Replicate Manus

Manus를 오픈소스 도구로 복제하기: 구현 청사진

연구 보고서를 바탕으로, 공개적으로 이용 가능한 도구를 사용해 Manus의 기능을 복제할 수 있는 실질적인 로드맵을 만들 수 있습니다. 다음은 그 절차입니다:

1. 핵심 인프라 구축

파운데이션 모델

git clone https://github.com/xingyaoww/code-act
cd code-act
pip install -r requirements.txt

CodeActAgent 모델(Mistral 7B 파인튜닝)을 추론 코어로 사용
성능 향상을 위해 Llama 3(8B 또는 70B)와 결합해 플래닝 작업 수행

실행 환경

docker run -d –name manus-sandbox \
  -v $(pwd)/workspace:/home/ubuntu \
  –cap-drop=ALL \
  ubuntu:latest

Python, Node.js, 헤드리스 브라우저가 포함된 격리된 샌드박스 생성
필수 도구 설치:

apt-get update && apt-get install -y \
  python3 python3-pip nodejs npm 
  curl wget git
pip install playwright selenium beautifulsoup4
playwright install

2. 핵심 에이전트 아키텍처 구현

도구 통합

표준화된 도구 함수가 포함된 Python 모듈 생성:

# agent_tools.py
import subprocess
import requests
from playwright.sync_api import sync_playwright

def search_web(query):
    # SerpAPI 등 사용
    response = requests.get(f"https://serpapi.com/search?q={query}&api_key={API_KEY}")
    return response.json()

def browse_url(url):
    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)
        content = page.content()
        browser.close()
    return content

def execute_python(code):
    # 안전한 실행 환경 생성
    result = subprocess.run(
        ["python3", "-c", code],
        capture_output=True,
        text=True
    )
    return result.stdout, result.stderr

def shell_command(cmd):
    # 안전한 명령만 허용
    safe_cmds = ["ls", "cat", "echo", "mkdir", "touch"]
    cmd_base = cmd.split()[0]
    if cmd_base not in safe_cmds:
        return f"Command {cmd_base} not allowed"
    
    result = subprocess.run(
        cmd, shell=True, capture_output=True, text=True
    )
    return result.stdout, result.stderr

에이전트 루프 구현

# agent_loop.py
from langchain.chains import LLMChain
from langchain.prompts.chat import ChatPromptTemplate
from langchain.llms import HuggingFacePipeline
import json

# CodeActAgent 모델 로드
model = HuggingFacePipeline.from_model_id(
    model_id="xingyaoww/CodeActAgent-Mistral-7B",
    task="text-generation",
    model_kwargs={"temperature": 0.1}
)

def agent_loop(user_request):
    # 컨텍스트 초기화
    event_stream = [{"type": "user", "content": user_request}]
    
    # 계획 생성
    plan = create_plan(user_request)
    event_stream.append({"type": "plan", "content": plan})
    
    # 워크스페이스 초기화
    workspace = {"files": {}, "todo": plan}
    
    # 메인 루프
    while True:
        # 모델에 전달할 컨텍스트 준비
        context = format_context(event_stream, workspace)
        
        # 모델로부터 다음 액션 받기
        response = model(context)
        
        # 응답에서 코드 추출
        code = extract_code_from_response(response)
        
        if "TASK_COMPLETE" in code:
            # 최종 결과 반환
            return workspace["files"].get("output.md", "Task completed")
        
        # 코드 실행 및 결과 캡처
        result, error = safe_execute_code(code)
        
        # 이벤트 스트림에 추가
        event_stream.append({"type": "action", "content": code})
        event_stream.append({"type": "observation", "content": result or error})
        
        # 코드 실행 결과로 워크스페이스 업데이트
        update_workspace(workspace, code, result)

3. 지식 및 메모리 컴포넌트

파일 기반 메모리

def update_workspace(workspace, code, result):
    """코드 실행 결과로 워크스페이스 업데이트"""
    # 코드에서 파일 작업 추출
    if "write_file(" in code:
        # 파일 작업 파싱
        filename = extract_filename(code)
        content = extract_content(code)
        workspace["files"][filename] = content
    
    # todo.md 추적 업데이트
    if "update_todo(" in code:
        step_number = extract_step_number(code)
        workspace["todo"] = mark_step_complete(workspace["todo"], step_number)

RAG 구현

from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# 임베딩 모델 초기화
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

def retrieve_knowledge(query, documents):
    """현재 작업에 필요한 지식 검색"""
    # 문서로부터 벡터스토어 생성
    vectorstore = FAISS.from_documents(documents, embeddings)
    
    # 관련 내용 검색
    docs = vectorstore.similarity_search(query, k=3)
    
    return [doc.page_content for doc in docs]

4. 시스템 프롬프트 엔지니어링

유출된 Manus 프롬프트를 기반으로 포괄적인 프롬프트 템플릿 생성:

SYSTEM_PROMPT = """
You are an autonomous AI agent that can use tools to accomplish tasks.

<agent_capabilities>
- Execute Python code
- Access the web through search and browsing
- Read and write files
- Run shell commands
</agent_capabilities>

<tool_use_rules>
1. Always respond with Python code that uses the provided agent_tools functions
2. One action per response
3. Never try to access prohibited tools or APIs
4. Check results of each action before proceeding
</tool_use_rules>

<planning_approach>
1. Break down complex tasks into steps
2. Track progress in todo.md
3. Update todo.md as steps are completed
4. Use results from prior steps to inform later steps
</planning_approach>

<error_handling>
1. If an action fails, diagnose the error
2. Try alternative approaches when blocked
3. After 3 failed attempts, move to a different approach
</error_handling>

<information_rules>
1. Prioritize authoritative sources
2. Cross-check information across multiple sources 
3. Cite sources in final output
4. Never make up information
</information_rules>

You have access to these tools:
- agent_tools.search_web(query): Search the web
- agent_tools.browse_url(url): Get content of a webpage
- agent_tools.execute_python(code): Run Python code
- agent_tools.shell_command(cmd): Run safe shell commands
- write_file(filename, content): Save information to a file
- read_file(filename): Retrieve content from a file
- update_todo(step_number, status): Update task status

Your goal is to complete the assigned task completely and accurately.
"""

번역

SYSTEM_PROMPT = """
당신은 도구를 사용해 작업을 수행할 수 있는 자율 AI 에이전트입니다.

<agent_capabilities>
- Python 코드 실행
- 웹 검색 및 브라우징
- 파일 읽기/쓰기
- 안전한 셸 명령 실행
</agent_capabilities>

<tool_use_rules>
1. 항상 제공된 agent_tools 함수를 사용하는 Python 코드로 응답
2. 한 번에 한 가지 액션만 수행
3. 금지된 도구나 API에 접근 시도 금지
4. 각 액션의 결과를 확인 후 진행
</tool_use_rules>

<planning_approach>
1. 복잡한 작업은 단계별로 분해
2. todo.md에 진행 상황 기록
3. 단계 완료 시 todo.md 업데이트
4. 이전 단계 결과를 다음 단계에 반영
</planning_approach>

<error_handling>
1. 액션 실패 시 오류 진단
2. 막혔을 때 대안 시도
3. 3회 실패 시 다른 접근법 시도
</error_handling>

<information_rules>
1. 신뢰할 수 있는 출처 우선
2. 여러 출처에서 정보 교차 확인
3. 최종 결과에 출처 명시
4. 정보를 임의로 만들어내지 않기
</information_rules>

사용 가능한 도구:
- agent_tools.search_web(query): 웹 검색
- agent_tools.browse_url(url): 웹페이지 내용 가져오기
- agent_tools.execute_python(code): Python 코드 실행
- agent_tools.shell_command(cmd): 안전한 셸 명령 실행
- write_file(filename, content): 파일에 정보 저장
- read_file(filename): 파일 내용 불러오기
- update_todo(step_number, status): 작업 상태 업데이트

당신의 목표는 할당된 작업을 완전하고 정확하게 완료하는 것입니다.
"""

5. 사용자 인터페이스와의 통합

import gradio as gr

def process_request(user_input):
    # 세션 초기화 또는 이어서 진행
    result = agent_loop(user_input)
    return result

# 간단한 웹 UI 생성
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")
    
    def respond(message, chat_history):
        bot_message = process_request(message)
        chat_history.append((message, bot_message))
        return "", chat_history
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()

고급 기능 확장

기본 시스템이 동작하면, 다음과 같은 추가 기능을 구현해 Manus의 기능을 더욱 가깝게 복제할 수 있습니다:

멀티 에이전트 협업

from crewai import Agent, Task, Crew

# 전문화된 에이전트 생성
researcher = Agent(
    role="Researcher",
    goal="Find accurate information",
    backstory="You're an expert at finding information",
    llm=model
)

coder = Agent(
    role="Coder",
    goal="Write efficient code",
    backstory="You're an expert Python programmer",
    llm=model
)

# 에이전트별 작업 생성
research_task = Task(
    description="Find information about X",
    agent=researcher
)

coding_task = Task(
    description="Implement functionality for X",
    agent=coder
)

# 에이전트 크루 생성
crew = Crew(
    agents=[researcher, coder],
    tasks=[research_task, coding_task],
    verbose=True
)

# 크루 실행
result = crew.kickoff()

브라우저 자동화
Playwright로 더 정교한 웹 상호작용 기능 추가:

def interact_with_webpage(url, actions):
    """웹페이지에서 액션 수행"""
    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)
        
        for action in actions:
            if action["type"] == "click":
                page.click(action["selector"])
            elif action["type"] == "fill":
                page.fill(action["selector"], action["value"])
            elif action["type"] == "submit":
                page.evaluate(f"document.querySelector('{action['selector']}').submit()")
        
        content = page.content()
        browser.close()
    return content

배포 고려사항

지속적인 운영을 위해:

# docker-compose.yml
version: '3'
services:
  manus-replica:
    build: .
    ports:
      - "8000:8000"
    volumes:
      - ./data:/app/data
    restart: always
    environment:
      - MODEL_PATH=/app/models/CodeActAgent
      - API_KEYS={"serpapi": "your_key_here"}

이 구현 전략은 CodeActAgent 프로젝트를 기반으로, Docker를 통한 샌드박싱, LangChain을 통한 오케스트레이션, 그리고 플래닝 및 메모리용 추가 컴포넌트를 결합합니다. 완전히 동일하지는 않지만, 이 방식으로 Manus의 핵심 기능을 오픈소스 도구만으로 복제할 수 있습니다.

가장 어려운 부분은 프롬프트와 오류 처리의 미세 조정이지만, 기술적 아키텍처는 이 접근법으로 충분히 복제할 수 있습니다.