Add Camel examples

anjieyang · anjieyang · commit b140715f514d · 2024-10-23T19:02:25.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -170,4 +170,8 @@ _build/
 
 logs/
 
-.DS_Store
+.DS_Store
+
+# RAG data
+local_data/
+vim_docs/
diff --git a/crab/agents/backend_models/__init__.py b/crab/agents/backend_models/__init__.py
@@ -13,6 +13,7 @@
 # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
 # ruff: noqa: F401
 from .camel_model import CamelModel
+from .camel_rag_model import CamelRAGModel
 from .claude_model import ClaudeModel
 from .gemini_model import GeminiModel
 from .openai_model import OpenAIModel
diff --git a/crab/agents/backend_models/camel_rag_model.py b/crab/agents/backend_models/camel_rag_model.py
@@ -0,0 +1,108 @@
+# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
+from typing import Any, List, Optional, Tuple
+
+from crab import BackendOutput, MessageType
+from crab.agents.backend_models.camel_model import CamelModel
+from camel.messages import BaseMessage
+
+try:
+    from camel.embeddings import OpenAIEmbedding
+    from camel.retrievers import VectorRetriever
+    from camel.storages import QdrantStorage
+    RAG_ENABLED = True
+except ImportError:
+    RAG_ENABLED = False
+
+
+class CamelRAGModel(CamelModel):
+    def __init__(
+        self,
+        model: str,
+        model_platform: str,
+        parameters: dict[str, Any] | None = None,
+        history_messages_len: int = 0,
+        embedding_model: Optional[str] = "text-embedding-3-small",
+        collection_name: str = "knowledge_base",
+        vector_storage_path: str = "local_data",
+        top_k: int = 3,
+        similarity_threshold: float = 0.75,
+    ) -> None:
+        if not RAG_ENABLED:
+            raise ImportError(
+                "Please install RAG dependencies: "
+                "pip install camel-ai[embeddings,retrievers,storages]"
+            )
+        
+        super().__init__(model, model_platform, parameters, history_messages_len)
+        
+        self.embedding_model = OpenAIEmbedding() if embedding_model else None
+        
+        if self.embedding_model:
+            self.vector_storage = QdrantStorage(
+                vector_dim=self.embedding_model.get_output_dim(),
+                path=vector_storage_path,
+                collection_name=collection_name,
+            )
+            self.retriever = VectorRetriever(
+                embedding_model=self.embedding_model
+            )
+        else:
+            self.vector_storage = None
+            self.retriever = None
+            
+        self.top_k = top_k
+        self.similarity_threshold = similarity_threshold
+
+    def process_documents(self, content_path: str) -> None:
+        if not self.retriever or not self.vector_storage:
+            raise ValueError("RAG components not initialized")
+            
+        self.retriever.process(
+            content=content_path,
+            storage=self.vector_storage, 
+        )
+
+    def _enhance_with_context(self, messages: List[Tuple[str, MessageType]]) -> List[Tuple[str, MessageType]]:
+        if not self.retriever or not self.vector_storage:
+            return messages
+
+        query = next(
+            (msg[0] for msg in messages if msg[1] != MessageType.IMAGE_JPG_BASE64),
+            ""
+        )
+
+        retrieved_info = self.retriever.query(
+            query=query,
+            top_k=self.top_k,
+            similarity_threshold=self.similarity_threshold,
+        )
+
+        if not retrieved_info or retrieved_info[0].get('text', '').startswith('No suitable information'):
+            return messages
+
+        context = "Relevant context:\n\n"
+        for info in retrieved_info:
+            context += f"From {info.get('content path', 'unknown')}:\n"
+            context += f"{info.get('text', '')}\n\n"
+
+        enhanced_messages = []
+        enhanced_messages.append((context, MessageType.TEXT))
+        enhanced_messages.extend(messages)
+
+        return enhanced_messages
+
+    def chat(self, messages: List[Tuple[str, MessageType]]) -> BackendOutput:
+        enhanced_messages = self._enhance_with_context(messages)
+        return super().chat(enhanced_messages)
diff --git a/examples/camel_example.py b/examples/camel_example.py
@@ -12,35 +12,15 @@
 # limitations under the License.
 # =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
 from termcolor import colored
-
-from camel.societies import RolePlaying
-from camel.utils import print_text_animated
+import os
 
 from crab import Benchmark, create_benchmark
-from crab.agents.backend_models import OpenAIModel
+from crab.agents.backend_models.camel_model import CamelModel
 from crab.agents.policies import SingleAgentPolicy
 from crab.benchmarks.template import template_benchmark_config
+from camel.types import ModelType, ModelPlatformType
+from camel.models import ModelFactory
 
-def camel_task_generator():
-    task_prompt = "Design a custom game using pygame"
-    print(colored(f"Original task prompt:\n{task_prompt}\n", "yellow"))
-    role_play_session = RolePlaying("Computer Programmer", "Gamer", task_prompt=task_prompt)
-    print(colored(f"Specified task prompt:\n{role_play_session.task_prompt}\n", "cyan"))
-
-    chat_turn_limit, n = 50, 0
-    input_msg = role_play_session.init_chat()
-    while n < chat_turn_limit:
-        n += 1
-        assistant_response, user_response = role_play_session.step(input_msg)
-        print_text_animated(colored(f"AI User:\n\n{user_response.msg.content}\n", "blue"))
-        print_text_animated(colored(f"AI Assistant:\n\n{assistant_response.msg.content}\n", "green"))
-
-        if "CAMEL_TASK_DONE" in user_response.msg.content:
-            break
-
-        input_msg = assistant_response.msg
-
-    return role_play_session.task_prompt
 
 def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
     for step in range(20):
@@ -74,23 +54,25 @@ def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
                 print("=" * 40)
                 print(
                     colored(
-                        f"Task finished, result: {response.evaluation_results}",
-                        "green"
+                        f"Task finished, result: {response.evaluation_results}", "green"
                     )
                 )
                 return
 
-if __name__ == "__main__":
-    task_description = camel_task_generator()
 
+if __name__ == "__main__":
     benchmark = create_benchmark(template_benchmark_config)
-    task, action_space = benchmark.start_task("0", task_description)
+    task, action_space = benchmark.start_task("0")
     env_descriptions = benchmark.get_env_descriptions()
 
-    model = OpenAIModel(model="gpt-4o", history_messages_len=5)
-    agent = SingleAgentPolicy(model_backend=model)
-    agent.reset(task_description, action_space, env_descriptions)
-
-    print("Start performing task: " + colored(f'"{task_description}"', "green"))
+    # TODO: Use local model
+    camel_model = CamelModel(
+        model="gpt-4o",
+        model_platform=ModelPlatformType.OPENAI,
+        parameters={"temperature": 0.7},  
+    )
+    agent = SingleAgentPolicy(model_backend=camel_model)
+    agent.reset(task.description, action_space, env_descriptions)
+    print("Start performing task: " + colored(f'"{task.description}"', "green"))
     start_benchmark(benchmark, agent)
-    benchmark.reset()
+    benchmark.reset()
diff --git a/examples/camel_rag_example.py b/examples/camel_rag_example.py
@@ -0,0 +1,146 @@
+# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2024 @ CAMEL-AI.org. All Rights Reserved. ===========
+from termcolor import colored
+import os
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+from crab import Benchmark, create_benchmark
+from crab.agents.backend_models.camel_rag_model import CamelRAGModel
+from crab.agents.policies import SingleAgentPolicy
+from crab.benchmarks.template import template_benchmark_config
+from camel.types import ModelType, ModelPlatformType
+
+
+def start_benchmark(benchmark: Benchmark, agent: SingleAgentPolicy):
+    for step in range(20):
+        print("=" * 40)
+        print(f"Start agent step {step}:")
+        observation = benchmark.observe()["template_env"]
+        print(f"Current environment observation: {observation}")
+        response = agent.chat(
+            {
+                "template_env": [
+                    (f"Current environment observation: {observation}", 0),
+                ]
+            }
+        )
+        print(colored(f"Agent take action: {response}", "blue"))
+
+        for action in response:
+            response = benchmark.step(
+                action=action.name,
+                parameters=action.arguments,
+                env_name=action.env,
+            )
+            print(
+                colored(
+                    f'Action "{action.name}" success, stat: '
+                    f"{response.evaluation_results}",
+                    "green",
+                )
+            )
+            if response.terminated:
+                print("=" * 40)
+                print(
+                    colored(
+                        f"Task finished, result: {response.evaluation_results}",
+                        "green"
+                    )
+                )
+                return
+
+
+def prepare_vim_docs():
+    """Prepare Vim documentation for RAG"""
+    print(colored("Starting Vim documentation preparation...", "yellow"))
+    base_url = "https://vimdoc.sourceforge.net/htmldoc/usr_07.html"
+    content_dir = "vim_docs"
+    os.makedirs(content_dir, exist_ok=True)
+    
+    print(colored("Fetching main page...", "yellow"))
+    response = requests.get(base_url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+    
+    # Process the main page first
+    main_content = soup.get_text(separator='\n', strip=True)
+    with open(os.path.join(content_dir, "main.txt"), 'w', encoding='utf-8') as f:
+        f.write(f"Source: {base_url}\n\n{main_content}")
+    
+    links = [link for link in soup.find_all('a') 
+             if link.get('href') and not link.get('href').startswith(('#', 'http'))]
+    total_links = len(links)
+    print(colored(f"Found {total_links} documentation pages to process", "yellow"))
+    
+    processed_files = []
+    for idx, link in enumerate(links, 1):
+        href = link.get('href')
+        full_url = urljoin(base_url, href)
+        try:
+            print(colored(f"Processing page {idx}/{total_links}: {href}", "yellow"))
+            
+            # Fetch and process page
+            page_response = requests.get(full_url)
+            page_soup = BeautifulSoup(page_response.text, 'html.parser')
+            for tag in page_soup(['script', 'style']):
+                tag.decompose()
+            content = page_soup.get_text(separator='\n', strip=True)
+            
+            # Save content
+            filename = os.path.join(content_dir, f"{href.replace('/', '_')}.txt")
+            with open(filename, 'w', encoding='utf-8') as f:
+                f.write(f"Source: {full_url}\n\n{content}")
+            processed_files.append(filename)
+            print(colored(f"✓ Saved {href}", "green"))
+            
+        except Exception as e:
+            print(colored(f"✗ Error processing {full_url}: {e}", "red"))
+    
+    print(colored("Documentation preparation completed!", "green"))
+    return processed_files
+
+
+if __name__ == "__main__":
+    print(colored("=== Starting RAG-enhanced benchmark ===", "cyan"))
+    
+    # Initialize benchmark and environment
+    print(colored("\nInitializing benchmark environment...", "yellow"))
+    benchmark = create_benchmark(template_benchmark_config)
+    task, action_space = benchmark.start_task("0")
+    env_descriptions = benchmark.get_env_descriptions()
+
+    doc_files = prepare_vim_docs()
+    
+    print(colored("\nInitializing RAG model...", "yellow"))
+    rag_model = CamelRAGModel(
+        model="gpt-4o",
+        model_platform=ModelPlatformType.OPENAI,
+        parameters={"temperature": 0.7}
+    )
+    
+    print(colored("Processing documents for RAG...", "yellow"))
+    for doc_file in doc_files:
+        print(colored(f"Processing {doc_file}...", "yellow"))
+        rag_model.process_documents(doc_file)
+    print(colored("RAG model initialization complete!", "green"))
+    
+    print(colored("\nSetting up agent...", "yellow"))
+    agent = SingleAgentPolicy(model_backend=rag_model)
+    agent.reset(task.description, action_space, env_descriptions)
+    
+    print(colored("\nStarting benchmark execution:", "cyan"))
+    print("Start performing task: " + colored(f'"{task.description}"', "green"))
+    start_benchmark(benchmark, agent)
+    benchmark.reset()