Integrated MemGPT like functionality

SmartManoj · SmartManoj · Aug 5, 2024 · Jul 6, 2024 · Jul 6, 2024 · Jul 7, 2024
commit 4c3482f8a0549d1567c40c5ec2e63eca038ffc3f
diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -33,6 +33,7 @@
 from opendevin.events.serialization.event import truncate_content
 from opendevin.llm.llm import LLM
 from opendevin.memory.condenser import summarize_messages
+from opendevin.memory.history import ShortTermHistory
 from opendevin.runtime.plugins import (
     AgentSkillsRequirement,
     JupyterRequirement,
@@ -233,16 +234,16 @@ def step(self, state: State) -> Action:
                     ],
                     temperature=0.0,
                 )
-                print('Response: ', response)
+                # print('Response: ', response)
             except (ContextWindowExceededError, TokenLimitExceededError):
                 # Handle the specific exception
                 print('An error occurred: ')
                 attempt += 1
                 # If we got a context alert, try trimming the messages length, then try again
                 if self.llm.is_over_token_limit(messages):
                     # A separate API call to run a summarizer
-                    messages = self.summarize_messages_inplace(messages)
-
+                    self.condense(state=state)
+                    messages = self._get_messages(state=state)
                     # Try step again
                     # return self.step(user_message, first_message=first_message, return_dicts=return_dicts)
                 else:
@@ -254,9 +255,10 @@ def step(self, state: State) -> Action:
 
         return self.action_parser.parse(response)
 
-    def summarize_messages_inplace(
+    def condense(
         self,
-        messages: list[dict],
+        state: State,
+        # history: ShortTermHistory,
         #    cutoff=None,
         #    preserve_last_N_messages=True,
         #    disallow_tool_as_first=True
@@ -266,17 +268,31 @@ def summarize_messages_inplace(
         # Start at index 1 (past the system message),
         # and collect messages for summarization until we reach the desired truncation token fraction (eg 50%)
         # Do not allow truncation of the last N messages, since these are needed for in-context examples of function calling
-
-        # TODO: Check the functioning of this get_token_count function.
+        history: ShortTermHistory = state.history
+        messages = self._get_messages(state=state)
         token_counts = [self.llm.get_token_count([message]) for message in messages]
-        print(token_counts)
         message_buffer_token_count = sum(token_counts[2:])  # no system message
         MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC = 0.75
         desired_token_count_to_summarize = int(
             message_buffer_token_count * MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC
         )
-        candidate_messages_to_summarize = messages[2:]
-        token_counts = token_counts[2:]
+        candidate_messages_to_summarize = []
+        # token_counts = token_counts[2:]
+
+        last_summarized_event_id = None
+        tokens_so_far = 0
+        for event in history.get_events():
+            message = (
+                get_action_message(event)
+                if isinstance(event, Action)
+                else get_observation_message(event)
+            )
+            if message:
+                candidate_messages_to_summarize.append(message)
+                tokens_so_far += self.llm.get_token_count([message])
+            if tokens_so_far > desired_token_count_to_summarize:
+                last_summarized_event_id = event.id
+                break
 
         # TODO: Add functionality for preserving last N messages
         MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST = 3
@@ -298,26 +314,16 @@ def summarize_messages_inplace(
                 f"Summarize error: tried to run summarize, but couldn't find enough messages to compress [len={len(messages)}, preserve_N={MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST}]"
             )
 
-        tokens_so_far = 0
-        cutoff = 0
-        for i, msg in enumerate(candidate_messages_to_summarize):
-            cutoff = i
-            tokens_so_far += token_counts[i]
-            if tokens_so_far > desired_token_count_to_summarize:
-                break
-        # Account for system message
-        cutoff += 1
-
-        # Try to make an assistant message come after the cutoff
-        try:
-            print(f"Selected cutoff {cutoff} was a 'user', shifting one...")
-            if messages[cutoff]['role'] == 'user':
-                new_cutoff = cutoff + 1
-                if messages[new_cutoff]['role'] == 'user':
-                    print(f"Shifted cutoff {new_cutoff} is still a 'user', ignoring...")
-                cutoff = new_cutoff
-        except IndexError:
-            pass
+        # TODO: Try to make an assistant message come after the cutoff
+        # try:
+        #     print(f"Selected cutoff {cutoff} was a 'user', shifting one...")
+        #     if messages[cutoff]['role'] == 'user':
+        #         new_cutoff = cutoff + 1
+        #         if messages[new_cutoff]['role'] == 'user':
+        #             print(f"Shifted cutoff {new_cutoff} is still a 'user', ignoring...")
+        #         cutoff = new_cutoff
+        # except IndexError:
+        #     pass
 
         # TODO: Customize this function to be used by OpenDevin.
         # # Make sure the cutoff isn't on a 'tool' or 'function'
@@ -326,17 +332,16 @@ def summarize_messages_inplace(
         #         printd(f"Selected cutoff {cutoff} was a 'tool', shifting one...")
         #         cutoff += 1
 
-        message_sequence_to_summarize = messages[
-            2:cutoff
-        ]  # do NOT get rid of the system message
+        message_sequence_to_summarize = candidate_messages_to_summarize
+
         if len(message_sequence_to_summarize) <= 1:
             # This prevents a potential infinite loop of summarizing the same message over and over
             raise SummarizeError(
                 f"Summarize error: tried to run summarize, but couldn't find enough messages to compress [len={len(message_sequence_to_summarize)} <= 1]"
             )
         else:
             print(
-                f'Attempting to summarize {len(message_sequence_to_summarize)} messages [2:{cutoff}] of {len(messages)}'
+                f'Attempting to summarize with last summarized event id = {last_summarized_event_id}'
             )
 
         # TODO: (Check) I don't think this is needed because max_tokens is already define in opendevin.
@@ -349,10 +354,13 @@ def summarize_messages_inplace(
         #         LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
         #     )
 
-        summary = summarize_messages(
+        summary_action = summarize_messages(
             message_sequence_to_summarize=message_sequence_to_summarize, llm=self.llm
         )
-        print(f'Got summary: {summary}')
+        summary_action.last_summarized_event_id = last_summarized_event_id
+        print(f'Got summary: {summary_action}')
+        history.add_summary(summary_action)
+        print('Added to history')
 
         # TODO: Look into this
         # # Metadata that's useful for the agent to see
@@ -382,14 +390,13 @@ def summarize_messages_inplace(
 
         # print(f"Ran summarizer, messages length {prior_len} -> {len(self.messages)}")
 
-        print('Old No of tokens, ' + str(self.llm.get_token_count(messages)) + '\n')
-        new_messages = (
-            messages[:2]
-            + [{'role': 'assistant', 'content': summary}]
-            + messages[cutoff:]
-        )
-        print('New No of tokens, ' + str(self.llm.get_token_count(new_messages)) + '\n')
-        return new_messages
+        # print('Old No of tokens, ' + str(self.llm.get_token_count(messages)) + '\n')
+        # new_messages = (
+        #     messages[:2]
+        #     + [{'role': 'assistant', 'content': summary}]
+        #     + messages[cutoff:]
+        # )
+        # print('New No of tokens, ' + str(self.llm.get_token_count(new_messages)) + '\n')
 
     def search_memory(self, query: str) -> list[str]:
         raise NotImplementedError('Implement this abstract method')
@@ -408,6 +415,8 @@ def _get_messages(self, state: State) -> list[dict[str, str]]:
                 else get_observation_message(event)
             )
 
+            # print (event.id , " : ", message)
+
             # add regular message
             if message:
                 messages.append(message)

diff --git a/opendevin/core/exceptions.py b/opendevin/core/exceptions.py
@@ -88,3 +88,8 @@ class SummarizeError(Exception):
 
     def __init__(self, message='Error Summarizing The Memory'):
         super().__init__(message)
+
+
+class InvalidSummaryResponseError(Exception):
+    def __init__(self, message='Invalid summary response'):
+        super().__init__(message)
diff --git a/opendevin/events/action/agent.py b/opendevin/events/action/agent.py
@@ -31,18 +31,49 @@ def message(self) -> str:
         return f"Let me dive into my memories to find what you're looking for! Searching for: '{self.query}'. This might take a moment."
 
 
+# @dataclass
+# class AgentSummarizeAction(Action):
+#     summary: str
+#     action: str = ActionType.SUMMARIZE
+#     _chunk_start: int = -1
+#     _chunk_end: int = -1
+
+#     @property
+#     def message(self) -> str:
+#         return self.summary
+
+#     def __str__(self) -> str:
+#         ret = '**AgentSummarizeAction**\n'
+#         ret += f'SUMMARY: {self.summary}'
+#         return
+
+
 @dataclass
 class AgentSummarizeAction(Action):
-    summary: str
+    """
+    Action to summarize a list of events.
+
+    Attributes:
+    - summarized_actions: A sentence summarizing all the actions.
+    - summarized_observations: A few sentences summarizing all the observations.
+    """
+
+    summarized_actions: str = ''
+    summarized_observations: str = ''
     action: str = ActionType.SUMMARIZE
+    # _chunk_start: int = -1
+    # _chunk_end: int = -1
+    last_summarized_event_id = None
+    is_delegate_summary: bool = False
 
     @property
     def message(self) -> str:
-        return self.summary
+        return self.summarized_observations
 
     def __str__(self) -> str:
         ret = '**AgentSummarizeAction**\n'
-        ret += f'SUMMARY: {self.summary}'
+        ret += f'SUMMARIZED ACTIONS: {self.summarized_actions}\n'
+        ret += f'SUMMARIZED OBSERVATIONS: {self.summarized_observations}\n'
         return ret
 
 
@@ -54,7 +85,7 @@ class AgentFinishAction(Action):
 
     @property
     def message(self) -> str:
-        if self.thought != "":
+        if self.thought != '':
             return self.thought
         return "All done! What's next on the agenda?"
 

diff --git a/opendevin/events/serialization/action.py b/opendevin/events/serialization/action.py
@@ -5,6 +5,7 @@
     AgentFinishAction,
     AgentRecallAction,
     AgentRejectAction,
+    AgentSummarizeAction,
     ChangeAgentStateAction,
 )
 from opendevin.events.action.browse import BrowseInteractiveAction, BrowseURLAction
@@ -33,6 +34,7 @@
     ModifyTaskAction,
     ChangeAgentStateAction,
     MessageAction,
+    AgentSummarizeAction,
 )
 
 ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions}  # type: ignore[attr-defined]

diff --git a/opendevin/memory/condenser.py b/opendevin/memory/condenser.py
@@ -1,7 +1,11 @@
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.llm.llm import LLM
 
-from .prompts import MESSAGE_SUMMARY_WARNING_FRAC, SUMMARY_PROMPT_SYSTEM
+from .prompts import (
+    MESSAGE_SUMMARY_WARNING_FRAC,
+    SUMMARY_PROMPT_SYSTEM,
+    parse_summary_response,
+)
 
 
 class MemoryCondenser:
@@ -79,44 +83,12 @@ def summarize_messages(message_sequence_to_summarize: list[dict], llm: LLM):
     )
 
     print(f'summarize_messages gpt reply: {response.choices[0]}')
-    reply = response.choices[0].message.content
-    return reply
-
-
-# def summarize_messages(
-#     agent_state: AgentState,
-#     message_sequence_to_summarize: List[Message],
-#     insert_acknowledgement_assistant_message: bool = True,
-# ):
-#     """Summarize a message sequence using GPT"""
-#     # we need the context_window
-#     context_window = agent_state.llm_config.context_window
-
-#     summary_prompt = SUMMARY_PROMPT_SYSTEM
-#     summary_input = _format_summary_history(message_sequence_to_summarize)
-#     summary_input_tkns = count_tokens(summary_input)
-#     if summary_input_tkns > MESSAGE_SUMMARY_WARNING_FRAC * context_window:
-#         trunc_ratio = (MESSAGE_SUMMARY_WARNING_FRAC * context_window / summary_input_tkns) * 0.8  # For good measure...
-#         cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
-#         summary_input = str(
-#             [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])]
-#             + message_sequence_to_summarize[cutoff:]
-#         )
-
-#     dummy_user_id = uuid.uuid4()
-#     dummy_agent_id = uuid.uuid4()
-#     message_sequence = []
-#     message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="system", text=summary_prompt))
-#     if insert_acknowledgement_assistant_message:
-#         message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="assistant", text=MESSAGE_SUMMARY_REQUEST_ACK))
-#     message_sequence.append(Message(user_id=dummy_user_id, agent_id=dummy_agent_id, role="user", text=summary_input))
-
-#     response = create(
-#         llm_config=agent_state.llm_config,
-#         user_id=agent_state.user_id,
-#         messages=message_sequence,
-#     )
-
-#     printd(f"summarize_messages gpt reply: {response.choices[0]}")
-#     reply = response.choices[0].message.content
-#     return reply
+    # reply = response.choices[0].message.content
+    # print ("Response After Summarizing")
+    # print (response)
+
+    action_response = response['choices'][0]['message']['content']
+    action = parse_summary_response(action_response)
+    # action._chunk_start = 2
+    # action._chunk_end = len(message_sequence)+1
+    return action
diff --git a/opendevin/memory/history.py b/opendevin/memory/history.py
@@ -4,6 +4,7 @@
 from opendevin.events.action.action import Action
 from opendevin.events.action.agent import (
     AgentDelegateAction,
+    AgentSummarizeAction,
     ChangeAgentStateAction,
 )
 from opendevin.events.action.empty import NullAction
@@ -41,6 +42,12 @@ def __init__(self):
         self.start_id = -1
         self.end_id = -1
         self.delegates = {}
+        self.summary = None
+        self.last_summarized_event_id = None
+
+    def add_summary(self, summary_action: AgentSummarizeAction):
+        self.summary = summary_action
+        self.last_summarized_event_id = summary_action.last_summarized_event_id
 
     def set_event_stream(self, event_stream: EventStream):
         self._event_stream = event_stream
@@ -76,7 +83,14 @@ def get_events(self, reverse: bool = False) -> Iterable[Event]:
             # and filter out events that were included in a summary
 
             # filter out the events from a delegate of the current agent
-            if not any(
+            if (
+                self.last_summarized_event_id is not None
+                and self.summary is not None
+                and event.id <= self.last_summarized_event_id
+            ):
+                summary_action = self.summary
+                yield summary_action
+            elif not any(
                 # except for the delegate action and observation themselves, currently
                 # AgentDelegateAction has id = delegate_start
                 # AgentDelegateObservation has id = delegate_end

diff --git a/opendevin/memory/prompts.py b/opendevin/memory/prompts.py
@@ -1,15 +1,71 @@
-WORD_LIMIT = 100
+from opendevin.core.exceptions import (
+    InvalidSummaryResponseError,
+    LLMMalformedActionError,
+    LLMResponseError,
+)
+from opendevin.core.logger import opendevin_logger as logger
+from opendevin.core.utils import json
+from opendevin.events.action.agent import AgentSummarizeAction
+from opendevin.events.event import EventSource
+from opendevin.events.serialization.action import action_from_dict
+
+WORD_LIMIT = 200
 MESSAGE_SUMMARY_WARNING_FRAC = 0.75
-SUMMARY_PROMPT_SYSTEM = f"""
-Your job is to summarize a history of previous messages in a conversation between an AI persona and a human.
-The conversation you are given is a from a fixed context window and may not be complete.
-Messages sent by the AI are marked with the 'assistant' role.
-The AI 'assistant' can also make calls to functions, whose outputs can be seen in messages with the 'function' role.
-Things the AI says in the message content are considered inner monologue and are not seen by the user.
-The only AI messages seen by the user are from when the AI uses 'send_message'.
-Messages the user sends are in the 'user' role.
-The 'user' role is also used for important system events, such as login events and heartbeat events (heartbeats run the AI's program without user action, allowing the AI to act without prompting from the user sending them a message).
-Summarize what happened in the conversation from the perspective of the AI (use the first person).
-Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit.
+# SUMMARY_PROMPT_SYSTEM = f"""
+# Your job is to summarize a history of previous messages in a conversation between an AI persona and a human.
+# The conversation you are given is a from a fixed context window and may not be complete.
+# Messages sent by the AI are marked with the 'assistant' role.
+# The AI 'assistant' can also make calls to functions, whose outputs can be seen in messages with the 'function' role.
+# Things the AI says in the message content are considered inner monologue and are not seen by the user.
+# The only AI messages seen by the user are from when the AI uses 'send_message'.
+# Messages the user sends are in the 'user' role.
+# The 'user' role is also used for important system events, such as login events and heartbeat events (heartbeats run the AI's program without user action, allowing the AI to act without prompting from the user sending them a message).
+# Summarize what happened in the conversation from the perspective of the AI (use the first person).
+# Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit.
+# Only output the summary, do NOT include anything else in your output.
+# """
+
+SUMMARY_PROMPT_SYSTEM = """
+Your job is to summarize a history of previous messages in a conversation between an AI persona and a human. The conversation you are given is a from a fixed context window and may not be complete. Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit.
 Only output the summary, do NOT include anything else in your output.
+Given the following actions and observations, create a JSON response with:
+    - "action": "summarize"
+    - args:
+      - "summarized_actions": A precise sentence summarizing all the provided actions, written in the first person.
+      - "summarized_observations": A few precise sentences summarizing all the provided observations, written in the third person.
+Example:
+{
+    "action": "summarize",
+    "args": {
+        "summarized_actions": "I located the UML specification PDF, parsed its content, and searched for information about sequence diagrams.",
+        "summarized_observations": "The agent encountered a UnicodeDecodeError when initially searching the PDF text, but was able to resolve this by installing the PyPDF2 library and successfully extracting relevant information about sequence diagrams."
+    }
+}
+Make sure to include in observations any relevant information that the agent needs to remember.
+%(events)s
 """
+
+
+def parse_summary_response(response: str) -> AgentSummarizeAction:
+    """
+    Parses a JSON summary of events.
+
+    Parameters:
+    - response: The response string to be parsed
+    Returns:
+    - The summary action output by the model
+    """
+    try:
+        action_dict = json.loads(response)
+        action = action_from_dict(action_dict)
+        if action is None or not isinstance(action, AgentSummarizeAction):
+            error_message = f'Expected a summarize action, but the response got {str(type(action)) if action else None}'
+            logger.error(error_message)
+            raise InvalidSummaryResponseError(error_message)
+        action._source = EventSource.AGENT  # type: ignore
+    except (LLMResponseError, LLMMalformedActionError) as e:
+        logger.error(f'Failed to parse summary response: {str(e)}')
+        raise InvalidSummaryResponseError(
+            f'Failed to parse the response: {str(e)}'
+        ) from e
+    return action
diff --git a/output.txt b/output.txt