Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrating MemGPT-like Functionality #2937

Closed
wants to merge 42 commits into from
Closed
Changes from 38 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
fd5adae
Edited Makefile
khushvind Jul 6, 2024
78be88f
updated num_retries
khushvind Jul 6, 2024
efc1c55
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 7, 2024
9ee63e1
Fix ruff issues and added summarizer
khushvind Jul 9, 2024
a4e2a18
Merge remote-tracking branch 'upstream/main'
khushvind Jul 10, 2024
507a67e
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 10, 2024
ccffdcc
added summarize function
khushvind Jul 14, 2024
4c3482f
Integrated MemGPT like functionality
khushvind Jul 14, 2024
4b1bf53
Integrated MemGPT like Functionality
khushvind Jul 14, 2024
de1b94b
Merge remote-tracking branch 'upstream/main'
khushvind Jul 15, 2024
5fc4ce4
Merge remote-tracking branch 'upstream/main' into MemGPT_Summarize_St…
khushvind Jul 15, 2024
85a8f4b
Retriving Summary
khushvind Jul 15, 2024
2845c2c
removed bugs
khushvind Jul 15, 2024
7a8299b
removed pip install -q -U google-generativeai from Makefile
khushvind Jul 16, 2024
a7a4c8a
removed bugs
khushvind Jul 18, 2024
0428dcc
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 19, 2024
bd9d14f
corrected the max_input_token
khushvind Jul 19, 2024
22e92d7
moved condenser configs to LLMConfig
khushvind Jul 19, 2024
d2b1ae1
fixed issue causing error in test on linux
khushvind Jul 20, 2024
1afd574
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 20, 2024
c43ed97
converted each message to Message class with additional attributes
khushvind Jul 21, 2024
6080071
Moved condenser functions to LLM class
khushvind Jul 22, 2024
515e038
removed condenser.py file
khushvind Jul 23, 2024
44d3c9d
Removed ContextWindowExceededError - TokenLimitExceededError already …
khushvind Jul 23, 2024
d93f5ee
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 25, 2024
0fece3f
Merge branch 'main' into MemGPT
khushvind Jul 25, 2024
d59be73
build condenser as mixin class
khushvind Jul 26, 2024
85b715d
build condenser as mixin class
khushvind Jul 26, 2024
7c5606d
Merge remote-tracking branch 'origin' into MemGPT
khushvind Jul 26, 2024
d9b3aae
Merge remote-tracking branch 'origin/MemGPT' into MemGPT
khushvind Jul 26, 2024
2a81073
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 26, 2024
140253c
replaced get_response with the original llm.completion
khushvind Jul 27, 2024
c7a3713
returning summarize_action to agent controller to add to memory
khushvind Jul 28, 2024
754a9c3
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 28, 2024
490b192
removed bug - pass summary in prompt
khushvind Jul 30, 2024
2162c91
modified summarize_messages
khushvind Jul 30, 2024
a90edc8
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 31, 2024
8d7bc30
Merge remote-tracking branch 'upstream/main' into MemGPT
khushvind Jul 31, 2024
34caa62
Merged with latest main
khushvind Aug 20, 2024
f30a572
updated prompt for condenser
khushvind Aug 21, 2024
13a9f64
removed print summary message
khushvind Aug 22, 2024
d25e19e
Modified how agent_controller handles summarization actions
khushvind Aug 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions agenthub/codeact_agent/action_parser.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
Action,
AgentDelegateAction,
AgentFinishAction,
AgentSummarizeAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
@@ -32,6 +33,8 @@ def __init__(self):
self.default_parser = CodeActActionParserMessage()

def parse(self, response) -> Action:
if isinstance(response, AgentSummarizeAction):
return response
action_str = self.parse_response(response)
return self.parse_action(action_str)

112 changes: 76 additions & 36 deletions agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
Action,
AgentDelegateAction,
AgentFinishAction,
AgentSummarizeAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
@@ -24,6 +25,7 @@
from opendevin.events.observation.observation import Observation
from opendevin.events.serialization.event import truncate_content
from opendevin.llm.llm import LLM
from opendevin.llm.messages import Message
from opendevin.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
@@ -121,25 +123,39 @@ def action_to_str(self, action: Action) -> str:
return f'{action.thought}\n<execute_browse>\n{action.inputs["task"]}\n</execute_browse>'
elif isinstance(action, MessageAction):
return action.content
elif isinstance(action, AgentSummarizeAction):
return (
'Summary of all Action and Observations till now. \n'
+ 'Action: '
+ action.summarized_actions
+ '\nObservation: '
+ action.summarized_observations
)
elif isinstance(action, AgentFinishAction) and action.source == 'agent':
return action.thought
return ''

def get_action_message(self, action: Action) -> dict[str, str] | None:
def get_action_message(self, action: Action) -> Message | None:
message = None
if (
isinstance(action, AgentDelegateAction)
or isinstance(action, CmdRunAction)
or isinstance(action, IPythonRunCellAction)
or isinstance(action, MessageAction)
or isinstance(action, AgentSummarizeAction)
or (isinstance(action, AgentFinishAction) and action.source == 'agent')
):
return {
message = {
'role': 'user' if action.source == 'user' else 'assistant',
'content': self.action_to_str(action),
}
return None
if message:
return Message(message=message, condensable=True, event_id=action.id)
else:
return None

def get_observation_message(self, obs: Observation) -> dict[str, str] | None:
def get_observation_message(self, obs: Observation) -> Message | None:
message = None
max_message_chars = self.llm.config.max_message_chars
if isinstance(obs, CmdOutputObservation):
content = 'OBSERVATION:\n' + truncate_content(
@@ -148,7 +164,7 @@ def get_observation_message(self, obs: Observation) -> dict[str, str] | None:
content += (
f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]'
)
return {'role': 'user', 'content': content}
message = {'role': 'user', 'content': content}
elif isinstance(obs, IPythonRunCellObservation):
content = 'OBSERVATION:\n' + obs.content
# replace base64 images with a placeholder
@@ -160,13 +176,16 @@ def get_observation_message(self, obs: Observation) -> dict[str, str] | None:
)
content = '\n'.join(splitted)
content = truncate_content(content, max_message_chars)
return {'role': 'user', 'content': content}
message = {'role': 'user', 'content': content}
elif isinstance(obs, AgentDelegateObservation):
content = 'OBSERVATION:\n' + truncate_content(
str(obs.outputs), max_message_chars
)
return {'role': 'user', 'content': content}
return None
message = {'role': 'user', 'content': content}
if message:
return Message(message=message, condensable=True, event_id=obs.id)
else:
return None

def reset(self) -> None:
"""Resets the CodeAct Agent."""
@@ -191,48 +210,69 @@ def step(self, state: State) -> Action:
if latest_user_message and latest_user_message.strip() == '/exit':
return AgentFinishAction()

# prepare what we want to send to the LLM
messages: list[dict[str, str]] = self._get_messages(state)

response = self.llm.completion(
messages=messages,
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
)
response = None
# give it multiple chances to get a response
# if it fails, we'll try to condense memory
attempt = 0
while not response and attempt < self.llm.config.attempts_to_condense:
# prepare what we want to send to the LLM
messages: list[Message] = self._get_messages(state)
print('No of tokens, ' + str(self.llm.get_token_count(messages)) + '\n')
response = self.llm.completion(
messages=messages,
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
condense=True,
)
attempt += 1

return self.action_parser.parse(response)

def _get_messages(self, state: State) -> list[dict[str, str]]:
def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

def _get_messages(self, state: State) -> list[Message]:
messages = [
{'role': 'system', 'content': self.system_message},
{'role': 'user', 'content': self.in_context_example},
Message(
message={'role': 'system', 'content': self.system_message},
condensable=False,
),
Message(
message={'role': 'user', 'content': self.in_context_example},
condensable=False,
),
]

if state.history.summary:
summary_message = self.get_action_message(state.history.summary)
if summary_message:
messages.append(summary_message)
for event in state.history.get_events():
# create a regular message from an event
if isinstance(event, Action):
message = self.get_action_message(event)
elif isinstance(event, Observation):
message = self.get_observation_message(event)
else:
raise ValueError(f'Unknown event type: {type(event)}')

# add regular message
if message:
messages.append(message)
if event.id > state.history.last_summarized_event_id:
# create a regular message from an event
if isinstance(event, Action):
message = self.get_action_message(event)
elif isinstance(event, Observation):
message = self.get_observation_message(event)
else:
raise ValueError(f'Unknown event type: {type(event)}')
# add regular message
if message:
messages.append(message)

# the latest user message is important:
# we want to remind the agent of the environment constraints
latest_user_message = next(
(m for m in reversed(messages) if m['role'] == 'user'), None
(m for m in reversed(messages) if m.message['role'] == 'user'), None
)

# add a reminder to the prompt
if latest_user_message:
latest_user_message['content'] += (
latest_user_message.message['content'] += (
f'\n\nENVIRONMENT REMINDER: You have {state.max_iterations - state.iteration} turns left to complete the task. When finished reply with <finish></finish>'
)

146 changes: 146 additions & 0 deletions opendevin/condenser/condenser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from opendevin.core.exceptions import (
SummarizeError,
)
from opendevin.core.logger import opendevin_logger as logger
from opendevin.events.action import (
AgentSummarizeAction,
)
from opendevin.llm.messages import Message

from .prompts import (
MESSAGE_SUMMARY_WARNING_FRAC,
SUMMARY_PROMPT_SYSTEM,
parse_summary_response,
)


class CondenserMixin:
"""Condenses a group of condensable messages as done by MemGPT."""

def condense(
self,
messages: list[Message],
):
# Start past the system message, and example messages.,
# and collect messages for summarization until we reach the desired truncation token fraction (eg 50%)
# Do not allow truncation for in-context examples of function calling
token_counts = [
self.get_token_count([message]) # type: ignore
for message in messages
if message.condensable
]
message_buffer_token_count = sum(token_counts) # no system and example message

desired_token_count_to_summarize = int(
message_buffer_token_count * self.config.message_summary_trunc_tokens_frac # type: ignore
)

candidate_messages_to_summarize = []
tokens_so_far = 0
for message in messages:
Copy link
Collaborator

@enyst enyst Jul 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We gave up using State, but then we still need to keep the user message that is likely to be a task. That is computed here: https://github.com/OpenDevin/OpenDevin/blob/84a6e90dc2e8b1e096af33f7545fa1969853c7d4/opendevin/controller/state/state.py#L169

Maybe we can set it as non-condensable in the agent, before we get here? Otherwise we're losing relevant information, I think.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, something needs to be done about user messages. It is currently losing relevant information. But only marking user messages as non-condensable is not sufficient ig? Some user messages might not contribute much to the relevant information about the final task.?

Currently summarize_messages() has summarized_actions & summarized_observations as args. I was thinking, maybe adding one more arg, summarized_user_messages might also help. This will summarize all the user messages till now.

Copy link
Collaborator

@enyst enyst Jul 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is currently losing relevant information. But only marking user messages as non-condensable is not sufficient ig? Some user messages might not contribute much to the relevant information about the final task.?

I suggest to not do it for all user messages, but the most likely task. The method linked above, get_current_user_intent(), returns only one user message: what is likely to be the current task. If we set that one as non-condensable, it will already improve the behavior. For the SWE-bench, that method will work to return the initial user message, which is definitely the task itself.

For interactive user sessions, you're right it's more complicated. The method tries to guess what's the best user message, and returns the last one after a FinishAction, so it's the user message just after a task ended, and a new one started. That's a best-guess, of course, that the new task is in that message, but IMHO it will do for now. Do you think it's likely it won't work?

I was thinking, maybe adding one more arg, summarized_user_messages might also help. This will summarize all the user messages till now.

I see what you mean. In the previous version (PR 2021), we were summarizing the user messages in a summary of their own, as you suggest, when there were no more agent messages, so at a later stage, but again except the current user intent. The current intent as detected (or attempted 😅) remained uncondensed as long as possible. Doing something like this might still be a good option, but I don't think it's necessary for this PR.

The important part, it seems to me, is that if we miss the initial user message, then we risk that the agent will not have enough information about the task to perform correctly.

if message.condensable:
candidate_messages_to_summarize.append(message)
tokens_so_far += self.get_token_count([message]) # type: ignore
if tokens_so_far > desired_token_count_to_summarize:
last_summarized_event_id = message.event_id
break

# TODO: Add functionality for preserving last N messages
# MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST = 3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to note, a minor thought, 2 or 4 messages would seem a better default, because a lot of the time, they will be action, obs, action, obs.... Not all the time, but well, if we will pick something we might want to target a common behavior.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, makes sense to keep it 2 or 4.

# if preserve_last_N_messages:
# candidate_messages_to_summarize = candidate_messages_to_summarize[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST]
# token_counts = token_counts[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST]

logger.debug(
f'message_summary_trunc_tokens_frac={self.config.message_summary_trunc_tokens_frac}' # type: ignore
)
# logger.debug(f'MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST={MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST}')
logger.debug(f'token_counts={token_counts}')
logger.debug(f'message_buffer_token_count={message_buffer_token_count}')
logger.debug(
f'desired_token_count_to_summarize={desired_token_count_to_summarize}'
)
logger.debug(
f'len(candidate_messages_to_summarize)={len(candidate_messages_to_summarize)}'
)

if len(candidate_messages_to_summarize) == 0:
raise SummarizeError(
f"Summarize error: tried to run summarize, but couldn't find enough messages to compress [len={len(messages)}]"
)

# TODO: Try to make an assistant message come after the cutoff

message_sequence_to_summarize = candidate_messages_to_summarize

if len(message_sequence_to_summarize) <= 1:
# This prevents a potential infinite loop of summarizing the same message over and over
raise SummarizeError(
f"Summarize error: tried to run summarize, but couldn't find enough messages to compress [len={len(message_sequence_to_summarize)} <= 1]"
)
else:
print(
f'Attempting to summarize with last summarized event id = {last_summarized_event_id}'
)

action_response = self.summarize_messages(
message_sequence_to_summarize=message_sequence_to_summarize
)
summary_action: AgentSummarizeAction = parse_summary_response(action_response)
summary_action.last_summarized_event_id = (
last_summarized_event_id if last_summarized_event_id else -1
)
return summary_action

def _format_summary_history(self, message_history: list[dict]) -> str:
# TODO use existing prompt formatters for this (eg ChatML)
return '\n'.join([f'{m["role"]}: {m["content"]}' for m in message_history])

def summarize_messages(self, message_sequence_to_summarize: list[Message]):
"""Summarize a message sequence using LLM"""
context_window = self.config.max_input_tokens # type: ignore
summary_prompt = SUMMARY_PROMPT_SYSTEM
summary_input = self._format_summary_history(
self.get_text_messages(message_sequence_to_summarize) # type: ignore
)
summary_input_tkns = self.get_token_count(summary_input) # type: ignore
if context_window is None:
raise ValueError('context_window should not be None')
if summary_input_tkns > MESSAGE_SUMMARY_WARNING_FRAC * context_window:
trunc_ratio = (
MESSAGE_SUMMARY_WARNING_FRAC * context_window / summary_input_tkns
) * 0.8 # For good measure...
cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
curr_summary = self.summarize_messages(
message_sequence_to_summarize=message_sequence_to_summarize[:cutoff]
)
curr_summary_message = (
'Summary of all Action and Observations till now. \n'
+ 'Action: '
+ curr_summary['args']['summarized_actions']
+ '\nObservation: '
+ curr_summary['args']['summarized_observations']
)
input = [
Message({'role': 'assistant', 'content': curr_summary_message})
] + message_sequence_to_summarize[cutoff:]
summary_input = self._format_summary_history(self.get_text_messages(input)) # type: ignore

message_sequence = []
message_sequence.append(Message({'role': 'system', 'content': summary_prompt}))
message_sequence.append(Message({'role': 'user', 'content': summary_input}))

response = self.completion( # type: ignore
messages=message_sequence,
stop=[
'</execute_ipython>',
'</execute_bash>',
'</execute_browse>',
],
temperature=0.0,
)

print(f'summarize_messages gpt reply: {response.choices[0]}')

action_response = response['choices'][0]['message']['content']
return action_response
Loading