⚡ [agents] Refactor tool handling and add dependency management support

YuanmingLeee · YuanmingLeee · commit e7fe3d6a2358 · 2025-07-23T02:09:35.000+08:00
- Add utilize `safe_fileio` for secure file I/O within tool handling, and introduced a dependency retrieval method (`deps`) to manage runtime requirements.

Improves code modularity for better aligns with agent processing flows for code generation and execution.
diff --git a/exp/agents.py b/exp/agents.py
@@ -4,16 +4,16 @@
 
 from jinja2 import Template
 from langchain_core.language_models import LanguageModelInput
-from langchain_core.messages import ToolMessage, HumanMessage, SystemMessage, BaseMessage, AIMessage
+from langchain_core.messages import ToolMessage, HumanMessage, SystemMessage, BaseMessage, AIMessage, ToolCall
 from langchain_core.runnables import Runnable
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.func import task, entrypoint
 from langgraph.prebuilt import ToolNode
 from rich.console import Console
 
-from exp.utils import get_vllm_with_tools
+from exp.utils import get_vllm_with_tools, safe_fileio
 from mle.function import (
-    read_file, create_file, write_file, list_files,
+    read_file, create_file, list_files,
     create_directory, preview_csv_data, preview_zip_structure, unzip_data
 )
 from mle.utils import clean_json_string
@@ -149,39 +149,52 @@
     textwrap.dedent(
         """
         You are a **Machine Learning Engineer** tasked with implementing a solution based on the provided requirements by the advisor.
-        You will be given the whole project plan, and each task will be provided to you one by one.
         Requirements: {{ advisor_report | tojson(indent=2) }}
-        Implementation Plan: {{ plan | tojson(indent=2) }}
         Working Directory: {{ working_dir }}
         Environment: {{ env | tojson(indent=2) }}
         
-        Your task is to generate the complete, working Python code that implements the solution. Call the write_file, mkdir, and read_file function tools to inspect and generate the necessary files.
-        IMPORTANT: 
-        1. Generate a single file `solution.py` that contains all the code for the solution, including imports, constants, functions, classes, main guard (`if __name__ == "__main__":`), argument parsing (if needed), execution logic, and docstrings.
-        Focus on:
+        Your task is to generate the complete, working Python code. Focus points to consider:
         1. Clean, readable code
         2. Proper data handling
         3. Model implementation
         4. Training and evaluation logic
         5. Kaggle submission format
+        """.strip()
+    )
+)
+
+CODE_PROMPT = Template(
+    textwrap.dedent(
+        """
+        Implement Python code to solve the following task:
+        ## Task: {{ task }}
+        {{ description }}
         
-        After finalizing the code, you will call the `create_file` function to save the code to `solution.py`
-        After the tool calling result is given back, you should also provide the dependencies required to run the code and the command to run the code in a JSON format:
+        Make sure to follow the requirements and provide the code in a single Python file. Call any necessary tools to inspect the data.
+        Once ready, call the ` create_file ` tool to save the code.
+        
+        The code should include:
+        1. A single file `solution.py` that contains all the code for the solution, including imports, constants, functions, classes, main guard (`if __name__ == "__main__":`), argument parsing (if needed), execution logic, and docstrings.
+        2. Overwrite existing code; do not supply diffs or partial patches.
+        """.strip()
+    )
+)
+
+CODER_DEPS_PROMPT = Template(
+    textwrap.dedent(
+        """
+        Look at the latest created code in the chat history and analyze the dependencies required to run the code.
+        Providing the dependencies required and the command to run the code in a JSON format.
+        Example (for JSON schema illustration only):
         {
             "dependency": ["pkg1", "pkg2", "..."],
-            "command": "python solution.py"
+            "command": "python solution.py",
+            "entryfile": "solution.py"
         }
         """.strip()
     )
 )
 
-CODE_PROMPT = Template(
-    """
-    ## Task: {{ task }}
-    {{ description }}
-    """
-)
-
 
 class AdviseAgent:
     console: Console = None
@@ -342,7 +355,6 @@ class CodeAgent:
 
     class State(TypedDict):
         advisor_report: dict
-        plan: dict
         task: str
         description: str
         env: dict
@@ -358,16 +370,15 @@ def __new__(cls, model_name, working_dir='.', console=None):
             working_dir: the working directory.
             console: the console to use.
         """
-        tools =             [
-                read_file,
-                create_file,
-                write_file,
-                list_files,
-                create_directory,
-                preview_csv_data,
-                preview_zip_structure,
-                unzip_data,
-            ]
+        tools = [
+            safe_fileio(working_dir)(read_file),
+            safe_fileio(working_dir, path_params=["path"])(create_file),
+            safe_fileio(working_dir)(list_files),
+            safe_fileio(working_dir, path_params=["path"])(create_directory),
+            safe_fileio(working_dir, path_params=["path"])(preview_csv_data),
+            safe_fileio(working_dir, path_params=["path"])(preview_zip_structure),
+            safe_fileio(working_dir, path_params=["extract_path"])(unzip_data),
+        ]
 
         cls.model = get_vllm_with_tools(model_name, tools)
         cls.working_dir = working_dir
@@ -377,7 +388,22 @@ def __new__(cls, model_name, working_dir='.', console=None):
 
     @staticmethod
     @task
-    def code(task: str, description: str, first_call=True) -> AIMessage:
+    def setup(advisor_report: dict, env: dict):
+        # Set up the chat history with the system prompt if not already set
+        if len(CodeAgent.chat_history) == 0:
+            CodeAgent.chat_history.append(
+                SystemMessage(
+                    content=CODER_SYSTEM_PROMPT.render(
+                        working_dir=CodeAgent.working_dir,
+                        advisor_report=advisor_report,
+                        env=env,
+                    )
+                )
+            )
+
+    @staticmethod
+    @task
+    def code(task: str, description: str, first_call=True) -> AIMessage | dict:
         """
         Handle the query from the model query response.
         Args:
@@ -396,10 +422,29 @@ def code(task: str, description: str, first_call=True) -> AIMessage:
                         )
                     )
                 )
-            message = CodeAgent.model.invoke(CodeAgent.chat_history)
+            message: AIMessage = CodeAgent.model.invoke(CodeAgent.chat_history)
 
             CodeAgent.chat_history.append(message)
-        return message
+            return message
+
+    @staticmethod
+    @task
+    def deps() -> dict:
+        """
+        Get the dependencies required to run the code and the command to run the code.
+        Returns:
+            A dictionary containing the dependencies and the command to run the code.
+        """
+        CodeAgent.chat_history.append(
+            HumanMessage(content=CODER_DEPS_PROMPT.render())
+        )
+        message = CodeAgent.model.invoke(CodeAgent.chat_history)
+
+        CodeAgent.chat_history.append(message)
+        try:
+            return json.loads(message.content)
+        except json.JSONDecodeError as e:
+            return clean_json_string(message.content)
 
     @staticmethod
     @entrypoint(checkpointer=checkpointer)
@@ -411,18 +456,7 @@ def graph(state: State) -> dict:
         Returns:
             The code for the task.
         """
-        # Set up the chat history with the system prompt if not already set
-        if len(CodeAgent.chat_history) == 0:
-            CodeAgent.chat_history.append(
-                SystemMessage(
-                    content=CODER_SYSTEM_PROMPT.render(
-                        working_dir=CodeAgent.working_dir,
-                        advisor_report=state['advisor_report'],
-                        plan=state['plan'],
-                        env=state['env'],
-                    )
-                )
-            )
+        CodeAgent.setup(state['advisor_report'], state['env'])
 
         try_times = 5
         while try_times > 0:
@@ -432,20 +466,28 @@ def graph(state: State) -> dict:
                 first_call=(try_times == 5)
             ).result()
             try_times -= 1
-
             if isinstance(message, AIMessage):
-                # If the message is an AIMessage, check if it need tool calls
                 if message.tool_calls:
-                    message = CodeAgent.tool_node.invoke({
-                        "messages": CodeAgent.chat_history[-1:],
-                    })
+                    CodeAgent.console.print(f"Calling tools {[tool['name'] for tool in message.tool_calls]}")
+                    message = CodeAgent.tool_node.invoke(
+                        {
+                            "messages": [message],
+                        }
+                    )
                     CodeAgent.chat_history.extend(message['messages'])
+
+                    # If the tool `create_file` succeeded, break the loop
+                    if any(
+                        isinstance(msg, ToolMessage) and msg.name == "create_file" and
+                        msg.content and "error" not in msg.content.lower()
+                        for msg in message['messages']
+                    ):
+                        break
                 else:
-                    # If no tool calls, return the message
-                    CodeAgent.chat_history.append(message)
                     break
             else:
                 break
 
-        CodeAgent.console.print(CodeAgent.chat_history)
-        return message
+        # Check the dependencies and command to run the code
+        deps = CodeAgent.deps().result()
+        return deps
diff --git a/exp/kaggle_solver.py b/exp/kaggle_solver.py
@@ -43,6 +43,7 @@
 
 from mlebench.registry import registry
 
+from mle.function import read_file
 from mle.utils import print_in_box
 
 
diff --git a/exp/utils.py b/exp/utils.py
@@ -4,15 +4,19 @@
 Email: yuanmingleee@gmail.com
 Date: Jul 12, 2025
 """
+import inspect
 import logging
 import mimetypes
 import os
 import random
+import tempfile
 import venv
 import zipfile
 from collections import defaultdict
+from functools import wraps
 from io import StringIO
 from pathlib import Path
+from typing import Callable, Any
 
 import pandas as pd
 from langchain.chat_models import init_chat_model
@@ -223,3 +227,50 @@ def create_virtualenv(cwd='.', path='.venv'):
     builder = venv.EnvBuilder(with_pip=True)
     builder.create(venv_path)
     return venv_path.absolute() / 'bin' / 'python' if os.name != 'nt' else venv_path / 'Scripts' / 'python.exe'
+
+
+
+def safe_fileio(working_dir: str, path_params: str | list[str] | None = None) -> Callable:
+    working_dir = Path(working_dir).resolve()
+    temp_dir = Path(tempfile.gettempdir()).resolve()
+
+    def decorator(func: Callable):
+        @wraps(func)
+        def wrapper(*args, **kwargs) -> Any:
+            sig = inspect.signature(func)
+            bound = sig.bind(*args, **kwargs)
+            bound.apply_defaults()
+
+            # Validate and rewrite specified path parameters
+            if isinstance(path_params, str):
+                path_params_ = [path_params]
+            else:
+                path_params_ = path_params or []
+            for param in path_params_:
+                if param in bound.arguments:
+                    original = bound.arguments[param]
+                    if original is None:
+                        continue
+                    if not isinstance(original, (str, Path)):
+                        raise TypeError(f"Expected str or Path for '{param}', got {type(original)}")
+                    if not Path(original).is_absolute():
+                        abs_path = (working_dir / original).resolve()
+                    else:
+                        abs_path = Path(original).resolve()
+                    if not (
+                        str(abs_path).startswith(str(working_dir)) or
+                        str(abs_path).startswith(str(temp_dir))
+                    ):
+                        raise PermissionError(f"Access denied: {abs_path} is outside allowed directories")
+                    bound.arguments[param] = abs_path
+
+            # Change CWD temporarily
+            prev_cwd = os.getcwd()
+            os.chdir(working_dir)
+            try:
+                return func(*bound.args, **bound.kwargs)
+            finally:
+                os.chdir(prev_cwd)
+
+        return wrapper
+    return decorator

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@`
`43`	`43`
`44`	`44`	`from mlebench.registry import registry`
`45`	`45`
	`46`	`+from mle.function import read_file`
`46`	`47`	`from mle.utils import print_in_box`
`47`	`48`
`48`	`49`