Skip to content
10 changes: 5 additions & 5 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def main():
"--timeout", type=int, default=3600, help="Timeout in seconds for agent execution"
)
parser.add_argument(
"--stream",
action="store_true",
default=False,
help="Use streaming execution (default: False, uses non-streaming)",
"--reasoning-effort",
default="default",
choices=["default", "minimal", "low", "medium", "high"],
help="Reasoning effort level for supported models (default: None)",
)

# Output configuration
Expand Down Expand Up @@ -138,7 +138,7 @@ def main():
timeout=args.timeout,
exp_name=run_exp_name,
output_dir=run_output_dir,
stream=args.stream,
reasoning_effort=args.reasoning_effort,
)

pipeline.run_evaluation(args.tasks)
Expand Down
11 changes: 11 additions & 0 deletions src/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
MCPMark Agent Module
====================

Provides a unified agent implementation using LiteLLM for model interactions
and minimal MCP server management.
"""

from .mcpmark_agent import MCPMarkAgent

__all__ = ["MCPMarkAgent"]
11 changes: 11 additions & 0 deletions src/agents/mcp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
MCP (Model Context Protocol) Components
========================================

Minimal MCP server implementations for MCPMark.
"""

from .stdio_server import MCPStdioServer
from .http_server import MCPHttpServer

__all__ = ["MCPStdioServer", "MCPHttpServer"]
78 changes: 78 additions & 0 deletions src/agents/mcp/http_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Minimal MCP HTTP Server Implementation
=======================================

Provides HTTP-based MCP server communication for services like GitHub.
"""

import asyncio
from contextlib import AsyncExitStack
from typing import Any, Dict, List, Optional

from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client

class MCPHttpServer:
"""
HTTP-based MCP client using the official MCP Python SDK
(Streamable HTTP transport).
"""

def __init__(
self,
url: str,
headers: Optional[Dict[str, str]] = None,
timeout: int = 30,
):
self.url = url.rstrip("/")
self.headers = headers or {}
self.timeout = timeout

self._stack: Optional[AsyncExitStack] = None
self.session: Optional[ClientSession] = None
self._tools_cache: Optional[List[Dict[str, Any]]] = None

async def __aenter__(self):
await self.start()
return self

async def __aexit__(self, exc_type, exc, tb):
await self.stop()

async def start(self):
"""Open Streamable HTTP transport and initialize MCP session."""
self._stack = AsyncExitStack()

read_stream, write_stream, _ = await self._stack.enter_async_context(
streamablehttp_client(self.url, headers=self.headers)
)

self.session = await self._stack.enter_async_context(ClientSession(read_stream, write_stream))
await asyncio.wait_for(self.session.initialize(), timeout=self.timeout)

async def stop(self):
"""Close the session/transport cleanly."""
if self._stack:
await self._stack.aclose()
self._stack = None
self.session = None
self._tools_cache = None

async def list_tools(self) -> List[Dict[str, Any]]:
"""Return tool definitions (cached)."""
if self._tools_cache is not None:
return self._tools_cache
if not self.session:
raise RuntimeError("MCP HTTP client not started")

resp = await asyncio.wait_for(self.session.list_tools(), timeout=self.timeout)
self._tools_cache = [t.model_dump() for t in resp.tools]
return self._tools_cache

async def call_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
"""Invoke a remote tool and return the structured result."""
if not self.session:
raise RuntimeError("MCP HTTP client not started")

result = await asyncio.wait_for(self.session.call_tool(name, arguments), timeout=self.timeout)
return result.model_dump()
46 changes: 46 additions & 0 deletions src/agents/mcp/stdio_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Minimal MCP Stdio Server Implementation
========================================

Provides stdio-based MCP server communication for services like
Notion, Filesystem, Playwright, and Postgres.
"""

import asyncio
import os
from contextlib import AsyncExitStack
from typing import Any, Dict, List, Optional

from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client

class MCPStdioServer:
"""Lightweight wrapper around the official MCP Python SDK."""

def __init__(self, command: str, args: List[str], env: Optional[Dict[str, str]] = None, timeout: int = 120):
self.params = StdioServerParameters(command=command, args=args, env={**os.environ, **(env or {})})
self.timeout = timeout
self._stack: Optional[AsyncExitStack] = None
self._streams = None
self.session: Optional[ClientSession] = None

async def __aenter__(self):
self._stack = AsyncExitStack()
read, write = await self._stack.enter_async_context(stdio_client(self.params))
self.session = await self._stack.enter_async_context(ClientSession(read, write))
await asyncio.wait_for(self.session.initialize(), timeout=self.timeout)
return self

async def __aexit__(self, exc_type, exc, tb):
if self._stack:
await self._stack.aclose()
self._stack = None
self.session = None

async def list_tools(self) -> List[Dict[str, Any]]:
resp = await asyncio.wait_for(self.session.list_tools(), timeout=self.timeout)
return [t.model_dump() for t in resp.tools]

async def call_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
result = await asyncio.wait_for(self.session.call_tool(name, arguments), timeout=self.timeout)
return result.model_dump() # 同上,转成 dict
Loading