From a2db8ebd5437e38ad8e70d66cf193f559a19859d Mon Sep 17 00:00:00 2001 From: mmltt-star Date: Wed, 17 Dec 2025 16:05:06 +0800 Subject: [PATCH 1/3] support chinese url --- config/benchmark/browsecomp-zh.yaml | 2 +- src/tool/mcp_servers/searching_mcp_server.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/config/benchmark/browsecomp-zh.yaml b/config/benchmark/browsecomp-zh.yaml index 8dfb1e58..d44e1cba 100644 --- a/config/benchmark/browsecomp-zh.yaml +++ b/config/benchmark/browsecomp-zh.yaml @@ -11,7 +11,7 @@ data: whitelist: [] # Optional: List of specific task_ids to run execution: - max_tasks: null # null = no limit, or specify a number + max_tasks: 5 # null = no limit, or specify a number max_concurrent: 5 # Number of parallel tasks pass_at_k: 1 # Number of attempts per task diff --git a/src/tool/mcp_servers/searching_mcp_server.py b/src/tool/mcp_servers/searching_mcp_server.py index 4187e4a1..ab02877f 100644 --- a/src/tool/mcp_servers/searching_mcp_server.py +++ b/src/tool/mcp_servers/searching_mcp_server.py @@ -15,6 +15,7 @@ import asyncio from .utils.smart_request import smart_request, request_to_json from src.logging.logger import setup_mcp_logging +import urllib.parse SERPER_API_KEY = os.environ.get("SERPER_API_KEY", "") @@ -77,6 +78,12 @@ def filter_google_search_result(result_content: str) -> str: if "snippet" in item: del item["snippet"] + # translate utf-8 to chinese + if "organic" in data: + for item in data["organic"]: + if "link" in item: + item["link"] = urllib.parse.unquote(item["link"]) + # Return filtered JSON return json.dumps(data, ensure_ascii=False, indent=2) @@ -220,7 +227,7 @@ async def wiki_get_page_content(entity: str, first_sentences: int = 10) -> str: # TODO: Context Engineering Needed result_parts.append(f"Content: {page.content}") - result_parts.append(f"URL: {page.url}") + result_parts.append(f"URL: {urllib.parse.unquote(page.url)}") return "\n\n".join(result_parts) @@ -420,7 +427,7 @@ async def search_wiki_revision( revisions_details.append( f"{i}. Revision ID: {revision_id}\n" f" Timestamp: {formatted_time}\n" - f" URL: {rev_url}" + f" URL: {urllib.parse.unquote(rev_url)}" ) if revisions_details: @@ -469,6 +476,11 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) -> original_url = url url = f"https://{url}" protocol_hint = f"[NOTE]: Automatically added 'https://' to URL '{original_url}' -> '{url}'\n\n" + url_chinese = urllib.parse.unquote(url) + if url_chinese != url: + protocol_hint += f"[NOTE]: Automatically translated URL '{url}' -> '{url_chinese}'\n\n" + url = url_chinese + hint_message = "" if ".wikipedia.org" in url: @@ -591,7 +603,7 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) -> f"Archive Found: Archived version located\n\n" f"Original URL: {url}\n" f"Requested Date: {year:04d}-{month:02d}-{day:02d}\n" - f"Archived URL: {archived_url}\n" + f"Archived URL: {urllib.parse.unquote(archived_url)}\n" f"Archived Timestamp: {formatted_time}\n" ) + "\n\nHint: You can also use the `scrape_website` tool to get the webpage content of a URL." @@ -648,7 +660,7 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) -> + ( f"Archive Found: Most recent archived version\n\n" f"Original URL: {url}\n" - f"Archived URL: {archived_url}\n" + f"Archived URL: {urllib.parse.unquote(archived_url)}\n" f"Archived Timestamp: {formatted_time}\n" ) + "\n\nHint: You can also use the `scrape_website` tool to get the webpage content of a URL." From de97e03e26f5307d6dca38661b3d151be679534f Mon Sep 17 00:00:00 2001 From: mmltt-star Date: Wed, 17 Dec 2025 16:06:01 +0800 Subject: [PATCH 2/3] reformat --- src/tool/mcp_servers/searching_mcp_server.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tool/mcp_servers/searching_mcp_server.py b/src/tool/mcp_servers/searching_mcp_server.py index ab02877f..9002b391 100644 --- a/src/tool/mcp_servers/searching_mcp_server.py +++ b/src/tool/mcp_servers/searching_mcp_server.py @@ -478,9 +478,10 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) -> protocol_hint = f"[NOTE]: Automatically added 'https://' to URL '{original_url}' -> '{url}'\n\n" url_chinese = urllib.parse.unquote(url) if url_chinese != url: - protocol_hint += f"[NOTE]: Automatically translated URL '{url}' -> '{url_chinese}'\n\n" + protocol_hint += ( + f"[NOTE]: Automatically translated URL '{url}' -> '{url_chinese}'\n\n" + ) url = url_chinese - hint_message = "" if ".wikipedia.org" in url: From 079012bc8d8e888e48cfae151634c42129e5c6bb Mon Sep 17 00:00:00 2001 From: mmltt-star Date: Wed, 17 Dec 2025 16:07:52 +0800 Subject: [PATCH 3/3] recover config --- config/benchmark/browsecomp-zh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/benchmark/browsecomp-zh.yaml b/config/benchmark/browsecomp-zh.yaml index d44e1cba..8dfb1e58 100644 --- a/config/benchmark/browsecomp-zh.yaml +++ b/config/benchmark/browsecomp-zh.yaml @@ -11,7 +11,7 @@ data: whitelist: [] # Optional: List of specific task_ids to run execution: - max_tasks: 5 # null = no limit, or specify a number + max_tasks: null # null = no limit, or specify a number max_concurrent: 5 # Number of parallel tasks pass_at_k: 1 # Number of attempts per task