forked from iwanghc/mcp_web_search
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcli.py
More file actions
158 lines (144 loc) · 5.45 KB
/
cli.py
File metadata and controls
158 lines (144 loc) · 5.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
"""
基于 Playwright 的 Google 搜索 CLI 工具
Google search CLI tool based on Playwright
"""
import argparse
import asyncio
import json
import sys
from pathlib import Path
from google_search.engine import google_search, get_google_search_page_html
from common.types import CommandOptions
from common import logger
# 获取版本信息
# Get version information
def get_version():
"""获取版本信息
Get version information
"""
return "1.0.0"
async def main():
"""主函数
Main entrypoint
"""
# 创建命令行解析器 # Create argument parser
parser = argparse.ArgumentParser(
prog="google-search-cli", description="基于 Playwright 的 Google 搜索 CLI 工具"
)
# 配置命令行选项 # Configure command line options
parser.add_argument("query", help="搜索关键词")
parser.add_argument("-l", "--limit", type=int, default=10, help="结果数量限制 (默认: 10)")
parser.add_argument(
"-t", "--timeout", type=int, default=30000, help="超时时间(毫秒) (默认: 30000)"
)
parser.add_argument(
"--no-headless", action="store_true", help="已废弃: 总是先尝试无头模式,如果遇到人机验证会自动切换到有头模式"
)
parser.add_argument(
"--state-file",
default="./browser-state.json",
help="浏览器状态文件路径 (默认: ./browser-state.json)",
)
parser.add_argument("--no-save-state", action="store_true", help="不保存浏览器状态")
parser.add_argument(
"-b",
"--basic-view",
"--gbv",
dest="basic_view",
action="store_true",
help="使用 Google Basic Variant (gbv=1),绕过 JS 驱动的检测(较小、无 JS 的旧界面)。别名: -b, --gbv",
)
parser.add_argument(
"--manual-captcha",
dest="manual_captcha",
action="store_true",
help="允许在检测到 CAPTCHA 时进行交互式手动解答(会阻塞直到用户在终端按回车)。仅在交互式会话中有效。",
)
parser.add_argument(
"--get-html", action="store_true", help="获取搜索结果页面的原始HTML而不是解析结果"
)
parser.add_argument("--save-html", action="store_true", help="将HTML保存到文件")
parser.add_argument("--html-output", help="HTML输出文件路径")
parser.add_argument(
"-V", "--version", action="version", version=f"%(prog)s {get_version()}"
)
# 解析命令行参数 # Parse command line arguments
args = parser.parse_args()
try:
if args.get_html:
# 获取HTML # Get HTML
html_result = await get_google_search_page_html(
args.query,
CommandOptions(
limit=args.limit,
timeout=args.timeout,
state_file=args.state_file,
no_save_state=args.no_save_state,
basic_view=args.basic_view,
manual_captcha=args.manual_captcha,
),
args.save_html or False,
args.html_output,
)
# 输出HTML结果 # Print HTML result
print(
json.dumps(
{
"query": html_result.query,
"url": html_result.url,
"html_length": len(html_result.html),
"saved_path": html_result.saved_path,
"screenshot_path": html_result.screenshot_path,
},
indent=2,
ensure_ascii=False,
)
)
if args.save_html:
print(
f"\nHTML已保存到: {html_result.saved_path} (HTML saved to: {html_result.saved_path})"
)
if html_result.screenshot_path:
print(
f"截图已保存到: {html_result.screenshot_path} (Screenshot saved to: {html_result.screenshot_path})"
)
else:
# 执行搜索 # Execute search
search_result = await google_search(
args.query,
CommandOptions(
limit=args.limit,
timeout=args.timeout,
state_file=args.state_file,
no_save_state=args.no_save_state,
basic_view=args.basic_view,
manual_captcha=args.manual_captcha,
),
)
# 输出搜索结果 # Print search results
print(
json.dumps(
{
"query": search_result.query,
"results": [
{
"title": result.title,
"link": result.link,
"snippet": result.snippet,
}
for result in search_result.results
],
},
indent=2,
ensure_ascii=False,
)
)
except KeyboardInterrupt:
print("\n搜索被用户中断 (Search interrupted by user)")
sys.exit(1)
except Exception as e:
print(f"搜索失败: {e} (Search failed: {e})")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())