77import os
88import json
99import subprocess
10- from . import crawl , crawl_to_r2
11- from .crawler import get_binary_path
10+
11+ # Fix the import to use direct import instead of relative
12+ import pathik
13+ from pathik .crawler import get_binary_path
1214
1315def main ():
1416 """Main entry point for the CLI"""
15- parser = argparse .ArgumentParser (description = "Pathik - A fast web crawler with Python integration" )
17+ parser = argparse .ArgumentParser (
18+ description = "Pathik - A fast web crawler with Python integration" ,
19+ epilog = """
20+ Note: This Python CLI uses subcommands (crawl, r2, kafka, version) rather than flags.
21+ For example:
22+ pathik kafka https://example.com
23+ pathik crawl -o ./output https://example.com
24+
25+ If you prefer flag-style syntax, use the Go binary directly:
26+ ./pathik -kafka https://example.com
27+ ./pathik -crawl -outdir ./output https://example.com
28+ """ ,
29+ formatter_class = argparse .RawDescriptionHelpFormatter
30+ )
1631 subparsers = parser .add_subparsers (dest = "command" , help = "Command to run" )
1732
1833 # Crawl command
@@ -35,19 +50,33 @@ def main():
3550 kafka_parser .add_argument ("-t" , "--topic" , help = "Kafka topic to stream to" )
3651 kafka_parser .add_argument ("-c" , "--content" , choices = ["html" , "markdown" , "both" ], default = "both" ,
3752 help = "Content type to stream (html, markdown, or both)" )
53+ kafka_parser .add_argument ("--session" , help = "Session ID to include with messages (for multi-user environments)" )
3854
3955 # Version command
4056 version_parser = subparsers .add_parser ("version" , help = "Print version information" )
4157
42- args = parser .parse_args ()
58+ try :
59+ args = parser .parse_args ()
60+ except SystemExit as e :
61+ # Check if user might be using Go binary syntax with dashes
62+ for i , arg in enumerate (sys .argv [1 :]):
63+ if arg .startswith ('-' ) and not arg .startswith ('--' ) and arg not in ['-o' , '-s' , '-u' , '-b' , '-t' , '-c' ]:
64+ print ("\n Error: It seems you're using Go binary syntax with the Python CLI." )
65+ print ("The Python CLI uses subcommands instead of flags:" )
66+ print (" ✅ Correct: pathik kafka https://example.com" )
67+ print (" ❌ Incorrect: pathik -kafka https://example.com" )
68+ print ("\n Available subcommands: crawl, r2, kafka, version" )
69+ return 1
70+ # If not caught by our check, let the original error propagate
71+ return e .code
4372
4473 if not args .command :
4574 parser .print_help ()
4675 return 1
4776
4877 try :
4978 if args .command == "crawl" :
50- result = crawl (
79+ result = pathik . crawl (
5180 urls = args .urls ,
5281 output_dir = args .outdir ,
5382 parallel = not args .sequential
@@ -72,7 +101,7 @@ def main():
72101 print (f"\n Results saved to: { results_file } " )
73102
74103 elif args .command == "r2" :
75- result = crawl_to_r2 (
104+ result = pathik . crawl_to_r2 (
76105 urls = args .urls ,
77106 uuid_str = args .uuid ,
78107 parallel = not args .sequential
@@ -101,11 +130,17 @@ def main():
101130 if args .content and args .content != "both" :
102131 cmd .extend (["-content" , args .content ])
103132
133+ # Add topic if specified
134+ if args .topic :
135+ cmd .extend (["-topic" , args .topic ])
136+
137+ # Add session ID if provided
138+ if args .session :
139+ cmd .extend (["-session" , args .session ])
140+
104141 # Add Kafka-specific options if provided
105142 if args .brokers :
106143 os .environ ["KAFKA_BROKERS" ] = args .brokers
107- if args .topic :
108- os .environ ["KAFKA_TOPIC" ] = args .topic
109144
110145 # Add URLs
111146 cmd .extend (args .urls )
@@ -126,8 +161,7 @@ def main():
126161 return 1
127162
128163 elif args .command == "version" :
129- from . import __version__ # Importing here to avoid circular imports
130- print (f"Pathik v{ __version__ } " )
164+ print (f"Pathik v{ pathik .__version__ } " )
131165 return 0
132166
133167 return 0
0 commit comments