-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
213 lines (176 loc) · 8.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import argparse
import os
import sys
from pathlib import Path
from typing import Dict, List, Type
from tqdm import tqdm
from core.analyzer import Analyzer
from scanners.base_scanner import BaseScanner
from core.utils import get_scanner_categories
def get_available_scanners() -> Dict[str, dict]:
"""Return a flattened dictionary of all available scanners with their full category paths as keys."""
categories = get_scanner_categories()
flattened = {}
for category, category_data in categories.items():
for scanner_name, scanner_data in category_data["scanners"].items():
full_name = f"{category}.{scanner_name}"
flattened[full_name] = scanner_data
return flattened
def list_available_scanners():
"""Print information about all available scanners in a hierarchical format."""
categories = get_scanner_categories()
print(f"Available scanner categories:")
for category, category_data in categories.items():
print(f"\n {category} - {category_data['description']}")
for scanner_name, scanner_data in category_data["scanners"].items():
full_name = f"{category}.{scanner_name}"
print(f" - {full_name}: {scanner_data['description']}")
print("\nUse --scans with specific scanner IDs (e.g., --scans prompt.xml_tags chain.unsafe_input)")
print("Or use category names to run all scanners in that category (e.g., --scans prompt chain.unsafe_input)")
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="LLM code scanner to detect potential security issues and best practices"
)
parser.add_argument(
"path", nargs="?", default=".", help="Path to file or directory to scan (default: current directory)"
)
parser.add_argument(
"--list_scans", action="store_true", help="List available scanners and exit"
)
parser.add_argument(
"--scans", nargs="+", help="Specific scanners to run (default: all scanners)"
)
parser.add_argument(
"--debug", action="store_true", help="Enable debug mode for more verbose output"
)
parser.add_argument(
"--offline", action="store_true", help="Run in offline mode without fetching remote prompts"
)
parser.add_argument(
"--json", action="store_true", help="Output results to a JSON file"
)
parser.add_argument(
"--json-dir", type=str, default="scan_results",
help="Directory where JSON reports will be saved (default: scan_results)"
)
parser.add_argument(
"--comprehensive", action="store_true",
help="Enable comprehensive logging of all scanned elements, not just issues"
)
parser.add_argument(
"--log-dir", type=str, default="logs",
help="Directory where comprehensive logs will be saved (default: logs)"
)
parser.add_argument(
"--github_url", type=str,
help="Base GitHub URL to prepend to relative file paths in reports"
)
return parser.parse_args()
def main():
print("Starting scanner...")
args = parse_arguments()
# List available scanners if requested
if args.list_scans:
list_available_scanners()
return
# Enable more verbose output in debug mode
if args.debug:
os.environ['DEBUG'] = "1"
print("Debug mode enabled")
# Initialize the analyzer with scan settings
analyzer = Analyzer(offline_mode=args.offline)
# Get categories and all available scanners
categories = get_scanner_categories()
all_scanners = get_available_scanners()
# Determine which scanners to run
scanners_to_run = []
if args.scans:
# Parse requested scanners, handling category and specific scanner names
for requested in args.scans:
# If it's a category name (no dot), add all scanners in that category
if '.' not in requested and requested in categories:
for scanner_name in categories[requested]["scanners"]:
full_name = f"{requested}.{scanner_name}"
scanners_to_run.append(full_name)
# If it's a specific scanner, add it directly
elif requested in all_scanners:
scanners_to_run.append(requested)
# Try partial match against category.scanner pattern
elif any(name.startswith(requested) for name in all_scanners):
matches = [name for name in all_scanners if name.startswith(requested)]
scanners_to_run.extend(matches)
else:
print(f"Warning: Unknown scanner '{requested}' - skipping")
else:
# If no scanners specified, run all
scanners_to_run = list(all_scanners.keys())
# Initialize and register the selected scanners
for full_name in scanners_to_run:
if full_name not in all_scanners:
continue
scanner_data = all_scanners[full_name]
scanner_cls = scanner_data["class"]
scanner_params = scanner_data.get("params", {})
# Apply runtime parameters
if "offline_mode" in scanner_params:
scanner_params["offline_mode"] = args.offline
# Initialize the scanner with parameters
scanner = scanner_cls(**{k: v for k, v in scanner_params.items() if v is not None})
# Register the scanner
analyzer.register_scanner(scanner)
print(f"Registered scanner: {full_name}")
# Get target path
target_path = Path(args.path)
issues = []
# Handle both file and directory paths
files_to_analyze = []
if target_path.is_file() and target_path.suffix in ('.py', '.ipynb'):
# If target is a specific Python file, analyze just that file
files_to_analyze = [target_path]
print(f"Analyzing single file: {target_path}")
else:
# If target is a directory, recursively find all Python files
py_files = list(target_path.glob("**/*.py"))
ipynb_files = list(target_path.glob("**/*.ipynb"))
files_to_analyze = py_files + ipynb_files
print(f"Found {len(py_files)} Python files and {len(ipynb_files)} Jupyter notebooks to analyze")
print("Scanning...")
for file_path in tqdm(files_to_analyze):
# Skip the scanner itself to avoid false positives
if "scanner" in str(file_path) and not file_path.name.endswith("_test.py"):
continue
if args.debug:
print(f"\nAnalyzing file: {file_path}")
# Debug mode - skip extra prompt extraction for now
pass
file_issues = analyzer.analyze_file(file_path)
issues.extend(file_issues)
# Report issues
# Always output to console
from reporters.console_reporter import ConsoleReporter
console_reporter = ConsoleReporter()
console_reporter.report(issues)
# Additionally output to JSON if requested
if args.json:
from reporters.json_reporter import JSONReporter
json_reporter = JSONReporter(output_dir=args.json_dir,
github_url=args.github_url,
scan_target=args.path)
json_path = json_reporter.report(issues)
print(f"JSON report saved to: {json_path}")
# Create comprehensive report if requested
if args.comprehensive:
from reporters.comprehensive_reporter import ComprehensiveReporter
comprehensive_reporter = ComprehensiveReporter(output_dir=args.log_dir)
# Collect scanned elements from each scanner
for scanner in analyzer.scanners:
if hasattr(scanner, 'scanned_elements') and scanner.scanned_elements:
for element_type, elements in scanner.scanned_elements.items():
for element in elements:
comprehensive_reporter.add_scanned_element(element_type, element)
# Generate comprehensive report
log_path = comprehensive_reporter.report(issues)
print(f"Comprehensive log saved to: {log_path}")
if __name__ == "__main__":
main()