Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions libs/openant-core/context/application_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from anthropic import Anthropic
from dotenv import load_dotenv
from utilities.file_io import open_utf8, read_json, write_json

# Load environment variables
load_dotenv()
Expand Down Expand Up @@ -208,7 +209,8 @@ def gather_context_sources(repo_path: Path) -> dict[str, str]:
filepath = repo_path / filename
if filepath.exists():
try:
content = filepath.read_text(errors="ignore")
with open_utf8(filepath, errors="ignore") as _f:
content = _f.read()
# Limit size to avoid token overflow
if len(content) > 10000:
content = content[:10000] + "\n\n[... truncated ...]"
Expand Down Expand Up @@ -289,7 +291,8 @@ def detect_entry_points(repo_path: Path) -> str:
continue

try:
content = py_file.read_text(errors="ignore")
with open_utf8(py_file, errors="ignore") as _f:
content = _f.read()
rel_path = py_file.relative_to(repo_path)

for category, patterns in ENTRY_POINT_PATTERNS.items():
Expand All @@ -308,7 +311,8 @@ def detect_entry_points(repo_path: Path) -> str:
continue

try:
content = js_file.read_text(errors="ignore")
with open_utf8(js_file, errors="ignore") as _f:
content = _f.read()
rel_path = js_file.relative_to(repo_path)

if re.search(r"express\(\)|require\(['\"]express['\"]\)", content):
Expand Down Expand Up @@ -340,15 +344,17 @@ def check_manual_override(repo_path: Path) -> ApplicationContext | None:
continue

try:
content = filepath.read_text()

if filename.endswith('.json'):
# Direct JSON format
data = json.loads(content)
data = read_json(filepath)
data['source'] = 'manual'
return ApplicationContext(**data)

elif filename.endswith('.md'):
# .md files need raw text so regex can extract the embedded JSON block.
with open_utf8(filepath) as _f:
content = _f.read()

if filename.endswith('.md'):
# Markdown format - check for JSON code block
json_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
if json_match:
Expand Down Expand Up @@ -545,8 +551,7 @@ def save_context(context: ApplicationContext, output_path: Path) -> None:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)

with open(output_path, 'w') as f:
json.dump(asdict(context), f, indent=2)
write_json(output_path, asdict(context))

print(f"Context saved to {output_path}", file=sys.stderr)

Expand All @@ -560,9 +565,7 @@ def load_context(input_path: Path) -> ApplicationContext:
Returns:
ApplicationContext loaded from file.
"""
with open(input_path) as f:
data = json.load(f)

data = read_json(input_path)
# Mark as manual to skip validation (already validated when saved)
original_source = data.get('source', 'llm')
data['source'] = 'manual' # Temporarily bypass validation
Expand Down
9 changes: 3 additions & 6 deletions libs/openant-core/core/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

# Import existing analysis machinery
from utilities.llm_client import AnthropicClient, get_global_tracker
from utilities.file_io import read_json, write_json
from utilities.json_corrector import JSONCorrector
from utilities.rate_limiter import get_rate_limiter, is_rate_limit_error, is_retryable_error

Expand Down Expand Up @@ -330,9 +331,7 @@ def run_analysis(

# Load dataset
print(f"[Analyze] Loading dataset: {dataset_path}", file=sys.stderr)
with open(dataset_path) as f:
dataset = json.load(f)

dataset = read_json(dataset_path)
units = dataset.get("units", [])

# Diff filter: if upstream parse stamped diff_selected on units (PR-diff
Expand Down Expand Up @@ -513,9 +512,7 @@ def _summary_callback(finding, usage=None):
"code_by_route": code_by_route,
}

with open(results_path, "w") as f:
json.dump(experiment_result, f, indent=2)

write_json(results_path, experiment_result)
print(f"\n[Analyze] Results written to {results_path}", file=sys.stderr)

# Checkpoints are preserved as a permanent artifact alongside results.
Expand Down
18 changes: 6 additions & 12 deletions libs/openant-core/core/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from datetime import datetime, timezone

from utilities.safe_filename import safe_filename
from utilities.file_io import read_json, write_json
from pathlib import Path


Expand Down Expand Up @@ -79,8 +80,7 @@ def load(self) -> dict[str, dict]:
continue
filepath = os.path.join(self.dir, filename)
try:
with open(filepath, "r") as f:
data = json.load(f)
data = read_json(filepath)
unit_id = data.get("id")
if unit_id:
results[unit_id] = data
Expand Down Expand Up @@ -130,9 +130,7 @@ def save(self, unit_id: str, data: dict):
filename = self._safe_filename(unit_id) + ".json"
filepath = os.path.join(self.dir, filename)
data["id"] = unit_id # ensure id is always present
with open(filepath, "w") as f:
json.dump(data, f, indent=2)

write_json(filepath, data)
def write_summary(
self,
total_units: int,
Expand Down Expand Up @@ -168,9 +166,7 @@ def write_summary(
}
if usage is not None:
data["usage"] = usage
with open(filepath, "w") as f:
json.dump(data, f, indent=2)

write_json(filepath, data)
@staticmethod
def read_summary(checkpoint_dir: str) -> dict | None:
"""Read _summary.json from a checkpoint directory.
Expand All @@ -182,8 +178,7 @@ def read_summary(checkpoint_dir: str) -> dict | None:
if not os.path.isfile(filepath):
return None
try:
with open(filepath, "r") as f:
return json.load(f)
return read_json(filepath)
except (json.JSONDecodeError, OSError):
return None

Expand Down Expand Up @@ -241,8 +236,7 @@ def status(checkpoint_dir: str) -> dict:
continue
filepath = os.path.join(checkpoint_dir, filename)
try:
with open(filepath, "r") as f:
data = json.load(f)
data = read_json(filepath)
except (json.JSONDecodeError, OSError):
errors += 1
error_breakdown["unreadable"] = error_breakdown.get("unreadable", 0) + 1
Expand Down
6 changes: 3 additions & 3 deletions libs/openant-core/core/diff_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@

from __future__ import annotations

import json
import sys
from dataclasses import dataclass, asdict

from utilities.file_io import read_json


# Scope constants (must match internal/git/manifest.go).
SCOPE_CHANGED_FILES = "changed_files"
Expand Down Expand Up @@ -65,8 +66,7 @@ def to_dict(self) -> dict:

def load_manifest(path: str) -> dict:
"""Read and minimally validate a diff manifest file."""
with open(path, "r", encoding="utf-8") as f:
m = json.load(f)
m = read_json(path)
scope = m.get("scope")
if scope not in _VALID_SCOPES:
raise ValueError(
Expand Down
8 changes: 3 additions & 5 deletions libs/openant-core/core/dynamic_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from core.schemas import DynamicTestStepResult, UsageInfo
from core import tracking
from utilities.file_io import read_json, write_json


def run_tests(
Expand Down Expand Up @@ -51,9 +52,7 @@ def run_tests(
os.makedirs(output_dir, exist_ok=True)

# Check how many findings to test
with open(pipeline_output_path) as f:
pipeline_data = json.load(f)

pipeline_data = read_json(pipeline_output_path)
findings = pipeline_data.get("findings", [])
testable = [
f for f in findings
Expand All @@ -65,8 +64,7 @@ def run_tests(

if not testable:
results_path = os.path.join(output_dir, "dynamic_test_results.json")
with open(results_path, "w") as f:
json.dump({"findings_tested": 0, "results": []}, f, indent=2)
write_json(results_path, {"findings_tested": 0, "results": []})

return DynamicTestStepResult(
results_json_path=results_path,
Expand Down
9 changes: 3 additions & 6 deletions libs/openant-core/core/enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from core import tracking
from core.progress import ProgressReporter
from utilities.rate_limiter import configure_rate_limiter
from utilities.file_io import read_json, write_json


def enhance_dataset(
Expand Down Expand Up @@ -69,9 +70,7 @@ def enhance_dataset(

# Load dataset
print(f"[Enhance] Loading dataset: {dataset_path}", file=sys.stderr)
with open(dataset_path) as f:
dataset = json.load(f)

dataset = read_json(dataset_path)
units = dataset.get("units", [])
print(f"[Enhance] Units to enhance: {len(units)}", file=sys.stderr)

Expand Down Expand Up @@ -138,9 +137,7 @@ def _on_restored(count: int):

# Write enhanced dataset
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
with open(output_path, "w") as f:
json.dump(enhanced, f, indent=2)

write_json(output_path, enhanced)
print(f"[Enhance] Enhanced dataset: {output_path}", file=sys.stderr)
print(f"[Enhance] Classifications: {classifications}", file=sys.stderr)
if error_count:
Expand Down
42 changes: 13 additions & 29 deletions libs/openant-core/core/parser_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pathlib import Path

from core.schemas import ParseResult
from utilities.file_io import read_json, write_json

# Root of openant-core (where parsers/ lives)
_CORE_ROOT = Path(__file__).parent.parent
Expand Down Expand Up @@ -161,9 +162,7 @@ def _maybe_apply_diff_filter(
)
return

with open(result.dataset_path, "r") as f:
dataset = json.load(f)

dataset = read_json(result.dataset_path)
# Dataset may be a dict with "units" or a raw list.
if isinstance(dataset, dict):
units = dataset.get("units", [])
Expand All @@ -172,14 +171,11 @@ def _maybe_apply_diff_filter(

stats = apply_diff_filter(units, manifest)

with open(result.dataset_path, "w") as f:
json.dump(dataset, f, indent=2)

write_json(result.dataset_path, dataset)
# Expose stats on the ParseResult via a side-channel file; the parse
# step_context reads this when assembling parse.report.json.
diff_report_path = os.path.join(output_dir, "diff_filter.report.json")
with open(diff_report_path, "w") as f:
json.dump(stats.to_dict(), f, indent=2)
write_json(diff_report_path, stats.to_dict())

print(
f" Diff filter ({stats.scope}): {stats.selected}/{stats.total} units selected"
Expand Down Expand Up @@ -245,9 +241,7 @@ def _load_module(name, filename):

print(f"\n[Reachability Filter] Filtering to {processing_level} units...", file=sys.stderr)

with open(call_graph_path, "r") as f:
call_graph_data = json.load(f)

call_graph_data = read_json(call_graph_path)
functions = call_graph_data.get("functions", {})
call_graph = call_graph_data.get("call_graph", {})
reverse_call_graph = call_graph_data.get("reverse_call_graph", {})
Expand Down Expand Up @@ -352,12 +346,8 @@ def _parse_python(repo_path: str, output_dir: str, processing_level: str, skip_t
dataset = _apply_reachability_filter(dataset, output_dir, processing_level)

# Write outputs
with open(dataset_path, "w") as f:
json.dump(dataset, f, indent=2)

with open(analyzer_output_path, "w") as f:
json.dump(analyzer_output, f, indent=2)

write_json(dataset_path, dataset)
write_json(analyzer_output_path, analyzer_output)
units_count = len(dataset.get("units", []))
print(f" Python parser complete: {units_count} units", file=sys.stderr)

Expand Down Expand Up @@ -413,8 +403,7 @@ def _parse_javascript(repo_path: str, output_dir: str, processing_level: str, sk
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" JavaScript parser complete: {units_count} units", file=sys.stderr)
Expand Down Expand Up @@ -470,8 +459,7 @@ def _parse_go(repo_path: str, output_dir: str, processing_level: str, skip_tests
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" Go parser complete: {units_count} units", file=sys.stderr)
Expand Down Expand Up @@ -530,8 +518,7 @@ def _parse_c(repo_path: str, output_dir: str, processing_level: str, skip_tests:
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" C/C++ parser complete: {units_count} units", file=sys.stderr)
Expand Down Expand Up @@ -590,8 +577,7 @@ def _parse_ruby(repo_path: str, output_dir: str, processing_level: str, skip_tes
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" Ruby parser complete: {units_count} units", file=sys.stderr)
Expand Down Expand Up @@ -650,8 +636,7 @@ def _parse_php(repo_path: str, output_dir: str, processing_level: str, skip_test
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" PHP parser complete: {units_count} units", file=sys.stderr)
Expand Down Expand Up @@ -710,8 +695,7 @@ def _parse_zig(repo_path: str, output_dir: str, processing_level: str, skip_test
# Count units
units_count = 0
if os.path.exists(dataset_path):
with open(dataset_path) as f:
data = json.load(f)
data = read_json(dataset_path)
units_count = len(data.get("units", []))

print(f" Zig parser complete: {units_count} units", file=sys.stderr)
Expand Down
Loading
Loading