knostic · ar7casper · May 10, 2026 · May 4, 2026 · May 4, 2026 · May 10, 2026
@@ -31,6 +31,7 @@
 
 from anthropic import Anthropic
 from dotenv import load_dotenv
+from utilities.file_io import open_utf8, read_json, write_json
 
 # Load environment variables
 load_dotenv()
@@ -208,7 +209,8 @@ def gather_context_sources(repo_path: Path) -> dict[str, str]:
         filepath = repo_path / filename
         if filepath.exists():
             try:
-                content = filepath.read_text(errors="ignore")
+                with open_utf8(filepath, errors="ignore") as _f:
+                    content = _f.read()
                 # Limit size to avoid token overflow
                 if len(content) > 10000:
                     content = content[:10000] + "\n\n[... truncated ...]"
@@ -289,7 +291,8 @@ def detect_entry_points(repo_path: Path) -> str:
             continue
 
         try:
-            content = py_file.read_text(errors="ignore")
+            with open_utf8(py_file, errors="ignore") as _f:
+                content = _f.read()
             rel_path = py_file.relative_to(repo_path)
 
             for category, patterns in ENTRY_POINT_PATTERNS.items():
@@ -308,7 +311,8 @@ def detect_entry_points(repo_path: Path) -> str:
             continue
 
         try:
-            content = js_file.read_text(errors="ignore")
+            with open_utf8(js_file, errors="ignore") as _f:
+                content = _f.read()
             rel_path = js_file.relative_to(repo_path)
 
             if re.search(r"express\(\)|require\(['\"]express['\"]\)", content):
@@ -340,15 +344,17 @@ def check_manual_override(repo_path: Path) -> ApplicationContext | None:
             continue
 
         try:
-            content = filepath.read_text()
-
             if filename.endswith('.json'):
                 # Direct JSON format
-                data = json.loads(content)
+                data = read_json(filepath)
                 data['source'] = 'manual'
                 return ApplicationContext(**data)
 
-            elif filename.endswith('.md'):
+            # .md files need raw text so regex can extract the embedded JSON block.
+            with open_utf8(filepath) as _f:
+                content = _f.read()
+
+            if filename.endswith('.md'):
                 # Markdown format - check for JSON code block
                 json_match = re.search(r'```json\s*(.*?)\s*```', content, re.DOTALL)
                 if json_match:
@@ -545,8 +551,7 @@ def save_context(context: ApplicationContext, output_path: Path) -> None:
     output_path = Path(output_path)
     output_path.parent.mkdir(parents=True, exist_ok=True)
 
-    with open(output_path, 'w') as f:
-        json.dump(asdict(context), f, indent=2)
+    write_json(output_path, asdict(context))
 
     print(f"Context saved to {output_path}", file=sys.stderr)
 
@@ -560,9 +565,7 @@ def load_context(input_path: Path) -> ApplicationContext:
     Returns:
         ApplicationContext loaded from file.
     """
-    with open(input_path) as f:
-        data = json.load(f)
-
+    data = read_json(input_path)
     # Mark as manual to skip validation (already validated when saved)
     original_source = data.get('source', 'llm')
     data['source'] = 'manual'  # Temporarily bypass validation

@@ -27,6 +27,7 @@
 
 # Import existing analysis machinery
 from utilities.llm_client import AnthropicClient, get_global_tracker
+from utilities.file_io import read_json, write_json
 from utilities.json_corrector import JSONCorrector
 from utilities.rate_limiter import get_rate_limiter, is_rate_limit_error, is_retryable_error
 
@@ -330,9 +331,7 @@ def run_analysis(
 
     # Load dataset
     print(f"[Analyze] Loading dataset: {dataset_path}", file=sys.stderr)
-    with open(dataset_path) as f:
-        dataset = json.load(f)
-
+    dataset = read_json(dataset_path)
     units = dataset.get("units", [])
 
     # Diff filter: if upstream parse stamped diff_selected on units (PR-diff
@@ -513,9 +512,7 @@ def _summary_callback(finding, usage=None):
         "code_by_route": code_by_route,
     }
 
-    with open(results_path, "w") as f:
-        json.dump(experiment_result, f, indent=2)
-
+    write_json(results_path, experiment_result)
     print(f"\n[Analyze] Results written to {results_path}", file=sys.stderr)
 
     # Checkpoints are preserved as a permanent artifact alongside results.

@@ -27,6 +27,7 @@
 from datetime import datetime, timezone
 
 from utilities.safe_filename import safe_filename
+from utilities.file_io import read_json, write_json
 from pathlib import Path
 
 
@@ -79,8 +80,7 @@ def load(self) -> dict[str, dict]:
                 continue
             filepath = os.path.join(self.dir, filename)
             try:
-                with open(filepath, "r") as f:
-                    data = json.load(f)
+                data = read_json(filepath)
                 unit_id = data.get("id")
                 if unit_id:
                     results[unit_id] = data
@@ -130,9 +130,7 @@ def save(self, unit_id: str, data: dict):
         filename = self._safe_filename(unit_id) + ".json"
         filepath = os.path.join(self.dir, filename)
         data["id"] = unit_id  # ensure id is always present
-        with open(filepath, "w") as f:
-            json.dump(data, f, indent=2)
-
+        write_json(filepath, data)
     def write_summary(
         self,
         total_units: int,
@@ -168,9 +166,7 @@ def write_summary(
         }
         if usage is not None:
             data["usage"] = usage
-        with open(filepath, "w") as f:
-            json.dump(data, f, indent=2)
-
+        write_json(filepath, data)
     @staticmethod
     def read_summary(checkpoint_dir: str) -> dict | None:
         """Read _summary.json from a checkpoint directory.
@@ -182,8 +178,7 @@ def read_summary(checkpoint_dir: str) -> dict | None:
         if not os.path.isfile(filepath):
             return None
         try:
-            with open(filepath, "r") as f:
-                return json.load(f)
+            return read_json(filepath)
         except (json.JSONDecodeError, OSError):
             return None
 
@@ -241,8 +236,7 @@ def status(checkpoint_dir: str) -> dict:
                 continue
             filepath = os.path.join(checkpoint_dir, filename)
             try:
-                with open(filepath, "r") as f:
-                    data = json.load(f)
+                data = read_json(filepath)
             except (json.JSONDecodeError, OSError):
                 errors += 1
                 error_breakdown["unreadable"] = error_breakdown.get("unreadable", 0) + 1

@@ -30,10 +30,11 @@
 
 from __future__ import annotations
 
-import json
 import sys
 from dataclasses import dataclass, asdict
 
+from utilities.file_io import read_json
+
 
 # Scope constants (must match internal/git/manifest.go).
 SCOPE_CHANGED_FILES = "changed_files"
@@ -65,8 +66,7 @@ def to_dict(self) -> dict:
 
 def load_manifest(path: str) -> dict:
     """Read and minimally validate a diff manifest file."""
-    with open(path, "r", encoding="utf-8") as f:
-        m = json.load(f)
+    m = read_json(path)
     scope = m.get("scope")
     if scope not in _VALID_SCOPES:
         raise ValueError(

@@ -12,6 +12,7 @@
 
 from core.schemas import DynamicTestStepResult, UsageInfo
 from core import tracking
+from utilities.file_io import read_json, write_json
 
 
 def run_tests(
@@ -51,9 +52,7 @@ def run_tests(
     os.makedirs(output_dir, exist_ok=True)
 
     # Check how many findings to test
-    with open(pipeline_output_path) as f:
-        pipeline_data = json.load(f)
-
+    pipeline_data = read_json(pipeline_output_path)
     findings = pipeline_data.get("findings", [])
     testable = [
         f for f in findings
@@ -65,8 +64,7 @@ def run_tests(
 
     if not testable:
         results_path = os.path.join(output_dir, "dynamic_test_results.json")
-        with open(results_path, "w") as f:
-            json.dump({"findings_tested": 0, "results": []}, f, indent=2)
+        write_json(results_path, {"findings_tested": 0, "results": []})
 
         return DynamicTestStepResult(
             results_json_path=results_path,

@@ -17,6 +17,7 @@
 from core import tracking
 from core.progress import ProgressReporter
 from utilities.rate_limiter import configure_rate_limiter
+from utilities.file_io import read_json, write_json
 
 
 def enhance_dataset(
@@ -69,9 +70,7 @@ def enhance_dataset(
 
     # Load dataset
     print(f"[Enhance] Loading dataset: {dataset_path}", file=sys.stderr)
-    with open(dataset_path) as f:
-        dataset = json.load(f)
-
+    dataset = read_json(dataset_path)
     units = dataset.get("units", [])
     print(f"[Enhance] Units to enhance: {len(units)}", file=sys.stderr)
 
@@ -138,9 +137,7 @@ def _on_restored(count: int):
 
     # Write enhanced dataset
     os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
-    with open(output_path, "w") as f:
-        json.dump(enhanced, f, indent=2)
-
+    write_json(output_path, enhanced)
     print(f"[Enhance] Enhanced dataset: {output_path}", file=sys.stderr)
     print(f"[Enhance] Classifications: {classifications}", file=sys.stderr)
     if error_count:

@@ -16,6 +16,7 @@
 from pathlib import Path
 
 from core.schemas import ParseResult
+from utilities.file_io import read_json, write_json
 
 # Root of openant-core (where parsers/ lives)
 _CORE_ROOT = Path(__file__).parent.parent
@@ -161,9 +162,7 @@ def _maybe_apply_diff_filter(
         )
         return
 
-    with open(result.dataset_path, "r") as f:
-        dataset = json.load(f)
-
+    dataset = read_json(result.dataset_path)
     # Dataset may be a dict with "units" or a raw list.
     if isinstance(dataset, dict):
         units = dataset.get("units", [])
@@ -172,14 +171,11 @@ def _maybe_apply_diff_filter(
 
     stats = apply_diff_filter(units, manifest)
 
-    with open(result.dataset_path, "w") as f:
-        json.dump(dataset, f, indent=2)
-
+    write_json(result.dataset_path, dataset)
     # Expose stats on the ParseResult via a side-channel file; the parse
     # step_context reads this when assembling parse.report.json.
     diff_report_path = os.path.join(output_dir, "diff_filter.report.json")
-    with open(diff_report_path, "w") as f:
-        json.dump(stats.to_dict(), f, indent=2)
+    write_json(diff_report_path, stats.to_dict())
 
     print(
         f"  Diff filter ({stats.scope}): {stats.selected}/{stats.total} units selected"
@@ -245,9 +241,7 @@ def _load_module(name, filename):
 
     print(f"\n[Reachability Filter] Filtering to {processing_level} units...", file=sys.stderr)
 
-    with open(call_graph_path, "r") as f:
-        call_graph_data = json.load(f)
-
+    call_graph_data = read_json(call_graph_path)
     functions = call_graph_data.get("functions", {})
     call_graph = call_graph_data.get("call_graph", {})
     reverse_call_graph = call_graph_data.get("reverse_call_graph", {})
@@ -352,12 +346,8 @@ def _parse_python(repo_path: str, output_dir: str, processing_level: str, skip_t
         dataset = _apply_reachability_filter(dataset, output_dir, processing_level)
 
     # Write outputs
-    with open(dataset_path, "w") as f:
-        json.dump(dataset, f, indent=2)
-
-    with open(analyzer_output_path, "w") as f:
-        json.dump(analyzer_output, f, indent=2)
-
+    write_json(dataset_path, dataset)
+    write_json(analyzer_output_path, analyzer_output)
     units_count = len(dataset.get("units", []))
     print(f"  Python parser complete: {units_count} units", file=sys.stderr)
 
@@ -413,8 +403,7 @@ def _parse_javascript(repo_path: str, output_dir: str, processing_level: str, sk
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  JavaScript parser complete: {units_count} units", file=sys.stderr)
@@ -470,8 +459,7 @@ def _parse_go(repo_path: str, output_dir: str, processing_level: str, skip_tests
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  Go parser complete: {units_count} units", file=sys.stderr)
@@ -530,8 +518,7 @@ def _parse_c(repo_path: str, output_dir: str, processing_level: str, skip_tests:
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  C/C++ parser complete: {units_count} units", file=sys.stderr)
@@ -590,8 +577,7 @@ def _parse_ruby(repo_path: str, output_dir: str, processing_level: str, skip_tes
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  Ruby parser complete: {units_count} units", file=sys.stderr)
@@ -650,8 +636,7 @@ def _parse_php(repo_path: str, output_dir: str, processing_level: str, skip_test
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  PHP parser complete: {units_count} units", file=sys.stderr)
@@ -710,8 +695,7 @@ def _parse_zig(repo_path: str, output_dir: str, processing_level: str, skip_test
     # Count units
     units_count = 0
     if os.path.exists(dataset_path):
-        with open(dataset_path) as f:
-            data = json.load(f)
+        data = read_json(dataset_path)
         units_count = len(data.get("units", []))
 
     print(f"  Zig parser complete: {units_count} units", file=sys.stderr)