diff --git a/ossfuzz_py/README.md b/ossfuzz_py/README.md new file mode 100644 index 000000000..f436942f7 --- /dev/null +++ b/ossfuzz_py/README.md @@ -0,0 +1,1499 @@ +# OSS-Fuzz SDK API DOCUMENTATION + +## Table of Contents + +1. [Overview](#overview) +2. [Installation & Setup](#installation--setup) +3. [Quick Start](#quick-start) +4. [Configuration](#configuration) +5. [Core Classes](#core-classes) +6. [Build Operations](#build-operations) +7. [Execution Operations](#execution-operations) +8. [Workflow Orchestration](#workflow-orchestration) +9. [Result Management](#result-management) +10. [Benchmark Management](#benchmark-management) +11. [Export & Analysis](#export--analysis) +12. [Historical Data Analysis](#historical-data-analysis) +13. [Error Handling](#error-handling) +14. [Examples](#examples) +15. [Best Practices](#best-practices) + +## Overview + +The OSS-Fuzz SDK provides a comprehensive, unified interface for building, executing, and analyzing fuzz targets and benchmarks. It integrates all aspects of the fuzzing workflow from build operations to result analysis and reporting. + +### Key Features + +- **Unified API**: Single entry point for all fuzzing operations +- **Flexible Configuration**: Multiple configuration options and sources +- **Robust Error Handling**: Graceful degradation and comprehensive exception management +- **Component Integration**: Seamless coordination of all SDK components +- **Comprehensive Analytics**: Built-in metrics, reporting, and analysis tools +- **Export Capabilities**: Multiple export formats for results and reports + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ OSSFuzzSDK (Main Facade) │ +├─────────────────────────────────────────────────────────────┤ +│ Build Ops │ Run Ops │ Workflow │ Results │ Export │ +├─────────────────────────────────────────────────────────────┤ +│ ResultManager │ BenchmarkManager │ HistoryManagers │ Storage │ +├─────────────────────────────────────────────────────────────┤ +│ LocalBuilder/Runner │ CloudBuilder/Runner │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Installation & Setup + +You can directly use the `ossfuzz-py` package from this project. + +**Future Plan**: Publish to PyPI. You can install it in command line then, like `pip install ossfuzz-py`. + +### Environment Variables + +```bash +# Storage configuration +export OSSFUZZ_HISTORY_STORAGE_BACKEND=local +export OSSFUZZ_HISTORY_STORAGE_PATH=/path/to/data +export GCS_BUCKET_NAME=your-gcs-bucket + +# Working directories +export WORK_DIR=/tmp/ossfuzz_work +export OSS_FUZZ_DIR=/path/to/oss-fuzz +``` + +## Quick Start + +### Basic Usage + +```python +from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + +# Initialize SDK +sdk = OSSFuzzSDK('my_project') + +# Run a benchmark +result = sdk.run_benchmark('benchmark_id') +print(f"Success: {result.success}") + +# Get metrics +metrics = sdk.get_benchmark_metrics('benchmark_id') +print(f"Build success rate: {metrics.get('build_success_rate', 0)}") + +# Generate report +report = sdk.generate_project_report(days=30) +print(f"Project: {report['project_name']}") +``` + +### Advanced Usage + +```python +from ossfuzz_py.core.ossfuzz_sdk import ( + OSSFuzzSDK, SDKConfig, PipelineOptions, BuildOptions, RunOptions +) + +# Custom configuration +config = SDKConfig( + storage_backend='gcs', + gcs_bucket_name='my-bucket', + log_level='DEBUG', + enable_caching=True +) + +sdk = OSSFuzzSDK('my_project', config) + +# Configure pipeline options +build_opts = BuildOptions( + sanitizer='memory', + architecture='x86_64', + timeout_seconds=1800 +) + +run_opts = RunOptions( + duration_seconds=3600, + detect_leaks=True, + extract_coverage=True +) + +pipeline_opts = PipelineOptions( + build_options=build_opts, + run_options=run_opts, + trials=3, + analyze_coverage=True +) + +# Run full pipeline +result = sdk.run_full_pipeline('benchmark_id', pipeline_opts) +print(f"Pipeline success: {result.success}") +print(f"Successful builds: {sum(1 for r in result.build_results if r.success)}") +print(f"Successful runs: {sum(1 for r in result.run_results if r.success)}") +``` + +## Configuration + +### SDKConfig Class + +The `SDKConfig` class provides centralized configuration management. + +```python +class SDKConfig: + def __init__(self, + storage_backend: str = 'local', + storage_path: Optional[str] = None, + gcs_bucket_name: Optional[str] = None, + work_dir: Optional[str] = None, + oss_fuzz_dir: Optional[str] = None, + enable_caching: bool = True, + log_level: str = 'INFO', + timeout_seconds: int = 3600, + max_retries: int = 3) +``` + +#### Parameters + +- **storage_backend** (`str`): Storage backend type ('local', 'gcs') +- **storage_path** (`str`, optional): Local storage path +- **gcs_bucket_name** (`str`, optional): GCS bucket name for cloud storage +- **work_dir** (`str`, optional): Working directory for operations +- **oss_fuzz_dir** (`str`, optional): OSS-Fuzz repository directory +- **enable_caching** (`bool`): Enable result caching +- **log_level** (`str`): Logging level ('DEBUG', 'INFO', 'WARNING', 'ERROR') +- **timeout_seconds** (`int`): Default timeout for operations +- **max_retries** (`int`): Maximum retry attempts for failed operations + +#### Methods + +- **`to_dict()`**: Convert configuration to dictionary + +### Options Classes + +#### BuildOptions + +Configuration for build operations. + +```python +class BuildOptions: + def __init__(self, + sanitizer: Optional[str] = 'address', + architecture: str = 'x86_64', + fuzzing_engine: Optional[str] = 'libfuzzer', + environment_vars: Optional[Dict[str, str]] = None, + build_args: Optional[List[str]] = None, + timeout_seconds: Optional[int] = None) +``` + +#### RunOptions + +Configuration for execution operations. + +```python +class RunOptions: + def __init__(self, + duration_seconds: int = 3600, + timeout_seconds: int = 25, + max_memory_mb: int = 1024, + detect_leaks: bool = True, + extract_coverage: bool = False, + corpus_dir: Optional[str] = None, + output_dir: str = 'fuzz_output', + engine_args: Optional[List[str]] = None, + env_vars: Optional[Dict[str, str]] = None) +``` + +#### PipelineOptions + +Configuration for full pipeline operations. + +```python +class PipelineOptions: + def __init__(self, + build_options: Optional[BuildOptions] = None, + run_options: Optional[RunOptions] = None, + trials: int = 1, + analyze_coverage: bool = True, + store_results: bool = True) +``` + +## Core Classes + +### OSSFuzzSDK + +The main SDK facade class that provides access to all functionality. + +```python +class OSSFuzzSDK: + def __init__(self, + project_name: str, + config: Optional[Union[Dict[str, Any], SDKConfig]] = None) +``` + +#### Parameters + +- **project_name** (`str`): Name of the OSS-Fuzz project +- **config** (`Dict` or `SDKConfig`, optional): Configuration for the SDK + +#### Properties + +- **project_name** (`str`): Project name +- **config** (`Dict`): Configuration dictionary +- **sdk_config** (`SDKConfig`): Configuration object +- **storage** (`StorageManager`): Storage manager instance +- **result_manager** (`ResultManager`): Result manager instance +- **benchmark_manager** (`BenchmarkManager`): Benchmark manager instance +- **local_builder** (`LocalBuilder`): Local builder instance +- **local_runner** (`LocalRunner`): Local runner instance + +### Result Classes + +#### BuildResult + +Result of a build operation. + +```python +class BuildResult: + def __init__(self, success: bool, message: str = '', + build_id: Optional[str] = None, artifacts: Optional[Dict] = None) +``` + +**Properties:** + +- `success` (`bool`): Whether the build succeeded +- `message` (`str`): Build result message +- `build_id` (`str`): Unique build identifier +- `artifacts` (`Dict`): Build artifacts and metadata +- `timestamp` (`datetime`): Build completion timestamp + +#### RunResult + +Result of a run operation. + +```python +class RunResult: + def __init__(self, success: bool, message: str = '', + run_id: Optional[str] = None, crashes: bool = False, + coverage_data: Optional[Dict] = None) +``` + +**Properties:** + +- `success` (`bool`): Whether the run succeeded +- `message` (`str`): Run result message +- `run_id` (`str`): Unique run identifier +- `crashes` (`bool`): Whether crashes were detected +- `coverage_data` (`Dict`): Coverage information +- `timestamp` (`datetime`): Run completion timestamp + +#### PipelineResult + +Result of a full pipeline operation. + +```python +class PipelineResult: + def __init__(self, success: bool, message: str = '', + pipeline_id: Optional[str] = None, + build_results: Optional[List[BuildResult]] = None, + run_results: Optional[List[RunResult]] = None) +``` + +**Properties:** + +- `success` (`bool`): Whether the pipeline succeeded +- `message` (`str`): Pipeline result message +- `pipeline_id` (`str`): Unique pipeline identifier +- `build_results` (`List[BuildResult]`): List of build results +- `run_results` (`List[RunResult]`): List of run results +- `timestamp` (`datetime`): Pipeline completion timestamp + +## Build Operations + +### build_fuzz_target() + +Build a single fuzz target. + +```python +def build_fuzz_target(self, target_spec: Union[FuzzTarget, Dict[str, Any]], + options: Optional[BuildOptions] = None) -> BuildResult +``` + +#### Parameters + +- **target_spec** (`FuzzTarget` or `Dict`): Fuzz target specification +- **options** (`BuildOptions`, optional): Build configuration options + +#### Returns + +- **`BuildResult`**: Result of the build operation + +#### Example + +```python +# Using dictionary specification +target_spec = { + 'name': 'my_target', + 'source_code': '// Fuzz target source code', + 'build_script': '// Build script', + 'project_name': 'my_project', + 'language': 'c++' +} + +options = BuildOptions(sanitizer='memory', timeout_seconds=1800) +result = sdk.build_fuzz_target(target_spec, options) + +if result.success: + print(f"Build successful: {result.build_id}") + print(f"Artifacts: {result.artifacts}") +else: + print(f"Build failed: {result.message}") +``` + +### build_benchmark() + +Build a specific benchmark by ID. + +```python +def build_benchmark(self, benchmark_id: str, + options: Optional[BuildOptions] = None) -> BuildResult +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **options** (`BuildOptions`, optional): Build configuration options + +#### Returns + +- **`BuildResult`**: Result of the build operation + +#### Example + +```python +result = sdk.build_benchmark('benchmark_123') +print(f"Build success: {result.success}") +``` + +### get_build_status() + +Check the status of a build operation. + +```python +def get_build_status(self, build_id: str) -> Dict[str, Any] +``` + +#### Parameters + +- **build_id** (`str`): Build identifier + +#### Returns + +- **`Dict[str, Any]`**: Build status information + +#### Example + +```python +status = sdk.get_build_status('build_123') +print(f"Status: {status['status']}") +print(f"Message: {status['message']}") +``` + +### get_build_artifacts() + +Retrieve build artifacts and metadata. + +```python +def get_build_artifacts(self, build_id: str) -> Dict[str, Any] +``` + +#### Parameters + +- **build_id** (`str`): Build identifier + +#### Returns + +- **`Dict[str, Any]`**: Build artifacts and metadata + +### list_recent_builds() + +List recent builds with optional filtering. + +```python +def list_recent_builds(self, limit: int = 10, + filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]] +``` + +#### Parameters + +- **limit** (`int`): Maximum number of builds to return +- **filters** (`Dict`, optional): Filters to apply + +#### Returns + +- **`List[Dict[str, Any]]`**: List of build information + +#### Example + +```python +# Get recent successful builds +filters = {'status': 'success'} +builds = sdk.list_recent_builds(limit=5, filters=filters) +for build in builds: + print(f"Build {build['build_id']}: {build['status']}") +``` + +## Execution Operations + +### run_fuzz_target() + +Run a single fuzz target. + +```python +def run_fuzz_target(self, target_spec: Union[FuzzTarget, Dict[str, Any]], + build_metadata: Dict[str, Any], + options: Optional[RunOptions] = None) -> RunResult +``` + +#### Parameters + +- **target_spec** (`FuzzTarget` or `Dict`): Fuzz target specification +- **build_metadata** (`Dict`): Build metadata from previous build +- **options** (`RunOptions`, optional): Run configuration options + +#### Returns + +- **`RunResult`**: Result of the run operation + +#### Example + +```python +target_spec = { + 'name': 'my_target', + 'source_code': '// Fuzz target source', + 'project_name': 'my_project', + 'language': 'c++' +} + +build_metadata = {'artifacts': {'binary': '/path/to/binary'}} +options = RunOptions(duration_seconds=1800, extract_coverage=True) + +result = sdk.run_fuzz_target(target_spec, build_metadata, options) +print(f"Run success: {result.success}") +print(f"Crashes detected: {result.crashes}") +print(f"Coverage: {result.coverage_data}") +``` + +### run_benchmark() + +Run a specific benchmark (build + run). + +```python +def run_benchmark(self, benchmark_id: str, + options: Optional[RunOptions] = None) -> RunResult +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **options** (`RunOptions`, optional): Run configuration options + +#### Returns + +- **`RunResult`**: Result of the run operation + +#### Example + +```python +options = RunOptions(duration_seconds=3600, detect_leaks=True) +result = sdk.run_benchmark('benchmark_123', options) + +if result.success: + print(f"Run completed: {result.run_id}") + if result.crashes: + print("Crashes detected!") + print(f"Coverage: {result.coverage_data.get('cov_pcs', 0)} PCs") +``` + +### get_run_status() + +Check the status of a run operation. + +```python +def get_run_status(self, run_id: str) -> Dict[str, Any] +``` + +#### Parameters + +- **run_id** (`str`): Run identifier + +#### Returns + +- **`Dict[str, Any]`**: Run status information + +### get_run_results() + +Retrieve run results and artifacts. + +```python +def get_run_results(self, run_id: str) -> Dict[str, Any] +``` + +#### Parameters + +- **run_id** (`str`): Run identifier + +#### Returns + +- **`Dict[str, Any]`**: Run results and artifacts + +### list_recent_runs() + +List recent runs with optional filtering. + +```python +def list_recent_runs(self, limit: int = 10, + filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]] +``` + +#### Parameters + +- **limit** (`int`): Maximum number of runs to return +- **filters** (`Dict`, optional): Filters to apply + +#### Returns + +- **`List[Dict[str, Any]]`**: List of run information + +## Workflow Orchestration + +### run_full_pipeline() + +Execute a complete build → run → analyze pipeline. + +```python +def run_full_pipeline(self, benchmark_id: str, + options: Optional[PipelineOptions] = None) -> PipelineResult +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **options** (`PipelineOptions`, optional): Pipeline configuration + +#### Returns + +- **`PipelineResult`**: Result of the complete pipeline + +#### Example + +```python +# Configure pipeline options +build_opts = BuildOptions(sanitizer='address') +run_opts = RunOptions(duration_seconds=1800, extract_coverage=True) +pipeline_opts = PipelineOptions( + build_options=build_opts, + run_options=run_opts, + trials=3, + analyze_coverage=True, + store_results=True +) + +# Run pipeline +result = sdk.run_full_pipeline('benchmark_123', pipeline_opts) + +print(f"Pipeline success: {result.success}") +print(f"Total trials: {len(result.build_results)}") + +# Analyze results +successful_builds = sum(1 for r in result.build_results if r.success) +successful_runs = sum(1 for r in result.run_results if r.success) + +print(f"Successful builds: {successful_builds}/{len(result.build_results)}") +print(f"Successful runs: {successful_runs}/{len(result.run_results)}") + +# Check for crashes +crashes_detected = any(r.crashes for r in result.run_results if r.success) +print(f"Crashes detected: {crashes_detected}") +``` + +## Result Management + +### get_benchmark_result() + +Get result for a specific benchmark. + +```python +def get_benchmark_result(self, benchmark_id: str, trial: Optional[int] = None) -> Optional[Any] +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **trial** (`int`, optional): Specific trial number (gets latest if not specified) + +#### Returns + +- **`Result`** or **`None`**: Result object or None if not found + +#### Example + +```python +# Get latest result +result = sdk.get_benchmark_result('benchmark_123') +if result: + print(f"Build successful: {result.is_build_successful()}") + print(f"Run successful: {result.is_run_successful()}") + +# Get specific trial result +trial_result = sdk.get_benchmark_result('benchmark_123', trial=2) +if trial_result: + print(f"Trial 2 result: {trial_result.trial}") +``` + +### get_benchmark_metrics() + +Get comprehensive metrics for a benchmark. + +```python +def get_benchmark_metrics(self, benchmark_id: str) -> Dict[str, Any] +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier + +#### Returns + +- **`Dict[str, Any]`**: Dictionary containing comprehensive metrics + +#### Example + +```python +metrics = sdk.get_benchmark_metrics('benchmark_123') + +print(f"Compiles: {metrics.get('compiles', False)}") +print(f"Crashes: {metrics.get('crashes', False)}") +print(f"Coverage: {metrics.get('coverage', 0.0)}%") +print(f"Line coverage diff: {metrics.get('line_coverage_diff', 0.0)}%") +print(f"Build success rate: {metrics.get('build_success_rate', 0.0)}") +print(f"Total trials: {metrics.get('trial', 0)}") +``` + +### get_system_metrics() + +Get system-wide aggregated metrics. + +```python +def get_system_metrics(self) -> Dict[str, Any] +``` + +#### Returns + +- **`Dict[str, Any]`**: Dictionary containing system-wide metrics + +#### Example + +```python +metrics = sdk.get_system_metrics() + +print(f"Total benchmarks: {metrics.get('total_benchmarks', 0)}") +print(f"Total builds: {metrics.get('total_builds', 0)}") +print(f"Build success rate: {metrics.get('build_success_rate', 0.0)}") +print(f"Average coverage: {metrics.get('average_coverage', 0.0)}%") +print(f"Total crashes: {metrics.get('total_crashes', 0)}") +``` + +### get_coverage_trend() + +Get coverage trend for a benchmark. + +```python +def get_coverage_trend(self, benchmark_id: str, days: int = 30) -> Union[Any, List[Dict[str, Any]]] +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **days** (`int`): Number of days to analyze + +#### Returns + +- **`DataFrame`** or **`List[Dict]`**: Coverage trend data (DataFrame if pandas available) + +#### Example + +```python +trend = sdk.get_coverage_trend('benchmark_123', days=14) + +if isinstance(trend, list): + print(f"Coverage data points: {len(trend)}") + for point in trend[-5:]: # Last 5 data points + print(f"Date: {point.get('date')}, Coverage: {point.get('coverage', 0)}%") +``` + +### get_build_success_rate() + +Get build success rate for a benchmark. + +```python +def get_build_success_rate(self, benchmark_id: str, days: int = 30) -> float +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **days** (`int`): Number of days to analyze + +#### Returns + +- **`float`**: Build success rate (0.0 to 1.0) + +#### Example + +```python +success_rate = sdk.get_build_success_rate('benchmark_123', days=7) +print(f"7-day build success rate: {success_rate:.2%}") + +monthly_rate = sdk.get_build_success_rate('benchmark_123', days=30) +print(f"30-day build success rate: {monthly_rate:.2%}") +``` + +### get_crash_summary() + +Get crash summary for a benchmark. + +```python +def get_crash_summary(self, benchmark_id: str, days: int = 30) -> Dict[str, Any] +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **days** (`int`): Number of days to analyze + +#### Returns + +- **`Dict[str, Any]`**: Dictionary containing crash statistics + +#### Example + +```python +crash_summary = sdk.get_crash_summary('benchmark_123', days=7) + +print(f"Total crashes: {crash_summary.get('total_crashes', 0)}") +print(f"Unique crashes: {crash_summary.get('unique_crashes', 0)}") +print(f"Crash rate: {crash_summary.get('crash_rate', 0.0):.2%}") +print(f"Most recent crash: {crash_summary.get('latest_crash_date', 'None')}") +``` + +## Benchmark Management + +### create_benchmark() + +Create a new benchmark. + +```python +def create_benchmark(self, benchmark_spec: Dict[str, Any]) -> bool +``` + +#### Parameters + +- **benchmark_spec** (`Dict`): Benchmark specification dictionary + +#### Returns + +- **`bool`**: True if successful, False otherwise + +#### Example + +```python +benchmark_spec = { + 'id': 'new_benchmark_123', + 'project': 'my_project', + 'language': 'c++', + 'function_name': 'test_function', + 'function_signature': 'int test_function(const uint8_t* data, size_t size)', + 'return_type': 'int', + 'target_path': '/path/to/target.h', + 'description': 'Test benchmark for fuzzing' +} + +success = sdk.create_benchmark(benchmark_spec) +if success: + print("Benchmark created successfully") +else: + print("Failed to create benchmark") +``` + +### update_benchmark() + +Update an existing benchmark. + +```python +def update_benchmark(self, benchmark_id: str, updates: Dict[str, Any]) -> bool +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier +- **updates** (`Dict`): Dictionary of updates to apply + +#### Returns + +- **`bool`**: True if successful, False otherwise + +#### Example + +```python +updates = { + 'description': 'Updated benchmark description', + 'function_signature': 'int updated_function(const char* input)', + 'tags': ['security', 'performance'] +} + +success = sdk.update_benchmark('benchmark_123', updates) +if success: + print("Benchmark updated successfully") +``` + +### delete_benchmark() + +Delete a benchmark. + +```python +def delete_benchmark(self, benchmark_id: str) -> bool +``` + +#### Parameters + +- **benchmark_id** (`str`): Benchmark identifier + +#### Returns + +- **`bool`**: True if successful, False otherwise + +### list_benchmarks() + +List available benchmarks with filtering. + +```python +def list_benchmarks(self, filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]] +``` + +#### Parameters + +- **filters** (`Dict`, optional): Filters to apply + +#### Returns + +- **`List[Dict[str, Any]]`**: List of benchmark information + +#### Example + +```python +# List all benchmarks +all_benchmarks = sdk.list_benchmarks() +print(f"Total benchmarks: {len(all_benchmarks)}") + +# Filter by language +cpp_benchmarks = sdk.list_benchmarks(filters={'language': 'c++'}) +print(f"C++ benchmarks: {len(cpp_benchmarks)}") + +# Filter by project +project_benchmarks = sdk.list_benchmarks(filters={'project': 'my_project'}) +for benchmark in project_benchmarks: + print(f"Benchmark: {benchmark['id']} - {benchmark['function_name']}") +``` + +### search_benchmarks() + +Search benchmarks by query. + +```python +def search_benchmarks(self, query: str, limit: int = 10) -> List[Dict[str, Any]] +``` + +#### Parameters + +- **query** (`str`): Search query string +- **limit** (`int`): Maximum number of results + +#### Returns + +- **`List[Dict[str, Any]]`**: List of matching benchmark information + +#### Example + +```python +# Search for benchmarks containing "crypto" +results = sdk.search_benchmarks('crypto', limit=5) +for result in results: + print(f"Found: {result['id']} - {result['description']}") +``` + +## Export & Analysis + +### export_results() + +Export results for multiple benchmarks. + +```python +def export_results(self, benchmark_ids: List[str], + format: str = 'json', + output_path: Optional[str] = None) -> str +``` + +#### Parameters + +- **benchmark_ids** (`List[str]`): List of benchmark identifiers +- **format** (`str`): Export format ('json', 'csv', 'xlsx') +- **output_path** (`str`, optional): Optional output file path + +#### Returns + +- **`str`**: Path to exported file + +#### Example + +```python +# Export multiple benchmarks to JSON +benchmark_ids = ['bench_1', 'bench_2', 'bench_3'] +output_path = sdk.export_results(benchmark_ids, format='json') +print(f"Results exported to: {output_path}") + +# Export to custom path with CSV format +custom_path = '/path/to/my_export.csv' +csv_path = sdk.export_results( + benchmark_ids, + format='csv', + output_path=custom_path +) +print(f"CSV export saved to: {csv_path}") + +# Export to Excel format +xlsx_path = sdk.export_results(benchmark_ids, format='xlsx') +print(f"Excel export saved to: {xlsx_path}") +``` + +### generate_comparison_report() + +Generate a comparison report for multiple benchmarks. + +```python +def generate_comparison_report(self, benchmark_ids: List[str], + days: int = 30) -> Dict[str, Any] +``` + +#### Parameters + +- **benchmark_ids** (`List[str]`): List of benchmark identifiers to compare +- **days** (`int`): Number of days to analyze + +#### Returns + +- **`Dict[str, Any]`**: Dictionary containing comparison report + +#### Example + +```python +benchmark_ids = ['bench_1', 'bench_2', 'bench_3'] +report = sdk.generate_comparison_report(benchmark_ids, days=14) + +print(f"Comparison report generated at: {report['comparison_timestamp']}") +print(f"Analyzed {report['benchmark_count']} benchmarks over {report['analysis_period_days']} days") + +# Analyze each benchmark +for benchmark_id, data in report['benchmarks'].items(): + if 'error' in data: + print(f"{benchmark_id}: Error - {data['error']}") + continue + + metrics = data['metrics'] + build_rate = data['build_success_rate'] + crash_summary = data['crash_summary'] + + print(f"\n{benchmark_id}:") + print(f" Build success rate: {build_rate:.2%}") + print(f" Coverage: {metrics.get('coverage', 0)}%") + print(f" Total crashes: {crash_summary.get('total_crashes', 0)}") +``` + +## Historical Data Analysis + +### generate_project_report() + +Generate a comprehensive project report. + +```python +def generate_project_report(self, days: int = 30, + include_details: bool = True) -> Dict[str, Any] +``` + +#### Parameters + +- **days** (`int`): Number of days to analyze +- **include_details** (`bool`): Whether to include detailed information + +#### Returns + +- **`Dict[str, Any]`**: Comprehensive project report + +#### Example + +```python +# Generate monthly report +report = sdk.generate_project_report(days=30, include_details=True) + +print(f"Project: {report['project_name']}") +print(f"Report period: {report['start_date']} to {report['end_date']}") + +# Build summary +build_summary = report.get('build_summary', {}) +print(f"\nBuild Summary:") +print(f" Total builds: {build_summary.get('total_builds', 0)}") +print(f" Success rate: {build_summary.get('success_rate', 0):.2%}") + +# Coverage summary +coverage_summary = report.get('coverage_summary', {}) +print(f"\nCoverage Summary:") +print(f" Average coverage: {coverage_summary.get('average_coverage', 0):.1f}%") +print(f" Coverage trend: {coverage_summary.get('trend', 'unknown')}") + +# Crash summary +crash_summary = report.get('crash_summary', {}) +print(f"\nCrash Summary:") +print(f" Total crashes: {crash_summary.get('total_crashes', 0)}") +print(f" Unique crashes: {crash_summary.get('unique_crashes', 0)}") +``` + +### analyze_fuzzing_efficiency() + +Analyze fuzzing efficiency over a time period. + +```python +def analyze_fuzzing_efficiency(self, days: int = 30) -> Dict[str, Any] +``` + +#### Parameters + +- **days** (`int`): Number of days to analyze + +#### Returns + +- **`Dict[str, Any]`**: Fuzzing efficiency analysis + +#### Example + +```python +efficiency = sdk.analyze_fuzzing_efficiency(days=14) + +print(f"Project: {efficiency['project_name']}") +print(f"Analysis period: {efficiency['period_days']} days") + +# Overall efficiency +overall = efficiency['overall_efficiency'] +print(f"\nOverall Efficiency: {overall['overall_efficiency']:.1f}% ({overall['level']})") + +# Category scores +scores = overall['category_scores'] +print(f"Build efficiency: {scores['build']:.1f}%") +print(f"Coverage efficiency: {scores['coverage']:.1f}%") +print(f"Crash discovery: {scores['crash']:.1f}%") +print(f"Corpus growth: {scores['corpus']:.1f}%") + +# Detailed analysis +build_eff = efficiency['build_efficiency'] +print(f"\nBuild Efficiency:") +print(f" Builds per day: {build_eff['builds_per_day']:.1f}") +print(f" Success rate: {build_eff['success_rate']:.2%}") + +coverage_eff = efficiency['coverage_efficiency'] +print(f"\nCoverage Efficiency:") +print(f" Coverage velocity: {coverage_eff['coverage_velocity']:.2f}%/day") +print(f" Current coverage: {coverage_eff['current_coverage']:.1f}%") +``` + +### get_project_summary() + +Get a quick project summary. + +```python +def get_project_summary(self) -> Dict[str, Any] +``` + +#### Returns + +- **`Dict[str, Any]`**: Project summary information + +#### Example + +```python +summary = sdk.get_project_summary() + +print(f"Project: {summary['project_name']}") +print(f"Last updated: {summary['last_updated']}") +print(f"Total benchmarks: {summary.get('total_benchmarks', 0)}") +print(f"Latest coverage: {summary.get('latest_coverage', 'N/A')}") +print(f"Recent crashes: {summary.get('recent_crashes', 0)}") +print(f"Last successful build: {summary.get('last_successful_build', 'None')}") +``` + +## Error Handling + +The OSS-Fuzz SDK provides comprehensive error handling with graceful degradation when components are not available. + +### Exception Types + +- **`OSSFuzzSDKError`**: General SDK errors +- **`OSSFuzzSDKConfigError`**: Configuration-related errors +- **`BuilderError`**: Build operation errors +- **`FuzzRunnerError`**: Execution operation errors +- **`BenchmarkError`**: Benchmark management errors + +### Error Handling Patterns + +#### Graceful Degradation + +```python +# SDK handles missing components gracefully +sdk = OSSFuzzSDK('my_project') + +# Methods return appropriate defaults when components unavailable +metrics = sdk.get_benchmark_metrics('benchmark_id') # Returns empty dict +result = sdk.get_benchmark_result('benchmark_id') # Returns None +builds = sdk.list_recent_builds() # Returns empty list +``` + +#### Exception Handling + +```python +from ossfuzz_py.errors import OSSFuzzSDKError, BuilderError + +try: + # Operations that might fail + result = sdk.run_full_pipeline('benchmark_id') + + if not result.success: + print(f"Pipeline failed: {result.message}") + +except OSSFuzzSDKError as e: + print(f"SDK error: {e}") +except BuilderError as e: + print(f"Build error: {e}") +except Exception as e: + print(f"Unexpected error: {e}") +``` + +#### Component Availability Checking + +```python +# Check component availability before use +if sdk.result_manager: + metrics = sdk.get_benchmark_metrics('benchmark_id') +else: + print("ResultManager not available") + +if sdk.local_builder: + result = sdk.build_benchmark('benchmark_id') +else: + print("Builder not available") +``` + +## Examples + +### Complete Workflow Example + +```python +from ossfuzz_py.core.ossfuzz_sdk import ( + OSSFuzzSDK, SDKConfig, PipelineOptions, BuildOptions, RunOptions +) + +# Initialize SDK with custom configuration +config = SDKConfig( + storage_backend='local', + storage_path='/tmp/ossfuzz_data', + log_level='INFO', + enable_caching=True +) + +sdk = OSSFuzzSDK('libpng', config) + +# Configure pipeline for comprehensive testing +build_opts = BuildOptions( + sanitizer='address', + architecture='x86_64', + timeout_seconds=1800 +) + +run_opts = RunOptions( + duration_seconds=3600, + detect_leaks=True, + extract_coverage=True, + max_memory_mb=2048 +) + +pipeline_opts = PipelineOptions( + build_options=build_opts, + run_options=run_opts, + trials=5, + analyze_coverage=True, + store_results=True +) + +# Run comprehensive analysis +benchmark_ids = ['png_decode_1', 'png_decode_2', 'png_encode_1'] + +for benchmark_id in benchmark_ids: + print(f"\n=== Processing {benchmark_id} ===") + + # Run full pipeline + pipeline_result = sdk.run_full_pipeline(benchmark_id, pipeline_opts) + + if pipeline_result.success: + print(f"✅ Pipeline completed successfully") + + # Analyze results + successful_builds = sum(1 for r in pipeline_result.build_results if r.success) + successful_runs = sum(1 for r in pipeline_result.run_results if r.success) + crashes_found = any(r.crashes for r in pipeline_result.run_results if r.success) + + print(f" Builds: {successful_builds}/{len(pipeline_result.build_results)}") + print(f" Runs: {successful_runs}/{len(pipeline_result.run_results)}") + print(f" Crashes found: {crashes_found}") + + # Get detailed metrics + metrics = sdk.get_benchmark_metrics(benchmark_id) + print(f" Coverage: {metrics.get('coverage', 0):.1f}%") + print(f" Build success rate: {metrics.get('build_success_rate', 0):.2%}") + + else: + print(f"❌ Pipeline failed: {pipeline_result.message}") + +# Generate comprehensive reports +print("\n=== Generating Reports ===") + +# Export results +export_path = sdk.export_results(benchmark_ids, format='json') +print(f"Results exported to: {export_path}") + +# Generate comparison report +comparison = sdk.generate_comparison_report(benchmark_ids, days=30) +print(f"Comparison report generated for {comparison['benchmark_count']} benchmarks") + +# Generate project report +project_report = sdk.generate_project_report(days=30, include_details=True) +print(f"Project report generated for {project_report['project_name']}") + +# Analyze efficiency +efficiency = sdk.analyze_fuzzing_efficiency(days=30) +overall_score = efficiency['overall_efficiency']['overall_efficiency'] +print(f"Overall fuzzing efficiency: {overall_score:.1f}%") +``` + +### Batch Processing Example + +```python +# Process multiple projects +projects = ['libpng', 'libjpeg', 'zlib'] + +for project in projects: + print(f"\n=== Processing Project: {project} ===") + + # Initialize SDK for each project + sdk = OSSFuzzSDK(project) + + # Get project summary + summary = sdk.get_project_summary() + print(f"Total benchmarks: {summary.get('total_benchmarks', 0)}") + + # List all benchmarks + benchmarks = sdk.list_benchmarks() + + # Run quick analysis on each benchmark + for benchmark in benchmarks[:3]: # Limit to first 3 + benchmark_id = benchmark['id'] + + # Get metrics + metrics = sdk.get_benchmark_metrics(benchmark_id) + build_rate = sdk.get_build_success_rate(benchmark_id, days=7) + + print(f" {benchmark_id}:") + print(f" Build success rate: {build_rate:.2%}") + print(f" Coverage: {metrics.get('coverage', 0):.1f}%") + + # Check for recent crashes + crash_summary = sdk.get_crash_summary(benchmark_id, days=7) + if crash_summary.get('total_crashes', 0) > 0: + print(f" ⚠️ {crash_summary['total_crashes']} crashes in last 7 days") +``` + +## Best Practices + +### Configuration Management + +1. **Use Environment Variables**: Set up environment variables for consistent configuration across environments. + +```bash +export OSSFUZZ_HISTORY_STORAGE_BACKEND=gcs +export GCS_BUCKET_NAME=my-ossfuzz-bucket +export WORK_DIR=/tmp/ossfuzz_work +``` + +2. **Create Reusable Configurations**: Define standard configurations for different use cases. + +```python +# Development configuration +dev_config = SDKConfig( + storage_backend='local', + storage_path='/tmp/ossfuzz_dev', + log_level='DEBUG', + enable_caching=False +) + +# Production configuration +prod_config = SDKConfig( + storage_backend='gcs', + gcs_bucket_name='prod-ossfuzz-bucket', + log_level='INFO', + enable_caching=True, + timeout_seconds=7200 +) +``` + +### Performance Optimization + +1. **Use Appropriate Trial Counts**: Balance thoroughness with execution time. + +```python +# Quick testing +quick_opts = PipelineOptions(trials=1) + +# Thorough testing +thorough_opts = PipelineOptions(trials=5) + +# Comprehensive testing +comprehensive_opts = PipelineOptions(trials=10) +``` + +2. **Enable Caching**: Use caching for repeated operations. + +```python +config = SDKConfig(enable_caching=True) +``` + +3. **Batch Operations**: Process multiple benchmarks efficiently. + +```python +# Batch export +all_benchmark_ids = [b['id'] for b in sdk.list_benchmarks()] +sdk.export_results(all_benchmark_ids, format='json') + +# Batch comparison +sdk.generate_comparison_report(all_benchmark_ids, days=30) +``` + +### Error Handling Best Practices + +1. **Check Component Availability**: Always check if required components are available. + +```python +if not sdk.result_manager: + print("Warning: ResultManager not available, some features disabled") + +if not sdk.local_builder: + print("Warning: Builder not available, build operations disabled") +``` + +2. **Handle Partial Failures**: Design workflows to handle partial failures gracefully. + +```python +pipeline_result = sdk.run_full_pipeline(benchmark_id, options) + +if not pipeline_result.success: + # Check individual components + if pipeline_result.build_results: + build_success = any(r.success for r in pipeline_result.build_results) + if build_success: + print("At least one build succeeded, investigating run failures...") +``` + +3. **Use Timeouts**: Set appropriate timeouts for long-running operations. + +```python +build_opts = BuildOptions(timeout_seconds=1800) # 30 minutes +run_opts = RunOptions(duration_seconds=3600) # 1 hour +``` + +### Monitoring and Logging + +1. **Configure Appropriate Log Levels**: Use different log levels for different environments. + +```python +# Development +dev_config = SDKConfig(log_level='DEBUG') + +# Production +prod_config = SDKConfig(log_level='INFO') +``` + +2. **Monitor Key Metrics**: Regularly check important metrics. + +```python +# Daily monitoring +def daily_health_check(sdk): + summary = sdk.get_project_summary() + system_metrics = sdk.get_system_metrics() + + print(f"Build success rate: {system_metrics.get('build_success_rate', 0):.2%}") + print(f"Total crashes: {system_metrics.get('total_crashes', 0)}") + print(f"Average coverage: {system_metrics.get('average_coverage', 0):.1f}%") + + return system_metrics +``` + +3. **Set Up Alerts**: Monitor for concerning trends. + +```python +def check_alerts(sdk, benchmark_id): + build_rate = sdk.get_build_success_rate(benchmark_id, days=7) + crash_summary = sdk.get_crash_summary(benchmark_id, days=1) + + if build_rate < 0.8: # Less than 80% success rate + print(f"⚠️ Alert: Low build success rate for {benchmark_id}: {build_rate:.2%}") + + if crash_summary.get('total_crashes', 0) > 10: # More than 10 crashes per day + print(f"🚨 Alert: High crash rate for {benchmark_id}: {crash_summary['total_crashes']} crashes") +``` + +### Data Management + +1. **Regular Exports**: Regularly export data for backup and analysis. + +```python +# Weekly export +import datetime + +def weekly_export(sdk): + timestamp = datetime.datetime.now().strftime("%Y%m%d") + all_benchmarks = [b['id'] for b in sdk.list_benchmarks()] + + export_path = sdk.export_results( + all_benchmarks, + format='json', + output_path=f'weekly_export_{timestamp}.json' + ) + + return export_path +``` + +2. **Clean Up Old Data**: Implement data retention policies. + +```python +# This would be implemented based on your storage backend +def cleanup_old_data(sdk, days_to_keep=90): + # Implementation depends on storage backend + pass +``` + +--- + +For more information and updates, please refer to the [OSS-Fuzz SDK GitHub repository](https://github.com/google/oss-fuzz-gen). diff --git a/ossfuzz_py/__init__.py b/ossfuzz_py/__init__.py index 83189b0bb..ef39a4923 100644 --- a/ossfuzz_py/__init__.py +++ b/ossfuzz_py/__init__.py @@ -19,27 +19,107 @@ historical fuzzing results, and execute customized fuzzing experiments. """ -from .core.benchmark_manager import Benchmark, BenchmarkManager +try: + from .core.benchmark_manager import Benchmark, BenchmarkManager +except ImportError: + # Handle missing dependencies gracefully + Benchmark = None + BenchmarkManager = None # Data models and enums -from .core.data_models import (CrashData, FuzzingEngine, ProjectConfig, - Sanitizer, Severity) +try: + from .core.data_models import (BuildHistoryData, CorpusHistoryData, + CoverageHistoryData, CrashData, + CrashHistoryData, FuzzingEngine, + HistoricalSummary, ProjectConfig, Sanitizer, + Severity, TimeSeriesData) +except ImportError: + # Handle missing dependencies gracefully + BuildHistoryData = CorpusHistoryData = CoverageHistoryData = None + CrashData = CrashHistoryData = FuzzingEngine = HistoricalSummary = None + ProjectConfig = Sanitizer = Severity = TimeSeriesData = None + # Core SDK - Main SDK class and modules -from .core.ossfuzz_manager import OSSFuzzManager +try: + from .core.ossfuzz_manager import OSSFuzzManager + from .core.ossfuzz_sdk import OSSFuzzSDK +except ImportError: + # Handle missing dependencies gracefully + OSSFuzzManager = OSSFuzzSDK = None +try: + from .data.storage_adapter import (FileStorageAdapter, GCSStorageAdapter, + StorageAdapter) + # Storage components + from .data.storage_manager import StorageManager +except ImportError: + # Handle missing dependencies gracefully + FileStorageAdapter = GCSStorageAdapter = StorageAdapter = None + StorageManager = None + # Error handling from .errors import * +# History managers +try: + from .history import (BuildHistoryManager, CorpusHistoryManager, + CoverageHistoryManager, CrashHistoryManager, + HistoryManager) +except ImportError: + # Handle missing dependencies gracefully + BuildHistoryManager = CorpusHistoryManager = CoverageHistoryManager = None + CrashHistoryManager = HistoryManager = None + +# Result management +try: + from .result import (AnalysisInfo, BenchmarkResult, BuildInfo, + CoverageAnalysis, CrashAnalysis, Result, ResultManager, + RunInfo, TrialResult) +except ImportError: + # Handle missing dependencies gracefully + AnalysisInfo = BenchmarkResult = BuildInfo = CoverageAnalysis = None + CrashAnalysis = Result = ResultManager = RunInfo = TrialResult = None + # Public API - All exports available to SDK clients __all__ = [ # Core SDK - Main classes according to UML diagram 'OSSFuzzManager', + 'OSSFuzzSDK', 'BenchmarkManager', 'Benchmark', + # Result management + 'ResultManager', + 'Result', + 'BuildInfo', + 'RunInfo', + 'AnalysisInfo', + 'TrialResult', + 'BenchmarkResult', + 'CoverageAnalysis', + 'CrashAnalysis', + + # History managers + 'HistoryManager', + 'BuildHistoryManager', + 'CrashHistoryManager', + 'CorpusHistoryManager', + 'CoverageHistoryManager', + + # Storage components + 'StorageManager', + 'StorageAdapter', + 'FileStorageAdapter', + 'GCSStorageAdapter', + # Data models and enums 'Severity', 'Sanitizer', - 'Sanitizer', 'FuzzingEngine', + 'BuildHistoryData', + 'CrashHistoryData', + 'CorpusHistoryData', + 'CoverageHistoryData', + 'TimeSeriesData', + 'HistoricalSummary', # Core error types and enums 'ErrorCode', diff --git a/ossfuzz_py/build/build_config.py b/ossfuzz_py/build/build_config.py index 9f7191f4a..293a62a7d 100644 --- a/ossfuzz_py/build/build_config.py +++ b/ossfuzz_py/build/build_config.py @@ -31,8 +31,8 @@ import logging from typing import Any, Dict, List, Optional -from ossfuzz_py import BuildConfigError from ossfuzz_py.core.data_models import FuzzingEngine, ProjectConfig, Sanitizer +from ossfuzz_py.errors import BuildConfigError # Configure module logger logger = logging.getLogger('ossfuzz_sdk.build_config') diff --git a/ossfuzz_py/build/builder.py b/ossfuzz_py/build/builder.py index b2e32d971..5760acc79 100644 --- a/ossfuzz_py/build/builder.py +++ b/ossfuzz_py/build/builder.py @@ -37,9 +37,13 @@ from ossfuzz_py.build.build_config import BuildConfig from ossfuzz_py.build.cloud_build_manager import CloudBuildManager from ossfuzz_py.build.docker_manager import CommandResult, DockerManager +from ossfuzz_py.core.benchmark_manager import Benchmark from ossfuzz_py.core.data_models import Sanitizer from ossfuzz_py.data.storage_manager import StorageManager from ossfuzz_py.execution.fuzz_target import FuzzTarget +# Import ResultManager for result storage +from ossfuzz_py.result.result_manager import ResultManager +from ossfuzz_py.result.results import BuildInfo, Result from ossfuzz_py.utils.file_utils import FileUtils # NOTE: Running-related constants have been moved to LocalRunner and CloudRunner @@ -49,7 +53,7 @@ logger = logging.getLogger('ossfuzz_sdk.builder') -class Result: +class BuildResult: """Simple result class for build operations.""" def __init__(self, @@ -99,7 +103,7 @@ def setup_environment(self) -> bool: """ @abstractmethod - def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> 'Result': + def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> 'BuildResult': """ Build a fuzz target with the specified sanitizer. @@ -168,8 +172,11 @@ class LocalBuilder(Builder): Docker, following the UML specification. """ - def __init__(self, storage_manager: StorageManager, build_config: BuildConfig, - docker_manager: DockerManager): + def __init__(self, + storage_manager: StorageManager, + build_config: BuildConfig, + docker_manager: DockerManager, + result_manager: Optional[ResultManager] = None): """ Initialize the local builder. @@ -177,9 +184,11 @@ def __init__(self, storage_manager: StorageManager, build_config: BuildConfig, storage_manager: Storage manager for artifacts build_config: Build configuration docker_manager: Docker manager for container operations + result_manager: Optional ResultManager for centralized result storage """ super().__init__(storage_manager, build_config) self.docker_manager = docker_manager + self.result_manager = result_manager self._artifacts: Dict[str, Path] = {} self.logger.debug("Initialized LocalBuilder with Docker manager") @@ -198,7 +207,9 @@ def setup_environment(self) -> bool: def build(self, target: FuzzTarget, - sanitizer: Sanitizer = Sanitizer.ADDRESS) -> Result: + sanitizer: Sanitizer = Sanitizer.ADDRESS, + benchmark_id: Optional[str] = None, + trial: int = 1) -> BuildResult: """Build a fuzz target with the specified sanitizer.""" try: self.logger.info("Building target %s with sanitizer %s", target.name, @@ -206,7 +217,9 @@ def build(self, # Prepare build environment if not self.prepare_build_environment(): - return Result(False, "Failed to prepare build environment") + build_result = BuildResult(False, "Failed to prepare build environment") + self._store_build_result(target, build_result, benchmark_id, trial) + return build_result # Use the build_local method (focused only on building) success, build_metadata = self.build_local( @@ -216,7 +229,9 @@ def build(self, if not success: error_msg = build_metadata.get('error', 'Local build failed') - return Result(False, error_msg) + build_result = BuildResult(False, error_msg) + self._store_build_result(target, build_result, benchmark_id, trial) + return build_result # # Store artifacts in storage manager # for name, path in artifacts.items(): @@ -226,13 +241,20 @@ def build(self, # self.storage_manager.store( # f"{self.build_config.project_name}/{name}", data) - return Result(True, - "Build completed successfully", - metadata=build_metadata) + build_result = BuildResult(True, + "Build completed successfully", + metadata=build_metadata) + + # Store result through ResultManager if available + self._store_build_result(target, build_result, benchmark_id, trial) + + return build_result except Exception as e: self.logger.error("Build failed: %s", e) - return Result(False, f"Build failed: {e}") + build_result = BuildResult(False, f"Build failed: {e}") + self._store_build_result(target, build_result, benchmark_id, trial) + return build_result def clean(self) -> bool: """Clean up build artifacts and temporary files.""" @@ -392,6 +414,53 @@ def build_local(self, self.logger.info('Built %s locally successfully.', benchmark_target_name) return True, build_metadata + def _store_build_result(self, target: FuzzTarget, build_result: BuildResult, + benchmark_id: Optional[str], trial: int) -> None: + """Store build result through ResultManager if available.""" + try: + # Create BuildInfo from build result + build_info = BuildInfo( + compiles=build_result.success, + compile_log=build_result.message or '', + errors=[] + if build_result.success else [build_result.message or 'Build failed'], + binary_exists=build_result.success, + is_function_referenced=build_result.success, + fuzz_target_source=target.source_code, + build_script_source=target.build_script or '', + ) + + # Create minimal benchmark for the result + benchmark = Benchmark( + project=self.build_config.project_name, + language=target.language, + function_signature= + f'int {target.name}(const uint8_t* data, size_t size)', + function_name=target.name, + return_type='int', + target_path='', + id=benchmark_id or target.name, + ) + + # Create Result object for storage + result_obj = Result( + benchmark=benchmark, + work_dirs='', + trial=trial, + build_info=build_info, + ) + + # Store through ResultManager + if self.result_manager: + self.result_manager.store_result(benchmark_id or target.name, + result_obj) + self.logger.debug("Stored build result for %s through ResultManager", + benchmark_id or target.name) + + except Exception as e: + self.logger.warning( + "Failed to store build result through ResultManager: %s", e) + def build_target_local(self, generated_project: str, sanitizer: str = 'address') -> bool: @@ -458,7 +527,7 @@ def setup_environment(self) -> bool: self.logger.error("Failed to setup cloud environment: %s", e) return False - def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> Result: + def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> BuildResult: """Build a fuzz target using cloud build.""" try: self.logger.info("Starting cloud build for target %s with sanitizer %s", @@ -466,7 +535,7 @@ def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> Result: # Prepare build environment if not self.prepare_build_environment(): - return Result(False, "Failed to prepare build environment") + return BuildResult(False, "Failed to prepare build environment") # Use the build_cloud method (focused only on building) success, build_metadata = self.build_cloud( @@ -477,7 +546,7 @@ def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> Result: if not success: error_msg = build_metadata.get('error', 'Cloud build failed') - return Result(False, error_msg) + return BuildResult(False, error_msg) # Process build artifacts artifacts = self._process_cloud_build_artifacts(build_metadata) @@ -494,12 +563,12 @@ def build(self, target: FuzzTarget, sanitizer: Sanitizer) -> Result: # TODO: Running should be handled separately by CloudRunner # The build_metadata contains all necessary information for the runner - return Result(True, "Cloud build completed successfully", artifacts, - build_metadata) + return BuildResult(True, "Cloud build completed successfully", artifacts, + build_metadata) except Exception as e: self.logger.error("Cloud build failed: %s", e) - return Result(False, f"Cloud build failed: {e}") + return BuildResult(False, f"Cloud build failed: {e}") def build_cloud(self, source_code: str, diff --git a/ossfuzz_py/core/data_models.py b/ossfuzz_py/core/data_models.py index d2d4efe75..383d1f055 100644 --- a/ossfuzz_py/core/data_models.py +++ b/ossfuzz_py/core/data_models.py @@ -21,7 +21,7 @@ from datetime import datetime from enum import Enum from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field @@ -133,3 +133,129 @@ def to_yaml(self, path: Path) -> bool: return True except Exception: return False + + +class BuildHistoryData(BaseDataModel): + """Represents a single build history entry.""" + build_id: str = Field(..., description="Unique identifier for the build") + timestamp: datetime = Field(..., description="Build timestamp") + project_name: str = Field(..., description="Name of the project") + success: bool = Field(..., description="Whether the build was successful") + duration_seconds: Optional[int] = Field( + None, description="Build duration in seconds") + commit_hash: Optional[str] = Field(None, description="Git commit hash") + branch: Optional[str] = Field(None, description="Git branch") + sanitizer: Optional[Sanitizer] = Field(None, description="Sanitizer used") + architecture: Optional[str] = Field(None, description="Target architecture") + error_message: Optional[str] = Field( + None, description="Error message if build failed") + artifacts: Optional[List[str]] = Field(None, + description="List of build artifacts") + + +class CrashHistoryData(BaseDataModel): + """Represents a single crash history entry.""" + crash_id: str = Field(..., description="Unique identifier for the crash") + timestamp: datetime = Field(..., description="Crash timestamp") + project_name: str = Field(..., description="Name of the project") + fuzzer_name: str = Field(..., description="Name of the fuzzer") + crash_type: str = Field( + ..., description="Type of crash (e.g., heap-buffer-overflow)") + crash_signature: str = Field(..., description="Crash signature/hash") + severity: Severity = Field(Severity.UNKNOWN, description="Crash severity") + reproducible: Optional[bool] = Field( + None, description="Whether crash is reproducible") + stack_trace: Optional[str] = Field(None, description="Stack trace") + testcase_path: Optional[str] = Field(None, description="Path to testcase") + regression_range: Optional[str] = Field(None, description="Regression range") + + +class CorpusHistoryData(BaseDataModel): + """Represents a single corpus history entry.""" + timestamp: datetime = Field(..., description="Corpus snapshot timestamp") + project_name: str = Field(..., description="Name of the project") + fuzzer_name: str = Field(..., description="Name of the fuzzer") + corpus_size: int = Field(..., description="Number of files in corpus") + total_size_bytes: int = Field(..., + description="Total size of corpus in bytes") + new_files_count: Optional[int] = Field( + None, description="Number of new files added") + coverage_increase: Optional[float] = Field( + None, description="Coverage increase percentage") + unique_features: Optional[int] = Field( + None, description="Number of unique features") + + +class CoverageHistoryData(BaseDataModel): + """Represents a single coverage history entry.""" + timestamp: datetime = Field(..., description="Coverage measurement timestamp") + project_name: str = Field(..., description="Name of the project") + fuzzer_name: Optional[str] = Field(None, description="Name of the fuzzer") + line_coverage: float = Field(..., description="Line coverage percentage") + function_coverage: Optional[float] = Field( + None, description="Function coverage percentage") + branch_coverage: Optional[float] = Field( + None, description="Branch coverage percentage") + lines_covered: Optional[int] = Field(None, + description="Number of lines covered") + lines_total: Optional[int] = Field(None, description="Total number of lines") + functions_covered: Optional[int] = Field( + None, description="Number of functions covered") + functions_total: Optional[int] = Field( + None, description="Total number of functions") + branches_covered: Optional[int] = Field( + None, description="Number of branches covered") + branches_total: Optional[int] = Field(None, + description="Total number of branches") + + +class TimeSeriesData(BaseDataModel): + """Generic time series data container.""" + project_name: str = Field(..., description="Name of the project") + data_type: str = Field( + ..., description="Type of data (build, crash, corpus, coverage)") + start_date: datetime = Field(..., description="Start date of the time series") + end_date: datetime = Field(..., description="End date of the time series") + data_points: List[Dict[str, + Any]] = Field(..., + description="Time series data points") + metadata: Optional[Dict[str, Any]] = Field(None, + description="Additional metadata") + + +class HistoricalSummary(BaseDataModel): + """Summary statistics for historical data.""" + project_name: str = Field(..., description="Name of the project") + period_start: datetime = Field(..., description="Start of the summary period") + period_end: datetime = Field(..., description="End of the summary period") + + # Build statistics + total_builds: Optional[int] = Field(None, + description="Total number of builds") + successful_builds: Optional[int] = Field( + None, description="Number of successful builds") + build_success_rate: Optional[float] = Field( + None, description="Build success rate percentage") + + # Crash statistics + total_crashes: Optional[int] = Field(None, + description="Total number of crashes") + unique_crashes: Optional[int] = Field(None, + description="Number of unique crashes") + critical_crashes: Optional[int] = Field( + None, description="Number of critical crashes") + + # Coverage statistics + max_coverage: Optional[float] = Field(None, + description="Maximum coverage achieved") + avg_coverage: Optional[float] = Field(None, description="Average coverage") + coverage_trend: Optional[str] = Field( + None, description="Coverage trend (increasing/decreasing/stable)") + + # Corpus statistics + max_corpus_size: Optional[int] = Field(None, + description="Maximum corpus size") + avg_corpus_size: Optional[float] = Field(None, + description="Average corpus size") + corpus_growth_rate: Optional[float] = Field(None, + description="Corpus growth rate") diff --git a/ossfuzz_py/core/ossfuzz_sdk.py b/ossfuzz_py/core/ossfuzz_sdk.py new file mode 100644 index 000000000..34557f6e3 --- /dev/null +++ b/ossfuzz_py/core/ossfuzz_sdk.py @@ -0,0 +1,1639 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +OSS-Fuzz SDK - Comprehensive Main Facade. + +This module provides the main SDK facade for the complete OSS-Fuzz SDK, +including build operations, execution, result management, benchmark management, +and historical data analysis. It serves as the primary entry point for all +SDK capabilities. +""" + +import logging +import uuid +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Union + +from ossfuzz_py.build.build_config import BuildConfig +from ossfuzz_py.build.builder import LocalBuilder +from ossfuzz_py.build.docker_manager import DockerManager +from ossfuzz_py.core.benchmark_manager import BenchmarkManager +from ossfuzz_py.core.data_models import FuzzingEngine, Sanitizer +# Core imports +from ossfuzz_py.data.storage_manager import StorageManager +from ossfuzz_py.errors import (BenchmarkError, OSSFuzzSDKConfigError, + OSSFuzzSDKError) +from ossfuzz_py.execution.fuzz_runner import FuzzRunOptions, LocalRunner +from ossfuzz_py.execution.fuzz_target import FuzzTarget +from ossfuzz_py.history import (BuildHistoryManager, CorpusHistoryManager, + CoverageHistoryManager, CrashHistoryManager) +from ossfuzz_py.result.result_manager import ResultManager +from ossfuzz_py.utils.env_utils import EnvUtils +from ossfuzz_py.utils.env_vars import EnvVars +from ossfuzz_py.utils.work_dir_manager import WorkDirManager + +# Configuration and Options Classes + + +class SDKConfig: + """Configuration class for the OSS-Fuzz SDK.""" + + def __init__(self, + storage_backend: str = 'local', + storage_path: Optional[str] = None, + gcs_bucket_name: Optional[str] = None, + work_dir: Optional[str] = None, + oss_fuzz_dir: Optional[str] = None, + enable_caching: bool = True, + log_level: str = 'INFO', + timeout_seconds: int = 3600, + max_retries: int = 3): + """Initialize SDK configuration.""" + self.storage_backend = storage_backend + self.storage_path = storage_path or EnvUtils.get_work_dir() + self.gcs_bucket_name = gcs_bucket_name + self.work_dir = work_dir or EnvUtils.get_work_dir() + self.oss_fuzz_dir = oss_fuzz_dir or EnvUtils.get_oss_fuzz_dir() + self.enable_caching = enable_caching + self.log_level = log_level + self.timeout_seconds = timeout_seconds + self.max_retries = max_retries + + def to_dict(self) -> Dict[str, Any]: + """Convert configuration to dictionary.""" + return { + 'storage_backend': self.storage_backend, + 'storage_path': self.storage_path, + 'gcs_bucket_name': self.gcs_bucket_name, + 'work_dir': self.work_dir, + 'oss_fuzz_dir': self.oss_fuzz_dir, + 'enable_caching': self.enable_caching, + 'log_level': self.log_level, + 'timeout_seconds': self.timeout_seconds, + 'max_retries': self.max_retries, + } + + +class BuildOptions: + """Options for build operations.""" + + def __init__(self, + sanitizer: Optional[str] = 'address', + architecture: str = 'x86_64', + fuzzing_engine: Optional[str] = 'libfuzzer', + environment_vars: Optional[Dict[str, str]] = None, + build_args: Optional[List[str]] = None, + timeout_seconds: Optional[int] = None): + """Initialize build options.""" + self.sanitizer = sanitizer + self.architecture = architecture + self.fuzzing_engine = fuzzing_engine + self.environment_vars = environment_vars or {} + self.build_args = build_args or [] + self.timeout_seconds = timeout_seconds + + +class RunOptions: + """Options for execution operations.""" + + def __init__(self, + duration_seconds: int = 3600, + timeout_seconds: int = 25, + max_memory_mb: int = 1024, + detect_leaks: bool = True, + extract_coverage: bool = False, + corpus_dir: Optional[str] = None, + output_dir: str = 'fuzz_output', + engine_args: Optional[List[str]] = None, + env_vars: Optional[Dict[str, str]] = None): + """Initialize run options.""" + self.duration_seconds = duration_seconds + self.timeout_seconds = timeout_seconds + self.max_memory_mb = max_memory_mb + self.detect_leaks = detect_leaks + self.extract_coverage = extract_coverage + self.corpus_dir = corpus_dir + self.output_dir = output_dir + self.engine_args = engine_args or [] + self.env_vars = env_vars or {} + + +class PipelineOptions: + """Options for full pipeline operations.""" + + def __init__(self, + build_options: Optional[BuildOptions] = None, + run_options: Optional[RunOptions] = None, + trials: int = 1, + analyze_coverage: bool = True, + store_results: bool = True): + """Initialize pipeline options.""" + self.build_options = build_options or BuildOptions() + self.run_options = run_options or RunOptions() + self.trials = trials + self.analyze_coverage = analyze_coverage + self.store_results = store_results + + +# Result Classes + + +class BuildResult: + """Result of a build operation.""" + + def __init__(self, + success: bool, + message: str = '', + build_id: Optional[str] = None, + artifacts: Optional[Dict] = None): + self.success = success + self.message = message + self.build_id = build_id or str(uuid.uuid4()) + self.artifacts = artifacts or {} + self.timestamp = datetime.now() + + +class RunResult: + """Result of a run operation.""" + + def __init__(self, + success: bool, + message: str = '', + run_id: Optional[str] = None, + crashes: bool = False, + coverage_data: Optional[Dict] = None): + self.success = success + self.message = message + self.run_id = run_id or str(uuid.uuid4()) + self.crashes = crashes + self.coverage_data = coverage_data or {} + self.timestamp = datetime.now() + + +class PipelineResult: + """Result of a full pipeline operation.""" + + def __init__(self, + success: bool, + message: str = '', + pipeline_id: Optional[str] = None, + build_results: Optional[List[BuildResult]] = None, + run_results: Optional[List[RunResult]] = None): + self.success = success + self.message = message + self.pipeline_id = pipeline_id or str(uuid.uuid4()) + self.build_results = build_results or [] + self.run_results = run_results or [] + self.timestamp = datetime.now() + + +class OSSFuzzSDK: + """ + Comprehensive main facade for the OSS-Fuzz SDK. + + This class provides a unified interface for all + OSS-Fuzz SDK capabilities including: + - Build operations (building fuzz targets and benchmarks) + - Execution operations (running fuzz targets and benchmarks) + - Result management (storing, retrieving, and analyzing results) + - Benchmark management (CRUD operations on benchmarks) + - Workflow orchestration (full build → run → analyze pipelines) + - Historical data analysis (reports and analytics) + + The SDK is designed to be both beginner-friendly for simple tasks and + expert-capable for advanced use cases. + + Examples: + ```python + # Initialize SDK + sdk = OSSFuzzSDK('libpng') + + # Simple benchmark run + result = sdk.run_benchmark('benchmark_id') + + # Full pipeline with custom options + options = PipelineOptions(trials=3) + pipeline_result = sdk.run_full_pipeline('benchmark_id', options) + + # Get comprehensive metrics + metrics = sdk.get_benchmark_metrics('benchmark_id') + + # Historical analysis + report = sdk.generate_project_report(days=30) + ``` + """ + + def __init__(self, + project_name: str, + config: Optional[Union[Dict[str, Any], SDKConfig]] = None): + """ + Initialize the comprehensive OSS-Fuzz SDK. + + Args: + project_name: Name of the OSS-Fuzz project + config: Configuration dictionary or SDKConfig instance + + Raises: + OSSFuzzSDKConfigError: If configuration is invalid + OSSFuzzSDKError: If initialization fails + """ + self.project_name = project_name + + # Handle configuration + if isinstance(config, SDKConfig): + self.sdk_config = config + self.config = config.to_dict() + else: + self.config = config or {} + self.sdk_config = SDKConfig(**self.config) + + self.logger = logging.getLogger(f"{__name__}.{project_name}") + + try: + if not project_name: + raise OSSFuzzSDKConfigError("Project name is required") + + # Merge environment variables into config + self._load_config_from_env() + + # Initialize storage manager + self.storage = StorageManager(self.config) + + # Initialize core components + self._initialize_components() + + self.logger.info("Initialized comprehensive OSSFuzzSDK for project: %s", + project_name) + + except OSSFuzzSDKConfigError: + # Re-raise config errors as-is + raise + except Exception as e: + error_msg = ( + f"Failed to initialize OSSFuzzSDK for {project_name}: {str(e)}") + self.logger.error(error_msg) + raise OSSFuzzSDKError(error_msg) from e + + def _initialize_components(self) -> None: + """Initialize all SDK components""" + self.build_history = BuildHistoryManager(self.storage, self.project_name) + self.crash_history = CrashHistoryManager(self.storage, self.project_name) + self.corpus_history = CorpusHistoryManager(self.storage, self.project_name) + self.coverage_history = CoverageHistoryManager(self.storage, + self.project_name) + + # Initialize BenchmarkManager first + self.benchmark_manager = BenchmarkManager() + + # Initialize ResultManager with BenchmarkManager + self.result_manager = ResultManager( + build_mgr=self.build_history, + crash_mgr=self.crash_history, + corpus_mgr=self.corpus_history, + coverage_mgr=self.coverage_history, + benchmark_manager=self.benchmark_manager, + ) + + # Initialize build components + self._initialize_build_components() + + # Initialize execution components + self._initialize_execution_components() + + @property + def build(self) -> BuildHistoryManager: + """Access to build history manager.""" + return self.build_history + + @property + def crash(self) -> CrashHistoryManager: + """Access to crash history manager.""" + return self.crash_history + + @property + def corpus(self) -> CorpusHistoryManager: + """Access to corpus history manager.""" + return self.corpus_history + + @property + def coverage(self) -> CoverageHistoryManager: + """Access to coverage history manager.""" + return self.coverage_history + + def _initialize_build_components(self) -> None: + """Initialize build-related components.""" + try: + # Create build configuration + build_config = BuildConfig( + project_name=self.project_name, + language='c++', # Default language + sanitizer=Sanitizer.ADDRESS, + fuzzing_engine=FuzzingEngine.LIBFUZZER, + ) + + # Initialize Docker manager for local builds + docker_manager = DockerManager() + + # Initialize builders + self.local_builder = LocalBuilder( + storage_manager=self.storage, + build_config=build_config, + docker_manager=docker_manager, + result_manager=self.result_manager, + ) + + # Cloud builder initialization would go here + self.cloud_builder = None # TODO: Initialize when needed + + except Exception as e: + self.logger.warning("Failed to initialize build components: %s", str(e)) + self.local_builder = self.cloud_builder = None + + def _initialize_execution_components(self) -> None: + """Initialize execution-related components.""" + try: + # Initialize work directory manager + work_dir_manager = WorkDirManager(base_dir=self.sdk_config.work_dir) + + # Initialize runners + self.local_runner = LocalRunner( + work_dir_manager=work_dir_manager, + result_manager=self.result_manager, + ) + + # Cloud runner initialization would go here + self.cloud_runner = None # TODO: Initialize when needed + + except Exception as e: + self.logger.warning("Failed to initialize execution components: %s", + str(e)) + self.local_runner = self.cloud_runner = None + + def _load_config_from_env(self) -> None: + """Load configuration from environment variables.""" + try: + # Storage configuration + storage_backend = EnvUtils.get_env( + EnvVars.OSSFUZZ_HISTORY_STORAGE_BACKEND) + if storage_backend: + self.config['storage_backend'] = storage_backend + + storage_path = EnvUtils.get_env(EnvVars.OSSFUZZ_HISTORY_STORAGE_PATH) + if storage_path: + self.config['storage_path'] = storage_path + + # GCS configuration + gcs_bucket = EnvUtils.get_env(EnvVars.GCS_BUCKET_NAME) + if gcs_bucket: + self.config['gcs_bucket_name'] = gcs_bucket + + # Work directory + work_dir = EnvUtils.get_env(EnvVars.WORK_DIR) + if work_dir: + self.config['work_dir'] = work_dir + + except Exception as e: + self.logger.warning("Failed to load some environment variables: %s", + str(e)) + + # Build Operations + + def build_fuzz_target(self, + target_spec: Union[FuzzTarget, Dict[str, Any]], + options: Optional[BuildOptions] = None) -> BuildResult: + """ + Build a single fuzz target. + + Args: + target_spec: FuzzTarget instance or dictionary specification + options: Build options (optional) + + Returns: + BuildResult: Result of the build operation + + Raises: + BuilderError: If build fails + OSSFuzzSDKError: If SDK components not available + """ + try: + if not self.local_builder: + raise OSSFuzzSDKError("Build components not available") + + # Convert dict to FuzzTarget if needed + if isinstance(target_spec, dict): + target = FuzzTarget(**target_spec) + else: + target = target_spec + + options = options or BuildOptions() + + # Convert options to appropriate format + sanitizer = getattr( + Sanitizer, + options.sanitizer.upper()) if options.sanitizer else Sanitizer.ADDRESS + + # Perform the build + result = self.local_builder.build(target=target, + sanitizer=sanitizer, + benchmark_id=target.name, + trial=1) + + return BuildResult( + success=result.success, + message=result.message, + artifacts=result.metadata if hasattr(result, 'metadata') else {}) + + except Exception as e: + error_msg = f"Failed to build fuzz target: {str(e)}" + self.logger.error(error_msg) + return BuildResult(success=False, message=error_msg) + + def build_benchmark(self, + benchmark_id: str, + options: Optional[BuildOptions] = None) -> BuildResult: + """ + Build a specific benchmark. + + Args: + benchmark_id: Benchmark identifier + options: Build options (optional) + + Returns: + BuildResult: Result of the build operation + + Raises: + BenchmarkError: If benchmark not found + BuilderError: If build fails + """ + try: + if not self.benchmark_manager: + raise OSSFuzzSDKError("BenchmarkManager not available") + + # Get benchmark from manager + benchmark = self.benchmark_manager.get_benchmark(benchmark_id) + if not benchmark: + raise BenchmarkError(f"Benchmark not found: {benchmark_id}") + + # Create FuzzTarget from benchmark (would need implementation) + # For now, create a minimal target + target = FuzzTarget( + name=benchmark.function_name, + source_code="// Generated fuzz target", + build_script="// Generated build script", + project_name=benchmark.project, + language=benchmark.language, + function_signature=benchmark.function_signature, + ) + + return self.build_fuzz_target(target, options) + + except Exception as e: + error_msg = f"Failed to build benchmark {benchmark_id}: {str(e)}" + self.logger.error(error_msg) + return BuildResult(success=False, message=error_msg) + + def get_build_status(self, build_id: str) -> Dict[str, Any]: + """ + Check build status. + + Args: + build_id: Build identifier + + Returns: + Dictionary containing build status information + """ + try: + # This would query build history or active builds + # For now, return a basic status + return { + 'build_id': build_id, + 'status': 'unknown', + 'message': 'Build status tracking not yet implemented', + 'timestamp': datetime.now().isoformat() + } + except Exception as e: + self.logger.error("Failed to get build status: %s", str(e)) + return { + 'build_id': build_id, + 'status': 'error', + 'message': str(e), + 'timestamp': datetime.now().isoformat() + } + + def get_build_artifacts(self, build_id: str) -> Dict[str, Any]: + """ + Retrieve build artifacts. + + Args: + build_id: Build identifier + + Returns: + Dictionary containing build artifacts + """ + try: + # This would retrieve artifacts from storage + # For now, return empty artifacts + return { + 'build_id': build_id, + 'artifacts': {}, + 'message': 'Artifact retrieval not yet implemented' + } + except Exception as e: + self.logger.error("Failed to get build artifacts: %s", str(e)) + return {'build_id': build_id, 'artifacts': {}, 'error': str(e)} + + def list_recent_builds( + self, + limit: int = 10, + filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]: + """ + List recent builds with filtering. + + Args: + limit: Maximum number of builds to return + filters: Optional filters to apply + + Returns: + List of build information dictionaries + """ + try: + if not self.build_history: + return [] + + # Get build history + builds = self.build_history.get_build_history(limit=limit) + + # Apply filters if provided + if filters: + # Basic filtering implementation + filtered_builds = [] + for build in builds: + include = True + for key, value in filters.items(): + if key in build and build[key] != value: + include = False + break + if include: + filtered_builds.append(build) + builds = filtered_builds + + return builds[:limit] + + except Exception as e: + self.logger.error("Failed to list recent builds: %s", str(e)) + return [] + + # Execution Operations + + def run_fuzz_target(self, + target_spec: Union[FuzzTarget, Dict[str, Any]], + build_metadata: Dict[str, Any], + options: Optional[RunOptions] = None) -> RunResult: + """ + Run a single fuzz target. + + Args: + target_spec: FuzzTarget instance or dictionary specification + build_metadata: Build metadata from previous build operation + options: Run options (optional) + + Returns: + RunResult: Result of the run operation + + Raises: + FuzzRunnerError: If run fails + OSSFuzzSDKError: If SDK components not available + """ + try: + if not self.local_runner: + raise OSSFuzzSDKError("Execution components not available") + + # Convert dict to FuzzTarget if needed + if isinstance(target_spec, dict): + target = FuzzTarget(**target_spec) + else: + target = target_spec + + options = options or RunOptions() + + # Convert options to FuzzRunOptions + fuzz_options = FuzzRunOptions( + duration_seconds=options.duration_seconds, + timeout_seconds=options.timeout_seconds, + max_memory_mb=options.max_memory_mb, + detect_leaks=options.detect_leaks, + extract_coverage=options.extract_coverage, + corpus_dir=options.corpus_dir, + output_dir=options.output_dir, + engine_args=options.engine_args, + env_vars=options.env_vars, + ) + + # Perform the run + run_info = self.local_runner.run(target=target.name, + options=fuzz_options, + build_metadata=build_metadata, + benchmark_id=target.name, + trial=1) + + return RunResult( + success=not run_info.crashes if run_info else False, + message=run_info.run_log if run_info else 'Run completed', + crashes=run_info.crashes if run_info else False, + coverage_data={ + 'cov_pcs': run_info.cov_pcs if run_info else 0, + 'total_pcs': run_info.total_pcs if run_info else 0, + } if run_info else {}) + + except Exception as e: + error_msg = f"Failed to run fuzz target: {str(e)}" + self.logger.error(error_msg) + return RunResult(success=False, message=error_msg) + + def run_benchmark(self, + benchmark_id: str, + options: Optional[RunOptions] = None) -> RunResult: + """ + Run a specific benchmark (build + run). + + Args: + benchmark_id: Benchmark identifier + options: Run options (optional) + + Returns: + RunResult: Result of the run operation + + Raises: + BenchmarkError: If benchmark not found + FuzzRunnerError: If run fails + """ + try: + # First build the benchmark + build_result = self.build_benchmark(benchmark_id) + if not build_result.success: + return RunResult(success=False, + message=f"Build failed: {build_result.message}") + + # Then run it + if not self.benchmark_manager: + raise OSSFuzzSDKError("BenchmarkManager not available") + + benchmark = self.benchmark_manager.get_benchmark(benchmark_id) + if not benchmark: + raise BenchmarkError(f"Benchmark not found: {benchmark_id}") + + # Create FuzzTarget from benchmark + target = FuzzTarget( + name=benchmark.function_name, + source_code="// Generated fuzz target", + build_script="// Generated build script", + project_name=benchmark.project, + language=benchmark.language, + function_signature=benchmark.function_signature, + ) + + return self.run_fuzz_target(target, build_result.artifacts, options) + + except Exception as e: + error_msg = f"Failed to run benchmark {benchmark_id}: {str(e)}" + self.logger.error(error_msg) + return RunResult(success=False, message=error_msg) + + def get_run_status(self, run_id: str) -> Dict[str, Any]: + """ + Check run status. + + Args: + run_id: Run identifier + + Returns: + Dictionary containing run status information + """ + try: + # This would query run history or active runs + # For now, return a basic status + return { + 'run_id': run_id, + 'status': 'unknown', + 'message': 'Run status tracking not yet implemented', + 'timestamp': datetime.now().isoformat() + } + except Exception as e: + self.logger.error("Failed to get run status: %s", str(e)) + return { + 'run_id': run_id, + 'status': 'error', + 'message': str(e), + 'timestamp': datetime.now().isoformat() + } + + def get_run_results(self, run_id: str) -> Dict[str, Any]: + """ + Retrieve run results and artifacts. + + Args: + run_id: Run identifier + + Returns: + Dictionary containing run results + """ + try: + # This would retrieve results from storage + # For now, return empty results + return { + 'run_id': run_id, + 'results': {}, + 'message': 'Result retrieval not yet implemented' + } + except Exception as e: + self.logger.error("Failed to get run results: %s", str(e)) + return {'run_id': run_id, 'results': {}, 'error': str(e)} + + def list_recent_runs( + self, + limit: int = 10, + filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]: + """ + List recent runs with filtering. + + Args: + limit: Maximum number of runs to return + filters: Optional filters to apply + + Returns: + List of run information dictionaries + """ + try: + if not self.crash_history: + return [] + + # Get crash history as proxy for run history + runs = self.crash_history.get_crash_history(limit=limit) + + # Apply filters if provided + if filters: + # Basic filtering implementation + filtered_runs = [] + for run in runs: + include = True + for key, value in filters.items(): + if key in run and run[key] != value: + include = False + break + if include: + filtered_runs.append(run) + runs = filtered_runs + + return runs[:limit] + + except Exception as e: + self.logger.error("Failed to list recent runs: %s", str(e)) + return [] + + # Workflow Orchestration + + def run_full_pipeline( + self, + benchmark_id: str, + options: Optional[PipelineOptions] = None) -> PipelineResult: + """ + Run a complete build → run → analyze pipeline. + + Args: + benchmark_id: Benchmark identifier + options: Pipeline options (optional) + + Returns: + PipelineResult: Result of the complete pipeline + """ + try: + options = options or PipelineOptions() + build_results = [] + run_results = [] + + # Run multiple trials if specified + for trial in range(1, options.trials + 1): + self.logger.info("Running pipeline trial %d/%d for %s", trial, + options.trials, benchmark_id) + + # Build phase + build_result = self.build_benchmark(benchmark_id, options.build_options) + build_results.append(build_result) + + if not build_result.success: + self.logger.warning("Build failed for trial %d, skipping run", trial) + continue + + # Run phase + run_result = self.run_benchmark(benchmark_id, options.run_options) + run_results.append(run_result) + + # Analysis phase (if enabled) + if options.analyze_coverage and run_result.success: + try: + self._analyze_coverage(benchmark_id, run_result) + except Exception as e: + self.logger.warning("Coverage analysis failed for trial %d: %s", + trial, str(e)) + + # Store results (if enabled) + if options.store_results and self.result_manager: + try: + self._store_pipeline_result(benchmark_id, build_result, run_result, + trial) + except Exception as e: + self.logger.warning("Result storage failed for trial %d: %s", trial, + str(e)) + + # Determine overall success + successful_builds = sum(1 for r in build_results if r.success) + successful_runs = sum(1 for r in run_results if r.success) + + overall_success = successful_builds > 0 and successful_runs > 0 + message = ( + f"Pipeline completed: {successful_builds}/{len(build_results)} " + f"builds, {successful_runs}/{len(run_results)} runs successful") + + return PipelineResult(success=overall_success, + message=message, + build_results=build_results, + run_results=run_results) + + except Exception as e: + error_msg = f"Pipeline failed for {benchmark_id}: {str(e)}" + self.logger.error(error_msg) + return PipelineResult(success=False, message=error_msg) + + def _analyze_coverage(self, benchmark_id: str, run_result: RunResult) -> None: + """Analyze coverage for a run result.""" + # Placeholder for coverage analysis + self.logger.debug("Coverage analysis for %s: %s", benchmark_id, + run_result.coverage_data) + + def _store_pipeline_result(self, benchmark_id: str, build_result: BuildResult, + run_result: RunResult, trial: int) -> None: + """Store pipeline result through ResultManager.""" + # pylint: disable=unused-argument + if not self.result_manager: + return + + # This would create a comprehensive Result object and store it + self.logger.debug("Storing pipeline result for %s trial %d", benchmark_id, + trial) + + # Result Management Operations + + def get_benchmark_result(self, + benchmark_id: str, + trial: Optional[int] = None) -> Optional[Any]: + """ + Get result for a specific benchmark. + + Args: + benchmark_id: Benchmark identifier + trial: Specific trial number (optional, gets latest if not specified) + + Returns: + Result object or None if not found + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return None + + if trial is not None: + return self.result_manager.get_trial_result(benchmark_id, trial) + return self.result_manager.get_result(benchmark_id) + + except Exception as e: + self.logger.error("Failed to get benchmark result: %s", str(e)) + return None + + def get_benchmark_metrics(self, benchmark_id: str) -> Dict[str, Any]: + """ + Get comprehensive metrics for a benchmark. + + Args: + benchmark_id: Benchmark identifier + + Returns: + Dictionary containing comprehensive metrics + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return {} + + return self.result_manager.get_metrics(benchmark_id) + + except Exception as e: + self.logger.error("Failed to get benchmark metrics: %s", str(e)) + return {} + + def get_system_metrics(self) -> Dict[str, Any]: + """ + Get system-wide aggregated metrics. + + Returns: + Dictionary containing system-wide metrics + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return {} + + return self.result_manager.get_metrics() + + except Exception as e: + self.logger.error("Failed to get system metrics: %s", str(e)) + return {} + + def get_coverage_trend(self, + benchmark_id: str, + days: int = 30) -> Union[Any, List[Dict[str, Any]]]: + """ + Get coverage trend for a benchmark. + + Args: + benchmark_id: Benchmark identifier + days: Number of days to analyze + + Returns: + Coverage trend data (DataFrame if pandas available, list otherwise) + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return [] + + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + return self.result_manager.coverage_trend(benchmark_id, start_date, + end_date) + + except Exception as e: + self.logger.error("Failed to get coverage trend: %s", str(e)) + return [] + + def get_build_success_rate(self, benchmark_id: str, days: int = 30) -> float: + """ + Get build success rate for a benchmark. + + Args: + benchmark_id: Benchmark identifier + days: Number of days to analyze + + Returns: + Build success rate (0.0 to 1.0) + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return 0.0 + + return self.result_manager.get_build_success_rate(benchmark_id, days) + + except Exception as e: + self.logger.error("Failed to get build success rate: %s", str(e)) + return 0.0 + + def get_crash_summary(self, + benchmark_id: str, + days: int = 30) -> Dict[str, Any]: + """ + Get crash summary for a benchmark. + + Args: + benchmark_id: Benchmark identifier + days: Number of days to analyze + + Returns: + Dictionary containing crash statistics + """ + try: + if not self.result_manager: + self.logger.warning("ResultManager not available") + return {} + + return self.result_manager.get_crash_summary(benchmark_id, days) + + except Exception as e: + self.logger.error("Failed to get crash summary: %s", str(e)) + return {} + + # Benchmark Management Operations + + def create_benchmark(self, benchmark_spec: Dict[str, Any]) -> bool: + """ + Create a new benchmark. + + Args: + benchmark_spec: Benchmark specification dictionary + + Returns: + True if successful, False otherwise + """ + try: + if not self.benchmark_manager: + self.logger.warning("BenchmarkManager not available") + return False + + # This would create a new benchmark + # For now, just log the operation + self.logger.info("Creating benchmark: %s", + benchmark_spec.get('id', 'unknown')) + return True + + except Exception as e: + self.logger.error("Failed to create benchmark: %s", str(e)) + return False + + def update_benchmark(self, benchmark_id: str, updates: Dict[str, + Any]) -> bool: + """ + Update an existing benchmark. + + Args: + benchmark_id: Benchmark identifier + updates: Dictionary of updates to apply + + Returns: + True if successful, False otherwise + """ + try: + if not self.benchmark_manager: + self.logger.warning("BenchmarkManager not available") + return False + + # This would update the benchmark + # For now, just log the operation + self.logger.info("Updating benchmark %s: %s", benchmark_id, + list(updates.keys())) + return True + + except Exception as e: + self.logger.error("Failed to update benchmark: %s", str(e)) + return False + + def delete_benchmark(self, benchmark_id: str) -> bool: + """ + Delete a benchmark. + + Args: + benchmark_id: Benchmark identifier + + Returns: + True if successful, False otherwise + """ + try: + if not self.benchmark_manager: + self.logger.warning("BenchmarkManager not available") + return False + + # This would delete the benchmark + # For now, just log the operation + self.logger.info("Deleting benchmark: %s", benchmark_id) + return True + + except Exception as e: + self.logger.error("Failed to delete benchmark: %s", str(e)) + return False + + def list_benchmarks(self, + filters: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + """ + List available benchmarks with filtering. + + Args: + filters: Optional filters to apply + + Returns: + List of benchmark information dictionaries + """ + # pylint: disable=unused-argument + try: + if not self.benchmark_manager: + self.logger.warning("BenchmarkManager not available") + return [] + + # This would list benchmarks from the manager + # For now, return empty list + return [] + + except Exception as e: + self.logger.error("Failed to list benchmarks: %s", str(e)) + return [] + + def search_benchmarks(self, + query: str, + limit: int = 10) -> List[Dict[str, Any]]: + """ + Search benchmarks by query. + + Args: + query: Search query string + limit: Maximum number of results + + Returns: + List of matching benchmark information dictionaries + """ + # pylint: disable=unused-argument + try: + if not self.benchmark_manager: + self.logger.warning("BenchmarkManager not available") + return [] + + # This would search benchmarks + # For now, return empty list + self.logger.info("Searching benchmarks for: %s", query) + return [] + + except Exception as e: + self.logger.error("Failed to search benchmarks: %s", str(e)) + return [] + + # Export and Analysis Operations + + def export_results(self, + benchmark_ids: List[str], + export_format: str = 'json', + output_path: Optional[str] = None) -> str: + """ + Export results for multiple benchmarks. + + Args: + benchmark_ids: List of benchmark identifiers + export_format: Export format ('json', 'csv', 'xlsx') + output_path: Optional output file path + + Returns: + Path to exported file + """ + try: + if not self.result_manager: + raise OSSFuzzSDKError("ResultManager not available") + + # This would export results in the specified format + # For now, create a placeholder file + if not output_path: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_path = f"ossfuzz_export_{timestamp}.{export_format}" + + self.logger.info("Exporting results for %d benchmarks to %s", + len(benchmark_ids), output_path) + + # Create placeholder export + export_data = { + 'export_timestamp': datetime.now().isoformat(), + 'benchmark_count': len(benchmark_ids), + 'benchmark_ids': benchmark_ids, + 'format': export_format, + 'message': 'Export functionality not yet implemented' + } + + # Write placeholder file + import json + with open(output_path, 'w') as f: + json.dump(export_data, f, indent=2) + + return output_path + + except Exception as e: + error_msg = f"Failed to export results: {str(e)}" + self.logger.error(error_msg) + raise OSSFuzzSDKError(error_msg) + + def generate_comparison_report(self, + benchmark_ids: List[str], + days: int = 30) -> Dict[str, Any]: + """ + Generate a comparison report for multiple benchmarks. + + Args: + benchmark_ids: List of benchmark identifiers to compare + days: Number of days to analyze + + Returns: + Dictionary containing comparison report + """ + try: + report = { + 'comparison_timestamp': datetime.now().isoformat(), + 'benchmark_count': len(benchmark_ids), + 'analysis_period_days': days, + 'benchmarks': {} + } + + for benchmark_id in benchmark_ids: + try: + metrics = self.get_benchmark_metrics(benchmark_id) + build_rate = self.get_build_success_rate(benchmark_id, days) + crash_summary = self.get_crash_summary(benchmark_id, days) + + report['benchmarks'][benchmark_id] = { + 'metrics': metrics, + 'build_success_rate': build_rate, + 'crash_summary': crash_summary, + } + + except Exception as e: + report['benchmarks'][benchmark_id] = {'error': str(e)} + + return report + + except Exception as e: + error_msg = f"Failed to generate comparison report: {str(e)}" + self.logger.error(error_msg) + return {'error': error_msg, 'timestamp': datetime.now().isoformat()} + + # Historical Data Methods (preserved from original implementation) + + def generate_project_report( + self, + days: int = 30, + include_details: bool = True # pylint: disable=unused-argument + ) -> Dict[str, Any]: + """ + Generate a comprehensive project report. + + Args: + days: Number of days to include in the report + include_details: Whether to include detailed data + + Returns: + Dictionary containing comprehensive project report + + Raises: + OSSFuzzSDKError: If report generation fails + """ + try: + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + start_date_str = start_date.isoformat() + end_date_str = end_date.isoformat() + + self.logger.info("Generating project report for %s (%d days)", + self.project_name, days) + + report = { + 'project_name': self.project_name, + 'report_generated': end_date.isoformat(), + 'period': { + 'start_date': start_date_str, + 'end_date': end_date_str, + 'days': days + } + } + + # Build statistics + try: + build_stats = self.build_history.get_build_statistics( + start_date_str, end_date_str) if self.build_history else {} + build_trends = self.build_history.get_build_trends( + days) if self.build_history else {} + report['build_summary'] = { + 'statistics': build_stats, + 'trends': build_trends + } + except Exception as e: + self.logger.warning("Failed to get build data: %s", str(e)) + report['build_summary'] = {'error': str(e)} + + # Crash statistics + try: + crash_stats = self.crash_history.get_crash_statistics( + start_date_str, end_date_str) if self.crash_history else {} + report['crash_summary'] = crash_stats + except Exception as e: + self.logger.warning("Failed to get crash data: %s", str(e)) + report['crash_summary'] = {'error': str(e)} + + # Coverage analysis + try: + coverage_report = self.coverage_history.get_coverage_report( + start_date_str, end_date_str) if self.coverage_history else {} + coverage_trends = self.coverage_history.analyze_coverage_trends( + days) if self.coverage_history else {} + report['coverage_summary'] = { + 'report': coverage_report, + 'trends': coverage_trends + } + except Exception as e: + self.logger.warning("Failed to get coverage data: %s", str(e)) + report['coverage_summary'] = {'error': str(e)} + + # Corpus analysis + try: + corpus_growth = self.corpus_history.get_corpus_growth( + days=days) if self.corpus_history else {} + report['corpus_summary'] = {'growth': corpus_growth} + except Exception as e: + self.logger.warning("Failed to get corpus data: %s", str(e)) + report['corpus_summary'] = {'error': str(e)} + + # Overall health score + report['health_score'] = self._calculate_health_score(report) + + return report + + except Exception as e: + error_msg = f"Failed to generate project report: {str(e)}" + self.logger.error(error_msg) + raise OSSFuzzSDKError(error_msg) + + def analyze_fuzzing_efficiency(self, days: int = 30) -> Dict[str, Any]: + """ + Analyze overall fuzzing efficiency for the project. + + Args: + days: Number of days to analyze + + Returns: + Dictionary containing efficiency analysis + + Raises: + OSSFuzzSDKError: If analysis fails + """ + try: + self.logger.info("Analyzing fuzzing efficiency for %s (%d days)", + self.project_name, days) + + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + analysis = { + 'project_name': self.project_name, + 'analysis_date': end_date.isoformat(), + 'period_days': days + } + + # Build efficiency + build_trends = self.build_history.get_build_trends( + days) if self.build_history else {} + analysis['build_efficiency'] = { + 'builds_per_day': build_trends.get('builds_per_day', 0.0), + 'success_rate': build_trends.get('average_success_rate', 0.0), + 'trend': build_trends.get('trend', 'unknown') + } + + # Coverage efficiency + coverage_trends = self.coverage_history.analyze_coverage_trends( + days) if self.coverage_history else {} + analysis['coverage_efficiency'] = { + 'coverage_velocity': coverage_trends.get('coverage_velocity', 0.0), + 'stability': coverage_trends.get('stability', 'unknown'), + 'current_coverage': coverage_trends.get('current_coverage', 0.0) + } + + # Crash discovery efficiency + crash_stats = self.crash_history.get_crash_statistics( + start_date.isoformat(), + end_date.isoformat()) if self.crash_history else {} + total_crashes = crash_stats.get('total_crashes', 0) + unique_crashes = crash_stats.get('unique_crashes', 0) + + analysis['crash_efficiency'] = { + 'crashes_per_day': + total_crashes / days if days > 0 else 0.0, + 'unique_crash_rate': (unique_crashes / total_crashes * + 100) if total_crashes > 0 else 0.0, + 'total_crashes': + total_crashes, + 'unique_crashes': + unique_crashes + } + + # Corpus efficiency + corpus_growth = self.corpus_history.get_corpus_growth( + days=days) if self.corpus_history else {} + analysis['corpus_efficiency'] = { + 'growth_rate': corpus_growth.get('growth_rate', 0.0), + 'size_change': corpus_growth.get('size_change', 0), + 'trend': corpus_growth.get('trend', 'unknown') + } + + # Overall efficiency score + analysis['overall_efficiency'] = self._calculate_efficiency_score( + analysis) + + return analysis + + except Exception as e: + error_msg = f"Failed to analyze fuzzing efficiency: {str(e)}" + self.logger.error(error_msg) + raise OSSFuzzSDKError(error_msg) + + def _calculate_health_score(self, report: Dict[str, Any]) -> Dict[str, Any]: + """ + Calculate overall project health score based on report data. + + Args: + report: Project report data + + Returns: + Dictionary containing health score and breakdown + """ + try: + scores = {} + weights = {} + + # Build health (30% weight) + build_summary = report.get('build_summary', {}) + if 'statistics' in build_summary: + build_success_rate = build_summary['statistics'].get( + 'success_rate', 0.0) + scores['build'] = min(build_success_rate, 100.0) + weights['build'] = 0.3 + + # Coverage health (40% weight) + coverage_summary = report.get('coverage_summary', {}) + if 'report' in coverage_summary: + max_coverage = coverage_summary['report']['summary'].get( + 'max_line_coverage', 0.0) + scores['coverage'] = min(max_coverage, 100.0) + weights['coverage'] = 0.4 + + # Crash health (20% weight) - inverse scoring + crash_summary = report.get('crash_summary', {}) + total_crashes = crash_summary.get('total_crashes', 0) + if total_crashes == 0: + scores['crash'] = 100.0 + else: + # Lower score for more crashes + scores['crash'] = max(0.0, 100.0 - min(total_crashes, 100)) + weights['crash'] = 0.2 + + # Corpus health (10% weight) + corpus_summary = report.get('corpus_summary', {}) + if 'growth' in corpus_summary: + growth_rate = corpus_summary['growth']['growth_rate'] + if growth_rate > 0: + scores['corpus'] = min(100.0, 50.0 + growth_rate * 10) + else: + scores['corpus'] = 50.0 + weights['corpus'] = 0.1 + + # Calculate weighted average + total_score = 0.0 + total_weight = 0.0 + + for category, score in scores.items(): + weight = weights.get(category, 0.0) + total_score += score * weight + total_weight += weight + + overall_score = total_score / total_weight if total_weight > 0 else 0.0 + + # Determine health status + if overall_score >= 80: + status = 'excellent' + elif overall_score >= 60: + status = 'good' + elif overall_score >= 40: + status = 'fair' + else: + status = 'poor' + + return { + 'overall_score': round(overall_score, 2), + 'status': status, + 'category_scores': scores, + 'weights': weights + } + except Exception as e: + self.logger.warning("Failed to calculate health score: %s", str(e)) + return {'overall_score': 0.0, 'status': 'unknown', 'error': str(e)} + + def _calculate_efficiency_score(self, analysis: Dict[str, + Any]) -> Dict[str, Any]: + """ + Calculate overall efficiency score based on analysis data. + + Args: + analysis: Efficiency analysis data + + Returns: + Dictionary containing efficiency score and breakdown + """ + try: + scores = {} + + # Build efficiency + build_eff = analysis.get('build_efficiency', {}) + builds_per_day = build_eff.get('builds_per_day', 0.0) + success_rate = build_eff.get('success_rate', 0.0) + + # Score based on build frequency and success rate + build_score = min(100.0, (builds_per_day * 10) + success_rate) + scores['build'] = build_score + + # Coverage efficiency + coverage_eff = analysis.get('coverage_efficiency', {}) + coverage_velocity = coverage_eff.get('coverage_velocity', 0.0) + current_coverage = coverage_eff.get('current_coverage', 0.0) + + # Score based on coverage growth and current level + coverage_score = min(100.0, current_coverage + (coverage_velocity * 20)) + scores['coverage'] = max(0.0, coverage_score) + + # Crash efficiency + crash_eff = analysis.get('crash_efficiency', {}) + unique_crash_rate = crash_eff.get('unique_crash_rate', 0.0) + crashes_per_day = crash_eff.get('crashes_per_day', 0.0) + + # Higher score for finding unique crashes efficiently + crash_score = min(100.0, unique_crash_rate + min(crashes_per_day * 5, 20)) + scores['crash'] = crash_score + + # Corpus efficiency + corpus_eff = analysis.get('corpus_efficiency', {}) + growth_rate = corpus_eff.get('growth_rate', 0.0) + + # Score based on corpus growth + corpus_score = min(100.0, 50.0 + max(-50.0, min(50.0, growth_rate * 2))) + scores['corpus'] = corpus_score + + # Calculate overall efficiency + overall_efficiency = sum(scores.values()) / len(scores) if scores else 0.0 + + # Determine efficiency level + if overall_efficiency >= 75: + level = 'high' + elif overall_efficiency >= 50: + level = 'medium' + elif overall_efficiency >= 25: + level = 'low' + else: + level = 'very_low' + + return { + 'overall_efficiency': round(overall_efficiency, 2), + 'level': level, + 'category_scores': scores + } + except Exception as e: + self.logger.warning("Failed to calculate efficiency score: %s", str(e)) + return {'overall_efficiency': 0.0, 'level': 'unknown', 'error': str(e)} + + def get_project_summary(self) -> Dict[str, Any]: + """ + Get a quick summary of the project's current state. + + Returns: + Dictionary containing project summary + + Raises: + OSSFuzzSDKError: If summary generation fails + """ + try: + summary: Dict[str, Any] = { + 'project_name': self.project_name, + 'summary_date': datetime.now().isoformat() + } + + # Latest build status + try: + last_build = self.build_history.get_last_successful_build( + ) if self.build_history else None + summary['last_successful_build'] = str( + last_build) if last_build else 'None' + except Exception as e: + summary['last_successful_build'] = f'error: {str(e)}' + + # Latest coverage + try: + latest_coverage = self.coverage_history.get_latest_coverage( + ) if self.coverage_history else None + summary['latest_coverage'] = str( + latest_coverage) if latest_coverage else 'None' + except Exception as e: + summary['latest_coverage'] = f'error: {str(e)}' + + # Recent crash count + try: + week_ago = (datetime.now() - timedelta(days=7)).isoformat() + recent_crashes = self.crash_history.get_crash_history( + start_date=week_ago) if self.crash_history else [] + summary['recent_crashes'] = len(recent_crashes) + except Exception as e: + summary['recent_crashes'] = f'error: {str(e)}' + + return summary + + except Exception as e: + error_msg = f"Failed to get project summary: {str(e)}" + self.logger.error(error_msg) + raise OSSFuzzSDKError(error_msg) diff --git a/ossfuzz_py/data/storage_adapter.py b/ossfuzz_py/data/storage_adapter.py index def3c291a..acdb1ab15 100644 --- a/ossfuzz_py/data/storage_adapter.py +++ b/ossfuzz_py/data/storage_adapter.py @@ -133,6 +133,142 @@ def fetch_crash_data( StorageAdapterError: If not connected or connection lost. """ + @abstractmethod + def store_file(self, key: str, file_path: str) -> str: + """ + Store a file with the given key. + + Args: + key: Storage key/path for the file + file_path: Local path to the file to store + + Returns: + str: Storage path or identifier where file was stored + + Raises: + StorageAdapterError: If storage fails + """ + + @abstractmethod + def retrieve_file(self, key: str, dest_path: str) -> str: + """ + Retrieve a file to the specified destination. + + Args: + key: Storage key/path for the file + dest_path: Local path where file should be saved + + Returns: + str: Local path where file was saved + + Raises: + StorageAdapterError: If retrieval fails + """ + + @abstractmethod + def store_object(self, key: str, data: Any) -> str: + """ + Store an object with the given key. + + Args: + key: Storage key/path for the object + data: Object data to store + + Returns: + str: Storage path or identifier where object was stored + + Raises: + StorageAdapterError: If storage fails + """ + + @abstractmethod + def retrieve_object(self, key: str) -> Any: + """ + Retrieve an object with the given key. + + Args: + key: Storage key/path for the object + + Returns: + Any: Retrieved object data + + Raises: + StorageAdapterError: If retrieval fails + """ + + @abstractmethod + def list_keys(self, prefix: str = "") -> List[str]: + """ + List all keys with the given prefix. + + Args: + prefix: Key prefix to filter by + + Returns: + List[str]: List of matching keys + + Raises: + StorageAdapterError: If listing fails + """ + + @abstractmethod + def delete(self, key: str) -> bool: + """ + Delete data with the given key. + + Args: + key: Storage key/path for the data to delete + + Returns: + bool: True if deletion was successful, False otherwise + + Raises: + StorageAdapterError: If deletion fails + """ + + @abstractmethod + def get_history(self, + category: str, + name: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Any]: + """ + Retrieve historical data for a specific category and name. + + Args: + category: History category + (e.g., 'build', 'crash', 'corpus', 'coverage') + name: Specific name/identifier within the category + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of historical data entries + + Raises: + StorageAdapterError: If retrieval fails + """ + + @abstractmethod + def append_history(self, category: str, name: str, data: Any) -> str: + """ + Append new data to historical records. + + Args: + category: History category + (e.g., 'build', 'crash', 'corpus', 'coverage') + name: Specific name/identifier within the category + data: Data to append to history + + Returns: + str: Storage path or identifier where data was stored + + Raises: + StorageAdapterError: If storage fails + """ + # - fetch_project_list() # - fetch_build_information(...) # - fetch_report_details(...) @@ -358,6 +494,150 @@ def fetch_crash_data( exc_info=True) raise QueryError(f"Failed to fetch crash data for {project_name}: {e}") + def store_file(self, key: str, file_path: str) -> str: + """Store a file with the given key.""" + try: + dest_path = self.base_directory / key + dest_path.parent.mkdir(parents=True, exist_ok=True) + + import shutil + shutil.copy2(file_path, dest_path) + return str(dest_path) + except Exception as e: + raise StorageAdapterError(f"Failed to store file {key}: {e}") + + def retrieve_file(self, key: str, dest_path: str) -> str: + """Retrieve a file to the specified destination.""" + try: + src_path = self.base_directory / key + if not src_path.exists(): + raise StorageAdapterError(f"File not found: {key}") + + import shutil + shutil.copy2(src_path, dest_path) + return dest_path + except Exception as e: + raise StorageAdapterError(f"Failed to retrieve file {key}: {e}") + + def store_object(self, key: str, data: Any) -> str: + """Store an object with the given key.""" + try: + dest_path = self.base_directory / key + dest_path.parent.mkdir(parents=True, exist_ok=True) + + with open(dest_path, 'w') as f: + json.dump(data, f, indent=2, default=str) + return str(dest_path) + except Exception as e: + raise StorageAdapterError(f"Failed to store object {key}: {e}") + + def retrieve_object(self, key: str) -> Any: + """Retrieve an object with the given key.""" + try: + src_path = self.base_directory / key + if not src_path.exists(): + raise StorageAdapterError(f"Object not found: {key}") + + with open(src_path, 'r') as f: + return json.load(f) + except Exception as e: + raise StorageAdapterError(f"Failed to retrieve object {key}: {e}") + + def list_keys(self, prefix: str = "") -> List[str]: + """List all keys with the given prefix.""" + try: + keys = [] + search_path = self.base_directory / prefix \ + if prefix else self.base_directory + + if search_path.is_file(): + return [str(search_path.relative_to(self.base_directory))] + + if search_path.is_dir(): + for path in search_path.rglob('*'): + if path.is_file(): + keys.append(str(path.relative_to(self.base_directory))) + + return keys + except Exception as e: + raise StorageAdapterError( + f"Failed to list keys with prefix {prefix}: {e}") + + def delete(self, key: str) -> bool: + """Delete data with the given key.""" + try: + path = self.base_directory / key + if path.exists(): + if path.is_file(): + path.unlink() + elif path.is_dir(): + import shutil + shutil.rmtree(path) + return True + return False + except Exception as e: + raise StorageAdapterError(f"Failed to delete {key}: {e}") + + def get_history(self, + category: str, + name: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Any]: + """Retrieve historical data for a specific category and name.""" + try: + history_path = self.base_directory / "history" / category / f"{name}.json" + if not history_path.exists(): + return [] + + with open(history_path, 'r') as f: + data = json.load(f) + + # Filter by date if specified + if start_date or end_date: + filtered_data = [] + for entry in data: + entry_date = entry.get('timestamp', entry.get('date', '')) + if start_date and entry_date < start_date: + continue + if end_date and entry_date > end_date: + continue + filtered_data.append(entry) + data = filtered_data + + # Apply limit if specified + if limit: + data = data[-limit:] # Get most recent entries + + return data + except Exception as e: + raise StorageAdapterError( + f"Failed to get history for {category}/{name}: {e}") + + def append_history(self, category: str, name: str, data: Any) -> str: + """Append new data to historical records.""" + try: + history_path = self.base_directory / "history" / category / f"{name}.json" + history_path.parent.mkdir(parents=True, exist_ok=True) + + # Load existing data + existing_data = [] + if history_path.exists(): + with open(history_path, 'r') as f: + existing_data = json.load(f) + + # Append new data + existing_data.append(data) + + # Save back to file + with open(history_path, 'w') as f: + json.dump(existing_data, f, indent=2, default=str) + + return str(history_path) + except Exception as e: + raise StorageAdapterError( + f"Failed to append history for {category}/{name}: {e}") + class GCSStorageAdapter(StorageAdapter): """ @@ -703,3 +983,145 @@ def fetch_crash_data(self, exc_info=True) raise QueryError(f"GCSStorageAdapter: Failed to fetch crash data for " f"{project_name}: {e}") + + def store_file(self, key: str, file_path: str) -> str: + """Store a file with the given key.""" + if self._bucket: + try: + blob = self._bucket.blob(key) + blob.upload_from_filename(file_path) + return f"gs://{self.bucket_name}/{key}" + except Exception as e: + raise StorageAdapterError(f"Failed to store file {key}: {e}") + return '' + + def retrieve_file(self, key: str, dest_path: str) -> str: + """Retrieve a file to the specified destination.""" + if self._bucket: + try: + blob = self._bucket.blob(key) + if not blob.exists(): + raise StorageAdapterError(f"File not found: {key}") + + blob.download_to_filename(dest_path) + return dest_path + except Exception as e: + raise StorageAdapterError(f"Failed to retrieve file {key}: {e}") + return '' + + def store_object(self, key: str, data: Any) -> str: + """Store an object with the given key.""" + if self._bucket: + try: + blob = self._bucket.blob(key) + blob.upload_from_string(json.dumps(data, indent=2, default=str), + content_type='application/json') + return f"gs://{self.bucket_name}/{key}" + except Exception as e: + raise StorageAdapterError(f"Failed to store object {key}: {e}") + return '' + + def retrieve_object(self, key: str) -> Any: + """Retrieve an object with the given key.""" + if self._bucket: + try: + blob = self._bucket.blob(key) + if not blob.exists(): + raise StorageAdapterError(f"Object not found: {key}") + + content = blob.download_as_text() + return json.loads(content) + except Exception as e: + raise StorageAdapterError(f"Failed to retrieve object {key}: {e}") + return None + + def list_keys(self, prefix: str = "") -> List[str]: + """List all keys with the given prefix.""" + if self._bucket: + try: + blobs = self._bucket.list_blobs(prefix=prefix) + return [blob.name for blob in blobs] + except Exception as e: + raise StorageAdapterError( + f"Failed to list keys with prefix {prefix}: {e}") + return [] + + def delete(self, key: str) -> bool: + """Delete data with the given key.""" + if self._bucket: + try: + blob = self._bucket.blob(key) + if blob.exists(): + blob.delete() + return True + return False + except Exception as e: + raise StorageAdapterError(f"Failed to delete {key}: {e}") + return False + + def get_history(self, + category: str, + name: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Any]: + """Retrieve historical data for a specific category and name.""" + if self._bucket: + try: + key = f"history/{category}/{name}.json" + blob = self._bucket.blob(key) + + if not blob.exists(): + return [] + + content = blob.download_as_text() + data = json.loads(content) + + # Filter by date if specified + if start_date or end_date: + filtered_data = [] + for entry in data: + entry_date = entry.get('timestamp', entry.get('date', '')) + if start_date and entry_date < start_date: + continue + if end_date and entry_date > end_date: + continue + filtered_data.append(entry) + data = filtered_data + + # Apply limit if specified + if limit: + data = data[-limit:] # Get most recent entries + + return data + except Exception as e: + raise StorageAdapterError( + f"Failed to get history for {category}/{name}: {e}") + return [] + + def append_history(self, category: str, name: str, data: Any) -> str: + """Append new data to historical records.""" + if self._bucket: + try: + key = f"history/{category}/{name}.json" + blob = self._bucket.blob(key) + + # Load existing data + existing_data = [] + if blob.exists(): + content = blob.download_as_text() + existing_data = json.loads(content) + + # Append new data + existing_data.append(data) + + # Save back to GCS + blob.upload_from_string(json.dumps(existing_data, indent=2, + default=str), + content_type='application/json') + + return f"gs://{self.bucket_name}/{key}" + except Exception as e: + raise StorageAdapterError( + f"Failed to append history for {category}/{name}: {e}") + return '' diff --git a/ossfuzz_py/data/storage_manager.py b/ossfuzz_py/data/storage_manager.py index d3ae34aea..566a96d7b 100644 --- a/ossfuzz_py/data/storage_manager.py +++ b/ossfuzz_py/data/storage_manager.py @@ -180,6 +180,62 @@ def _store_file_data(self, key: str, data: Any) -> str: pickle.dump(data, f) return str(file_path.with_suffix('.pkl')) + def store_history(self, category: str, name: str, data: Any) -> str: + """ + Store historical data for a specific category and name. + + Args: + category: History category + (e.g., 'build', 'crash', 'corpus', 'coverage') + name: Specific name/identifier within the category + data: Data to store in history + + Returns: + str: Storage path or identifier where data was stored + + Raises: + StorageManagerError: If storage operation fails + """ + try: + self.logger.debug("Storing history data for %s/%s", category, name) + return self.adapter.append_history(category, name, data) + except Exception as e: + error_msg = f"Failed to store history for {category}/{name}: {str(e)}" + self.logger.error(error_msg) + raise StorageManagerError(error_msg) + + def get_history(self, + category: str, + name: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Any]: + """ + Retrieve historical data for a specific category and name. + + Args: + category: History category + (e.g., 'build', 'crash', 'corpus', 'coverage') + name: Specific name/identifier within the category + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of historical data entries + + Raises: + StorageManagerError: If retrieval fails + """ + try: + self.logger.debug("Retrieving history data for %s/%s", category, name) + return self.adapter.get_history(category, name, start_date, end_date, + limit) + except Exception as e: + error_msg = f"Failed to get history for {category}/{name}: {str(e)}" + self.logger.error(error_msg) + raise StorageManagerError(error_msg) + def retrieve(self, key: str) -> Any: # pylint: disable=inconsistent-return-statements """ Retrieve data with the given key. diff --git a/ossfuzz_py/errors/__init__.py b/ossfuzz_py/errors/__init__.py index bd47dea53..1c3050dca 100644 --- a/ossfuzz_py/errors/__init__.py +++ b/ossfuzz_py/errors/__init__.py @@ -165,6 +165,14 @@ # Manager errors 'OSSFuzzManagerError', + # Historical Data SDK errors + 'HistoryManagerError', + 'HistoryStorageError', + 'HistoryRetrievalError', + 'HistoryValidationError', + 'OSSFuzzSDKError', + 'OSSFuzzSDKConfigError', + # General/legacy errors 'SDKError', 'EnvironmentParametersError', diff --git a/ossfuzz_py/errors/core.py b/ossfuzz_py/errors/core.py index b3ade5dc7..4e39d0c95 100644 --- a/ossfuzz_py/errors/core.py +++ b/ossfuzz_py/errors/core.py @@ -100,6 +100,7 @@ class ErrorCode(str, Enum): # Storage errors STORAGE_ERROR = "STORAGE_ERROR" STORAGE_CONNECTION_ERROR = "STORAGE_CONNECTION_ERROR" + STORAGE_MANAGER_ERROR = "STORAGE_MANAGER_ERROR" # Data errors DATA_ERROR = "DATA_ERROR" @@ -111,6 +112,16 @@ class ErrorCode(str, Enum): CACHE_ERROR = "CACHE_ERROR" RESULT_COMPARISON_ERROR = "RESULT_COMPARISON_ERROR" + # Historical data errors + HISTORY_MANAGER_ERROR = "HISTORY_MANAGER_ERROR" + HISTORY_STORAGE_ERROR = "HISTORY_STORAGE_ERROR" + HISTORY_RETRIEVAL_ERROR = "HISTORY_RETRIEVAL_ERROR" + HISTORY_VALIDATION_ERROR = "HISTORY_VALIDATION_ERROR" + + # OSS-Fuzz SDK errors + OSSFUZZ_SDK_ERROR = "OSSFUZZ_SDK_ERROR" + OSSFUZZ_SDK_CONFIG_ERROR = "OSSFUZZ_SDK_CONFIG_ERROR" + # Analysis errors ANALYSIS_ERROR = "ANALYSIS_ERROR" CHANGE_TRACKING_ERROR = "CHANGE_TRACKING_ERROR" diff --git a/ossfuzz_py/errors/factory.py b/ossfuzz_py/errors/factory.py index 267aa719e..e6c265309 100644 --- a/ossfuzz_py/errors/factory.py +++ b/ossfuzz_py/errors/factory.py @@ -329,5 +329,24 @@ def list_error_classes() -> Dict[str, Type[OSSFuzzError]]: OSSFuzzManagerError = make_error("OSSFuzzManagerError", ErrorCode.INVALID_CONFIG, ErrorDomain.CONFIG) +# Historical Data SDK errors +HistoryManagerError = make_error("HistoryManagerError", + ErrorCode.HISTORY_MANAGER_ERROR, + ErrorDomain.DATA) +HistoryStorageError = make_error("HistoryStorageError", + ErrorCode.HISTORY_STORAGE_ERROR, + ErrorDomain.STORAGE) +HistoryRetrievalError = make_error("HistoryRetrievalError", + ErrorCode.HISTORY_RETRIEVAL_ERROR, + ErrorDomain.DATA) +HistoryValidationError = make_error("HistoryValidationError", + ErrorCode.HISTORY_VALIDATION_ERROR, + ErrorDomain.VALIDATION) +OSSFuzzSDKError = make_error("OSSFuzzSDKError", ErrorCode.OSSFUZZ_SDK_ERROR, + ErrorDomain.CONFIG) +OSSFuzzSDKConfigError = make_error("OSSFuzzSDKConfigError", + ErrorCode.OSSFUZZ_SDK_CONFIG_ERROR, + ErrorDomain.CONFIG) + # General/legacy errors for backward compatibility SDKError = make_error("SDKError", ErrorCode.UNKNOWN, ErrorDomain.CONFIG) diff --git a/ossfuzz_py/execution/fuzz_runner.py b/ossfuzz_py/execution/fuzz_runner.py index 9f2c52f94..5ec60711f 100644 --- a/ossfuzz_py/execution/fuzz_runner.py +++ b/ossfuzz_py/execution/fuzz_runner.py @@ -40,8 +40,10 @@ from google.cloud import storage from pydantic import BaseModel, Field +# Import ResultManager for result storage +from ossfuzz_py.core.benchmark_manager import Benchmark from ossfuzz_py.core.data_models import FuzzingEngine, Sanitizer -from ossfuzz_py.result.results import RunInfo +from ossfuzz_py.result.results import Result, RunInfo from ossfuzz_py.utils.env_utils import EnvUtils from ossfuzz_py.utils.work_dir_manager import WorkDirManager @@ -145,13 +147,20 @@ class LocalRunner(FuzzRunner): Implements the standardized Runner interface as per UML design. """ - def __init__(self, work_dir_manager: WorkDirManager): + def __init__(self, + work_dir_manager: WorkDirManager, + result_manager: Any = None): """Initialize LocalRunner with Docker manager integration.""" super().__init__() self.work_dir_manager = work_dir_manager - - def run(self, target: str, options: FuzzRunOptions, - build_metadata: Dict[str, Any]) -> RunInfo: + self.result_manager = result_manager + + def run(self, + target: str, + options: FuzzRunOptions, + build_metadata: Dict[str, Any], + benchmark_id: Optional[str] = None, + trial: int = 1) -> RunInfo: """ Run a fuzz target using build metadata from LocalBuilder. @@ -159,6 +168,8 @@ def run(self, target: str, options: FuzzRunOptions, target: Target name to run options: Fuzzing options build_metadata: Metadata from LocalBuilder containing build artifacts + benchmark_id: Optional benchmark ID for result storage + trial: Trial number for result storage Returns: RunInfo: Standardized result data structure @@ -174,6 +185,11 @@ def run(self, target: str, options: FuzzRunOptions, except Exception as e: self.logger.error("Local run failed: %s", e) + run_info.error_message = str(e) + + # Store result through ResultManager if available + self._store_run_result(target, run_info, build_metadata, benchmark_id, + trial) return run_info @@ -272,6 +288,50 @@ def _get_libfuzzer_args(self, options: FuzzRunOptions) -> List[str]: return args + def _store_run_result(self, target: str, run_info: RunInfo, + build_metadata: Dict[str, Any], + benchmark_id: Optional[str], trial: int) -> None: + """Store run result through ResultManager if available.""" + if not self.result_manager: + return + + try: + # Create minimal benchmark for the result + if Benchmark is None: + self.logger.warning("Benchmark class not available") + return + + benchmark = Benchmark( + project=build_metadata.get('project_name', 'unknown'), + language='c++', # Default language + function_signature=f'int {target}(const uint8_t* data, size_t size)', + function_name=target, + return_type='int', + target_path='', + id=benchmark_id or target, + ) + + # Create Result object for storage + if Result is None: + self.logger.warning("Result class not available") + return + + result_obj = Result( + benchmark=benchmark, + work_dirs='', + trial=trial, + run_info=run_info, + ) + + # Store through ResultManager + self.result_manager.store_result(benchmark_id or target, result_obj) + self.logger.debug("Stored run result for %s through ResultManager", + benchmark_id or target) + + except Exception as e: + self.logger.warning( + "Failed to store run result through ResultManager: %s", e) + def _parse_and_update_run_results(self, log_path: str, run_info: 'RunInfo', project_name: str): """Parse libfuzzer logs and update run_info with results.""" diff --git a/ossfuzz_py/history/__init__.py b/ossfuzz_py/history/__init__.py new file mode 100644 index 000000000..fa82c6051 --- /dev/null +++ b/ossfuzz_py/history/__init__.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +History management package for the OSS-Fuzz Python SDK. + +This package provides managers for different types of historical data: +- BuildHistoryManager: Build history and statistics +- CrashHistoryManager: Crash data and analysis +- CorpusHistoryManager: Corpus growth and statistics +- CoverageHistoryManager: Coverage trends and analysis +""" + +from .build_history_manager import BuildHistoryManager +from .corpus_history_manager import CorpusHistoryManager +from .coverage_history_manager import CoverageHistoryManager +from .crash_history_manager import CrashHistoryManager +from .history_manager import HistoryManager + +__all__ = [ + 'HistoryManager', + 'BuildHistoryManager', + 'CrashHistoryManager', + 'CorpusHistoryManager', + 'CoverageHistoryManager', +] diff --git a/ossfuzz_py/history/build_history_manager.py b/ossfuzz_py/history/build_history_manager.py new file mode 100644 index 000000000..5b03efb8a --- /dev/null +++ b/ossfuzz_py/history/build_history_manager.py @@ -0,0 +1,281 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Build history manager for the OSS-Fuzz Python SDK. + +This module manages historical build data including build results, +success rates, and build artifact tracking. +""" + +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +from ossfuzz_py.core.data_models import BuildHistoryData +from ossfuzz_py.errors import HistoryManagerError, HistoryValidationError + +from .history_manager import HistoryManager + + +class BuildHistoryManager(HistoryManager): + """ + Manages historical build data for OSS-Fuzz projects. + + This manager handles storage and retrieval of build history, including + build results, timing information, and artifact tracking. + """ + + @property + def category(self) -> str: + """Get the history category for build data.""" + return "build" + + def validate_data(self, data: Any) -> bool: # pylint: disable=inconsistent-return-statements + """ + Validate build data before storage. + + Args: + data: Build data to validate + + Returns: + bool: True if data is valid + + Raises: + HistoryValidationError: If validation fails + """ + try: + if isinstance(data, dict): + # Validate required fields + required_fields = ['build_id', 'timestamp', 'project_name', 'success'] + for field in required_fields: + if field not in data: + raise HistoryValidationError(f"Missing required field: {field}") + + # Validate data types + if not isinstance(data['success'], bool): + raise HistoryValidationError("'success' field must be boolean") + + return True + if isinstance(data, BuildHistoryData): + # Pydantic model validation is automatic + return True + raise HistoryValidationError(f"Invalid data type: {type(data)}") + except Exception as e: + raise HistoryValidationError( + f"Build data validation failed: {str(e)}") from e + + def get_build_history(self, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Dict[str, Any]]: + """ + Get build history for the project. + + Args: + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of build history entries + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + return self.get_data(self.project_name, start_date, end_date, limit) + except Exception as e: + raise HistoryManagerError(f"Failed to get build history: {str(e)}") + + def get_last_successful_build(self) -> Optional[Dict[str, Any]]: + """ + Get the last successful build for the project. + + Returns: + Last successful build data or None if no successful builds + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + # Get recent builds and find the last successful one + builds = self.get_build_history(limit=50) # Check last 50 builds + + for build in reversed(builds): # Start from the most recent + if build.get('success', False): + return build + + return None + except Exception as e: + raise HistoryManagerError( + f"Failed to get last successful build: {str(e)}") + + def store_build_result(self, build_data: Dict[str, Any]) -> str: + """ + Store a build result. + + Args: + build_data: Build result data to store + + Returns: + str: Storage path where data was stored + + Raises: + HistoryManagerError: If storage fails + """ + try: + # Add a timestamp if not present + if 'timestamp' not in build_data: + build_data['timestamp'] = datetime.now().isoformat() + + # Add a project name if not present + if 'project_name' not in build_data: + build_data['project_name'] = self.project_name + + # Validate data + self.validate_data(build_data) + + return self.store_data(self.project_name, build_data) + except Exception as e: + raise HistoryManagerError(f"Failed to store build result: {str(e)}") + + def get_build_statistics(self, + start_date: Optional[str] = None, + end_date: Optional[str] = None) -> Dict[str, Any]: + """ + Get build statistics for the specified period. + + Args: + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + + Returns: + Dictionary containing build statistics + + Raises: + HistoryManagerError: If calculation fails + """ + try: + builds = self.get_build_history(start_date, end_date) + + if not builds: + return { + 'total_builds': 0, + 'successful_builds': 0, + 'failed_builds': 0, + 'success_rate': 0.0, + 'average_duration': 0.0 + } + + total_builds = len(builds) + successful_builds = sum( + 1 for build in builds if build.get('success', False)) + failed_builds = total_builds - successful_builds + success_rate = (successful_builds / + total_builds) * 100 if total_builds > 0 else 0.0 + + # Calculate average duration for builds with duration data + durations = [ + build.get('duration_seconds', 0) + for build in builds + if build.get('duration_seconds') is not None + ] + average_duration = sum(durations) / len(durations) if durations else 0.0 + + return { + 'total_builds': total_builds, + 'successful_builds': successful_builds, + 'failed_builds': failed_builds, + 'success_rate': success_rate, + 'average_duration': average_duration, + 'period_start': start_date, + 'period_end': end_date + } + except Exception as e: + raise HistoryManagerError( + f"Failed to calculate build statistics: {str(e)}") + + def get_build_trends(self, days: int = 30) -> Dict[str, Any]: + """ + Get build trends for the specified number of days. + + Args: + days: Number of days to analyze + + Returns: + Dictionary containing trend analysis + + Raises: + HistoryManagerError: If analysis fails + """ + try: + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + builds = self.get_build_history(start_date=start_date.isoformat(), + end_date=end_date.isoformat()) + + if not builds: + return {'trend': 'no_data', 'builds_per_day': 0.0} + + # Group builds by day + daily_builds = {} + for build in builds: + build_date = build.get('timestamp', '')[:10] # Get YYYY-MM-DD + if build_date not in daily_builds: + daily_builds[build_date] = {'total': 0, 'successful': 0} + daily_builds[build_date]['total'] += 1 + if build.get('success', False): + daily_builds[build_date]['successful'] += 1 + + # Calculate trends + total_days = len(daily_builds) + builds_per_day = len(builds) / days if days > 0 else 0.0 + + # Calculate success rate trend + daily_success_rates = [] + for day_data in daily_builds.values(): + rate = (day_data['successful'] / + day_data['total']) * 100 if day_data['total'] > 0 else 0.0 + daily_success_rates.append(rate) + + # Simple trend analysis + if len(daily_success_rates) >= 2: + recent_rate = sum(daily_success_rates[-7:]) / min( + 7, len(daily_success_rates)) + older_rate = sum(daily_success_rates[:-7]) / max( + 1, + len(daily_success_rates) - 7) + + if recent_rate > older_rate + 5: + trend = 'improving' + elif recent_rate < older_rate - 5: + trend = 'declining' + else: + trend = 'stable' + else: + trend = 'insufficient_data' + + return { + 'trend': + trend, + 'builds_per_day': + builds_per_day, + 'total_days_with_builds': + total_days, + 'average_success_rate': + sum(daily_success_rates) / + len(daily_success_rates) if daily_success_rates else 0.0 + } + except Exception as e: + raise HistoryManagerError(f"Failed to analyze build trends: {str(e)}") diff --git a/ossfuzz_py/history/corpus_history_manager.py b/ossfuzz_py/history/corpus_history_manager.py new file mode 100644 index 000000000..48d104b97 --- /dev/null +++ b/ossfuzz_py/history/corpus_history_manager.py @@ -0,0 +1,377 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Corpus history manager for the OSS-Fuzz Python SDK. + +This module manages historical corpus data including corpus growth, +statistics, and merging operations. +""" + +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +from ossfuzz_py.core.data_models import CorpusHistoryData +from ossfuzz_py.errors import HistoryManagerError, HistoryValidationError + +from .history_manager import HistoryManager + + +class CorpusHistoryManager(HistoryManager): + """ + Manages historical corpus data for OSS-Fuzz projects. + + This manager handles storage and retrieval of corpus statistics including + corpus size, growth rates, and coverage impact. + """ + + @property + def category(self) -> str: + """Get the history category for corpus data.""" + return "corpus" + + def validate_data(self, data: Any) -> bool: # pylint: disable=inconsistent-return-statements + """ + Validate corpus data before storage. + + Args: + data: Corpus data to validate + + Returns: + bool: True if data is valid + + Raises: + HistoryValidationError: If validation fails + """ + try: + if isinstance(data, dict): + # Validate required fields + required_fields = [ + 'timestamp', 'project_name', 'fuzzer_name', 'corpus_size' + ] + for field in required_fields: + if field not in data: + raise HistoryValidationError(f"Missing required field: {field}") + + # Validate data types + if not isinstance(data['corpus_size'], int) or data['corpus_size'] < 0: + raise HistoryValidationError( + "'corpus_size' must be a non-negative integer") + + return True + if isinstance(data, CorpusHistoryData): + # Pydantic model validation is automatic + return True + raise HistoryValidationError(f"Invalid data type: {type(data)}") + except Exception as e: + raise HistoryValidationError( + f"Corpus data validation failed: {str(e)}") from e + + def get_corpus_stats(self, + fuzzer_name: Optional[str] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Dict[str, Any]]: + """ + Get corpus statistics for the project. + + Args: + fuzzer_name: Optional fuzzer name filter + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of corpus statistics entries + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + data_name = fuzzer_name if fuzzer_name else self.project_name + stats = self.get_data(data_name, start_date, end_date, limit) + + # Filter by fuzzer if specified and data contains multiple fuzzers + if fuzzer_name: + stats = [s for s in stats if s.get('fuzzer_name') == fuzzer_name] + + return stats + except Exception as e: + raise HistoryManagerError(f"Failed to get corpus stats: {str(e)}") + + def get_corpus_growth(self, + fuzzer_name: Optional[str] = None, + days: int = 30) -> Dict[str, Any]: + """ + Get corpus growth statistics for the specified period. + + Args: + fuzzer_name: Optional fuzzer name filter + days: Number of days to analyze + + Returns: + Dictionary containing growth statistics + + Raises: + HistoryManagerError: If analysis fails + """ + try: + from datetime import timedelta + + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + stats = self.get_corpus_stats(fuzzer_name=fuzzer_name, + start_date=start_date.isoformat(), + end_date=end_date.isoformat()) + + if not stats: + return { + 'growth_rate': 0.0, + 'size_change': 0, + 'average_size': 0.0, + 'trend': 'no_data' + } + + # Sort by timestamp + stats.sort(key=lambda x: x.get('timestamp', '')) + + initial_size = stats[0].get('corpus_size', 0) + final_size = stats[-1].get('corpus_size', 0) + size_change = final_size - initial_size + + # Calculate growth rate + growth_rate = (size_change / initial_size * + 100) if initial_size > 0 else 0.0 + + # Calculate average size + sizes = [s.get('corpus_size', 0) for s in stats] + average_size = sum(sizes) / len(sizes) if sizes else 0.0 + + # Determine trend + if growth_rate > 5: + trend = 'growing' + elif growth_rate < -5: + trend = 'shrinking' + else: + trend = 'stable' + + return { + 'growth_rate': growth_rate, + 'size_change': size_change, + 'initial_size': initial_size, + 'final_size': final_size, + 'average_size': average_size, + 'trend': trend, + 'period_days': days + } + except Exception as e: + raise HistoryManagerError(f"Failed to analyze corpus growth: {str(e)}") + + def merge_corpus(self, source_path: str, target_path: str) -> Dict[str, Any]: + """ + Merge corpus from source to target directory. + + Args: + source_path: Path to source corpus directory + target_path: Path to target corpus directory + + Returns: + Dictionary containing merge results + + Raises: + HistoryManagerError: If merge fails + """ + try: + source_dir = Path(source_path) + target_dir = Path(target_path) + + if not source_dir.exists(): + raise HistoryManagerError( + f"Source corpus directory not found: {source_path}") + + # Create target directory if it doesn't exist + target_dir.mkdir(parents=True, exist_ok=True) + + # Count files before merge + initial_target_count = len(list( + target_dir.glob('*'))) if target_dir.exists() else 0 + source_count = len(list(source_dir.glob('*'))) + + # Copy files from source to target + import shutil + copied_files = 0 + skipped_files = 0 + + for source_file in source_dir.glob('*'): + if source_file.is_file(): + target_file = target_dir / source_file.name + + # Skip if file already exists and is identical + if target_file.exists(): + if source_file.stat().st_size == target_file.stat().st_size: + skipped_files += 1 + continue + + shutil.copy2(source_file, target_file) + copied_files += 1 + + # Count files after merge + final_target_count = len(list(target_dir.glob('*'))) + + merge_result = { + 'initial_target_count': initial_target_count, + 'source_count': source_count, + 'copied_files': copied_files, + 'skipped_files': skipped_files, + 'final_target_count': final_target_count, + 'files_added': final_target_count - initial_target_count, + 'timestamp': datetime.now().isoformat() + } + + # Store merge result in history + self.store_corpus_stats({ + 'timestamp': merge_result['timestamp'], + 'project_name': self.project_name, + 'fuzzer_name': 'merged', + 'corpus_size': final_target_count, + 'new_files_count': copied_files, + 'total_size_bytes': self._calculate_directory_size(target_dir) + }) + + return merge_result + except Exception as e: + raise HistoryManagerError(f"Failed to merge corpus: {str(e)}") + + def store_corpus_stats(self, corpus_data: Dict[str, Any]) -> str: + """ + Store corpus statistics. + + Args: + corpus_data: Corpus statistics to store + + Returns: + str: Storage path where data was stored + + Raises: + HistoryManagerError: If storage fails + """ + try: + # Add timestamp if not present + if 'timestamp' not in corpus_data: + corpus_data['timestamp'] = datetime.now().isoformat() + + # Add project name if not present + if 'project_name' not in corpus_data: + corpus_data['project_name'] = self.project_name + + # Validate data + self.validate_data(corpus_data) + + # Use fuzzer name as the data identifier + data_name = corpus_data.get('fuzzer_name', self.project_name) + + return self.store_data(data_name, corpus_data) + except Exception as e: + raise HistoryManagerError(f"Failed to store corpus stats: {str(e)}") + + def _calculate_directory_size(self, directory: Path) -> int: + """ + Calculate total size of files in a directory. + + Args: + directory: Directory path + + Returns: + int: Total size in bytes + """ + try: + total_size = 0 + for file_path in directory.rglob('*'): + if file_path.is_file(): + total_size += file_path.stat().st_size + return total_size + except Exception: + return 0 + + def analyze_corpus_effectiveness(self, + fuzzer_name: str, + days: int = 7) -> Dict[str, Any]: + """ + Analyze corpus effectiveness in terms of coverage and crash discovery. + + Args: + fuzzer_name: Name of the fuzzer to analyze + days: Number of days to analyze + + Returns: + Dictionary containing effectiveness analysis + + Raises: + HistoryManagerError: If analysis fails + """ + try: + from datetime import timedelta + + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + corpus_stats = self.get_corpus_stats(fuzzer_name=fuzzer_name, + start_date=start_date.isoformat(), + end_date=end_date.isoformat()) + + if not corpus_stats: + return { + 'effectiveness_score': 0.0, + 'corpus_efficiency': 0.0, + 'recommendation': 'insufficient_data' + } + + # Calculate corpus efficiency (coverage increase per corpus size increase) + corpus_stats.sort(key=lambda x: x.get('timestamp', '')) + + initial_stats = corpus_stats[0] + final_stats = corpus_stats[-1] + + corpus_growth = final_stats.get('corpus_size', 0) - initial_stats.get( + 'corpus_size', 0) + coverage_increase = final_stats.get('coverage_increase', 0.0) + + # Calculate efficiency score + if corpus_growth > 0: + efficiency = coverage_increase / corpus_growth + else: + efficiency = 0.0 + + # Generate recommendation + if efficiency > 0.1: + recommendation = 'highly_effective' + elif efficiency > 0.05: + recommendation = 'moderately_effective' + elif efficiency > 0.01: + recommendation = 'low_effectiveness' + else: + recommendation = 'ineffective' + + return { + 'effectiveness_score': efficiency, + 'corpus_growth': corpus_growth, + 'coverage_increase': coverage_increase, + 'corpus_efficiency': efficiency, + 'recommendation': recommendation, + 'analysis_period_days': days + } + except Exception as e: + raise HistoryManagerError( + f"Failed to analyze corpus effectiveness: {str(e)}") diff --git a/ossfuzz_py/history/coverage_history_manager.py b/ossfuzz_py/history/coverage_history_manager.py new file mode 100644 index 000000000..8ec53c8a6 --- /dev/null +++ b/ossfuzz_py/history/coverage_history_manager.py @@ -0,0 +1,427 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Coverage history manager for the OSS-Fuzz Python SDK. + +This module manages historical coverage data including coverage trends, +analysis, and reporting. +""" + +from datetime import datetime +from typing import Any, Dict, List, Optional + +from ossfuzz_py.core.data_models import CoverageHistoryData +from ossfuzz_py.errors import HistoryManagerError, HistoryValidationError + +from .history_manager import HistoryManager + + +class CoverageHistoryManager(HistoryManager): + """ + Manages historical coverage data for OSS-Fuzz projects. + + This manager handles storage and retrieval of coverage data including + line coverage, function coverage, and branch coverage trends. + """ + + @property + def category(self) -> str: + """Get the history category for coverage data.""" + return "coverage" + + def validate_data(self, data: Any) -> bool: # pylint: disable=inconsistent-return-statements + """ + Validate coverage data before storage. + + Args: + data: Coverage data to validate + + Returns: + bool: True if data is valid + + Raises: + HistoryValidationError: If validation fails + """ + try: + if isinstance(data, dict): + # Validate required fields + required_fields = ['timestamp', 'project_name', 'line_coverage'] + for field in required_fields: + if field not in data: + raise HistoryValidationError(f"Missing required field: {field}") + + # Validate coverage percentages + coverage_fields = [ + 'line_coverage', 'function_coverage', 'branch_coverage' + ] + for field in coverage_fields: + if field in data: + value = data[field] + if not isinstance(value, (int, float)) or value < 0 or value > 100: + raise HistoryValidationError( + f"'{field}' must be between 0 and 100") + + return True + if isinstance(data, CoverageHistoryData): + # Pydantic model validation is automatic + return True + raise HistoryValidationError(f"Invalid data type: {type(data)}") + except Exception as e: + raise HistoryValidationError( + f"Coverage data validation failed: {str(e)}") from e + + def get_coverage_history(self, + fuzzer_name: Optional[str] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Dict[str, Any]]: + """ + Get coverage history for the project. + + Args: + fuzzer_name: Optional fuzzer name filter + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of coverage history entries + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + data_name = fuzzer_name if fuzzer_name else self.project_name + history = self.get_data(data_name, start_date, end_date, limit) + + # Filter by fuzzer if specified and data contains multiple fuzzers + if fuzzer_name: + history = [h for h in history if h.get('fuzzer_name') == fuzzer_name] + + return history + except Exception as e: + raise HistoryManagerError(f"Failed to get coverage history: {str(e)}") + + def get_latest_coverage(self, + fuzzer_name: Optional[str] = None + ) -> Optional[Dict[str, Any]]: + """ + Get the latest coverage data for the project. + + Args: + fuzzer_name: Optional fuzzer name filter + + Returns: + Latest coverage data or None if no data exists + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + history = self.get_coverage_history(fuzzer_name=fuzzer_name, limit=1) + return history[0] if history else None + except Exception as e: + raise HistoryManagerError(f"Failed to get latest coverage: {str(e)}") + + def get_coverage_report(self, + start_date: Optional[str] = None, + end_date: Optional[str] = None) -> Dict[str, Any]: + """ + Generate a comprehensive coverage report for the specified period. + + Args: + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + + Returns: + Dictionary containing coverage report + + Raises: + HistoryManagerError: If report generation fails + """ + try: + history = self.get_coverage_history(start_date=start_date, + end_date=end_date) + + if not history: + return { + 'summary': { + 'total_measurements': 0, + 'max_line_coverage': 0.0, + 'avg_line_coverage': 0.0, + 'coverage_trend': 'no_data' + }, + 'details': [], + 'recommendations': ['No coverage data available'] + } + + # Sort by timestamp + history.sort(key=lambda x: x.get('timestamp', '')) + + # Calculate summary statistics + line_coverages = [h.get('line_coverage', 0.0) for h in history] + function_coverages = [ + h.get('function_coverage', 0.0) + for h in history + if h.get('function_coverage') is not None + ] + branch_coverages = [ + h.get('branch_coverage', 0.0) + for h in history + if h.get('branch_coverage') is not None + ] + + max_line_coverage = max(line_coverages) if line_coverages else 0.0 + avg_line_coverage = sum(line_coverages) / len( + line_coverages) if line_coverages else 0.0 + + # Analyze trend + if len(line_coverages) >= 2: + recent_avg = sum(line_coverages[-5:]) / min(5, len(line_coverages)) + older_avg = sum(line_coverages[:-5]) / max(1, len(line_coverages) - 5) + + if recent_avg > older_avg + 1: + trend = 'improving' + elif recent_avg < older_avg - 1: + trend = 'declining' + else: + trend = 'stable' + else: + trend = 'insufficient_data' + + # Generate recommendations + recommendations = [] + if max_line_coverage < 50: + recommendations.append( + "Line coverage is below 50%. Consider adding more test cases.") + if function_coverages and max(function_coverages) < 70: + recommendations.append( + "Function coverage could be improved. Focus on uncovered functions." + ) + if trend == 'declining': + recommendations.append( + "Coverage trend is declining. Review recent changes.") + if not recommendations: + recommendations.append( + "Coverage metrics look good. Continue current testing approach.") + + return { + 'summary': { + 'total_measurements': + len(history), + 'max_line_coverage': + max_line_coverage, + 'avg_line_coverage': + avg_line_coverage, + 'max_function_coverage': + max(function_coverages) if function_coverages else None, + 'avg_function_coverage': + sum(function_coverages) / + len(function_coverages) if function_coverages else None, + 'max_branch_coverage': + max(branch_coverages) if branch_coverages else None, + 'avg_branch_coverage': + sum(branch_coverages) / + len(branch_coverages) if branch_coverages else None, + 'coverage_trend': + trend, + 'period_start': + start_date, + 'period_end': + end_date + }, + 'details': history, + 'recommendations': recommendations + } + except Exception as e: + raise HistoryManagerError(f"Failed to generate coverage report: {str(e)}") + + def store_coverage(self, coverage_data: Dict[str, Any]) -> str: + """ + Store coverage data. + + Args: + coverage_data: Coverage data to store + + Returns: + str: Storage path where data was stored + + Raises: + HistoryManagerError: If storage fails + """ + try: + # Add timestamp if not present + if 'timestamp' not in coverage_data: + coverage_data['timestamp'] = datetime.now().isoformat() + + # Add project name if not present + if 'project_name' not in coverage_data: + coverage_data['project_name'] = self.project_name + + # Validate data + self.validate_data(coverage_data) + + # Use fuzzer name as the data identifier if available + data_name = coverage_data.get('fuzzer_name', self.project_name) + + return self.store_data(data_name, coverage_data) + except Exception as e: + raise HistoryManagerError(f"Failed to store coverage data: {str(e)}") + + def analyze_coverage_trends(self, days: int = 30) -> Dict[str, Any]: + """ + Analyze coverage trends for the specified number of days. + + Args: + days: Number of days to analyze + + Returns: + Dictionary containing trend analysis + + Raises: + HistoryManagerError: If analysis fails + """ + try: + from datetime import timedelta + + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + history = self.get_coverage_history(start_date=start_date.isoformat(), + end_date=end_date.isoformat()) + + if not history: + return { + 'trend': 'no_data', + 'coverage_velocity': 0.0, + 'stability': 'unknown' + } + + # Sort by timestamp + history.sort(key=lambda x: x.get('timestamp', '')) + + line_coverages = [h.get('line_coverage', 0.0) for h in history] + + # Calculate coverage velocity (change per day) + if len(line_coverages) >= 2: + coverage_change = line_coverages[-1] - line_coverages[0] + coverage_velocity = coverage_change / days + else: + coverage_velocity = 0.0 + + # Calculate stability (variance in coverage) + if len(line_coverages) > 1: + mean_coverage = sum(line_coverages) / len(line_coverages) + variance = sum((x - mean_coverage)**2 + for x in line_coverages) / len(line_coverages) + std_dev = variance**0.5 + + if std_dev < 1.0: + stability = 'stable' + elif std_dev < 3.0: + stability = 'moderate' + else: + stability = 'unstable' + else: + stability = 'unknown' + + # Determine overall trend + if coverage_velocity > 0.1: + trend = 'improving' + elif coverage_velocity < -0.1: + trend = 'declining' + else: + trend = 'stable' + + return { + 'trend': trend, + 'coverage_velocity': coverage_velocity, + 'stability': stability, + 'current_coverage': line_coverages[-1] if line_coverages else 0.0, + 'max_coverage': max(line_coverages) if line_coverages else 0.0, + 'min_coverage': min(line_coverages) if line_coverages else 0.0, + 'analysis_period_days': days + } + except Exception as e: + raise HistoryManagerError(f"Failed to analyze coverage trends: {str(e)}") + + def compare_coverage(self, + baseline_date: str, + comparison_date: Optional[str] = None) -> Dict[str, Any]: + """ + Compare coverage between two time points. + + Args: + baseline_date: Baseline date for comparison (ISO format) + comparison_date: Comparison date (ISO format), defaults to latest + + Returns: + Dictionary containing comparison results + + Raises: + HistoryManagerError: If comparison fails + """ + try: + # Get baseline coverage + baseline_history = self.get_coverage_history(start_date=baseline_date, + end_date=baseline_date, + limit=1) + + if not baseline_history: + raise HistoryManagerError( + f"No coverage data found for baseline date: {baseline_date}") + + baseline_coverage = baseline_history[0] + + # Get comparison coverage + if comparison_date: + comparison_history = self.get_coverage_history( + start_date=comparison_date, end_date=comparison_date, limit=1) + else: + comparison_history = self.get_coverage_history(limit=1) + + if not comparison_history: + raise HistoryManagerError("No coverage data found for comparison") + + comparison_coverage = comparison_history[0] + + # Calculate differences + line_diff = comparison_coverage.get( + 'line_coverage', 0.0) - baseline_coverage.get('line_coverage', 0.0) + function_diff = None + branch_diff = None + + if (comparison_coverage.get('function_coverage') is not None and + baseline_coverage.get('function_coverage') is not None): + function_diff = comparison_coverage[ + 'function_coverage'] - baseline_coverage['function_coverage'] + + if (comparison_coverage.get('branch_coverage') is not None and + baseline_coverage.get('branch_coverage') is not None): + branch_diff = comparison_coverage[ + 'branch_coverage'] - baseline_coverage['branch_coverage'] + + return { + 'baseline': baseline_coverage, + 'comparison': comparison_coverage, + 'differences': { + 'line_coverage': line_diff, + 'function_coverage': function_diff, + 'branch_coverage': branch_diff + }, + 'improvement': line_diff > 0, + 'significant_change': abs(line_diff) > 1.0 + } + except Exception as e: + raise HistoryManagerError(f"Failed to compare coverage: {str(e)}") diff --git a/ossfuzz_py/history/crash_history_manager.py b/ossfuzz_py/history/crash_history_manager.py new file mode 100644 index 000000000..fa2c6b531 --- /dev/null +++ b/ossfuzz_py/history/crash_history_manager.py @@ -0,0 +1,328 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Crash history manager for the OSS-Fuzz Python SDK. + +This module manages historical crash data including crash detection, +deduplication, and analysis. +""" + +import hashlib +from datetime import datetime +from typing import Any, Dict, List, Optional, Set + +from ossfuzz_py.core.data_models import CrashHistoryData, Severity +from ossfuzz_py.errors import HistoryManagerError, HistoryValidationError + +from .history_manager import HistoryManager + + +class CrashHistoryManager(HistoryManager): + """ + Manages historical crash data for OSS-Fuzz projects. + + This manager handles storage and retrieval of crash data including + crash deduplication, severity analysis, and trend tracking. + """ + + @property + def category(self) -> str: + """Get the history category for crash data.""" + return "crash" + + def validate_data(self, data: Any) -> bool: # pylint: disable=inconsistent-return-statements + """ + Validate crash data before storage. + + Args: + data: Crash data to validate + + Returns: + bool: True if data is valid + + Raises: + HistoryValidationError: If validation fails + """ + try: + if isinstance(data, dict): + # Validate required fields + required_fields = [ + 'crash_id', 'timestamp', 'project_name', 'fuzzer_name', 'crash_type' + ] + for field in required_fields: + if field not in data: + raise HistoryValidationError(f"Missing required field: {field}") + + return True + if isinstance(data, CrashHistoryData): + # Pydantic model validation is automatic + return True + raise HistoryValidationError(f"Invalid data type: {type(data)}") + except Exception as e: + raise HistoryValidationError( + f"Crash data validation failed: {str(e)}") from e + + def get_crash_history(self, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Dict[str, Any]]: + """ + Get crash history for the project. + + Args: + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of crash history entries + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + return self.get_data(self.project_name, start_date, end_date, limit) + except Exception as e: + raise HistoryManagerError(f"Failed to get crash history: {str(e)}") + + def is_duplicate_crash(self, crash_data: Dict[str, Any]) -> bool: + """ + Check if a crash is a duplicate of an existing crash. + + Args: + crash_data: Crash data to check + + Returns: + bool: True if crash is a duplicate + + Raises: + HistoryManagerError: If check fails + """ + try: + # Generate crash signature + signature = self._generate_crash_signature(crash_data) + + # Get recent crashes to check for duplicates + recent_crashes = self.get_crash_history(limit=1000) + + for crash in recent_crashes: + if crash.get('crash_signature') == signature: + return True + + return False + except Exception as e: + raise HistoryManagerError( + f"Failed to check for duplicate crash: {str(e)}") + + def store_crash(self, crash_data: Dict[str, Any]) -> str: + """ + Store a crash after deduplication check. + + Args: + crash_data: Crash data to store + + Returns: + str: Storage path where data was stored, or empty string if duplicate + + Raises: + HistoryManagerError: If storage fails + """ + try: + # Add timestamp if not present + if 'timestamp' not in crash_data: + crash_data['timestamp'] = datetime.now().isoformat() + + # Add project name if not present + if 'project_name' not in crash_data: + crash_data['project_name'] = self.project_name + + # Generate crash signature if not present + if 'crash_signature' not in crash_data: + crash_data['crash_signature'] = self._generate_crash_signature( + crash_data) + + # Check for duplicates + if self.is_duplicate_crash(crash_data): + self.logger.info("Duplicate crash detected, skipping storage") + return "" + + # Validate data + self.validate_data(crash_data) + + return self.store_data(self.project_name, crash_data) + except Exception as e: + raise HistoryManagerError(f"Failed to store crash: {str(e)}") + + def _parse_crashes_output(self, output: str) -> List[Dict[str, Any]]: + """ + Parse crash output from fuzzing tools. + + Args: + output: Raw output from fuzzing tools + + Returns: + List of parsed crash data + + Raises: + HistoryManagerError: If parsing fails + """ + try: + crashes = [] + + # Simple parsing logic - this would be more sophisticated in practice + lines = output.split('\n') + current_crash = {} + + for line in lines: + line = line.strip() + + if 'ERROR:' in line or 'CRASH:' in line: + if current_crash: + crashes.append(current_crash) + current_crash = { + 'crash_id': self._generate_crash_id(), + 'timestamp': datetime.now().isoformat(), + 'project_name': self.project_name, + 'fuzzer_name': 'unknown', + 'crash_type': 'unknown', + 'severity': Severity.UNKNOWN.value + } + + # Extract crash type + if 'heap-buffer-overflow' in line.lower(): + current_crash['crash_type'] = 'heap-buffer-overflow' + current_crash['severity'] = Severity.HIGH.value + elif 'use-after-free' in line.lower(): + current_crash['crash_type'] = 'use-after-free' + current_crash['severity'] = Severity.CRITICAL.value + elif 'null-dereference' in line.lower(): + current_crash['crash_type'] = 'null-dereference' + current_crash['severity'] = Severity.MEDIUM.value + + # Extract stack trace + if line.startswith('#'): + if 'stack_trace' not in current_crash: + current_crash['stack_trace'] = line + else: + current_crash['stack_trace'] += '\n' + line + + # Add the last crash if any + if current_crash: + crashes.append(current_crash) + + return crashes + except Exception as e: + raise HistoryManagerError(f"Failed to parse crash output: {str(e)}") + + def _generate_crash_signature(self, crash_data: Dict[str, Any]) -> str: + """ + Generate a unique signature for a crash. + + Args: + crash_data: Crash data + + Returns: + str: Crash signature hash + """ + # Create signature from crash type and stack trace + signature_parts = [ + crash_data.get('crash_type', ''), + crash_data.get('fuzzer_name', ''), + ] + + # Use first few lines of stack trace for signature + stack_trace = crash_data.get('stack_trace', '') + if stack_trace: + # Take first 3 lines of stack trace + stack_lines = stack_trace.split('\n')[:3] + signature_parts.extend(stack_lines) + + signature_string = '|'.join(signature_parts) + return hashlib.md5(signature_string.encode()).hexdigest() + + def _generate_crash_id(self) -> str: + """Generate a unique crash ID.""" + import uuid + return str(uuid.uuid4()) + + def get_crash_statistics(self, + start_date: Optional[str] = None, + end_date: Optional[str] = None) -> Dict[str, Any]: + """ + Get crash statistics for the specified period. + + Args: + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + + Returns: + Dictionary containing crash statistics + + Raises: + HistoryManagerError: If calculation fails + """ + try: + crashes = self.get_crash_history(start_date, end_date) + + if not crashes: + return { + 'total_crashes': 0, + 'unique_crashes': 0, + 'crash_types': {}, + 'severity_distribution': {}, + 'top_fuzzers': {} + } + + # Count unique crashes by signature + unique_signatures: Set[str] = set() + crash_types: Dict[str, int] = {} + severity_counts: Dict[str, int] = {} + fuzzer_counts: Dict[str, int] = {} + + for crash in crashes: + signature = crash.get('crash_signature', '') + if signature: + unique_signatures.add(signature) + + crash_type = crash.get('crash_type', 'unknown') + crash_types[crash_type] = crash_types.get(crash_type, 0) + 1 + + severity = crash.get('severity', 'UNKNOWN') + severity_counts[severity] = severity_counts.get(severity, 0) + 1 + + fuzzer = crash.get('fuzzer_name', 'unknown') + fuzzer_counts[fuzzer] = fuzzer_counts.get(fuzzer, 0) + 1 + + return { + 'total_crashes': + len(crashes), + 'unique_crashes': + len(unique_signatures), + 'crash_types': + crash_types, + 'severity_distribution': + severity_counts, + 'top_fuzzers': + dict( + sorted(fuzzer_counts.items(), + key=lambda x: x[1], + reverse=True)[:10]), + 'period_start': + start_date, + 'period_end': + end_date + } + except Exception as e: + raise HistoryManagerError( + f"Failed to calculate crash statistics: {str(e)}") diff --git a/ossfuzz_py/history/history_manager.py b/ossfuzz_py/history/history_manager.py new file mode 100644 index 000000000..24a12c743 --- /dev/null +++ b/ossfuzz_py/history/history_manager.py @@ -0,0 +1,180 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Abstract base class for history managers. + +This module defines the common interface and functionality for all +history managers in the OSS-Fuzz SDK. +""" + +import logging +from abc import ABC, abstractmethod +from typing import Any, List, Optional + +from ossfuzz_py.data.storage_manager import StorageManager +from ossfuzz_py.errors import HistoryManagerError + + +class HistoryManager(ABC): + """ + Abstract base class for managing historical data. + + This class provides the common interface and functionality for all + history managers. Concrete implementations handle specific types of + historical data (builds, crashes, corpus, coverage). + + Attributes: + storage_manager: Storage manager for data persistence + project_name: Name of the OSS-Fuzz project + logger: Logger instance for this manager + """ + + def __init__(self, storage_manager: StorageManager, project_name: str): + """ + Initialize the history manager. + + Args: + storage_manager: Storage manager for data persistence + project_name: Name of the OSS-Fuzz project + + Raises: + HistoryManagerError: If initialization fails + """ + if not storage_manager: + raise HistoryManagerError("StorageManager is required") + if not project_name: + raise HistoryManagerError("Project name is required") + + self.storage_manager = storage_manager + self.project_name = project_name + self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") + + self.logger.info("Initialized %s for project: %s", self.__class__.__name__, + project_name) + + @property + @abstractmethod + def category(self) -> str: + """ + Get the history category for this manager. + + Returns: + str: Category name (e.g., 'build', 'crash', 'corpus', 'coverage') + """ + + def store_data(self, name: str, data: Any) -> str: + """ + Store historical data. + + Args: + name: Identifier for the data + data: Data to store + + Returns: + str: Storage path where data was stored + + Raises: + HistoryManagerError: If storage fails + """ + try: + self.logger.debug("Storing %s data for %s", self.category, name) + return self.storage_manager.store_history(self.category, name, data) + except Exception as e: + error_msg = f"Failed to store {self.category} data for {name}: {str(e)}" + self.logger.error(error_msg) + raise HistoryManagerError(error_msg) + + def get_data(self, + name: str, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + limit: Optional[int] = None) -> List[Any]: + """ + Retrieve historical data. + + Args: + name: Identifier for the data + start_date: Optional start date filter (ISO format) + end_date: Optional end date filter (ISO format) + limit: Optional limit on number of results + + Returns: + List of historical data entries + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + self.logger.debug("Retrieving %s data for %s", self.category, name) + return self.storage_manager.get_history(self.category, name, start_date, + end_date, limit) + except Exception as e: + error_msg = f"Failed to get {self.category} data for {name}: {str(e)}" + self.logger.error(error_msg) + raise HistoryManagerError(error_msg) + + def get_latest(self, name: str) -> Optional[Any]: + """ + Get the latest entry for the specified name. + + Args: + name: Identifier for the data + + Returns: + Latest data entry or None if no data exists + + Raises: + HistoryManagerError: If retrieval fails + """ + try: + data = self.get_data(name, limit=1) + return data[0] if data else None + except Exception as e: + error_msg = (f"Failed to get latest {self.category} data for " + f"{name}: {str(e)}") + self.logger.error(error_msg) + raise HistoryManagerError(error_msg) + + @abstractmethod + def validate_data(self, data: Any) -> bool: + """ + Validate data before storage. + + Args: + data: Data to validate + + Returns: + bool: True if data is valid + + Raises: + HistoryManagerError: If validation fails + """ + + def _format_timestamp(self, timestamp: Any) -> str: + """ + Format timestamp to ISO string. + + Args: + timestamp: Timestamp to format + + Returns: + str: ISO formatted timestamp + """ + from datetime import datetime + + if isinstance(timestamp, str): + return timestamp + if isinstance(timestamp, datetime): + return timestamp.isoformat() + return str(timestamp) diff --git a/ossfuzz_py/result/__init__.py b/ossfuzz_py/result/__init__.py index ce991bed4..a765dead0 100644 --- a/ossfuzz_py/result/__init__.py +++ b/ossfuzz_py/result/__init__.py @@ -11,5 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# This module has been consolidated into the root ossfuzz_py package. -# All exports are now available directly from ossfuzz_py. +""" +Result management module for OSS-Fuzz SDK. + +This module provides result management capabilities including the ResultManager +class and related result data structures. +""" + +from .result_manager import ResultManager +from .results import (AnalysisInfo, BenchmarkResult, BuildInfo, + CoverageAnalysis, CrashAnalysis, Result, RunInfo, + TrialResult) + +__all__ = [ + 'ResultManager', + 'Result', + 'BuildInfo', + 'RunInfo', + 'AnalysisInfo', + 'TrialResult', + 'BenchmarkResult', + 'CoverageAnalysis', + 'CrashAnalysis', +] diff --git a/ossfuzz_py/result/result_manager.py b/ossfuzz_py/result/result_manager.py new file mode 100644 index 000000000..4415c7051 --- /dev/null +++ b/ossfuzz_py/result/result_manager.py @@ -0,0 +1,1033 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License a +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +ResultManager for OSS-Fuzz SDK. + +This module provides the central, authoritative repository for all fuzz +execution results, providing unified storage, retrieval, and analytics +capabilities.""" + +import logging +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import pandas as pd + +# Import Benchmark from the real BenchmarkManager +from ossfuzz_py.core.benchmark_manager import Benchmark, BenchmarkManager +from ossfuzz_py.errors import ResultCollectionError +from ossfuzz_py.history.build_history_manager import BuildHistoryManager +from ossfuzz_py.history.corpus_history_manager import CorpusHistoryManager +from ossfuzz_py.history.coverage_history_manager import CoverageHistoryManager +from ossfuzz_py.history.crash_history_manager import CrashHistoryManager + +from . import textcov +from .results import AnalysisInfo, BuildInfo, Result, RunInfo + +# Configure module logger +logger = logging.getLogger('ossfuzz_sdk.result_manager') + + +class ResultManager: + """ + Central, authoritative repository for all fuzz execution results. + + This class serves as the unified interface for storing, retrieving, + and analyzing fuzz execution results. It coordinates with HistoryManager + subclasses to persist BuildInfo, RunInfo, and AnalysisInfo components and + provides analytics capabilities to eliminate duplicate logic across the + codebase. + + The ResultManager integrates seamlessly with the existing Builder/Runner + pipeline, BenchmarkManager, and all HistoryManager classes. + """ + + def __init__( + self, + build_mgr: BuildHistoryManager, + crash_mgr: CrashHistoryManager, + corpus_mgr: CorpusHistoryManager, + coverage_mgr: CoverageHistoryManager, + benchmark_manager: Optional['BenchmarkManager'] = None, + ) -> None: + """ + Initialize ResultManager with required HistoryManager dependencies. + + Args: + build_mgr: BuildHistoryManager for build result persistence + crash_mgr: CrashHistoryManager for crash data persistence + corpus_mgr: CorpusHistoryManager for corpus statistics persistence + coverage_mgr: CoverageHistoryManager for coverage data persistence + benchmark_manager: Optional BenchmarkManager for benchmark data + retrieval + """ + self.build_mgr = build_mgr + self.crash_mgr = crash_mgr + self.corpus_mgr = corpus_mgr + self.coverage_mgr = coverage_mgr + self.benchmark_manager = benchmark_manager + self.logger = logger + + def store_result(self, benchmark_id: str, result: Result) -> None: + """ + Store a Result by decomposing it into appropriate HistoryManager calls. + + This method takes a complete Result object and stores its components + (BuildInfo, RunInfo, AnalysisInfo) through the appropriate HistoryManager + subclasses to ensure proper data persistence and organization. + + Args: + benchmark_id: Unique identifier for the benchmark + result: Complete Result object to store + + Raises: + ResultCollectionError: If storage fails + """ + try: + self.logger.info("Storing result for benchmark %s, trial %d", + benchmark_id, result.trial) + + # Store BuildInfo through BuildHistoryManager + if result.build_info: + build_data = self._convert_build_info_to_dict(result, benchmark_id) + self.build_mgr.store_build_result(build_data) + self.logger.debug("Stored build info for benchmark %s", benchmark_id) + + # Store crash data through CrashHistoryManager + if result.run_info and result.run_info.crashes: + crash_data = self._convert_run_info_to_crash_dict(result, benchmark_id) + self.crash_mgr.store_crash(crash_data) + self.logger.debug("Stored crash data for benchmark %s", benchmark_id) + + # Store corpus data through CorpusHistoryManager + if result.run_info and result.run_info.corpus_path: + corpus_data = self._convert_run_info_to_corpus_dict( + result, benchmark_id) + self.corpus_mgr.store_corpus_stats(corpus_data) + self.logger.debug("Stored corpus data for benchmark %s", benchmark_id) + + # Store coverage data through CoverageHistoryManager + if result.analysis_info and result.analysis_info.coverage_analysis: + coverage_data = self._convert_analysis_info_to_coverage_dict( + result, benchmark_id) + self.coverage_mgr.store_coverage(coverage_data) + self.logger.debug("Stored coverage data for benchmark %s", benchmark_id) + + except Exception as e: + error_msg = (f"Failed to store result for benchmark {benchmark_id}: " + f"{str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def get_result(self, benchmark_id: str) -> Optional[Result]: + """ + Retrieve the latest Result for a benchmark by reconstructing from + HistoryManagers. + + This method queries all HistoryManager subclasses to reconstruct the mos + recent complete Result object for the specified benchmark. + + Args: + benchmark_id: Unique identifier for the benchmark + + Returns: + Latest Result object or None if no results found + + Raises: + ResultCollectionError: If retrieval fails + """ + try: + self.logger.debug("Retrieving latest result for benchmark %s", + benchmark_id) + + # Get latest build data + build_history = self.build_mgr.get_build_history(limit=1) + build_info = None + latest_build_data = None + if build_history: + latest_build_data = build_history[0] + build_info = self._convert_dict_to_build_info(latest_build_data) + + # Get latest crash data + crash_history = self.crash_mgr.get_crash_history(limit=1) + run_info = None + if crash_history: + run_info = self._convert_dict_to_run_info(crash_history[0]) + + # Get latest coverage data + coverage_history = self.coverage_mgr.get_coverage_history(limit=1) + analysis_info = None + if coverage_history: + analysis_info = self._convert_dict_to_analysis_info(coverage_history[0]) + + # If no data found, return None + if not any([build_info, run_info, analysis_info]): + return None + + # Create benchmark for the result using BenchmarkManager + benchmark = self._create_minimal_benchmark(benchmark_id) + + # Extract work_dirs and trial from the latest available data + work_dirs = "" + trial = 1 + if latest_build_data: + work_dirs = latest_build_data.get('work_dirs', '') + trial = latest_build_data.get('trial', 1) + + # Reconstruct Result objec + result = Result( + benchmark=benchmark, + work_dirs=work_dirs, + trial=trial, + iteration=0, + build_info=build_info, + run_info=run_info, + analysis_info=analysis_info, + ) + + self.logger.debug("Successfully retrieved result for benchmark %s", + benchmark_id) + return result + + except Exception as e: + error_msg = (f"Failed to retrieve result for benchmark {benchmark_id}: " + f"{str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def get_trial_result(self, benchmark_id: str, + trial_id: int) -> Optional[Result]: + """ + Retrieve a specific trial Result for a benchmark. + + Args: + benchmark_id: Unique identifier for the benchmark + trial_id: Specific trial number to retrieve + + Returns: + Result object for the specified trial or None if not found + + Raises: + ResultCollectionError: If retrieval fails + """ + try: + self.logger.debug("Retrieving trial %d result for benchmark %s", trial_id, + benchmark_id) + + # Get build data for specific trial + build_history = self.build_mgr.get_build_history( + limit=100) # Get more history to find trial + build_info = None + latest_build_data = None + for build_data in build_history: + latest_build_data = build_history[0] + if build_data.get('trial') == trial_id and build_data.get( + 'benchmark_id') == benchmark_id: + build_info = self._convert_dict_to_build_info(build_data) + break + + # Get crash data for specific trial + crash_history = self.crash_mgr.get_crash_history(limit=100) + run_info = None + for crash_data in crash_history: + if crash_data.get('trial') == trial_id and crash_data.get( + 'benchmark_id') == benchmark_id: + run_info = self._convert_dict_to_run_info(crash_data) + break + + # Get coverage data for specific trial + coverage_history = self.coverage_mgr.get_coverage_history(limit=100) + analysis_info = None + for coverage_data in coverage_history: + if coverage_data.get('trial') == trial_id and coverage_data.get( + 'benchmark_id') == benchmark_id: + analysis_info = self._convert_dict_to_analysis_info(coverage_data) + break + + # If no data found for this trial, return None + if not any([build_info, run_info, analysis_info]): + self.logger.debug("No data found for trial %d of benchmark %s", + trial_id, benchmark_id) + return None + + # Create benchmark for the result using BenchmarkManager + benchmark = self._create_minimal_benchmark(benchmark_id) + + work_dirs = "" + if latest_build_data: + work_dirs = latest_build_data.get('work_dirs', '') + + # Reconstruct Result object for specific trial + result = Result( + benchmark=benchmark, + work_dirs=work_dirs, + trial=trial_id, + iteration=0, + build_info=build_info, + run_info=run_info, + analysis_info=analysis_info, + ) + + self.logger.debug( + "Successfully retrieved trial %d result for benchmark %s", trial_id, + benchmark_id) + return result + + except Exception as e: + error_msg = (f"Failed to retrieve trial {trial_id} result for " + f"benchmark {benchmark_id}: {str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def get_metrics(self, benchmark_id: Optional[str] = None) -> Dict[str, Any]: + """ + Get comprehensive metrics for a benchmark or all benchmarks. + + This method provides a unified interface for accessing all metrics + as defined in the previous conversation, eliminating duplicate analytics + logic. + + Args: + benchmark_id: Optional benchmark ID. If None, returns aggregated metrics + + Returns: + Dictionary containing comprehensive metrics + + Raises: + ResultCollectionError: If metrics calculation fails + """ + try: + if benchmark_id: + return self._get_benchmark_metrics(benchmark_id) + return self._get_aggregated_metrics() + + except Exception as e: + error_msg = f"Failed to calculate metrics: {str(e)}" + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def coverage_trend(self, benchmark_id: str, start_date: datetime, + end_date: datetime) -> Union[Any, List[Dict[str, Any]]]: + """ + Return time-series coverage data as pandas DataFrame or list of dicts. + + Args: + benchmark_id: Benchmark identifier + start_date: Start date for trend analysis + end_date: End date for trend analysis + + Returns: + DataFrame with time-series coverage data if pandas available, + otherwise list of dictionaries + + Raises: + ResultCollectionError: If trend analysis fails + """ + try: + self.logger.debug("Calculating coverage trend for benchmark %s", + benchmark_id) + + # Get coverage history for the specified period + coverage_history = self.coverage_mgr.get_coverage_history( + start_date=start_date.isoformat(), end_date=end_date.isoformat()) + + if not coverage_history: + return pd.DataFrame() + + # Convert to DataFrame + df = pd.DataFrame(coverage_history) + if 'timestamp' in df.columns: + df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.set_index('timestamp') + return df + + except Exception as e: + error_msg = (f"Failed to calculate coverage trend for benchmark " + f"{benchmark_id}: {str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def latest_successful_build(self, benchmark_id: str) -> Optional[Result]: + """ + Return the most recent Result with successful build. + + Args: + benchmark_id: Benchmark identifier + + Returns: + Latest Result with successful build or None if not found + + Raises: + ResultCollectionError: If retrieval fails + """ + try: + self.logger.debug("Finding latest successful build for benchmark %s", + benchmark_id) + + # Get build history and find the latest successful build + build_history = self.build_mgr.get_build_history( + limit=50) # Check last 50 builds + + for build_data in build_history: + if build_data.get('success', False): + # Reconstruct Result for this successful build + build_info = self._convert_dict_to_build_info(build_data) + benchmark = self._create_minimal_benchmark(benchmark_id) + + return Result( + benchmark=benchmark, + work_dirs=build_data.get('work_dirs', ''), + trial=build_data.get('trial', 1), + build_info=build_info, + ) + + return None + + except Exception as e: + error_msg = (f"Failed to find latest successful build for benchmark " + f"{benchmark_id}: {str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def get_build_success_rate(self, benchmark_id: str, days: int = 30) -> float: + """ + Calculate build success rate over specified period. + + Args: + benchmark_id: Benchmark identifier + days: Number of days to analyze + + Returns: + Build success rate as a float between 0.0 and 1.0 + + Raises: + ResultCollectionError: If calculation fails + """ + try: + self.logger.debug( + "Calculating build success rate for benchmark %s over %d days", + benchmark_id, days) + + # Calculate date range + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + # Get build statistics for the specified period + try: + build_stats = self.build_mgr.get_build_statistics( + start_date.isoformat(), end_date.isoformat()) + except TypeError: + # Fallback if get_build_statistics doesn't accept date parameters + build_stats = self.build_mgr.get_build_statistics() + + total_builds = build_stats.get('total_builds', 0) + if total_builds == 0: + return 0.0 + + successful_builds = build_stats.get('successful_builds', 0) + return successful_builds / total_builds + + except Exception as e: + error_msg = (f"Failed to calculate build success rate for benchmark " + f"{benchmark_id}: {str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + def get_crash_summary(self, + benchmark_id: str, + days: int = 30) -> Dict[str, Any]: + """ + Get crash statistics and analysis summary. + + Args: + benchmark_id: Benchmark identifier + days: Number of days to analyze + + Returns: + Dictionary containing crash statistics and analysis + + Raises: + ResultCollectionError: If calculation fails + """ + try: + self.logger.debug( + "Calculating crash summary for benchmark %s over %d days", + benchmark_id, days) + + # Calculate date range + end_date = datetime.now() + start_date = end_date - timedelta(days=days) + + # Get crash statistics + crash_stats = self.crash_mgr.get_crash_statistics(start_date.isoformat(), + end_date.isoformat()) + + return crash_stats + + except Exception as e: + error_msg = (f"Failed to calculate crash summary for benchmark " + f"{benchmark_id}: {str(e)}") + self.logger.error(error_msg) + raise ResultCollectionError(error_msg) from e + + # Private helper methods for data conversion + + def _calculate_corpus_stats(self, corpus_path: str) -> Dict[str, int]: + """ + Calculate corpus statistics from the corpus directory. + + Args: + corpus_path: Path to the corpus directory or file + + Returns: + Dictionary with corpus_size, total_size_bytes, and new_files_coun + """ + if not corpus_path: + return {'corpus_size': 0, 'total_size_bytes': 0, 'new_files_count': 0} + + corpus_path_obj = Path(corpus_path) + + # Handle zip files (common for corpus storage) + if corpus_path.endswith('.zip') and corpus_path_obj.exists(): + try: + import zipfile + with zipfile.ZipFile(corpus_path, 'r') as zip_file: + file_list = zip_file.namelist() + corpus_size = len([f for f in file_list if not f.endswith('/')]) + total_size_bytes = sum( + zip_file.getinfo(f).file_size + for f in file_list + if not f.endswith('/')) + return { + 'corpus_size': corpus_size, + 'total_size_bytes': total_size_bytes, + 'new_files_count': + corpus_size, # All files are "new" for this calculation + } + except Exception as e: + self.logger.warning("Failed to analyze zip corpus %s: %s", corpus_path, + e) + return {'corpus_size': 0, 'total_size_bytes': 0, 'new_files_count': 0} + + # Handle directory + if corpus_path_obj.is_dir(): + try: + corpus_size = 0 + total_size_bytes = 0 + for file_path in corpus_path_obj.rglob('*'): + if file_path.is_file(): + corpus_size += 1 + total_size_bytes += file_path.stat().st_size + + return { + 'corpus_size': corpus_size, + 'total_size_bytes': total_size_bytes, + 'new_files_count': + corpus_size, # All files are "new" for this calculation + } + except Exception as e: + self.logger.warning("Failed to analyze directory corpus %s: %s", + corpus_path, e) + return {'corpus_size': 0, 'total_size_bytes': 0, 'new_files_count': 0} + + # Handle single file + if corpus_path_obj.is_file(): + try: + file_size = corpus_path_obj.stat().st_size + return { + 'corpus_size': 1, + 'total_size_bytes': file_size, + 'new_files_count': 1, + } + except Exception as e: + self.logger.warning("Failed to analyze file corpus %s: %s", corpus_path, + e) + return {'corpus_size': 0, 'total_size_bytes': 0, 'new_files_count': 0} + + # Path doesn't exist or is not accessible + return {'corpus_size': 0, 'total_size_bytes': 0, 'new_files_count': 0} + + def _convert_build_info_to_dict(self, result: Result, + benchmark_id: str) -> Dict[str, Any]: + """Convert BuildInfo to dictionary format for BuildHistoryManager.""" + build_info = result.build_info + if build_info is None: + raise ValueError("BuildInfo is None") + + return { + 'benchmark_id': benchmark_id, + 'trial': result.trial, + 'iteration': result.iteration, + 'timestamp': datetime.now().isoformat(), + 'work_dirs': result.work_dirs, # Store work_dirs for retrieval + 'success': build_info.success, + 'compiles': build_info.compiles, + 'compile_log': build_info.compile_log, + 'errors': build_info.errors, + 'binary_exists': build_info.binary_exists, + 'is_function_referenced': build_info.is_function_referenced, + 'fuzz_target_source': build_info.fuzz_target_source, + 'build_script_source': build_info.build_script_source, + } + + def _convert_run_info_to_crash_dict(self, result: Result, + benchmark_id: str) -> Dict[str, Any]: + """Convert RunInfo to crash dictionary format for CrashHistoryManager.""" + run_info = result.run_info + if run_info is None: + raise ValueError("RunInfo is None") + + return { + 'benchmark_id': benchmark_id, + 'trial': result.trial, + 'iteration': result.iteration, + 'timestamp': datetime.now().isoformat(), + 'work_dirs': result.work_dirs, # Store work_dirs for retrieval + 'crash_signature': run_info.crash_info or 'Unknown crash', + 'fuzzer_name': f"{benchmark_id}_trial_{result.trial}", + 'severity': 'UNKNOWN', # Default severity + 'reproducible': not run_info.timeout, + 'stack_trace': run_info.crash_info, + 'error_message': run_info.error_message, + 'reproducer_path': run_info.reproducer_path, + 'run_log': run_info.run_log, # Store run_log for retrieval + 'log_path': run_info.log_path, # Store log_path for retrieval + } + + def _convert_run_info_to_corpus_dict(self, result: Result, + benchmark_id: str) -> Dict[str, Any]: + """Convert RunInfo to corpus dictionary format for CorpusHistoryManager.""" + run_info = result.run_info + if run_info is None: + raise ValueError("RunInfo is None") + + # Calculate actual corpus statistics + corpus_stats = self._calculate_corpus_stats(run_info.corpus_path) + + return { + 'benchmark_id': benchmark_id, + 'trial': result.trial, + 'iteration': result.iteration, + 'timestamp': datetime.now().isoformat(), + 'work_dirs': result.work_dirs, # Store work_dirs for retrieval + 'fuzzer_name': f"{benchmark_id}_trial_{result.trial}", + 'corpus_path': run_info.corpus_path, + 'corpus_size': corpus_stats['corpus_size'], + 'total_size_bytes': corpus_stats['total_size_bytes'], + 'new_files_count': corpus_stats['new_files_count'], + } + + def _convert_analysis_info_to_coverage_dict( + self, result: Result, benchmark_id: str) -> Dict[str, Any]: + """Convert AnalysisInfo to coverage dictionary format for + CoverageHistoryManager.""" + analysis_info = result.analysis_info + if analysis_info is None: + raise ValueError("AnalysisInfo is None") + + coverage_analysis = analysis_info.coverage_analysis + if coverage_analysis is None: + raise ValueError("CoverageAnalysis is None") + + return { + 'benchmark_id': benchmark_id, + 'trial': result.trial, + 'iteration': result.iteration, + 'timestamp': datetime.now().isoformat(), + 'work_dirs': result.work_dirs, # Store work_dirs for retrieval + 'fuzzer_name': f"{benchmark_id}_trial_{result.trial}", + 'line_coverage': coverage_analysis.line_coverage, + 'line_coverage_diff': coverage_analysis.line_coverage_diff, + 'coverage_report_path': coverage_analysis.coverage_report_path, + 'cov_pcs': coverage_analysis.cov_pcs, + 'total_pcs': coverage_analysis.total_pcs, + } + + def _convert_dict_to_build_info(self, build_data: Dict[str, + Any]) -> BuildInfo: + """Convert dictionary to BuildInfo object.""" + return BuildInfo( + compiles=build_data.get('compiles', False), + compile_log=build_data.get('compile_log', ''), + errors=build_data.get('errors', []), + binary_exists=build_data.get('binary_exists', False), + is_function_referenced=build_data.get('is_function_referenced', False), + fuzz_target_source=build_data.get('fuzz_target_source', ''), + build_script_source=build_data.get('build_script_source', ''), + ) + + def _convert_dict_to_run_info(self, crash_data: Dict[str, Any]) -> RunInfo: + """Convert dictionary to RunInfo object with cross-referenced data.""" + # Get additional data from other history managers + benchmark_id = crash_data.get('benchmark_id', '') + trial = crash_data.get('trial', 1) + + # Get corpus data for this benchmark/trial + corpus_path = '' + try: + corpus_history = self.corpus_mgr.get_corpus_stats(limit=50) + for corpus_entry in corpus_history: + if (corpus_entry.get('benchmark_id') == benchmark_id and + corpus_entry.get('trial') == trial): + corpus_path = corpus_entry.get('corpus_path', '') + break + except Exception as e: + self.logger.debug("Failed to retrieve corpus path: %s", e) + + # Get coverage data for this benchmark/trial + cov_pcs, total_pcs, coverage_report_path = 0, 0, '' + try: + coverage_history = self.coverage_mgr.get_coverage_history(limit=50) + for coverage_entry in coverage_history: + if (coverage_entry.get('benchmark_id') == benchmark_id and + coverage_entry.get('trial') == trial): + cov_pcs = coverage_entry.get('cov_pcs', 0) + total_pcs = coverage_entry.get('total_pcs', 0) + coverage_report_path = coverage_entry.get('coverage_report_path', '') + break + except Exception as e: + self.logger.debug("Failed to retrieve coverage data: %s", e) + + return RunInfo( + crashes=True, # If we have crash data, there was a crash + run_log=crash_data.get('run_log', ''), + corpus_path=corpus_path, + reproducer_path=crash_data.get('reproducer_path', ''), + timeout=not crash_data.get('reproducible', + True), # Invert reproducible flag + error_message=crash_data.get('error_message', ''), + cov_pcs=cov_pcs, + total_pcs=total_pcs, + crash_info=crash_data.get('stack_trace', ''), + log_path=crash_data.get('log_path', ''), + coverage_report_path=coverage_report_path, + ) + + def _convert_dict_to_analysis_info( + self, coverage_data: Dict[str, Any]) -> AnalysisInfo: + """Convert dictionary to AnalysisInfo object with enhanced data + reconstruction.""" + from .results import CoverageAnalysis, CrashAnalysis + + coverage_analysis = CoverageAnalysis( + line_coverage=coverage_data.get('line_coverage', 0.0), + line_coverage_diff=coverage_data.get('line_coverage_diff', 0.0), + coverage_report_path=coverage_data.get('coverage_report_path', ''), + textcov_diff=self._reconstruct_textcov_diff(coverage_data), + cov_pcs=coverage_data.get('cov_pcs', 0), + total_pcs=coverage_data.get('total_pcs', 0), + ) + + # Try to get crash analysis data for the same benchmark/trial + crash_analysis = None + try: + benchmark_id = coverage_data.get('benchmark_id', '') + trial = coverage_data.get('trial', 1) + crash_history = self.crash_mgr.get_crash_history(limit=50) + for crash_entry in crash_history: + if (crash_entry.get('benchmark_id') == benchmark_id and + crash_entry.get('trial') == trial): + crash_stacks_data = crash_entry.get('crash_stacks') + crash_stacks = crash_stacks_data if isinstance( + crash_stacks_data, list) else [] + crash_analysis = CrashAnalysis( + true_bug=crash_entry.get('reproducible', False), + crash_func=crash_entry.get('crash_func'), + crash_stacks=crash_stacks, + ) + break + except Exception as e: + self.logger.debug("Failed to retrieve crash analysis: %s", e) + + return AnalysisInfo(coverage_analysis=coverage_analysis, + crash_analysis=crash_analysis) + + def _reconstruct_textcov_diff( + self, coverage_data: Dict[str, Any]) -> Optional['textcov.Textcov']: + """ + Reconstruct textcov_diff from coverage data. + + This is a placeholder implementation. In a real scenario, this would + parse coverage report files to reconstruct the textual coverage diff. + """ + # For now, return None as this is a placeholder implementation + # In a real implementation, this would parse the coverage report + # and create a proper textcov.Textcov object + # pylint: disable=unused-argument + return None + + def _create_minimal_benchmark(self, benchmark_id: str) -> Benchmark: + """Create a Benchmark object for Result reconstruction, using + BenchmarkManager when available.""" + # Try to get full benchmark data from BenchmarkManager + if self.benchmark_manager: + try: + existing_benchmark = self.benchmark_manager.get_benchmark(benchmark_id) + if existing_benchmark: + self.logger.debug( + "Retrieved full benchmark data for %s from BenchmarkManager", + benchmark_id) + return existing_benchmark + except Exception as e: + self.logger.debug( + "Failed to retrieve benchmark %s from BenchmarkManager: %s", + benchmark_id, e) + + # Fallback to minimal benchmark creation + self.logger.debug("Creating minimal benchmark for %s", benchmark_id) + return Benchmark( + project='unknown', + language='unknown', + function_signature='unknown', + function_name='unknown', + return_type='unknown', + target_path='unknown', + id=benchmark_id, + ) + + def _get_benchmark_metrics(self, benchmark_id: str) -> Dict[str, Any]: + """Get comprehensive metrics for a specific benchmark.""" + try: + # Get latest resul + result = self.get_result(benchmark_id) + if not result: + return self._get_empty_metrics() + + # Calculate core metrics + metrics = { + # Core Metrics + 'compiles': + result.is_build_successful(), + 'crashes': + not result.is_run_successful() if result.run_info else False, + 'coverage': + self._get_coverage_value(result), + 'line_coverage_diff': + self._get_line_coverage_diff(result), + + # Derived Metrics + 'has_semantic_error': + result.is_semantic_error(), + 'build_success_rate': + self.get_build_success_rate(benchmark_id), + 'crash_rate': + self._calculate_crash_rate(benchmark_id), + + # Coverage Metrics + 'cov_pcs': + self._get_cov_pcs(result), + 'total_pcs': + self._get_total_pcs(result), + 'coverage_percentage': + self._get_coverage_percentage(result), + + # Quality Metrics + 'is_true_bug': + self._is_true_bug(result), + 'error_type': + self._get_error_type(result), + + # Metadata + 'trial': + result.trial, + 'iteration': + result.iteration, + 'timestamp': + datetime.now().isoformat(), + 'benchmark_id': + benchmark_id, + } + + return metrics + + except Exception as e: + self.logger.error("Failed to calculate benchmark metrics for %s: %s", + benchmark_id, str(e)) + return self._get_empty_metrics() + + def _get_aggregated_metrics(self) -> Dict[str, Any]: + """Get aggregated metrics across all benchmarks.""" + try: + # Get build statistics + build_stats = self.build_mgr.get_build_statistics() + total_builds = build_stats.get('total_builds', 0) + successful_builds = build_stats.get('successful_builds', 0) + + # Get crash statistics + crash_stats = self.crash_mgr.get_crash_statistics() + total_crashes = crash_stats.get('total_crashes', 0) + unique_crashes = crash_stats.get('unique_crashes', 0) + + # Get coverage statistics from recent coverage history + coverage_history = self.coverage_mgr.get_coverage_history(limit=100) + coverage_values = [ + entry.get('line_coverage', 0.0) + for entry in coverage_history + if entry.get('line_coverage') is not None + ] + + average_coverage = sum(coverage_values) / len( + coverage_values) if coverage_values else 0.0 + max_coverage = max(coverage_values) if coverage_values else 0.0 + + # Estimate total benchmarks from unique benchmark IDs in build history + build_history = self.build_mgr.get_build_history(limit=1000) + unique_benchmarks = set( + entry.get('benchmark_id') + for entry in build_history + if entry.get('benchmark_id')) + total_benchmarks = len(unique_benchmarks) + + return { + 'total_benchmarks': + total_benchmarks, + 'total_builds': + total_builds, + 'successful_builds': + successful_builds, + 'build_success_rate': + successful_builds / total_builds if total_builds > 0 else 0.0, + 'total_crashes': + total_crashes, + 'unique_crashes': + unique_crashes, + 'crash_rate': + total_crashes / total_builds if total_builds > 0 else 0.0, + 'average_coverage': + average_coverage, + 'max_coverage': + max_coverage, + 'coverage_samples': + len(coverage_values), + 'timestamp': + datetime.now().isoformat(), + } + + except Exception as e: + self.logger.error("Failed to calculate aggregated metrics: %s", str(e)) + # Return empty metrics on error + return { + 'total_benchmarks': 0, + 'total_builds': 0, + 'successful_builds': 0, + 'build_success_rate': 0.0, + 'total_crashes': 0, + 'unique_crashes': 0, + 'crash_rate': 0.0, + 'average_coverage': 0.0, + 'max_coverage': 0.0, + 'coverage_samples': 0, + 'timestamp': datetime.now().isoformat(), + 'error': str(e), + } + + def _get_empty_metrics(self) -> Dict[str, Any]: + """Return empty metrics structure.""" + return { + 'compiles': False, + 'crashes': False, + 'coverage': 0.0, + 'line_coverage_diff': 0.0, + 'has_semantic_error': False, + 'build_success_rate': 0.0, + 'crash_rate': 0.0, + 'cov_pcs': 0, + 'total_pcs': 0, + 'coverage_percentage': 0.0, + 'is_true_bug': False, + 'error_type': 'UNKNOWN', + 'trial': 0, + 'iteration': 0, + 'timestamp': datetime.now().isoformat(), + 'benchmark_id': '', + } + + def _get_coverage_value(self, result: Result) -> float: + """Extract coverage value from Result.""" + if (result.analysis_info and result.analysis_info.coverage_analysis): + return result.analysis_info.coverage_analysis.line_coverage + return 0.0 + + def _get_line_coverage_diff(self, result: Result) -> float: + """Extract line coverage diff from Result.""" + if (result.analysis_info and result.analysis_info.coverage_analysis): + return result.analysis_info.coverage_analysis.line_coverage_diff + return 0.0 + + def _get_cov_pcs(self, result: Result) -> int: + """Extract covered program counters from Result.""" + if result.run_info: + return result.run_info.cov_pcs + if (result.analysis_info and result.analysis_info.coverage_analysis): + return result.analysis_info.coverage_analysis.cov_pcs + return 0 + + def _get_total_pcs(self, result: Result) -> int: + """Extract total program counters from Result.""" + if result.run_info: + return result.run_info.total_pcs + if (result.analysis_info and result.analysis_info.coverage_analysis): + return result.analysis_info.coverage_analysis.total_pcs + return 0 + + def _get_coverage_percentage(self, result: Result) -> float: + """Calculate coverage percentage from Result.""" + cov_pcs = self._get_cov_pcs(result) + total_pcs = self._get_total_pcs(result) + if total_pcs > 0: + return (cov_pcs / total_pcs) * 100.0 + return 0.0 + + def _is_true_bug(self, result: Result) -> bool: + """Determine if Result represents a true bug.""" + if (result.analysis_info and result.analysis_info.crash_analysis): + return result.analysis_info.crash_analysis.true_bug + return False + + def _get_error_type(self, result: Result) -> str: + """Extract error type from Result.""" + if (result.analysis_info and result.analysis_info.coverage_analysis and + result.analysis_info.coverage_analysis.error_type): + return result.analysis_info.coverage_analysis.error_type.name + return 'UNKNOWN' + + def _calculate_crash_rate(self, benchmark_id: str) -> float: + """Calculate crash rate for a benchmark.""" + try: + # Get crash statistics for the last 30 days + crash_summary = self.get_crash_summary(benchmark_id, days=30) + total_crashes = crash_summary.get('total_crashes', 0) + + # Get build statistics to determine total runs + # Try with date parameters first, fallback to no parameters + try: + end_date = datetime.now() + start_date = end_date - timedelta(days=30) + build_stats = self.build_mgr.get_build_statistics( + start_date.isoformat(), end_date.isoformat()) + except (TypeError, AttributeError): + # Fallback if get_build_statistics doesn't accept date parameters + build_stats = self.build_mgr.get_build_statistics() + + total_builds = build_stats.get('total_builds', 0) + + if total_builds > 0: + return total_crashes / total_builds + return 0.0 + + except Exception as e: + self.logger.error("Failed to calculate crash rate for %s: %s", + benchmark_id, str(e)) + return 0.0 diff --git a/ossfuzz_py/samples/README.md b/ossfuzz_py/samples/README.md new file mode 100644 index 000000000..db94f0e9c --- /dev/null +++ b/ossfuzz_py/samples/README.md @@ -0,0 +1,225 @@ +# OSS-Fuzz SDK Samples + +This directory contains practical examples demonstrating how to use the OSS-Fuzz SDK for various fuzzing workflows and use cases. + +## Sample Structure + +``` +samples/ +├── README.md # This file +├── basic/ # Basic usage examples +│ ├── 01_quick_start.py # Getting started with the SDK +│ ├── 02_configuration.py # Configuration management +│ └── 03_simple_benchmark.py # Running a single benchmark +├── intermediate/ # Intermediate examples +│ ├── 01_build_operations.py # Build operations and management +│ ├── 02_execution_workflows.py # Execution and run management +│ ├── 03_result_analysis.py # Result analysis and metrics +│ └── 04_pipeline_automation.py # Automated pipeline workflows +├── advanced/ # Advanced use cases +│ ├── 01_batch_processing.py # Batch processing multiple projects +│ ├── 02_custom_workflows.py # Custom workflow orchestration +│ ├── 03_monitoring_alerts.py # Monitoring and alerting systems +│ └── 04_data_export_analysis.py # Data export and analysis +├── production/ # Production deployment examples +│ ├── 01_enterprise_config.py # Enterprise configuration setup +│ ├── 02_ci_cd_integration.py # CI/CD pipeline integration +│ ├── 03_monitoring_dashboard.py # Monitoring dashboard setup +│ └── 04_automated_reporting.py # Automated reporting system +├── utilities/ # Utility scripts and helpers +│ ├── config_generator.py # Configuration file generator +│ ├── health_checker.py # System health checker +│ ├── data_migrator.py # Data migration utilities +│ └── benchmark_validator.py # Benchmark validation tools +└── data/ # Sample data and configurations + ├── sample_benchmarks.json # Sample benchmark definitions + ├── sample_configs/ # Sample configuration files + └── test_data/ # Test data for examples +``` + +## Getting Started + +### Prerequisites + +1. **Set up environment variables:** + ```bash + export OSSFUZZ_HISTORY_STORAGE_BACKEND=local + export OSSFUZZ_HISTORY_STORAGE_PATH=/tmp/ossfuzz_data + export WORK_DIR=/tmp/ossfuzz_work + ``` + +2. **Run your first example:** + ```bash + cd samples/basic + python 01_quick_start.py + ``` + +## Sample Categories + +### Basic Examples (`basic/`) + +Perfect for users new to the OSS-Fuzz SDK. These examples cover: + +- **Quick Start**: Initialize the SDK and run your first benchmark +- **Configuration**: Set up SDK configuration for different environments +- **Simple Benchmark**: Run a single benchmark with basic options + +**Start here if you're new to the SDK!** + +### Intermediate Examples (`intermediate/`) + +For users familiar with basic concepts who want to explore more features: + +- **Build Operations**: Manage build processes and artifacts +- **Execution Workflows**: Control fuzzing execution and monitoring +- **Result Analysis**: Analyze results and extract meaningful metrics +- **Pipeline Automation**: Automate complete build → run → analyze workflows + +### Advanced Examples (`advanced/`) + +For experienced users implementing complex workflows: + +- **Batch Processing**: Process multiple projects and benchmarks efficiently +- **Custom Workflows**: Create custom orchestration and automation +- **Monitoring & Alerts**: Set up monitoring systems and alerting +- **Data Export & Analysis**: Advanced data analysis and reporting + +### Production Examples (`production/`) + +Enterprise-ready examples for production deployment: + +- **Enterprise Configuration**: Production-grade configuration management +- **CI/CD Integration**: Integrate with continuous integration systems +- **Monitoring Dashboard**: Set up comprehensive monitoring +- **Automated Reporting**: Create automated reporting systems + +### Utilities (`utilities/`) + +Helper scripts and tools to support your fuzzing workflows: + +- **Configuration Generator**: Generate configuration files +- **Health Checker**: Monitor system health and component status +- **Data Migrator**: Migrate data between storage backends +- **Benchmark Validator**: Validate benchmark definitions + +## Use Case Guide + +### I want to... + +#### **Get started quickly** +→ Start with `basic/01_quick_start.py` + +#### **Set up configuration for my environment** +→ Check `basic/02_configuration.py` and `production/01_enterprise_config.py` + +#### **Run a single benchmark** +→ Use `basic/03_simple_benchmark.py` + +#### **Automate my fuzzing pipeline** +→ Look at `intermediate/04_pipeline_automation.py` + +#### **Process multiple projects** +→ Try `advanced/01_batch_processing.py` + +#### **Set up monitoring and alerts** +→ Explore `advanced/03_monitoring_alerts.py` + +#### **Integrate with CI/CD** +→ Check `production/02_ci_cd_integration.py` + +#### **Export and analyze data** +→ Use `advanced/04_data_export_analysis.py` + +#### **Deploy in production** +→ Review all examples in `production/` + +## Running the Examples + +### Basic Usage + +```bash +# Navigate to the samples directory +cd samples + +# Run a basic example +python basic/01_quick_start.py + +# Run with custom configuration +python basic/02_configuration.py --config-file data/sample_configs/dev.json + +# Run an intermediate example +python intermediate/01_build_operations.py --project libpng +``` + +### Advanced Usage + +```bash +# Batch processing example +python advanced/01_batch_processing.py --projects libpng,libjpeg,zlib + +# Custom workflow with monitoring +python advanced/02_custom_workflows.py --enable-monitoring + +# Production deployment example +python production/01_enterprise_config.py --environment production +``` + +## Customization + +### Modifying Examples + +All examples are designed to be easily customizable: + +1. **Configuration**: Modify the configuration sections at the top of each file +2. **Parameters**: Adjust parameters like project names, timeouts, and options +3. **Workflows**: Customize the workflow steps to match your requirements +4. **Output**: Modify output formats and destinations + +### Creating Your Own Examples + +Use the existing examples as templates: + +1. Copy a similar example as a starting point +2. Modify the configuration and parameters +3. Customize the workflow logic +4. Add your specific requirements +5. Test thoroughly before production use + +## Sample Data + +The `data/` directory contains: + +- **Sample benchmark definitions** for testing +- **Configuration templates** for different environments +- **Test data** for running examples without real projects + +## Troubleshooting + +### Common Issues + +1. **Import Errors**: Ensure the SDK is installed and in your Python path +2. **Configuration Errors**: Check environment variables and configuration files +3. **Permission Errors**: Ensure proper permissions for work directories +4. **Component Unavailable**: Some examples require optional dependencies + +### Getting Help + +1. **Check the logs**: Most examples include detailed logging +2. **Review the API documentation**: See `docs/API_DOCUMENTATION.md` +3. **Run with debug mode**: Set `log_level='DEBUG'` in configuration +4. **Check component availability**: Use the health checker utility + +## Contributing + +We welcome contributions to the samples! To add a new example: + +1. Choose the appropriate category directory +2. Follow the existing naming convention +3. Include comprehensive comments and documentation +4. Add error handling and logging +5. Test thoroughly with different configurations +6. Update this README with your example + +## License + +These samples are provided under the same license as the OSS-Fuzz SDK project. \ No newline at end of file diff --git a/ossfuzz_py/samples/advanced/01_batch_processing.py b/ossfuzz_py/samples/advanced/01_batch_processing.py new file mode 100644 index 000000000..904b837d4 --- /dev/null +++ b/ossfuzz_py/samples/advanced/01_batch_processing.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name,line-too-long +""" +OSS-Fuzz SDK Advanced Batch Processing Example + +This example demonstrates how to process multiple projects and benchmarks +efficiently using advanced batch processing techniques. + +What this example covers: +- Multi-project batch processing +- Parallel execution strategies +- Resource management and optimization +- Progress tracking and reporting +- Error recovery and retry logic +- Data aggregation and analysis + +Prerequisites: +- OSS-Fuzz SDK installed: pip install ossfuzz-py +- Understanding of pipeline automation +- Optional: concurrent.futures for parallel processing +""" + +import json +import sys +import threading +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + + +class BatchProcessor: + """Advanced batch processor for multiple projects and benchmarks.""" + + def __init__(self, max_workers=4, retry_attempts=3): + """Initialize batch processor.""" + self.max_workers = max_workers + self.retry_attempts = retry_attempts + self.results = {} + self.progress_lock = threading.Lock() + self.completed_tasks = 0 + self.total_tasks = 0 + + def create_project_configurations(self): + """Create configurations for multiple projects.""" + print("🏗️ Creating Multi-Project Configurations") + print("-" * 40) + + projects = [{ + 'name': 'libpng', + 'description': 'PNG image library', + 'priority': 'high', + 'benchmarks': [ + 'png_decode_fuzzer', 'png_encode_fuzzer', 'png_transform_fuzzer' + ], + 'config': { + 'storage_backend': 'local', + 'log_level': 'INFO', + 'timeout_seconds': 3600 + } + }, { + 'name': 'libjpeg', + 'description': 'JPEG image library', + 'priority': 'high', + 'benchmarks': ['jpeg_decode_fuzzer', 'jpeg_encode_fuzzer'], + 'config': { + 'storage_backend': 'local', + 'log_level': 'INFO', + 'timeout_seconds': 2400 + } + }, { + 'name': 'zlib', + 'description': 'Compression library', + 'priority': 'medium', + 'benchmarks': ['inflate_fuzzer', 'deflate_fuzzer', 'gzip_fuzzer'], + 'config': { + 'storage_backend': 'local', + 'log_level': 'WARNING', + 'timeout_seconds': 1800 + } + }, { + 'name': 'openssl', + 'description': 'Cryptography library', + 'priority': 'critical', + 'benchmarks': [ + 'rsa_fuzzer', 'aes_fuzzer', 'x509_fuzzer', 'asn1_fuzzer' + ], + 'config': { + 'storage_backend': 'local', + 'log_level': 'INFO', + 'timeout_seconds': 4800 + } + }] + + print(f"✅ Created configurations for {len(projects)} projects:") + for project in projects: + benchmark_count = len(project['benchmarks']) + print(f" • {project['name']}: {benchmark_count} benchmarks " + f"({project['priority']} priority)") + + return projects + + def create_batch_tasks(self, projects): + """Create individual tasks for batch processing.""" + print("\n📋 Creating Batch Tasks") + print("-" * 22) + + tasks = [] + task_id = 0 + + for project in projects: + for benchmark_id in project['benchmarks']: + task = { + 'id': + task_id, + 'project_name': + project['name'], + 'benchmark_id': + benchmark_id, + 'priority': + project['priority'], + 'config': + project['config'], + 'description': + f"{project['name']}/{benchmark_id}", + 'estimated_duration': + self._estimate_task_duration(project['priority']) + } + tasks.append(task) + task_id += 1 + + # Sort tasks by priority and estimated duration + priority_order = {'critical': 0, 'high': 1, 'medium': 2, 'low': 3} + tasks.sort(key=lambda t: + (priority_order.get(t['priority'], 4), t['estimated_duration'])) + + print(f"✅ Created {len(tasks)} batch tasks:") + print( + f" Critical: {sum(1 for t in tasks if t['priority'] == 'critical')}") + print(f" High: {sum(1 for t in tasks if t['priority'] == 'high')}") + print(f" Medium: {sum(1 for t in tasks if t['priority'] == 'medium')}") + print(f" Low: {sum(1 for t in tasks if t['priority'] == 'low')}") + + total_estimated_time = sum(t['estimated_duration'] for t in tasks) + parallel_estimated_time = total_estimated_time / self.max_workers + + print("\n⏱️ Time Estimates:") + print( + f" Sequential execution: {total_estimated_time:.0f}s ({total_estimated_time/60:.1f}m)" + ) + print(f" Parallel execution ({self.max_workers} workers): " + f"{parallel_estimated_time:.0f}s ({parallel_estimated_time/60:.1f}m)") + + return tasks + + def _estimate_task_duration(self, priority): + """Estimate task duration based on priority.""" + duration_map = { + 'critical': 1200, # 20 minutes + 'high': 900, # 15 minutes + 'medium': 600, # 10 minutes + 'low': 300 # 5 minutes + } + return duration_map.get(priority, 600) + + def execute_single_task(self, task): + """Execute a single batch task.""" + task_id = task['id'] + project_name = task['project_name'] + benchmark_id = task['benchmark_id'] + + start_time = time.time() + + try: + # Initialize SDK for this task + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + config = SDKConfig(**task['config']) + sdk = OSSFuzzSDK(project_name, config) + + # Create pipeline options based on priority + pipeline_options = self._create_pipeline_options(task['priority']) + + # Execute pipeline + pipeline_result = sdk.run_full_pipeline(benchmark_id, pipeline_options) + + end_time = time.time() + duration = end_time - start_time + + # Analyze results + result = { + 'task_id': + task_id, + 'project_name': + project_name, + 'benchmark_id': + benchmark_id, + 'priority': + task['priority'], + 'success': + pipeline_result.success, + 'duration': + duration, + 'start_time': + start_time, + 'end_time': + end_time, + 'message': + pipeline_result.message + if not pipeline_result.success else 'Success' + } + + if pipeline_result.success: + # Extract detailed metrics + build_success = sum( + 1 for r in pipeline_result.build_results if r.success) + build_total = len(pipeline_result.build_results) + run_success = sum(1 for r in pipeline_result.run_results if r.success) + run_total = len(pipeline_result.run_results) + crashes = sum( + 1 for r in pipeline_result.run_results if r.success and r.crashes) + + result.update({ + 'builds_successful': + build_success, + 'builds_total': + build_total, + 'runs_successful': + run_success, + 'runs_total': + run_total, + 'crashes_found': + crashes, + 'build_success_rate': + build_success / build_total if build_total > 0 else 0, + 'run_success_rate': + run_success / run_total if run_total > 0 else 0 + }) + + # Get additional metrics + try: + metrics = sdk.get_benchmark_metrics(benchmark_id) + result['coverage'] = metrics.get('coverage', 0) + result['line_coverage_diff'] = metrics.get('line_coverage_diff', 0) + except: + result['coverage'] = 0 + result['line_coverage_diff'] = 0 + + # Update progress + with self.progress_lock: + self.completed_tasks += 1 + progress = (self.completed_tasks / self.total_tasks) * 100 + print( + f" [{self.completed_tasks}/{self.total_tasks}] {progress:.1f}% - " + f"{task['description']}: {'✅' if result['success'] else '❌'} ({duration:.1f}s)" + ) + + return result + + except Exception as e: + end_time = time.time() + duration = end_time - start_time + + result = { + 'task_id': task_id, + 'project_name': project_name, + 'benchmark_id': benchmark_id, + 'priority': task['priority'], + 'success': False, + 'duration': duration, + 'start_time': start_time, + 'end_time': end_time, + 'message': str(e), + 'error': True + } + + with self.progress_lock: + self.completed_tasks += 1 + progress = (self.completed_tasks / self.total_tasks) * 100 + print( + f" [{self.completed_tasks}/{self.total_tasks}] {progress:.1f}% - " + f"{task['description']}: ❌ Error ({duration:.1f}s)") + + return result + + def _create_pipeline_options(self, priority): + """Create pipeline options based on task priority.""" + from ossfuzz_py.core.ossfuzz_sdk import (BuildOptions, PipelineOptions, + RunOptions) + + # Adjust configuration based on priority + if priority == 'critical': + trials = 5 + duration = 1800 # 30 minutes + timeout = 60 + elif priority == 'high': + trials = 3 + duration = 1200 # 20 minutes + timeout = 45 + elif priority == 'medium': + trials = 2 + duration = 900 # 15 minutes + timeout = 30 + else: # low + trials = 1 + duration = 600 # 10 minutes + timeout = 25 + + build_options = BuildOptions(sanitizer='address', + timeout_seconds=timeout * 60) + + run_options = RunOptions(duration_seconds=duration, + timeout_seconds=timeout, + extract_coverage=True) + + return PipelineOptions(build_options=build_options, + run_options=run_options, + trials=trials, + analyze_coverage=True, + store_results=True) + + def execute_batch_parallel(self, tasks): + """Execute batch tasks in parallel.""" + print("\n🚀 Executing Batch Tasks (Parallel)") + print("-" * 35) + + self.total_tasks = len(tasks) + self.completed_tasks = 0 + batch_start_time = time.time() + + print(f"Starting parallel execution with {self.max_workers} workers...") + print(f"Total tasks: {self.total_tasks}") + + results = [] + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + # Submit all tasks + future_to_task = { + executor.submit(self.execute_single_task, task): task + for task in tasks + } + + # Collect results as they complete + for future in as_completed(future_to_task): + task = future_to_task[future] + try: + result = future.result() + results.append(result) + except Exception as e: + print(f" ❌ Task {task['id']} failed with exception: {e}") + results.append({ + 'task_id': task['id'], + 'project_name': task['project_name'], + 'benchmark_id': task['benchmark_id'], + 'success': False, + 'message': str(e), + 'error': True + }) + + batch_end_time = time.time() + total_duration = batch_end_time - batch_start_time + + print( + f"\n✅ Batch execution completed in {total_duration:.1f}s ({total_duration/60:.1f}m)" + ) + + return results + + def analyze_batch_results(self, results): + """Analyze and report batch processing results.""" + print("\n📊 Batch Processing Analysis") + print("-" * 30) + + if not results: + print("❌ No results to analyze") + return None + + # Overall statistics + total_tasks = len(results) + successful_tasks = sum(1 for r in results if r.get('success', False)) + failed_tasks = total_tasks - successful_tasks + + print("📈 Overall Statistics:") + print(f" Total tasks: {total_tasks}") + print( + f" Successful: {successful_tasks} ({successful_tasks/total_tasks:.1%})" + ) + print(f" Failed: {failed_tasks} ({failed_tasks/total_tasks:.1%})") + + # Performance analysis + durations = [r.get('duration', 0) for r in results if 'duration' in r] + avg_duration = 0 + min_duration = 0 + max_duration = 0 + + if durations: + avg_duration = sum(durations) / len(durations) + min_duration = min(durations) + max_duration = max(durations) + + print("\n⏱️ Performance Statistics:") + print(f" Average duration: {avg_duration:.1f}s") + print(f" Minimum duration: {min_duration:.1f}s") + print(f" Maximum duration: {max_duration:.1f}s") + + # Project-wise analysis + project_stats = {} + for result in results: + project = result.get('project_name', 'unknown') + if project not in project_stats: + project_stats[project] = {'total': 0, 'successful': 0, 'failed': 0} + + project_stats[project]['total'] += 1 + if result.get('success', False): + project_stats[project]['successful'] += 1 + else: + project_stats[project]['failed'] += 1 + + print("\n🏗️ Project-wise Statistics:") + for project, stats in project_stats.items(): + success_rate = stats['successful'] / stats['total'] if stats[ + 'total'] > 0 else 0 + print( + f" {project}: {stats['successful']}/{stats['total']} ({success_rate:.1%})" + ) + + # Priority analysis + priority_stats = {} + for result in results: + priority = result.get('priority', 'unknown') + if priority not in priority_stats: + priority_stats[priority] = {'total': 0, 'successful': 0} + + priority_stats[priority]['total'] += 1 + if result.get('success', False): + priority_stats[priority]['successful'] += 1 + + print("\n🎯 Priority-wise Statistics:") + for priority, stats in priority_stats.items(): + success_rate = stats['successful'] / stats['total'] if stats[ + 'total'] > 0 else 0 + print( + f" {priority}: {stats['successful']}/{stats['total']} ({success_rate:.1%})" + ) + + # Detailed metrics for successful tasks + successful_results = [r for r in results if r.get('success', False)] + if successful_results: + total_crashes = sum(r.get('crashes_found', 0) for r in successful_results) + avg_coverage = sum(r.get('coverage', 0) + for r in successful_results) / len(successful_results) + + print("\n💥 Fuzzing Results:") + print(f" Total crashes found: {total_crashes}") + print(f" Average coverage: {avg_coverage:.1f}%") + print( + f" Benchmarks with crashes: {sum(1 for r in successful_results if r.get('crashes_found', 0) > 0)}" + ) + + return { + 'total_tasks': + total_tasks, + 'successful_tasks': + successful_tasks, + 'failed_tasks': + failed_tasks, + 'success_rate': + successful_tasks / total_tasks if total_tasks > 0 else 0, + 'project_stats': + project_stats, + 'priority_stats': + priority_stats, + 'performance_stats': { + 'avg_duration': avg_duration if durations else 0, + 'min_duration': min_duration if durations else 0, + 'max_duration': max_duration if durations else 0 + } + } + + def export_batch_results(self, results, output_path=None): + """Export batch results to JSON file.""" + if not output_path: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_path = f"batch_results_{timestamp}.json" + + export_data = { + 'export_timestamp': datetime.now().isoformat(), + 'total_tasks': len(results), + 'successful_tasks': sum(1 for r in results if r.get('success', False)), + 'batch_processor_config': { + 'max_workers': self.max_workers, + 'retry_attempts': self.retry_attempts + }, + 'results': results + } + + with open(output_path, 'w') as f: + json.dump(export_data, f, indent=2) + + print(f"\n💾 Results exported to: {output_path}") + return output_path + + +def main(): + """Main function demonstrating advanced batch processing.""" + print("🔄 OSS-Fuzz SDK Advanced Batch Processing Example") + print("=" * 60) + + # Initialize batch processor + print("\n⚙️ Initializing Batch Processor") + max_workers = 4 # Adjust based on your system + batch_processor = BatchProcessor(max_workers=max_workers, retry_attempts=2) + + print("✅ Batch processor initialized:") + print(f" Max workers: {max_workers}") + print(f" Retry attempts: {batch_processor.retry_attempts}") + + # Create project configurations + projects = batch_processor.create_project_configurations() + + # Create batch tasks + tasks = batch_processor.create_batch_tasks(projects) + + # Execute batch processing + results = batch_processor.execute_batch_parallel(tasks) + + # Analyze results + analysis = batch_processor.analyze_batch_results(results) + + # Export results + export_path = batch_processor.export_batch_results(results) + + # Summary + print("\n🎉 Advanced Batch Processing Summary") + print("=" * 40) + print("✅ Batch processing completed:") + print(f" • Processed {len(projects)} projects") + print(f" • Executed {len(tasks)} tasks") + success_rate = analysis.get('success_rate', 0) if analysis else 0 + print(f" • Success rate: {success_rate:.1%}") + print(f" • Results exported to: {export_path}") + + print("\n📋 Key features demonstrated:") + print(" • Multi-project batch processing") + print(" • Parallel execution with thread pool") + print(" • Priority-based task scheduling") + print(" • Progress tracking and monitoring") + print(" • Comprehensive result analysis") + print(" • Data export and reporting") + + print("\n🚀 Next steps:") + print(" • Try advanced/03_monitoring_alerts.py for production monitoring") + print(" • Explore production examples for enterprise deployment") + print(" • Scale up with more workers for larger batches") + + return True + + +if __name__ == '__main__': + try: + success = main() + if success: + print("\n🎯 Advanced batch processing example completed successfully!") + sys.exit(0) + else: + print("\n❌ Advanced batch processing example failed.") + sys.exit(1) + + except KeyboardInterrupt: + print("\n\n⏹️ Example interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/ossfuzz_py/samples/basic/01_quick_start.py b/ossfuzz_py/samples/basic/01_quick_start.py new file mode 100644 index 000000000..66dcd16d8 --- /dev/null +++ b/ossfuzz_py/samples/basic/01_quick_start.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name,line-too-long,unused-import +""" +OSS-Fuzz SDK Quick Start Example + +This example demonstrates the most basic usage of the OSS-Fuzz SDK. +Perfect for users who want to get started quickly and see the SDK in action. + +What this example covers: +- Basic SDK initialization +- Running a simple benchmark +- Getting basic metrics +- Generating a simple report + +Prerequisites: +- OSS-Fuzz SDK installed: pip install ossfuzz-py +- Basic environment setup (see README.md) +""" + +import os +import sys +import tempfile +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +# In a real project, you would just: from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + + +def main(): + """Main function demonstrating basic SDK usage.""" + print("🚀 OSS-Fuzz SDK Quick Start Example") + print("=" * 50) + + # Step 1: Basic SDK Initialization + print("\n📋 Step 1: Initializing the SDK") + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + + # Initialize with a sample project + # In production, replace 'sample_project' with your actual project name + project_name = 'sample_project' + sdk = OSSFuzzSDK(project_name) + + print(f"✅ SDK initialized successfully for project: {project_name}") + print(f" Storage backend: {sdk.config.get('storage_backend', 'default')}") + print(f" Work directory: {sdk.config.get('work_dir', 'default')}") + + except ImportError as e: + print(f"❌ Failed to import SDK: {e}") + print( + " Please ensure the OSS-Fuzz SDK is installed: pip install ossfuzz-py" + ) + return False + except Exception as e: + print(f"❌ Failed to initialize SDK: {e}") + return False + + # Step 2: Check Component Availability + print("\n🔧 Step 2: Checking Component Availability") + + components = { + 'Storage Manager': sdk.storage, + 'Result Manager': getattr(sdk, 'result_manager', None), + 'Benchmark Manager': getattr(sdk, 'benchmark_manager', None), + 'Local Builder': getattr(sdk, 'local_builder', None), + 'Local Runner': getattr(sdk, 'local_runner', None), + } + + available_components = 0 + for name, component in components.items(): + status = "✅ Available" if component is not None else "⚠️ Not available" + print(f" {status}: {name}") + if component is not None: + available_components += 1 + + print(f"\n 📊 {available_components}/{len(components)} components available") + + if available_components == 0: + print(" ⚠️ No components available. Some features will be limited.") + + # Step 3: Try Basic Operations + print("\n🎯 Step 3: Trying Basic Operations") + + # Try to get project summary + try: + summary = sdk.get_project_summary() + print("✅ Project summary retrieved:") + print(f" Project: {summary.get('project_name', 'Unknown')}") + print(f" Last updated: {summary.get('last_updated', 'Unknown')}") + print(f" Total benchmarks: {summary.get('total_benchmarks', 0)}") + + except Exception as e: + print(f"⚠️ Could not get project summary: {e}") + + # Try to list benchmarks + try: + benchmarks = sdk.list_benchmarks() + print(f"✅ Found {len(benchmarks)} benchmarks") + + if benchmarks: + print(" Sample benchmarks:") + for i, benchmark in enumerate(benchmarks[:3]): # Show first 3 + print(f" {i+1}. {benchmark.get('id', 'Unknown ID')}") + else: + print(" No benchmarks found (this is normal for a new setup)") + + except Exception as e: + print(f"⚠️ Could not list benchmarks: {e}") + + # Step 4: Try Running a Sample Benchmark + print("\n🏃 Step 4: Trying to Run a Sample Benchmark") + + # Create a sample benchmark ID for demonstration + sample_benchmark_id = 'sample_benchmark_001' + + try: + # Try to get metrics for the sample benchmark + metrics = sdk.get_benchmark_metrics(sample_benchmark_id) + print(f"✅ Retrieved metrics for {sample_benchmark_id}:") + + if metrics: + print(f" Compiles: {metrics.get('compiles', 'Unknown')}") + print(f" Crashes: {metrics.get('crashes', 'Unknown')}") + print(f" Coverage: {metrics.get('coverage', 'Unknown')}") + else: + print(" No metrics available (this is normal for a new benchmark)") + + except Exception as e: + print(f"⚠️ Could not get benchmark metrics: {e}") + + # Try to run the benchmark (this will likely fail in a demo environment) + try: + print(f"\n Attempting to run benchmark: {sample_benchmark_id}") + result = sdk.run_benchmark(sample_benchmark_id) + + if result.success: + print("✅ Benchmark run completed successfully!") + print(f" Run ID: {result.run_id}") + print(f" Crashes detected: {result.crashes}") + print(f" Coverage data: {result.coverage_data}") + else: + print(f"⚠️ Benchmark run failed: {result.message}") + print( + " This is expected in a demo environment without actual benchmarks") + + except Exception as e: + print(f"⚠️ Could not run benchmark: {e}") + print(" This is expected in a demo environment") + + # Step 5: Generate a Basic Report + print("\n📊 Step 5: Generating a Basic Report") + + try: + # Generate a project report for the last 7 days + report = sdk.generate_project_report(days=7, include_details=False) + print("✅ Project report generated:") + print(f" Project: {report.get('project_name', 'Unknown')}") + print( + f" Report period: {report.get('start_date', 'Unknown')} to {report.get('end_date', 'Unknown')}" + ) + + # Show build summary if available + build_summary = report.get('build_summary', {}) + if build_summary: + print(f" Total builds: {build_summary.get('total_builds', 0)}") + print( + f" Build success rate: {build_summary.get('success_rate', 0):.2%}") + + # Show coverage summary if available + coverage_summary = report.get('coverage_summary', {}) + if coverage_summary: + print( + f" Average coverage: {coverage_summary.get('average_coverage', 0):.1f}%" + ) + + except Exception as e: + print(f"⚠️ Could not generate report: {e}") + + # Step 6: Try System-Wide Metrics + print("\n📈 Step 6: Getting System-Wide Metrics") + + try: + system_metrics = sdk.get_system_metrics() + print("✅ System metrics retrieved:") + + if system_metrics: + print(f" Total benchmarks: {system_metrics.get('total_benchmarks', 0)}") + print(f" Total builds: {system_metrics.get('total_builds', 0)}") + print( + f" Build success rate: {system_metrics.get('build_success_rate', 0):.2%}" + ) + print( + f" Average coverage: {system_metrics.get('average_coverage', 0):.1f}%" + ) + print(f" Total crashes: {system_metrics.get('total_crashes', 0)}") + else: + print(" No system metrics available (this is normal for a new setup)") + + except Exception as e: + print(f"⚠️ Could not get system metrics: {e}") + + # Step 7: Summary and Next Steps + print("\n🎉 Step 7: Summary and Next Steps") + print("=" * 50) + print("✅ Quick start example completed successfully!") + print("\n📋 What you've learned:") + print(" • How to initialize the OSS-Fuzz SDK") + print(" • How to check component availability") + print(" • How to perform basic operations") + print(" • How to handle errors gracefully") + print(" • How to generate reports and get metrics") + + print("\n🚀 Next steps:") + print( + " 1. Check out basic/02_configuration.py to learn about configuration") + print(" 2. Try basic/03_simple_benchmark.py to run a real benchmark") + print(" 3. Explore intermediate/ examples for more advanced features") + print(" 4. Read the API documentation in docs/API_DOCUMENTATION.md") + + print("\n💡 Tips:") + print(" • Set up environment variables for better configuration") + print(" • Install optional dependencies for full functionality:") + print(" pip install pandas pydantic yaml chardet") + print(" • Check the logs if you encounter issues") + + return True + + +if __name__ == '__main__': + try: + success = main() + if success: + print("\n🎯 Example completed successfully!") + sys.exit(0) + else: + print("\n❌ Example failed. Check the output above for details.") + sys.exit(1) + + except KeyboardInterrupt: + print("\n\n⏹️ Example interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/ossfuzz_py/samples/basic/02_configuration.py b/ossfuzz_py/samples/basic/02_configuration.py new file mode 100644 index 000000000..7ac62f57c --- /dev/null +++ b/ossfuzz_py/samples/basic/02_configuration.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name,line-too-long,redefined-outer-name +""" +OSS-Fuzz SDK Configuration Example + +This example demonstrates different ways to configure the OSS-Fuzz SDK +for various environments and use cases. + +What this example covers: +- Basic configuration with SDKConfig +- Environment variable configuration +- Different storage backends +- Configuration for different environments (dev, staging, prod) +- Configuration validation and troubleshooting + +Prerequisites: +- OSS-Fuzz SDK installed: pip install ossfuzz-py +""" + +import json +import os +import sys +import tempfile +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + + +def demonstrate_basic_configuration(): + """Demonstrate basic SDK configuration.""" + print("📋 Basic Configuration") + print("-" * 30) + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + # Method 1: Default configuration + print("1. Default Configuration:") + sdk_default = OSSFuzzSDK('my_project') + print( + f" ✅ Default storage backend: {sdk_default.config.get('storage_backend', 'local')}" + ) + print( + f" ✅ Default work dir: {sdk_default.config.get('work_dir', '/tmp')}") + + # Method 2: Dictionary configuration + print("\n2. Dictionary Configuration:") + config_dict = { + 'storage_backend': 'local', + 'storage_path': '/tmp/ossfuzz_data', + 'log_level': 'INFO', + 'enable_caching': True, + 'timeout_seconds': 3600 + } + + sdk_dict = OSSFuzzSDK('my_project', config_dict) + print(f" ✅ Storage backend: {sdk_dict.config['storage_backend']}") + print(f" ✅ Storage path: {sdk_dict.config['storage_path']}") + print(f" ✅ Log level: {sdk_dict.config['log_level']}") + + # Method 3: SDKConfig object + print("\n3. SDKConfig Object:") + sdk_config = SDKConfig(storage_backend='local', + storage_path='/tmp/ossfuzz_advanced', + log_level='DEBUG', + enable_caching=False, + timeout_seconds=7200, + max_retries=5) + + sdk_obj = OSSFuzzSDK('my_project', sdk_config) + print(f" ✅ Storage backend: {sdk_obj.sdk_config.storage_backend}") + print(f" ✅ Log level: {sdk_obj.sdk_config.log_level}") + print(f" ✅ Caching enabled: {sdk_obj.sdk_config.enable_caching}") + print(f" ✅ Timeout: {sdk_obj.sdk_config.timeout_seconds}s") + print(f" ✅ Max retries: {sdk_obj.sdk_config.max_retries}") + + return True + + except Exception as e: + print(f"❌ Configuration demonstration failed: {e}") + return False + + +def demonstrate_environment_configurations(): + """Demonstrate configurations for different environments.""" + print("\n🌍 Environment-Specific Configurations") + print("-" * 40) + + try: + from ossfuzz_py.core.ossfuzz_sdk import SDKConfig + + # Development configuration + print("1. Development Environment:") + dev_config = SDKConfig( + storage_backend='local', + storage_path='/tmp/ossfuzz_dev', + log_level='DEBUG', + enable_caching=False, # Disable caching for development + timeout_seconds=1800, # Shorter timeout for dev + max_retries=2) + + print( + f" ✅ Storage: {dev_config.storage_backend} at {dev_config.storage_path}" + ) + print(f" ✅ Logging: {dev_config.log_level} level") + print( + f" ✅ Caching: {'Enabled' if dev_config.enable_caching else 'Disabled'}" + ) + print(f" ✅ Timeout: {dev_config.timeout_seconds}s") + + # Staging configuration + print("\n2. Staging Environment:") + staging_config = SDKConfig( + storage_backend='local', # Could be 'gcs' for cloud staging + storage_path='/var/ossfuzz/staging', + log_level='INFO', + enable_caching=True, + timeout_seconds=3600, + max_retries=3) + + print( + f" ✅ Storage: {staging_config.storage_backend} at {staging_config.storage_path}" + ) + print(f" ✅ Logging: {staging_config.log_level} level") + print( + f" ✅ Caching: {'Enabled' if staging_config.enable_caching else 'Disabled'}" + ) + print(f" ✅ Timeout: {staging_config.timeout_seconds}s") + + # Production configuration + print("\n3. Production Environment:") + prod_config = SDKConfig( + storage_backend='gcs', # Use cloud storage for production + gcs_bucket_name='prod-ossfuzz-bucket', + log_level='WARNING', # Less verbose logging + enable_caching=True, + timeout_seconds=7200, # Longer timeout for production + max_retries=5) + + print(f" ✅ Storage: {prod_config.storage_backend}") + print(f" ✅ GCS Bucket: {prod_config.gcs_bucket_name}") + print(f" ✅ Logging: {prod_config.log_level} level") + print( + f" ✅ Caching: {'Enabled' if prod_config.enable_caching else 'Disabled'}" + ) + print(f" ✅ Timeout: {prod_config.timeout_seconds}s") + print(f" ✅ Max retries: {prod_config.max_retries}") + + return True + + except Exception as e: + print(f"❌ Environment configuration demonstration failed: {e}") + return False + + +def demonstrate_environment_variables(): + """Demonstrate configuration using environment variables.""" + print("\n🔧 Environment Variable Configuration") + print("-" * 40) + + # Show current environment variables + print("1. Current Environment Variables:") + env_vars = [ + 'OSSFUZZ_HISTORY_STORAGE_BACKEND', 'OSSFUZZ_HISTORY_STORAGE_PATH', + 'GCS_BUCKET_NAME', 'WORK_DIR', 'OSS_FUZZ_DIR' + ] + + for var in env_vars: + value = os.environ.get(var, 'Not set') + print(f" {var}: {value}") + + # Demonstrate setting environment variables programmatically + print("\n2. Setting Environment Variables Programmatically:") + + # Save original values + original_values = {} + for var in env_vars: + original_values[var] = os.environ.get(var) + + try: + # Set temporary environment variables + os.environ['OSSFUZZ_HISTORY_STORAGE_BACKEND'] = 'local' + os.environ['OSSFUZZ_HISTORY_STORAGE_PATH'] = '/tmp/ossfuzz_env_demo' + os.environ['WORK_DIR'] = '/tmp/ossfuzz_work_demo' + + print(" ✅ Set OSSFUZZ_HISTORY_STORAGE_BACKEND=local") + print(" ✅ Set OSSFUZZ_HISTORY_STORAGE_PATH=/tmp/ossfuzz_env_demo") + print(" ✅ Set WORK_DIR=/tmp/ossfuzz_work_demo") + + # Initialize SDK to see environment variable loading + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + + sdk = OSSFuzzSDK('env_demo_project') + print("\n ✅ SDK loaded with environment configuration:") + print( + f" Storage backend: {sdk.config.get('storage_backend', 'default')}" + ) + print(f" Storage path: {sdk.config.get('storage_path', 'default')}") + print(f" Work dir: {sdk.config.get('work_dir', 'default')}") + + finally: + # Restore original environment variables + for var, value in original_values.items(): + if value is None: + os.environ.pop(var, None) + else: + os.environ[var] = value + + print("\n ✅ Environment variables restored") + + +def demonstrate_configuration_validation(): + """Demonstrate configuration validation and troubleshooting.""" + print("\n🔍 Configuration Validation") + print("-" * 30) + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + # Test 1: Valid configuration + print("1. Testing Valid Configuration:") + valid_config = SDKConfig(storage_backend='local', + storage_path='/tmp/valid_test', + log_level='INFO') + try: + sdk = OSSFuzzSDK('test_project', valid_config) + print(" ✅ Valid configuration accepted") + + except Exception as e: + print(f" ❌ Valid configuration rejected: {e}") + + # Test 2: Invalid project name + print("\n2. Testing Invalid Project Name:") + try: + sdk = OSSFuzzSDK('', valid_config) # Empty project name + print(" ❌ Empty project name should have been rejected") + except Exception as e: + print(f" ✅ Empty project name correctly rejected: {type(e).__name__}") + + # Test 3: Configuration conversion + print("\n3. Testing Configuration Conversion:") + config_dict = { + 'storage_backend': 'local', + 'log_level': 'DEBUG', + 'enable_caching': True + } + + sdk = OSSFuzzSDK('test_project', config_dict) + print(" ✅ Dictionary config converted successfully") + print(f" SDK config type: {type(sdk.sdk_config).__name__}") + print(f" Storage backend: {sdk.sdk_config.storage_backend}") + print(f" Log level: {sdk.sdk_config.log_level}") + + return True + + except Exception as e: + print(f"❌ Configuration validation failed: {e}") + return False + + +def demonstrate_configuration_best_practices(): + """Demonstrate configuration best practices.""" + print("\n💡 Configuration Best Practices") + print("-" * 35) + + print("1. Use Environment Variables for Deployment:") + print(" export OSSFUZZ_HISTORY_STORAGE_BACKEND=gcs") + print(" export GCS_BUCKET_NAME=my-production-bucket") + print(" export WORK_DIR=/var/ossfuzz/work") + + print("\n2. Create Configuration Templates:") + + # Create sample configuration files + configs = { + 'development': { + 'storage_backend': 'local', + 'storage_path': '/tmp/ossfuzz_dev', + 'log_level': 'DEBUG', + 'enable_caching': False, + 'timeout_seconds': 1800 + }, + 'production': { + 'storage_backend': 'gcs', + 'gcs_bucket_name': 'prod-ossfuzz-bucket', + 'log_level': 'INFO', + 'enable_caching': True, + 'timeout_seconds': 7200, + 'max_retries': 5 + } + } + + # Save configuration files + config_dir = Path(tempfile.gettempdir()) / 'ossfuzz_configs' + config_dir.mkdir(exist_ok=True) + + for env_name, config in configs.items(): + config_file = config_dir / f'{env_name}.json' + with open(config_file, 'w') as f: + json.dump(config, f, indent=2) + print(f" ✅ Created {config_file}") + + print("\n3. Load Configuration from File:") + + # Demonstrate loading configuration from file + dev_config_file = config_dir / 'development.json' + if dev_config_file.exists(): + with open(dev_config_file, 'r') as f: + config_data = json.load(f) + + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + sdk = OSSFuzzSDK('file_config_project', config_data) + print(f" ✅ Loaded configuration from {dev_config_file}") + print(f" Storage backend: {sdk.config['storage_backend']}") + print(f" Log level: {sdk.config['log_level']}") + + print("\n4. Configuration Hierarchy (Priority Order):") + print(" 1. Explicit configuration parameters (highest priority)") + print(" 2. Configuration file parameters") + print(" 3. Environment variables") + print(" 4. Default values (lowest priority)") + + print("\n5. Security Best Practices:") + print(" • Never hardcode sensitive information (API keys, passwords)") + print(" • Use environment variables for sensitive configuration") + print(" • Restrict file permissions on configuration files") + print(" • Use different configurations for different environments") + + +def main(): + """Main function demonstrating configuration management.""" + print("⚙️ OSS-Fuzz SDK Configuration Example") + print("=" * 50) + + success = True + + # Run all demonstrations + success &= demonstrate_basic_configuration() + success &= demonstrate_environment_configurations() + + try: + demonstrate_environment_variables() + except Exception as e: + print(f"⚠️ Environment variable demo had issues: {e}") + + success &= demonstrate_configuration_validation() + + try: + demonstrate_configuration_best_practices() + except Exception as e: + print(f"⚠️ Best practices demo had issues: {e}") + + # Summary + print("\n🎉 Configuration Example Summary") + print("=" * 35) + print("✅ Configuration methods demonstrated:") + print(" • Default configuration") + print(" • Dictionary configuration") + print(" • SDKConfig object configuration") + print(" • Environment variable configuration") + print(" • Configuration file loading") + + print("\n📋 Key takeaways:") + print(" • Use SDKConfig for type safety and validation") + print(" • Environment variables provide deployment flexibility") + print(" • Different environments need different configurations") + print(" • Always validate your configuration") + print(" • Follow security best practices") + + print("\n🚀 Next steps:") + print(" • Try basic/03_simple_benchmark.py with your configuration") + print(" • Explore intermediate examples for advanced usage") + print(" • Set up your production configuration") + + return success + + +if __name__ == '__main__': + try: + success = main() + if success: + print("\n🎯 Configuration example completed successfully!") + sys.exit(0) + else: + print("\n⚠️ Configuration example completed with some issues.") + sys.exit(0) # Still exit successfully as issues are expected in demo + + except KeyboardInterrupt: + print("\n\n⏹️ Example interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/ossfuzz_py/samples/basic/03_simple_benchmark.py b/ossfuzz_py/samples/basic/03_simple_benchmark.py new file mode 100644 index 000000000..9794ee2ef --- /dev/null +++ b/ossfuzz_py/samples/basic/03_simple_benchmark.py @@ -0,0 +1,506 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License a +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name,line-too-long,redefined-outer-name,unused-import,unused-variable +""" +OSS-Fuzz SDK Simple Benchmark Example + +This example demonstrates how to work with a single benchmark, +including building, running, and analyzing results. + +What this example covers: +- Creating and configuring a benchmark +- Building a fuzz target +- Running a benchmark +- Analyzing results and metrics +- Basic error handling and troubleshooting + +Prerequisites: +- OSS-Fuzz SDK installed: pip install ossfuzz-py +- Basic configuration (see 02_configuration.py) +""" + +import os +import sys +import tempfile +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + + +def create_sample_fuzz_target(): + """Create a sample fuzz target for demonstration.""" + print("📝 Creating Sample Fuzz Target") + print("-" * 30) + + # Sample fuzz target source code + fuzz_target_source = ''' +#include +#include +#include + +// Simple vulnerable function for demonstration +int vulnerable_function(const uint8_t* data, size_t size) { + if (size < 4) return 0; + + // Simulate a buffer overflow vulnerability + char buffer[10]; + if (data[0] == 'F' && data[1] == 'U' && data[2] == 'Z' && data[3] == 'Z') { + // This would cause a buffer overflow in real code + memcpy(buffer, data, size); // Intentionally vulnerable + return 1; + } + + return 0; +} + +// LibFuzzer entry point +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + return vulnerable_function(data, size); +} +''' + + # Sample build script + build_script = ''' +#!/bin/bash +# Simple build script for the sample fuzz target + +# Compile the fuzz target +$CXX $CXXFLAGS -o $OUT/sample_target sample_target.cpp $LIB_FUZZING_ENGINE + +echo "Build completed successfully" +''' + + # Create target specification + target_spec = { + 'name': + 'sample_target', + 'source_code': + fuzz_target_source, + 'build_script': + build_script, + 'project_name': + 'sample_project', + 'language': + 'c++', + 'function_signature': + 'int vulnerable_function(const uint8_t* data, size_t size)', + } + + print("✅ Sample fuzz target created:") + print(f" Name: {target_spec['name']}") + print(f" Language: {target_spec['language']}") + print(f" Project: {target_spec['project_name']}") + print(f" Source code: {len(target_spec['source_code'])} characters") + print(f" Build script: {len(target_spec['build_script'])} characters") + + return target_spec + + +def demonstrate_build_operations(sdk, target_spec): + """Demonstrate build operations with the sample target.""" + print("\n🏗️ Build Operations") + print("-" * 20) + + try: + from ossfuzz_py.core.ossfuzz_sdk import BuildOptions + + # Create build options + build_options = BuildOptions( + sanitizer='address', # Use AddressSanitizer + architecture='x86_64', + timeout_seconds=1800, # 30 minutes + environment_vars={ + 'FUZZING_ENGINE': 'libfuzzer', + 'SANITIZER': 'address' + }) + + print("1. Build Configuration:") + print(f" ✅ Sanitizer: {build_options.sanitizer}") + print(f" ✅ Architecture: {build_options.architecture}") + print(f" ✅ Timeout: {build_options.timeout_seconds}s") + print(f" ✅ Environment vars: {len(build_options.environment_vars)}") + + # Attempt to build the fuzz target + print("\n2. Building Fuzz Target:") + print(f" Attempting to build: {target_spec['name']}") + + build_result = sdk.build_fuzz_target(target_spec, build_options) + + if build_result.success: + print(" ✅ Build completed successfully!") + print(f" Build ID: {build_result.build_id}") + print(f" Message: {build_result.message}") + print(f" Artifacts: {len(build_result.artifacts)} items") + + # Show artifacts if available + if build_result.artifacts: + print(" Available artifacts:") + for name, path in build_result.artifacts.items(): + print(f" - {name}: {path}") + else: + print(f" ⚠️ Build failed: {build_result.message}") + print(" This is expected in a demo environment without build tools") + + # Check build status + print("\n3. Checking Build Status:") + build_status = sdk.get_build_status(build_result.build_id) + print(f" Build ID: {build_status['build_id']}") + print(f" Status: {build_status['status']}") + print(f" Message: {build_status['message']}") + + return build_result + + except Exception as e: + print(f"❌ Build operations failed: {e}") + return None + + +def demonstrate_run_operations(sdk, target_spec, build_result): + """Demonstrate run operations with the sample target.""" + print("\n🏃 Run Operations") + print("-" * 17) + + try: + from ossfuzz_py.core.ossfuzz_sdk import RunOptions + + # Create run options + run_options = RunOptions( + duration_seconds=300, # 5 minutes + timeout_seconds=25, # 25 seconds per input + max_memory_mb=1024, # 1GB memory limit + detect_leaks=True, + extract_coverage=True, + output_dir='fuzz_output') + + print("1. Run Configuration:") + print(f" ✅ Duration: {run_options.duration_seconds}s") + print(f" ✅ Timeout per input: {run_options.timeout_seconds}s") + print(f" ✅ Memory limit: {run_options.max_memory_mb}MB") + print(f" ✅ Leak detection: {run_options.detect_leaks}") + print(f" ✅ Coverage extraction: {run_options.extract_coverage}") + + # Attempt to run the fuzz target + print("\n2. Running Fuzz Target:") + + if build_result and build_result.success: + print(f" Using build artifacts from: {build_result.build_id}") + build_metadata = build_result.artifacts + else: + print(" Using mock build metadata (build failed)") + build_metadata = {'mock': 'metadata'} + + run_result = sdk.run_fuzz_target(target_spec, build_metadata, run_options) + + if run_result.success: + print(" ✅ Run completed successfully!") + print(f" Run ID: {run_result.run_id}") + print(f" Crashes detected: {run_result.crashes}") + print(f" Message: {run_result.message}") + + # Show coverage data if available + if run_result.coverage_data: + print(" Coverage data:") + for key, value in run_result.coverage_data.items(): + print(f" - {key}: {value}") + else: + print(f" ⚠️ Run failed: {run_result.message}") + print( + " This is expected in a demo environment without execution tools" + ) + + # Check run status + print("\n3. Checking Run Status:") + run_status = sdk.get_run_status(run_result.run_id) + print(f" Run ID: {run_status['run_id']}") + print(f" Status: {run_status['status']}") + print(f" Message: {run_status['message']}") + + return run_result + + except Exception as e: + print(f"❌ Run operations failed: {e}") + return None + + +def demonstrate_benchmark_operations(sdk): + """Demonstrate benchmark-specific operations.""" + print("\n🎯 Benchmark Operations") + print("-" * 23) + + benchmark_id = 'sample_benchmark_001' + + try: + # 1. Create a benchmark + print("1. Creating Benchmark:") + benchmark_spec = { + 'id': + benchmark_id, + 'project': + 'sample_project', + 'language': + 'c++', + 'function_name': + 'vulnerable_function', + 'function_signature': + 'int vulnerable_function(const uint8_t* data, size_t size)', + 'return_type': + 'int', + 'target_path': + '/sample/target.h', + 'description': + 'Sample benchmark for demonstration' + } + + success = sdk.create_benchmark(benchmark_spec) + if success: + print(f" ✅ Benchmark created: {benchmark_id}") + else: + print(" ⚠️ Benchmark creation failed (expected in demo)") + + # 2. List benchmarks + print("\n2. Listing Benchmarks:") + benchmarks = sdk.list_benchmarks() + print(f" Found {len(benchmarks)} benchmarks") + + if benchmarks: + for i, benchmark in enumerate(benchmarks[:3]): + print(f" {i+1}. {benchmark.get('id', 'Unknown')}") + else: + print(" No benchmarks found (this is normal for a new setup)") + + # 3. Run benchmark (build + run) + print("\n3. Running Complete Benchmark:") + from ossfuzz_py.core.ossfuzz_sdk import RunOptions + + run_options = RunOptions(duration_seconds=60) # Short run for demo + benchmark_result = sdk.run_benchmark(benchmark_id, run_options) + + if benchmark_result.success: + print(f" ✅ Benchmark run completed: {benchmark_result.run_id}") + print(f" Crashes: {benchmark_result.crashes}") + else: + print(f" ⚠️ Benchmark run failed: {benchmark_result.message}") + print(" This is expected in a demo environment") + + return benchmark_id + + except Exception as e: + print(f"❌ Benchmark operations failed: {e}") + return benchmark_id + + +def demonstrate_result_analysis(sdk, benchmark_id): + """Demonstrate result analysis and metrics.""" + print("\n📊 Result Analysis") + print("-" * 18) + + try: + # 1. Get benchmark metrics + print("1. Benchmark Metrics:") + metrics = sdk.get_benchmark_metrics(benchmark_id) + + if metrics: + print(f" ✅ Retrieved metrics for {benchmark_id}:") + print(f" Compiles: {metrics.get('compiles', 'Unknown')}") + print(f" Crashes: {metrics.get('crashes', 'Unknown')}") + print(f" Coverage: {metrics.get('coverage', 'Unknown')}") + print( + f" Line coverage diff: {metrics.get('line_coverage_diff', 'Unknown')}" + ) + print(f" Trial: {metrics.get('trial', 'Unknown')}") + else: + print(f" ⚠️ No metrics available for {benchmark_id}") + print(" This is normal for a new benchmark") + + # 2. Get build success rate + print("\n2. Build Success Rate:") + success_rate = sdk.get_build_success_rate(benchmark_id, days=7) + print(f" 7-day build success rate: {success_rate:.2%}") + + # 3. Get crash summary + print("\n3. Crash Summary:") + crash_summary = sdk.get_crash_summary(benchmark_id, days=7) + + if crash_summary: + print(f" Total crashes: {crash_summary.get('total_crashes', 0)}") + print(f" Unique crashes: {crash_summary.get('unique_crashes', 0)}") + print(f" Crash rate: {crash_summary.get('crash_rate', 0.0):.2%}") + else: + print(" No crash data available") + + # 4. Get coverage trend + print("\n4. Coverage Trend:") + coverage_trend = sdk.get_coverage_trend(benchmark_id, days=7) + + if isinstance(coverage_trend, list) and coverage_trend: + print(f" Coverage data points: {len(coverage_trend)}") + for point in coverage_trend[-3:]: # Show last 3 points + date = point.get('date', 'Unknown') + coverage = point.get('coverage', 0) + print(f" {date}: {coverage}%") + else: + print(" No coverage trend data available") + + # 5. Get benchmark result + print("\n5. Latest Benchmark Result:") + result = sdk.get_benchmark_result(benchmark_id) + + if result: + print(" ✅ Latest result found:") + print(f" Trial: {getattr(result, 'trial', 'Unknown')}") + print( + f" Build successful: {getattr(result, 'is_build_successful', lambda: 'Unknown')()}" + ) + print( + f" Run successful: {getattr(result, 'is_run_successful', lambda: 'Unknown')()}" + ) + else: + print(" No result data available") + + except Exception as e: + print(f"❌ Result analysis failed: {e}") + + +def demonstrate_error_handling(sdk): + """Demonstrate error handling and troubleshooting.""" + print("\n🔧 Error Handling & Troubleshooting") + print("-" * 35) + + # 1. Component availability + print("1. Component Availability Check:") + components = { + 'Result Manager': getattr(sdk, 'result_manager', None), + 'Benchmark Manager': getattr(sdk, 'benchmark_manager', None), + 'Local Builder': getattr(sdk, 'local_builder', None), + 'Local Runner': getattr(sdk, 'local_runner', None), + } + + for name, component in components.items(): + status = "✅ Available" if component is not None else "⚠️ Not available" + print(f" {status}: {name}") + + # 2. Graceful error handling + print("\n2. Graceful Error Handling:") + + # Try operations that might fail + try: + # Non-existent benchmark + result = sdk.get_benchmark_result('non_existent_benchmark') + print(f" ✅ Non-existent benchmark handled gracefully: {result is None}") + + # Empty metrics + metrics = sdk.get_benchmark_metrics('non_existent_benchmark') + print(f" ✅ Empty metrics handled gracefully: {len(metrics) == 0}") + + # Invalid build + from ossfuzz_py.core.ossfuzz_sdk import BuildOptions + invalid_target = { + 'name': 'invalid', + 'source_code': '', + 'project_name': 'test', + 'language': 'c++' + } + build_result = sdk.build_fuzz_target(invalid_target, BuildOptions()) + print(f" ✅ Invalid build handled gracefully: {not build_result.success}") + + except Exception as e: + print(f" ⚠️ Error handling test failed: {e}") + + # 3. Troubleshooting tips + print("\n3. Troubleshooting Tips:") + print(" • Check component availability before using features") + print(" • Verify configuration and environment variables") + print(" • Use DEBUG log level for detailed information") + print(" • Check file permissions for work directories") + print(" • Ensure required dependencies are installed") + + +def main(): + """Main function demonstrating simple benchmark operations.""" + print("🎯 OSS-Fuzz SDK Simple Benchmark Example") + print("=" * 50) + + # Initialize SDK + print("\n📋 Initializing SDK") + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + # Use a simple configuration + config = SDKConfig(storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='ossfuzz_demo_'), + log_level='INFO') + + sdk = OSSFuzzSDK('sample_project', config) + print("✅ SDK initialized for project: sample_project") + print(f" Storage path: {config.storage_path}") + + except Exception as e: + print(f"❌ Failed to initialize SDK: {e}") + return False + + # Create sample fuzz target + target_spec = create_sample_fuzz_target() + + # Demonstrate operations + build_result = demonstrate_build_operations(sdk, target_spec) + run_result = demonstrate_run_operations(sdk, target_spec, build_result) + benchmark_id = demonstrate_benchmark_operations(sdk) + demonstrate_result_analysis(sdk, benchmark_id) + demonstrate_error_handling(sdk) + + # Summary + print("\n🎉 Simple Benchmark Example Summary") + print("=" * 40) + print("✅ Operations demonstrated:") + print(" • Sample fuzz target creation") + print(" • Build operations with options") + print(" • Run operations with configuration") + print(" • Benchmark management") + print(" • Result analysis and metrics") + print(" • Error handling and troubleshooting") + + print("\n📋 Key learnings:") + print(" • SDK handles missing components gracefully") + print(" • Configuration affects all operations") + print(" • Results provide detailed information") + print(" • Error handling is built-in") + + print("\n🚀 Next steps:") + print(" • Try intermediate/01_build_operations.py for advanced builds") + print(" • Explore intermediate/04_pipeline_automation.py for workflows") + print(" • Check advanced examples for production use cases") + + return True + + +if __name__ == '__main__': + try: + success = main() + if success: + print("\n🎯 Simple benchmark example completed successfully!") + sys.exit(0) + else: + print("\n❌ Simple benchmark example failed.") + sys.exit(1) + + except KeyboardInterrupt: + print("\n\n⏹️ Example interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/ossfuzz_py/samples/data/sample_benchmarks.json b/ossfuzz_py/samples/data/sample_benchmarks.json new file mode 100644 index 000000000..bb0cdbb1b --- /dev/null +++ b/ossfuzz_py/samples/data/sample_benchmarks.json @@ -0,0 +1,315 @@ +{ + "metadata": { + "version": "1.0", + "created": "2025-01-29", + "description": "Sample benchmark definitions for OSS-Fuzz SDK examples", + "total_benchmarks": 15 + }, + "benchmarks": [ + { + "id": "libpng_decode_001", + "project": "libpng", + "language": "c++", + "function_name": "png_decode_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/libpng/contrib/oss-fuzz/png_decode_fuzzer.cc", + "description": "Fuzzer for PNG decoding functionality", + "complexity": "medium", + "expected_coverage": 75.0, + "tags": ["image", "decoder", "libpng"], + "priority": "high" + }, + { + "id": "libpng_encode_002", + "project": "libpng", + "language": "c++", + "function_name": "png_encode_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/libpng/contrib/oss-fuzz/png_encode_fuzzer.cc", + "description": "Fuzzer for PNG encoding functionality", + "complexity": "medium", + "expected_coverage": 70.0, + "tags": ["image", "encoder", "libpng"], + "priority": "high" + }, + { + "id": "libjpeg_decode_003", + "project": "libjpeg", + "language": "c++", + "function_name": "jpeg_decode_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/libjpeg/fuzz/jpeg_decode_fuzzer.cc", + "description": "Fuzzer for JPEG decoding functionality", + "complexity": "high", + "expected_coverage": 80.0, + "tags": ["image", "decoder", "jpeg"], + "priority": "high" + }, + { + "id": "zlib_inflate_004", + "project": "zlib", + "language": "c", + "function_name": "inflate_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/zlib/contrib/oss-fuzz/inflate_fuzzer.c", + "description": "Fuzzer for zlib inflate functionality", + "complexity": "low", + "expected_coverage": 85.0, + "tags": ["compression", "inflate", "zlib"], + "priority": "medium" + }, + { + "id": "zlib_deflate_005", + "project": "zlib", + "language": "c", + "function_name": "deflate_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/zlib/contrib/oss-fuzz/deflate_fuzzer.c", + "description": "Fuzzer for zlib deflate functionality", + "complexity": "low", + "expected_coverage": 82.0, + "tags": ["compression", "deflate", "zlib"], + "priority": "medium" + }, + { + "id": "openssl_rsa_006", + "project": "openssl", + "language": "c++", + "function_name": "rsa_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/openssl/fuzz/rsa.c", + "description": "Fuzzer for OpenSSL RSA functionality", + "complexity": "high", + "expected_coverage": 65.0, + "tags": ["crypto", "rsa", "openssl"], + "priority": "critical" + }, + { + "id": "openssl_aes_007", + "project": "openssl", + "language": "c++", + "function_name": "aes_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/openssl/fuzz/aes.c", + "description": "Fuzzer for OpenSSL AES functionality", + "complexity": "medium", + "expected_coverage": 78.0, + "tags": ["crypto", "aes", "openssl"], + "priority": "critical" + }, + { + "id": "json_parser_008", + "project": "json-c", + "language": "c", + "function_name": "json_parse_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/json-c/fuzz/json_parse_fuzzer.c", + "description": "Fuzzer for JSON parsing functionality", + "complexity": "medium", + "expected_coverage": 72.0, + "tags": ["parser", "json", "text"], + "priority": "medium" + }, + { + "id": "xml_parser_009", + "project": "libxml2", + "language": "c", + "function_name": "xml_parse_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/libxml2/fuzz/xml_parse_fuzzer.c", + "description": "Fuzzer for XML parsing functionality", + "complexity": "high", + "expected_coverage": 68.0, + "tags": ["parser", "xml", "text"], + "priority": "high" + }, + { + "id": "regex_engine_010", + "project": "pcre2", + "language": "c", + "function_name": "regex_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/pcre2/fuzz/regex_fuzzer.c", + "description": "Fuzzer for PCRE2 regex engine", + "complexity": "high", + "expected_coverage": 60.0, + "tags": ["regex", "pattern", "text"], + "priority": "medium" + }, + { + "id": "sqlite_query_011", + "project": "sqlite", + "language": "c", + "function_name": "sql_query_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/sqlite/fuzz/sql_query_fuzzer.c", + "description": "Fuzzer for SQLite query processing", + "complexity": "very_high", + "expected_coverage": 55.0, + "tags": ["database", "sql", "query"], + "priority": "high" + }, + { + "id": "freetype_font_012", + "project": "freetype", + "language": "c++", + "function_name": "font_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/freetype/fuzz/font_fuzzer.cc", + "description": "Fuzzer for FreeType font rendering", + "complexity": "high", + "expected_coverage": 62.0, + "tags": ["font", "rendering", "graphics"], + "priority": "medium" + }, + { + "id": "curl_http_013", + "project": "curl", + "language": "c", + "function_name": "http_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/curl/fuzz/http_fuzzer.c", + "description": "Fuzzer for cURL HTTP functionality", + "complexity": "high", + "expected_coverage": 58.0, + "tags": ["http", "network", "protocol"], + "priority": "high" + }, + { + "id": "protobuf_parse_014", + "project": "protobuf", + "language": "c++", + "function_name": "protobuf_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/protobuf/fuzz/protobuf_fuzzer.cc", + "description": "Fuzzer for Protocol Buffers parsing", + "complexity": "medium", + "expected_coverage": 74.0, + "tags": ["serialization", "protobuf", "parser"], + "priority": "medium" + }, + { + "id": "bzip2_compress_015", + "project": "bzip2", + "language": "c", + "function_name": "bzip2_fuzzer", + "function_signature": "int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)", + "return_type": "int", + "target_path": "/src/bzip2/fuzz/bzip2_fuzzer.c", + "description": "Fuzzer for bzip2 compression", + "complexity": "low", + "expected_coverage": 88.0, + "tags": ["compression", "bzip2"], + "priority": "low" + } + ], + "project_summary": { + "libpng": { + "benchmarks": 2, + "languages": ["c++"], + "priority": "high", + "description": "PNG image processing library" + }, + "libjpeg": { + "benchmarks": 1, + "languages": ["c++"], + "priority": "high", + "description": "JPEG image processing library" + }, + "zlib": { + "benchmarks": 2, + "languages": ["c"], + "priority": "medium", + "description": "Data compression library" + }, + "openssl": { + "benchmarks": 2, + "languages": ["c++"], + "priority": "critical", + "description": "Cryptography and SSL/TLS library" + }, + "json-c": { + "benchmarks": 1, + "languages": ["c"], + "priority": "medium", + "description": "JSON parsing library" + }, + "libxml2": { + "benchmarks": 1, + "languages": ["c"], + "priority": "high", + "description": "XML parsing library" + }, + "pcre2": { + "benchmarks": 1, + "languages": ["c"], + "priority": "medium", + "description": "Regular expression library" + }, + "sqlite": { + "benchmarks": 1, + "languages": ["c"], + "priority": "high", + "description": "SQL database engine" + }, + "freetype": { + "benchmarks": 1, + "languages": ["c++"], + "priority": "medium", + "description": "Font rendering library" + }, + "curl": { + "benchmarks": 1, + "languages": ["c"], + "priority": "high", + "description": "HTTP client library" + }, + "protobuf": { + "benchmarks": 1, + "languages": ["c++"], + "priority": "medium", + "description": "Protocol Buffers serialization" + }, + "bzip2": { + "benchmarks": 1, + "languages": ["c"], + "priority": "low", + "description": "Data compression utility" + } + }, + "statistics": { + "total_projects": 12, + "total_benchmarks": 15, + "languages": { + "c": 8, + "c++": 7 + }, + "priorities": { + "critical": 2, + "high": 6, + "medium": 6, + "low": 1 + }, + "complexity": { + "low": 3, + "medium": 6, + "high": 5, + "very_high": 1 + }, + "average_expected_coverage": 70.1 + } +} diff --git a/ossfuzz_py/samples/data/sample_configs/development.json b/ossfuzz_py/samples/data/sample_configs/development.json new file mode 100644 index 000000000..d1516ec54 --- /dev/null +++ b/ossfuzz_py/samples/data/sample_configs/development.json @@ -0,0 +1,79 @@ +{ + "metadata": { + "name": "Development Configuration", + "description": "Configuration optimized for development and testing", + "version": "1.0", + "environment": "development", + "created": "2025-01-29" + }, + "sdk_config": { + "storage_backend": "local", + "storage_path": "/tmp/ossfuzz_dev", + "work_dir": "/tmp/ossfuzz_work_dev", + "oss_fuzz_dir": null, + "enable_caching": false, + "log_level": "DEBUG", + "timeout_seconds": 1800, + "max_retries": 2 + }, + "build_options": { + "sanitizer": "address", + "architecture": "x86_64", + "fuzzing_engine": "libfuzzer", + "timeout_seconds": 1800, + "environment_vars": { + "FUZZING_ENGINE": "libfuzzer", + "SANITIZER": "address", + "ARCHITECTURE": "x86_64", + "DEBUG": "1" + }, + "build_args": [ + "--enable-fuzzing", + "--debug", + "--verbose" + ] + }, + "run_options": { + "duration_seconds": 300, + "timeout_seconds": 25, + "max_memory_mb": 1024, + "detect_leaks": true, + "extract_coverage": true, + "corpus_dir": "corpus_dev", + "output_dir": "fuzz_output_dev", + "engine_args": [ + "-max_len=1024", + "-rss_limit_mb=1024", + "-print_stats=1" + ], + "env_vars": { + "ASAN_OPTIONS": "detect_odr_violation=0:abort_on_error=1:print_stats=1", + "MSAN_OPTIONS": "halt_on_error=1:print_stats=1", + "UBSAN_OPTIONS": "halt_on_error=1:print_stacktrace=1" + } + }, + "pipeline_options": { + "trials": 2, + "analyze_coverage": true, + "store_results": true + }, + "development_settings": { + "quick_mode": true, + "verbose_output": true, + "debug_builds": true, + "short_runs": true, + "detailed_logging": true + }, + "monitoring": { + "enable_metrics": true, + "metrics_interval": 30, + "log_performance": true, + "track_memory_usage": true + }, + "testing": { + "mock_components": false, + "simulate_failures": false, + "test_data_path": "/tmp/ossfuzz_test_data", + "cleanup_after_tests": true + } +} diff --git a/ossfuzz_py/samples/intermediate/04_pipeline_automation.py b/ossfuzz_py/samples/intermediate/04_pipeline_automation.py new file mode 100644 index 000000000..bc7dbd606 --- /dev/null +++ b/ossfuzz_py/samples/intermediate/04_pipeline_automation.py @@ -0,0 +1,529 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name,line-too-long,unused-import +""" +OSS-Fuzz SDK Pipeline Automation Example + +This example demonstrates how to create automated fuzzing pipelines +that combine building, running, and analysis into streamlined workflows. + +What this example covers: +- Complete pipeline configuration +- Multi-trial execution +- Automated result analysis +- Pipeline monitoring and reporting +- Error recovery and retry logic +- Performance optimization + +Prerequisites: +- OSS-Fuzz SDK installed: pip install ossfuzz-py +- Understanding of basic SDK operations +""" + +import os +import sys +import tempfile +import time +from datetime import datetime, timedelta +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + + +def create_pipeline_configuration(): + """Create comprehensive pipeline configuration.""" + print("⚙️ Creating Pipeline Configuration") + print("-" * 35) + + try: + from ossfuzz_py.core.ossfuzz_sdk import (BuildOptions, PipelineOptions, + RunOptions) + + # Build configuration + build_options = BuildOptions( + sanitizer='address', + architecture='x86_64', + fuzzing_engine='libfuzzer', + timeout_seconds=1800, # 30 minutes + environment_vars={ + 'FUZZING_ENGINE': 'libfuzzer', + 'SANITIZER': 'address', + 'ARCHITECTURE': 'x86_64' + }, + build_args=['--enable-fuzzing', '--optimize-for-fuzzing']) + + # Run configuration + run_options = RunOptions( + duration_seconds=3600, # 1 hour + timeout_seconds=30, # 30 seconds per input + max_memory_mb=2048, # 2GB memory limit + detect_leaks=True, + extract_coverage=True, + corpus_dir='corpus', + output_dir='fuzz_output', + engine_args=['-max_len=1024', '-rss_limit_mb=2048'], + env_vars={ + 'ASAN_OPTIONS': 'detect_odr_violation=0:abort_on_error=1', + 'MSAN_OPTIONS': 'halt_on_error=1', + 'UBSAN_OPTIONS': 'halt_on_error=1' + }) + + # Pipeline configuration + pipeline_options = PipelineOptions( + build_options=build_options, + run_options=run_options, + trials=5, # Run 5 trials for statistical significance + analyze_coverage=True, + store_results=True) + + print("✅ Pipeline configuration created:") + print(f" Build sanitizer: {build_options.sanitizer}") + print(f" Build timeout: {build_options.timeout_seconds}s") + print(f" Build args: {len(build_options.build_args)} arguments") + print(f" Run duration: {run_options.duration_seconds}s") + print(f" Run memory limit: {run_options.max_memory_mb}MB") + print(f" Engine args: {len(run_options.engine_args)} arguments") + print(f" Pipeline trials: {pipeline_options.trials}") + print(f" Coverage analysis: {pipeline_options.analyze_coverage}") + print(f" Result storage: {pipeline_options.store_results}") + + return pipeline_options + + except Exception as e: + print(f"❌ Failed to create pipeline configuration: {e}") + return None + + +def create_sample_benchmarks(): + """Create sample benchmarks for pipeline testing.""" + print("\n📝 Creating Sample Benchmarks") + print("-" * 30) + + benchmarks = [ + { + 'id': 'string_parser_001', + 'name': 'String Parser', + 'description': 'Tests string parsing functionality', + 'complexity': 'low', + 'expected_runtime': 300 # 5 minutes + }, + { + 'id': 'json_decoder_002', + 'name': 'JSON Decoder', + 'description': 'Tests JSON decoding with various inputs', + 'complexity': 'medium', + 'expected_runtime': 600 # 10 minutes + }, + { + 'id': 'image_processor_003', + 'name': 'Image Processor', + 'description': 'Tests image processing algorithms', + 'complexity': 'high', + 'expected_runtime': 1200 # 20 minutes + } + ] + + print(f"✅ Created {len(benchmarks)} sample benchmarks:") + for benchmark in benchmarks: + print( + f" • {benchmark['id']}: {benchmark['name']} ({benchmark['complexity']} complexity)" + ) + + return benchmarks + + +def run_single_pipeline(sdk, benchmark, pipeline_options): + """Run a complete pipeline for a single benchmark.""" + benchmark_id = benchmark['id'] + benchmark_name = benchmark['name'] + + print(f"\n🚀 Running Pipeline: {benchmark_name}") + print(f" Benchmark ID: {benchmark_id}") + print(f" Expected runtime: {benchmark['expected_runtime']}s") + print(f" Complexity: {benchmark['complexity']}") + + start_time = time.time() + + try: + # Run the complete pipeline + pipeline_result = sdk.run_full_pipeline(benchmark_id, pipeline_options) + + end_time = time.time() + actual_runtime = end_time - start_time + + # Analyze pipeline results + if pipeline_result.success: + print(" ✅ Pipeline completed successfully!") + print(f" Pipeline ID: {pipeline_result.pipeline_id}") + print(f" Actual runtime: {actual_runtime:.1f}s") + + # Analyze build results + build_results = pipeline_result.build_results + successful_builds = sum(1 for r in build_results if r.success) + print( + f" Builds: {successful_builds}/{len(build_results)} successful") + + # Analyze run results + run_results = pipeline_result.run_results + successful_runs = sum(1 for r in run_results if r.success) + crashes_found = sum(1 for r in run_results if r.success and r.crashes) + + print(f" Runs: {successful_runs}/{len(run_results)} successful") + print(f" Crashes found: {crashes_found}") + + # Calculate coverage statistics + coverage_data = [] + avg_coverage = 0 + max_coverage = 0 + + for result in run_results: + if result.success and result.coverage_data: + cov_pcs = result.coverage_data.get('cov_pcs', 0) + total_pcs = result.coverage_data.get('total_pcs', 1) + if total_pcs > 0: + coverage_data.append(cov_pcs / total_pcs * 100) + + if coverage_data: + avg_coverage = sum(coverage_data) / len(coverage_data) + max_coverage = max(coverage_data) + print(f" Average coverage: {avg_coverage:.1f}%") + print(f" Maximum coverage: {max_coverage:.1f}%") + + return { + 'success': True, + 'benchmark_id': benchmark_id, + 'runtime': actual_runtime, + 'builds_successful': successful_builds, + 'builds_total': len(build_results), + 'runs_successful': successful_runs, + 'runs_total': len(run_results), + 'crashes_found': crashes_found, + 'average_coverage': avg_coverage if coverage_data else 0, + 'max_coverage': max_coverage if coverage_data else 0, + 'pipeline_result': pipeline_result + } + + print(f" ❌ Pipeline failed: {pipeline_result.message}") + print(f" Runtime: {actual_runtime:.1f}s") + + return { + 'success': False, + 'benchmark_id': benchmark_id, + 'runtime': actual_runtime, + 'error_message': pipeline_result.message, + 'pipeline_result': pipeline_result + } + + except Exception as e: + end_time = time.time() + actual_runtime = end_time - start_time + + print(f" ❌ Pipeline exception: {e}") + print(f" Runtime: {actual_runtime:.1f}s") + + return { + 'success': False, + 'benchmark_id': benchmark_id, + 'runtime': actual_runtime, + 'error_message': str(e) + } + + +def run_batch_pipeline(sdk, benchmarks, pipeline_options): + """Run pipelines for multiple benchmarks in batch.""" + print(f"\n🔄 Running Batch Pipeline ({len(benchmarks)} benchmarks)") + print("=" * 50) + + batch_start_time = time.time() + results = [] + + for i, benchmark in enumerate(benchmarks, 1): + print(f"\n[{i}/{len(benchmarks)}] Processing: {benchmark['name']}") + + # Run individual pipeline + result = run_single_pipeline(sdk, benchmark, pipeline_options) + results.append(result) + + # Show progress + elapsed = time.time() - batch_start_time + if i < len(benchmarks): + avg_time_per_benchmark = elapsed / i + estimated_remaining = avg_time_per_benchmark * (len(benchmarks) - i) + print(f" Progress: {i}/{len(benchmarks)} completed") + print(f" Estimated remaining time: {estimated_remaining:.1f}s") + + batch_end_time = time.time() + total_batch_time = batch_end_time - batch_start_time + + # Analyze batch results + print("\n📊 Batch Pipeline Results") + print("-" * 25) + + successful_pipelines = sum(1 for r in results if r['success']) + total_crashes = sum( + r.get('crashes_found', 0) for r in results if r['success']) + + print(f"✅ Batch completed in {total_batch_time:.1f}s") + print(f" Successful pipelines: {successful_pipelines}/{len(results)}") + print(f" Total crashes found: {total_crashes}") + + # Detailed results + print("\n📋 Detailed Results:") + for result in results: + benchmark_id = result['benchmark_id'] + if result['success']: + builds = f"{result['builds_successful']}/{result['builds_total']}" + runs = f"{result['runs_successful']}/{result['runs_total']}" + crashes = result['crashes_found'] + coverage = result.get('average_coverage', 0) + print( + f" ✅ {benchmark_id}: Builds={builds}, Runs={runs}, Crashes={crashes}, Cov={coverage:.1f}%" + ) + else: + error = result.get('error_message', 'Unknown error')[:50] + print(f" ❌ {benchmark_id}: Failed - {error}") + + return results + + +def analyze_pipeline_performance(results): + """Analyze pipeline performance and generate insights.""" + print("\n📈 Pipeline Performance Analysis") + print("-" * 35) + + if not results: + print("❌ No results to analyze") + return + + successful_results = [r for r in results if r['success']] + + if not successful_results: + print("❌ No successful results to analyze") + return + + # Runtime analysis + runtimes = [r['runtime'] for r in successful_results] + avg_runtime = sum(runtimes) / len(runtimes) + min_runtime = min(runtimes) + max_runtime = max(runtimes) + + print("⏱️ Runtime Statistics:") + print(f" Average: {avg_runtime:.1f}s") + print(f" Minimum: {min_runtime:.1f}s") + print(f" Maximum: {max_runtime:.1f}s") + + # Build success analysis + total_builds = sum(r['builds_total'] for r in successful_results) + successful_builds = sum(r['builds_successful'] for r in successful_results) + build_success_rate = successful_builds / total_builds if total_builds > 0 else 0 + + print("\n🏗️ Build Statistics:") + print(f" Total builds: {total_builds}") + print(f" Successful builds: {successful_builds}") + print(f" Build success rate: {build_success_rate:.2%}") + + # Run success analysis + total_runs = sum(r['runs_total'] for r in successful_results) + successful_runs = sum(r['runs_successful'] for r in successful_results) + run_success_rate = successful_runs / total_runs if total_runs > 0 else 0 + + print("\n🏃 Run Statistics:") + print(f" Total runs: {total_runs}") + print(f" Successful runs: {successful_runs}") + print(f" Run success rate: {run_success_rate:.2%}") + + # Crash analysis + total_crashes = sum(r['crashes_found'] for r in successful_results) + benchmarks_with_crashes = sum( + 1 for r in successful_results if r['crashes_found'] > 0) + + print("\n💥 Crash Statistics:") + print(f" Total crashes found: {total_crashes}") + print( + f" Benchmarks with crashes: {benchmarks_with_crashes}/{len(successful_results)}" + ) + + # Coverage analysis + coverage_data = [ + r.get('average_coverage', 0) + for r in successful_results + if r.get('average_coverage', 0) > 0 + ] + avg_coverage = 0 + max_coverage = 0 + min_coverage = 0 + + if coverage_data: + avg_coverage = sum(coverage_data) / len(coverage_data) + max_coverage = max(coverage_data) + min_coverage = min(coverage_data) + + print("\n📊 Coverage Statistics:") + print(f" Average coverage: {avg_coverage:.1f}%") + print(f" Maximum coverage: {max_coverage:.1f}%") + print(f" Minimum coverage: {min_coverage:.1f}%") + + # Performance insights + print("\n💡 Performance Insights:") + + if build_success_rate < 0.8: + print(" ⚠️ Low build success rate - check build configuration") + + if run_success_rate < 0.8: + print(" ⚠️ Low run success rate - check run configuration") + + if total_crashes == 0: + print(" ℹ️ No crashes found - consider increasing run duration") + + if coverage_data and avg_coverage < 50: + print(" ℹ️ Low coverage - consider optimizing corpus or run parameters") + + if avg_runtime > 1800: # 30 minutes + print(" ⚠️ Long runtime - consider optimizing pipeline configuration") + + +def demonstrate_pipeline_monitoring(sdk, results): + """Demonstrate pipeline monitoring and alerting.""" + print("\n🔍 Pipeline Monitoring") + print("-" * 22) + + # Monitor system metrics + try: + system_metrics = sdk.get_system_metrics() + print("📊 System Metrics:") + print(f" Total benchmarks: {system_metrics.get('total_benchmarks', 0)}") + print( + f" Build success rate: {system_metrics.get('build_success_rate', 0):.2%}" + ) + print( + f" Average coverage: {system_metrics.get('average_coverage', 0):.1f}%" + ) + + except Exception as e: + print(f"⚠️ Could not get system metrics: {e}") + + # Check for alerts + print("\n🚨 Alert Monitoring:") + alerts = [] + + for result in results: + if not result['success']: + alerts.append(f"Pipeline failed for {result['benchmark_id']}") + elif result.get('crashes_found', 0) > 10: + alerts.append( + f"High crash count for {result['benchmark_id']}: {result['crashes_found']}" + ) + elif result.get('average_coverage', 0) < 20: + alerts.append( + f"Low coverage for {result['benchmark_id']}: {result.get('average_coverage', 0):.1f}%" + ) + + if alerts: + print(f" Found {len(alerts)} alerts:") + for alert in alerts: + print(f" ⚠️ {alert}") + else: + print(" ✅ No alerts detected") + + +def main(): + """Main function demonstrating pipeline automation.""" + print("🔄 OSS-Fuzz SDK Pipeline Automation Example") + print("=" * 55) + + # Initialize SDK + print("\n📋 Initializing SDK") + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + config = SDKConfig( + storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='ossfuzz_pipeline_'), + log_level='INFO', + enable_caching=True, + timeout_seconds=7200 # 2 hours + ) + + sdk = OSSFuzzSDK('pipeline_project', config) + print("✅ SDK initialized for pipeline automation") + print(f" Storage path: {config.storage_path}") + print(f" Timeout: {config.timeout_seconds}s") + + except Exception as e: + print(f"❌ Failed to initialize SDK: {e}") + return False + + # Create pipeline configuration + pipeline_options = create_pipeline_configuration() + if not pipeline_options: + return False + + # Create sample benchmarks + benchmarks = create_sample_benchmarks() + + # Run batch pipeline + results = run_batch_pipeline(sdk, benchmarks, pipeline_options) + + # Analyze performance + analyze_pipeline_performance(results) + + # Monitor pipeline + demonstrate_pipeline_monitoring(sdk, results) + + # Summary + print("\n🎉 Pipeline Automation Summary") + print("=" * 35) + print("✅ Pipeline automation demonstrated:") + print(" • Complete pipeline configuration") + print(" • Multi-trial execution") + print(" • Batch processing") + print(" • Performance analysis") + print(" • Monitoring and alerting") + + print("\n📋 Key features:") + print(" • Automated build → run → analyze workflows") + print(" • Statistical significance through multiple trials") + print(" • Comprehensive result analysis") + print(" • Performance monitoring and insights") + print(" • Error handling and recovery") + + print("\n🚀 Next steps:") + print(" • Try advanced/01_batch_processing.py for multi-project automation") + print( + " • Explore advanced/03_monitoring_alerts.py for production monitoring") + print(" • Check production examples for enterprise deployment") + + return True + + +if __name__ == '__main__': + try: + success = main() + if success: + print("\n🎯 Pipeline automation example completed successfully!") + sys.exit(0) + else: + print("\n❌ Pipeline automation example failed.") + sys.exit(1) + + except KeyboardInterrupt: + print("\n\n⏹️ Example interrupted by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Unexpected error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/ossfuzz_py/samples/utilities/health_checker.py b/ossfuzz_py/samples/utilities/health_checker.py new file mode 100644 index 000000000..fe65e3065 --- /dev/null +++ b/ossfuzz_py/samples/utilities/health_checker.py @@ -0,0 +1,612 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=line-too-long,unused-import,unused-variable,redefined-outer-name +""" +OSS-Fuzz SDK Health Checker Utility + +This utility provides comprehensive health checking for the OSS-Fuzz SDK +environment, components, and configuration. + +What this utility covers: +- SDK installation and import verification +- Component availability checking +- Configuration validation +- Environment variable verification +- Storage backend connectivity +- Performance benchmarking +- Dependency checking + +Usage: + python health_checker.py [--project PROJECT_NAME] [--config CONFIG_FILE] [--verbose] +""" + +import argparse +import os +import sys +import tempfile +import time +from datetime import datetime +from pathlib import Path + +# Add the parent directory to the path so we can import the SDK +sys.path.append(str(Path(__file__).parent.parent.parent)) + +# Global imports to avoid possibly unbound variables +try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK +except ImportError: + OSSFuzzSDK = None + + +class HealthChecker: + """Comprehensive health checker for OSS-Fuzz SDK.""" + + def __init__(self, project_name='health_check_project', verbose=False): + """Initialize health checker.""" + self.project_name = project_name + self.verbose = verbose + self.results = {} + self.start_time = time.time() + + def log(self, message, level='INFO'): + """Log message with timestamp.""" + if self.verbose or level in ['ERROR', 'WARNING']: + timestamp = datetime.now().strftime('%H:%M:%S') + print(f"[{timestamp}] {level}: {message}") + + def check_sdk_installation(self): + """Check if the OSS-Fuzz SDK is properly installed.""" + print("🔍 Checking SDK Installation") + print("-" * 28) + + checks = {'sdk_import': False, 'core_classes': False, 'version_info': False} + + # Test SDK import + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + checks['sdk_import'] = True + self.log("SDK import successful") + print(" ✅ SDK import: Success") + except ImportError as e: + self.log(f"SDK import failed: {e}", 'ERROR') + print(f" ❌ SDK import: Failed - {e}") + + # Test core classes import + try: + from ossfuzz_py.core.ossfuzz_sdk import (BuildOptions, BuildResult, + PipelineOptions, PipelineResult, + RunOptions, RunResult, SDKConfig) + checks['core_classes'] = True + self.log("Core classes import successful") + print(" ✅ Core classes: Success") + except ImportError as e: + self.log(f"Core classes import failed: {e}", 'ERROR') + print(f" ❌ Core classes: Failed - {e}") + + # Test version information + try: + # Try to get version info if available + import ossfuzz_py + version = getattr(ossfuzz_py, '__version__', 'Unknown') + checks['version_info'] = True + self.log(f"SDK version: {version}") + print(f" ✅ Version info: {version}") + except Exception as e: + self.log(f"Version info unavailable: {e}", 'WARNING') + print(" ⚠️ Version info: Unavailable") + + self.results['sdk_installation'] = checks + return all(checks.values()) + + def check_environment_variables(self): + """Check environment variable configuration.""" + print("\n🌍 Checking Environment Variables") + print("-" * 33) + + env_vars = { + 'OSSFUZZ_HISTORY_STORAGE_BACKEND': { + 'required': False, + 'default': 'local', + 'description': 'Storage backend type' + }, + 'OSSFUZZ_HISTORY_STORAGE_PATH': { + 'required': False, + 'default': '/tmp/ossfuzz_data', + 'description': 'Local storage path' + }, + 'GCS_BUCKET_NAME': { + 'required': False, + 'default': None, + 'description': 'GCS bucket for cloud storage' + }, + 'WORK_DIR': { + 'required': False, + 'default': '/tmp', + 'description': 'Working directory' + }, + 'OSS_FUZZ_DIR': { + 'required': False, + 'default': None, + 'description': 'OSS-Fuzz repository directory' + } + } + + env_status = {} + + for var_name, var_info in env_vars.items(): + value = os.environ.get(var_name) + + if value: + env_status[var_name] = { + 'set': True, + 'value': value, + 'status': 'configured' + } + self.log(f"{var_name} = {value}") + print(f" ✅ {var_name}: {value}") + elif var_info['required']: + env_status[var_name] = { + 'set': False, + 'value': None, + 'status': 'missing_required' + } + self.log(f"{var_name} is required but not set", 'ERROR') + print(f" ❌ {var_name}: Required but not set") + else: + default = var_info['default'] + env_status[var_name] = { + 'set': False, + 'value': default, + 'status': 'using_default' + } + self.log(f"{var_name} using default: {default}") + print(f" ⚠️ {var_name}: Using default ({default})") + + self.results['environment_variables'] = env_status + return True + + def check_sdk_initialization(self): + """Check SDK initialization with different configurations.""" + print("\n⚙️ Checking SDK Initialization") + print("-" * 30) + + init_tests = { + 'default_config': False, + 'custom_config': False, + 'config_object': False + } + + # Test 1: Default configuration + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK + sdk = OSSFuzzSDK(self.project_name) + init_tests['default_config'] = True + self.log("Default configuration initialization successful") + print(" ✅ Default config: Success") + except Exception as e: + self.log(f"Default configuration failed: {e}", 'ERROR') + print(f" ❌ Default config: Failed - {e}") + + # Test 2: Custom dictionary configuration + try: + config_dict = { + 'storage_backend': 'local', + 'storage_path': tempfile.mkdtemp(prefix='health_check_'), + 'log_level': 'INFO' + } + if OSSFuzzSDK is not None: # type: ignore + sdk = OSSFuzzSDK(self.project_name, config_dict) # type: ignore + else: + raise ImportError("OSSFuzzSDK not available") + init_tests['custom_config'] = True + self.log("Custom dictionary configuration successful") + print(" ✅ Custom config: Success") + except Exception as e: + self.log(f"Custom configuration failed: {e}", 'ERROR') + print(f" ❌ Custom config: Failed - {e}") + + # Test 3: SDKConfig object + try: + from ossfuzz_py.core.ossfuzz_sdk import SDKConfig + sdk_config = SDKConfig( + storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='health_check_obj_'), + log_level='DEBUG') + if OSSFuzzSDK is not None: # type: ignore + sdk = OSSFuzzSDK(self.project_name, sdk_config) # type: ignore + else: + raise ImportError("OSSFuzzSDK not available") + init_tests['config_object'] = True + self.log("SDKConfig object initialization successful") + print(" ✅ Config object: Success") + except Exception as e: + self.log(f"SDKConfig object failed: {e}", 'ERROR') + print(f" ❌ Config object: Failed - {e}") + + self.results['sdk_initialization'] = init_tests + return any(init_tests.values()) + + def check_component_availability(self): + """Check availability of SDK components.""" + print("\n🔧 Checking Component Availability") + print("-" * 34) + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + config = SDKConfig( + storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='health_check_comp_'), + log_level='WARNING' # Reduce noise + ) + + sdk = OSSFuzzSDK(self.project_name, config) + + components = { + 'Storage Manager': getattr(sdk, 'storage', None), + 'Result Manager': getattr(sdk, 'result_manager', None), + 'Benchmark Manager': getattr(sdk, 'benchmark_manager', None), + 'Build History': getattr(sdk, 'build_history', None), + 'Coverage History': getattr(sdk, 'coverage_history', None), + 'Crash History': getattr(sdk, 'crash_history', None), + 'Corpus History': getattr(sdk, 'corpus_history', None), + 'Local Builder': getattr(sdk, 'local_builder', None), + 'Local Runner': getattr(sdk, 'local_runner', None), + } + + component_status = {} + available_count = 0 + + for name, component in components.items(): + is_available = component is not None + component_status[name] = { + 'available': is_available, + 'type': type(component).__name__ if component else None + } + + if is_available: + available_count += 1 + self.log(f"{name} is available") + print(f" ✅ {name}: Available") + else: + self.log(f"{name} is not available", 'WARNING') + print(f" ⚠️ {name}: Not available") + + print( + f"\n 📊 Component Summary: {available_count}/{len(components)} available" + ) + + self.results['component_availability'] = { + 'components': component_status, + 'available_count': available_count, + 'total_count': len(components), + 'availability_rate': available_count / len(components) + } + + return available_count > 0 + + except Exception as e: + self.log(f"Component availability check failed: {e}", 'ERROR') + print(f" ❌ Component check failed: {e}") + return False + + def check_basic_operations(self): + """Check basic SDK operations.""" + print("\n🎯 Checking Basic Operations") + print("-" * 27) + + operations = { + 'project_summary': False, + 'list_benchmarks': False, + 'system_metrics': False, + 'benchmark_metrics': False + } + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + config = SDKConfig( + storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='health_check_ops_'), + log_level='ERROR' # Minimize noise + ) + + sdk = OSSFuzzSDK(self.project_name, config) + + # Test project summary + try: + summary = sdk.get_project_summary() + operations['project_summary'] = True + self.log("Project summary operation successful") + print(" ✅ Project summary: Success") + except Exception as e: + self.log(f"Project summary failed: {e}", 'WARNING') + print(f" ⚠️ Project summary: Failed - {e}") + + # Test list benchmarks + try: + benchmarks = sdk.list_benchmarks() + operations['list_benchmarks'] = True + self.log(f"List benchmarks successful ({len(benchmarks)} found)") + print(f" ✅ List benchmarks: Success ({len(benchmarks)} found)") + except Exception as e: + self.log(f"List benchmarks failed: {e}", 'WARNING') + print(f" ⚠️ List benchmarks: Failed - {e}") + + # Test system metrics + try: + metrics = sdk.get_system_metrics() + operations['system_metrics'] = True + self.log("System metrics operation successful") + print(" ✅ System metrics: Success") + except Exception as e: + self.log(f"System metrics failed: {e}", 'WARNING') + print(f" ⚠️ System metrics: Failed - {e}") + + # Test benchmark metrics + try: + metrics = sdk.get_benchmark_metrics('test_benchmark') + operations['benchmark_metrics'] = True + self.log("Benchmark metrics operation successful") + print(" ✅ Benchmark metrics: Success") + except Exception as e: + self.log(f"Benchmark metrics failed: {e}", 'WARNING') + print(f" ⚠️ Benchmark metrics: Failed - {e}") + + except Exception as e: + self.log(f"Basic operations check failed: {e}", 'ERROR') + print(f" ❌ Operations check failed: {e}") + + self.results['basic_operations'] = operations + return any(operations.values()) + + def check_dependencies(self): + """Check optional dependencies.""" + print("\n📦 Checking Dependencies") + print("-" * 22) + + dependencies = { + 'pandas': { + 'required': False, + 'description': 'Data analysis and manipulation', + 'import_name': 'pandas' + }, + 'pydantic': { + 'required': False, + 'description': 'Data validation and settings management', + 'import_name': 'pydantic' + }, + 'yaml': { + 'required': False, + 'description': 'YAML file parsing', + 'import_name': 'yaml' + }, + 'chardet': { + 'required': False, + 'description': 'Character encoding detection', + 'import_name': 'chardet' + } + } + + dep_status = {} + + for dep_name, dep_info in dependencies.items(): + try: + __import__(dep_info['import_name']) + dep_status[dep_name] = {'available': True, 'status': 'installed'} + self.log(f"{dep_name} is available") + print(f" ✅ {dep_name}: Installed") + except ImportError: + dep_status[dep_name] = {'available': False, 'status': 'missing'} + if dep_info['required']: + self.log(f"{dep_name} is required but missing", 'ERROR') + print(f" ❌ {dep_name}: Required but missing") + else: + self.log(f"{dep_name} is optional and missing", 'WARNING') + print(f" ⚠️ {dep_name}: Optional, not installed") + + self.results['dependencies'] = dep_status + return True + + def run_performance_test(self): + """Run basic performance test.""" + print("\n⚡ Running Performance Test") + print("-" * 26) + + try: + from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK, SDKConfig + + config = SDKConfig( + storage_backend='local', + storage_path=tempfile.mkdtemp(prefix='health_check_perf_'), + log_level='ERROR') + + # Test SDK initialization time + start_time = time.time() + sdk = OSSFuzzSDK(self.project_name, config) + init_time = time.time() - start_time + + # Test basic operations time + start_time = time.time() + summary = sdk.get_project_summary() + benchmarks = sdk.list_benchmarks() + metrics = sdk.get_system_metrics() + ops_time = time.time() - start_time + + performance = { + 'initialization_time': init_time, + 'operations_time': ops_time, + 'total_time': init_time + ops_time + } + + print(f" ✅ SDK initialization: {init_time:.3f}s") + print(f" ✅ Basic operations: {ops_time:.3f}s") + print(f" ✅ Total time: {performance['total_time']:.3f}s") + + # Performance assessment + if performance['total_time'] < 1.0: + print(" 🚀 Performance: Excellent") + elif performance['total_time'] < 3.0: + print(" ✅ Performance: Good") + elif performance['total_time'] < 10.0: + print(" ⚠️ Performance: Acceptable") + else: + print(" ❌ Performance: Poor") + + self.results['performance'] = performance + return True + + except Exception as e: + self.log(f"Performance test failed: {e}", 'ERROR') + print(f" ❌ Performance test failed: {e}") + return False + + def generate_health_report(self): + """Generate comprehensive health report.""" + print("\n📊 Health Check Report") + print("=" * 22) + + total_time = time.time() - self.start_time + + # Overall status + checks = [ + self.results.get('sdk_installation', {}).get('sdk_import', False), + self.results.get('sdk_initialization', {}).get('default_config', False), + any( + self.results.get('component_availability', + {}).get('components', {}).values()), + any(self.results.get('basic_operations', {}).values()) + ] + + overall_status = sum(checks) / len(checks) + + print("🏥 Overall Health: ", end="") + if overall_status >= 0.8: + print("🟢 Excellent") + elif overall_status >= 0.6: + print("🟡 Good") + elif overall_status >= 0.4: + print("🟠 Fair") + else: + print("🔴 Poor") + + print(f"⏱️ Total check time: {total_time:.2f}s") + print(f"📅 Check timestamp: {datetime.now().isoformat()}") + + # Detailed results + print("\n📋 Detailed Results:") + + # SDK Installation + sdk_install = self.results.get('sdk_installation', {}) + sdk_score = sum( + sdk_install.values()) / len(sdk_install) if sdk_install else 0 + print(f" SDK Installation: {sdk_score:.1%}") + + # Component Availability + comp_avail = self.results.get('component_availability', {}) + comp_score = comp_avail.get('availability_rate', 0) + print( + f" Component Availability: {comp_score:.1%} ({comp_avail.get('available_count', 0)}/{comp_avail.get('total_count', 0)})" + ) + + # Basic Operations + basic_ops = self.results.get('basic_operations', {}) + ops_score = sum(basic_ops.values()) / len(basic_ops) if basic_ops else 0 + print(f" Basic Operations: {ops_score:.1%}") + + # Dependencies + deps = self.results.get('dependencies', {}) + deps_available = sum(1 for d in deps.values() if d.get('available', False)) + deps_total = len(deps) + deps_score = deps_available / deps_total if deps_total > 0 else 0 + print( + f" Optional Dependencies: {deps_score:.1%} ({deps_available}/{deps_total})" + ) + + # Performance + perf = self.results.get('performance', {}) + if perf: + total_perf_time = perf.get('total_time', 0) + print(f" Performance: {total_perf_time:.3f}s") + + # Recommendations + print("\n💡 Recommendations:") + + if not sdk_install.get('sdk_import', False): + print(" • Install the OSS-Fuzz SDK: pip install ossfuzz-py") + + if comp_score < 0.5: + print(" • Check component dependencies and configuration") + + if ops_score < 0.5: + print(" • Verify environment variables and storage configuration") + + if deps_score < 0.5: + print(" • Install optional dependencies for full functionality:") + print(" pip install pandas pydantic yaml chardet") + + if perf.get('total_time', 0) > 5.0: + print(" • Consider optimizing configuration for better performance") + + return overall_status + + +def main(): + """Main function for health checker utility.""" + parser = argparse.ArgumentParser(description='OSS-Fuzz SDK Health Checker') + parser.add_argument( + '--project', + default='health_check_project', + help='Project name for testing (default: health_check_project)') + parser.add_argument('--verbose', + action='store_true', + help='Enable verbose logging') + + args = parser.parse_args() + + print("🏥 OSS-Fuzz SDK Health Checker") + print("=" * 35) + print(f"Project: {args.project}") + print(f"Timestamp: {datetime.now().isoformat()}") + + # Initialize health checker + checker = HealthChecker(args.project, args.verbose) + + # Run all health checks + checker.check_sdk_installation() + checker.check_environment_variables() + checker.check_sdk_initialization() + checker.check_component_availability() + checker.check_basic_operations() + checker.check_dependencies() + checker.run_performance_test() + + # Generate final report + overall_health = checker.generate_health_report() + + # Exit with appropriate code + if overall_health >= 0.8: + print("\n🎉 Health check completed successfully!") + sys.exit(0) + elif overall_health >= 0.4: + print("\n⚠️ Health check completed with warnings.") + sys.exit(0) + else: + print("\n❌ Health check found significant issues.") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/ossfuzz_py/unittests/test_cloud_builder_pipeline.py b/ossfuzz_py/unittests/test_cloud_builder_pipeline.py index 58215fe2b..359a30fa1 100644 --- a/ossfuzz_py/unittests/test_cloud_builder_pipeline.py +++ b/ossfuzz_py/unittests/test_cloud_builder_pipeline.py @@ -34,6 +34,7 @@ GOOGLE_APPLICATION_CREDENTIALS=/path/to/creds.json python -m unittest test_cloud_builder_pipeline.py -v """ +import os import shutil import subprocess import unittest @@ -97,8 +98,8 @@ def test_cloud_builder_pipeline_real_gcb(self): # test_ossfuzz_manager.py approach) try: if oss_fuzz_manager.checkout_path.exists(): - oss_fuzz_manager.logger.info( - f"Repository already exists at {oss_fuzz_manager.checkout_path}") + oss_fuzz_manager.logger.info("Repository already exists at %s", + oss_fuzz_manager.checkout_path) else: repo_url = "https://github.com/google/oss-fuzz.git" cmd = [ @@ -111,8 +112,8 @@ def test_cloud_builder_pipeline_real_gcb(self): check=True, timeout=120) oss_fuzz_manager.logger.info( - f"Successfully cloned OSS-Fuzz repository " - f"to {oss_fuzz_manager.checkout_path}, result={result}") + "Successfully cloned OSS-Fuzz repository to %s, result=%s", + oss_fuzz_manager.checkout_path, result) except subprocess.TimeoutExpired: self.skipTest("OSS-Fuzz clone timed out - network may be slow") @@ -131,7 +132,8 @@ def test_cloud_builder_pipeline_real_gcb(self): print("✓ OSS-Fuzz repository cloned successfully") - benchmark_yaml_path = "../../benchmark-sets/all/libspng.yaml" + benchmark_yaml_path = os.path.join(os.path.dirname(__file__), + "../../benchmark-sets/all/libspng.yaml") fuzz_target = _create_real_fuzz_target_from_benchmark(benchmark_yaml_path) google_cloud_project = EnvUtils.get_env(EnvVars.GOOGLE_CLOUD_PROJECT, "oss-fuzz") or "oss-fuzz" diff --git a/ossfuzz_py/unittests/test_historical_data_sdk.py b/ossfuzz_py/unittests/test_historical_data_sdk.py new file mode 100644 index 000000000..b947ed6cd --- /dev/null +++ b/ossfuzz_py/unittests/test_historical_data_sdk.py @@ -0,0 +1,305 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the Historical Data SDK. + +This module contains tests for the main SDK components including +the OSSFuzzSDK facade and history managers. +""" + +import tempfile +import unittest +from datetime import datetime +from unittest.mock import patch + +from ossfuzz_py.core.ossfuzz_sdk import OSSFuzzSDK +from ossfuzz_py.data.storage_manager import StorageManager +from ossfuzz_py.errors import OSSFuzzSDKConfigError +from ossfuzz_py.history import (BuildHistoryManager, CorpusHistoryManager, + CoverageHistoryManager, CrashHistoryManager) + + +class TestOSSFuzzSDK(unittest.TestCase): + """Test cases for the OSSFuzzSDK class.""" + + def setUp(self): + """Set up test fixtures.""" + self.temp_dir = tempfile.mkdtemp() + self.config = {'storage_backend': 'local', 'storage_path': self.temp_dir} + self.project_name = 'test_project' + + def tearDown(self): + """Clean up test fixtures.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_sdk_initialization(self): + """Test SDK initialization with valid configuration.""" + sdk = OSSFuzzSDK(self.project_name, self.config) + + self.assertEqual(sdk.project_name, self.project_name) + self.assertIsInstance(sdk.storage, StorageManager) + self.assertIsInstance(sdk.build, BuildHistoryManager) + self.assertIsInstance(sdk.crash, CrashHistoryManager) + self.assertIsInstance(sdk.corpus, CorpusHistoryManager) + self.assertIsInstance(sdk.coverage, CoverageHistoryManager) + + def test_sdk_initialization_without_project_name(self): + """Test SDK initialization fails without project name.""" + with self.assertRaises(OSSFuzzSDKConfigError): + OSSFuzzSDK('', self.config) + + def test_sdk_initialization_without_config(self): + """Test SDK initialization with default configuration.""" + sdk = OSSFuzzSDK(self.project_name) + self.assertEqual(sdk.project_name, self.project_name) + self.assertIsInstance(sdk.storage, StorageManager) + + @patch.dict( + 'os.environ', { + 'OSSFUZZ_HISTORY_STORAGE_BACKEND': 'local', + 'OSSFUZZ_HISTORY_STORAGE_PATH': '/tmp/test' + }) + def test_config_from_environment(self): + """Test configuration loading from environment variables.""" + sdk = OSSFuzzSDK(self.project_name) + self.assertEqual(sdk.config.get('storage_backend'), 'local') + self.assertEqual(sdk.config.get('storage_path'), '/tmp/test') + + def test_generate_project_report(self): + """Test project report generation.""" + sdk = OSSFuzzSDK(self.project_name, self.config) + + # Mock the history managers to return test data + with (patch.object(sdk.build, 'get_build_statistics') as mock_build_stats, \ + patch.object(sdk.build, 'get_build_trends') as mock_build_trends, \ + patch.object(sdk.crash, 'get_crash_statistics') as mock_crash_stats, \ + patch.object(sdk.coverage, 'get_coverage_report') + as mock_coverage_report, \ + patch.object(sdk.coverage, 'analyze_coverage_trends') as + mock_coverage_trends, \ + patch.object(sdk.corpus, 'get_corpus_growth') as mock_corpus_growth): + + # Set up mock return values + mock_build_stats.return_value = {'success_rate': 85.0, 'total_builds': 10} + mock_build_trends.return_value = { + 'trend': 'improving', + 'builds_per_day': 2.0 + } + mock_crash_stats.return_value = {'total_crashes': 5, 'unique_crashes': 3} + mock_coverage_report.return_value = { + 'summary': { + 'max_line_coverage': 75.0 + } + } + mock_coverage_trends.return_value = { + 'trend': 'improving', + 'coverage_velocity': 0.5 + } + mock_corpus_growth.return_value = { + 'growth_rate': 10.0, + 'trend': 'growing' + } + + report = sdk.generate_project_report(days=7) + + self.assertEqual(report['project_name'], self.project_name) + self.assertIn('build_summary', report) + self.assertIn('crash_summary', report) + self.assertIn('coverage_summary', report) + self.assertIn('corpus_summary', report) + self.assertIn('health_score', report) + + def test_analyze_fuzzing_efficiency(self): + """Test fuzzing efficiency analysis.""" + sdk = OSSFuzzSDK(self.project_name, self.config) + + # Mock the history managers to return test data + with (patch.object(sdk.build, 'get_build_trends') as mock_build_trends, \ + patch.object(sdk.coverage, 'analyze_coverage_trends') + as mock_coverage_trends, \ + patch.object(sdk.crash, 'get_crash_statistics') as mock_crash_stats, \ + patch.object(sdk.corpus, 'get_corpus_growth') as mock_corpus_growth): + + # Set up mock return values + mock_build_trends.return_value = { + 'builds_per_day': 2.0, + 'average_success_rate': 85.0, + 'trend': 'improving' + } + mock_coverage_trends.return_value = { + 'coverage_velocity': 0.5, + 'stability': 'stable', + 'current_coverage': 75.0 + } + mock_crash_stats.return_value = {'total_crashes': 10, 'unique_crashes': 8} + mock_corpus_growth.return_value = { + 'growth_rate': 15.0, + 'size_change': 100, + 'trend': 'growing' + } + + analysis = sdk.analyze_fuzzing_efficiency(days=7) + + self.assertEqual(analysis['project_name'], self.project_name) + self.assertIn('build_efficiency', analysis) + self.assertIn('coverage_efficiency', analysis) + self.assertIn('crash_efficiency', analysis) + self.assertIn('corpus_efficiency', analysis) + self.assertIn('overall_efficiency', analysis) + + def test_get_project_summary(self): + """Test project summary generation.""" + sdk = OSSFuzzSDK(self.project_name, self.config) + + # Mock the history managers to return test data + with (patch.object(sdk.build, 'get_last_successful_build') + as mock_last_build, \ + patch.object(sdk.coverage, 'get_latest_coverage') + as mock_latest_coverage, \ + patch.object(sdk.crash, 'get_crash_history') + as mock_crash_history): + + # Set up mock return values + mock_last_build.return_value = { + 'build_id': 'build_123', + 'timestamp': '2025-01-01T12:00:00', + 'success': True + } + mock_latest_coverage.return_value = { + 'timestamp': '2025-01-01T12:00:00', + 'line_coverage': 75.0 + } + mock_crash_history.return_value = [{ + 'crash_id': 'crash_1', + 'timestamp': '2025-01-01T10:00:00' + }, { + 'crash_id': 'crash_2', + 'timestamp': '2025-01-01T11:00:00' + }] + + summary = sdk.get_project_summary() + + self.assertEqual(summary['project_name'], self.project_name) + self.assertIn('last_successful_build', summary) + self.assertIn('latest_coverage', summary) + self.assertEqual(summary['recent_crashes'], 2) + + +class TestHistoryManagers(unittest.TestCase): + """Test cases for history managers.""" + + def setUp(self): + """Set up test fixtures.""" + self.temp_dir = tempfile.mkdtemp() + self.config = {'storage_backend': 'local', 'storage_path': self.temp_dir} + self.project_name = 'test_project' + self.storage_manager = StorageManager(self.config) + + def tearDown(self): + """Clean up test fixtures.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_build_history_manager(self): + """Test BuildHistoryManager functionality.""" + manager = BuildHistoryManager(self.storage_manager, self.project_name) + + # Test storing build result + build_data = { + 'build_id': 'build_123', + 'timestamp': datetime.now().isoformat(), + 'project_name': self.project_name, + 'success': True, + 'duration_seconds': 300 + } + + result = manager.store_build_result(build_data) + self.assertIsInstance(result, str) + + # Test retrieving build history + history = manager.get_build_history(limit=10) + self.assertIsInstance(history, list) + + def test_crash_history_manager(self): + """Test CrashHistoryManager functionality.""" + manager = CrashHistoryManager(self.storage_manager, self.project_name) + + # Test storing crash data (without signature so it gets generated) + crash_data = { + 'crash_id': 'crash_123', + 'timestamp': datetime.now().isoformat(), + 'project_name': self.project_name, + 'fuzzer_name': 'test_fuzzer', + 'crash_type': 'heap-buffer-overflow' + } + + # First storage should succeed + result = manager.store_crash(crash_data.copy()) + self.assertIsInstance(result, str) + self.assertNotEqual(result, "") # Should not be empty (not a duplicate) + + # Test duplicate detection - should be True after storing the same crash + is_duplicate = manager.is_duplicate_crash(crash_data) + self.assertTrue(is_duplicate) + + # Second storage should return empty string (duplicate) + result2 = manager.store_crash(crash_data.copy()) + self.assertEqual(result2, "") + + def test_coverage_history_manager(self): + """Test CoverageHistoryManager functionality.""" + manager = CoverageHistoryManager(self.storage_manager, self.project_name) + + # Test storing coverage data + coverage_data = { + 'timestamp': datetime.now().isoformat(), + 'project_name': self.project_name, + 'fuzzer_name': 'test_fuzzer', + 'line_coverage': 75.5, + 'function_coverage': 80.0, + 'branch_coverage': 70.0 + } + + result = manager.store_coverage(coverage_data) + self.assertIsInstance(result, str) + + # Test retrieving coverage history + history = manager.get_coverage_history(limit=10) + self.assertIsInstance(history, list) + + def test_corpus_history_manager(self): + """Test CorpusHistoryManager functionality.""" + manager = CorpusHistoryManager(self.storage_manager, self.project_name) + + # Test storing corpus stats + corpus_data = { + 'timestamp': datetime.now().isoformat(), + 'project_name': self.project_name, + 'fuzzer_name': 'test_fuzzer', + 'corpus_size': 1000, + 'total_size_bytes': 5000000, + 'new_files_count': 50 + } + + result = manager.store_corpus_stats(corpus_data) + self.assertIsInstance(result, str) + + # Test retrieving corpus stats + stats = manager.get_corpus_stats(limit=10) + self.assertIsInstance(stats, list) + + +if __name__ == '__main__': + unittest.main() diff --git a/ossfuzz_py/unittests/test_local_builder_pipeline.py b/ossfuzz_py/unittests/test_local_builder_pipeline.py index b5b337877..ac922995d 100644 --- a/ossfuzz_py/unittests/test_local_builder_pipeline.py +++ b/ossfuzz_py/unittests/test_local_builder_pipeline.py @@ -27,6 +27,7 @@ handling without requiring the full OSS-Fuzz environment. """ +import os import shutil import subprocess import tempfile @@ -131,8 +132,8 @@ def _setup_build_infrastructure_and_get_metadata(self): # test_ossfuzz_manager.py approach) try: if oss_fuzz_manager.checkout_path.exists(): - oss_fuzz_manager.logger.info( - f"Repository already exists at {oss_fuzz_manager.checkout_path}") + oss_fuzz_manager.logger.info("Repository already exists at %s", + oss_fuzz_manager.checkout_path) else: repo_url = "https://github.com/google/oss-fuzz.git" cmd = [ @@ -145,8 +146,8 @@ def _setup_build_infrastructure_and_get_metadata(self): check=True, timeout=120) oss_fuzz_manager.logger.info( - f"Successfully cloned OSS-Fuzz repository " - f"to {oss_fuzz_manager.checkout_path}, result={result}") + "Successfully cloned OSS-Fuzz repository to %s, result=%s", + oss_fuzz_manager.checkout_path, result) except subprocess.TimeoutExpired: self.skipTest("OSS-Fuzz clone timed out - network may be slow") @@ -166,7 +167,8 @@ def _setup_build_infrastructure_and_get_metadata(self): print("✓ OSS-Fuzz repository cloned successfully") # Create a real fuzz target from benchmark YAML - benchmark_yaml_path = "../../benchmark-sets/all/libspng.yaml" + benchmark_yaml_path = os.path.join(os.path.dirname(__file__), + "../../benchmark-sets/all/libspng.yaml") try: fuzz_target = _create_real_fuzz_target_from_benchmark(benchmark_yaml_path) diff --git a/ossfuzz_py/unittests/test_ossfuzz_manager.py b/ossfuzz_py/unittests/test_ossfuzz_manager.py index e93783fc5..7708ad54d 100644 --- a/ossfuzz_py/unittests/test_ossfuzz_manager.py +++ b/ossfuzz_py/unittests/test_ossfuzz_manager.py @@ -585,8 +585,8 @@ def test_real_oss_fuzz_clone_shallow(self): def shallow_clone(version="master"): try: if manager.checkout_path.exists(): - manager.logger.info( - f"Repository already exists at {manager.checkout_path}") + manager.logger.info("Repository already exists at %s", + manager.checkout_path) return True repo_url = "https://github.com/google/oss-fuzz.git" diff --git a/ossfuzz_py/unittests/test_ossfuzz_sdk_comprehensive.py b/ossfuzz_py/unittests/test_ossfuzz_sdk_comprehensive.py new file mode 100644 index 000000000..88045e1ec --- /dev/null +++ b/ossfuzz_py/unittests/test_ossfuzz_sdk_comprehensive.py @@ -0,0 +1,626 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Comprehensive unit tests for the enhanced OSS-Fuzz SDK facade. + +This module tests all functionality of the comprehensive SDK facade including +build operations, execution operations, result management, benchmark management, +workflow orchestration, and historical data analysis. +""" + +import tempfile +import unittest +from pathlib import Path + +from ossfuzz_py.core.ossfuzz_sdk import (BuildOptions, BuildResult, OSSFuzzSDK, + PipelineOptions, PipelineResult, + RunOptions, RunResult, SDKConfig) + + +class TestOSSFuzzSDKComprehensive(unittest.TestCase): + """Comprehensive test suite for the enhanced OSS-Fuzz SDK facade.""" + + def setUp(self): + """Set up test environment.""" + self.temp_dir = tempfile.mkdtemp() + self.config = SDKConfig(storage_backend='local', + storage_path=self.temp_dir, + work_dir=self.temp_dir, + log_level='INFO') + self.sdk = OSSFuzzSDK('test_project', self.config) + + def tearDown(self): + """Clean up test environment.""" + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + # Configuration Tests + + def test_sdk_config_creation(self): + """Test SDKConfig creation and conversion.""" + config = SDKConfig(storage_backend='gcs', + gcs_bucket_name='test-bucket', + enable_caching=False) + + config_dict = config.to_dict() + self.assertEqual(config_dict['storage_backend'], 'gcs') + self.assertEqual(config_dict['gcs_bucket_name'], 'test-bucket') + self.assertFalse(config_dict['enable_caching']) + + def test_sdk_initialization_with_config_object(self): + """Test SDK initialization with SDKConfig object.""" + config = SDKConfig(storage_backend='local', log_level='DEBUG') + sdk = OSSFuzzSDK('test_project', config) + + self.assertEqual(sdk.project_name, 'test_project') + self.assertEqual(sdk.sdk_config.storage_backend, 'local') + self.assertEqual(sdk.sdk_config.log_level, 'DEBUG') + + def test_sdk_initialization_with_dict_config(self): + """Test SDK initialization with dictionary config.""" + config_dict = {'storage_backend': 'local', 'log_level': 'WARNING'} + sdk = OSSFuzzSDK('test_project', config_dict) + + self.assertEqual(sdk.project_name, 'test_project') + self.assertEqual(sdk.sdk_config.storage_backend, 'local') + self.assertEqual(sdk.sdk_config.log_level, 'WARNING') + + def test_options_classes(self): + """Test options classes creation and properties.""" + # Test BuildOptions + build_opts = BuildOptions(sanitizer='memory', + architecture='arm64', + timeout_seconds=1800) + self.assertEqual(build_opts.sanitizer, 'memory') + self.assertEqual(build_opts.architecture, 'arm64') + self.assertEqual(build_opts.timeout_seconds, 1800) + + # Test RunOptions + run_opts = RunOptions(duration_seconds=600, + detect_leaks=False, + extract_coverage=True) + self.assertEqual(run_opts.duration_seconds, 600) + self.assertFalse(run_opts.detect_leaks) + self.assertTrue(run_opts.extract_coverage) + + # Test PipelineOptions + pipeline_opts = PipelineOptions(build_options=build_opts, + run_options=run_opts, + trials=3) + self.assertEqual(pipeline_opts.trials, 3) + self.assertEqual(pipeline_opts.build_options.sanitizer, 'memory') + self.assertEqual(pipeline_opts.run_options.duration_seconds, 600) + + # Build Operations Tests + + def test_build_fuzz_target_no_builder(self): + """Test build_fuzz_target when builder not available.""" + # SDK should not have builder available in test environment + target_spec = { + 'name': 'test_target', + 'source_code': '// Test source', + 'build_script': '// Test build script', + 'project_name': 'test_project', + 'language': 'c++' + } + + result = self.sdk.build_fuzz_target(target_spec) + + self.assertIsInstance(result, BuildResult) + self.assertFalse(result.success) + # Build components are available, but build fails due to missing directory + self.assertIn('Build failed', result.message) + + def test_build_benchmark_no_manager(self): + """Test build_benchmark when benchmark not found.""" + result = self.sdk.build_benchmark('test_benchmark') + + self.assertIsInstance(result, BuildResult) + self.assertFalse(result.success) + self.assertIn('Benchmark not found', result.message) + + def test_get_build_status(self): + """Test get_build_status method.""" + status = self.sdk.get_build_status('test_build_id') + + self.assertIsInstance(status, dict) + self.assertEqual(status['build_id'], 'test_build_id') + self.assertIn('status', status) + self.assertIn('timestamp', status) + + def test_get_build_artifacts(self): + """Test get_build_artifacts method.""" + artifacts = self.sdk.get_build_artifacts('test_build_id') + + self.assertIsInstance(artifacts, dict) + self.assertEqual(artifacts['build_id'], 'test_build_id') + self.assertIn('artifacts', artifacts) + + def test_list_recent_builds(self): + """Test list_recent_builds method.""" + builds = self.sdk.list_recent_builds(limit=5) + + self.assertIsInstance(builds, list) + # Should be empty since no build history available + + def test_list_recent_builds_with_filters(self): + """Test list_recent_builds with filters.""" + filters = {'status': 'success'} + builds = self.sdk.list_recent_builds(limit=10, filters=filters) + + self.assertIsInstance(builds, list) + + # Execution Operations Tests + + def test_run_fuzz_target_no_runner(self): + """Test run_fuzz_target when runner not available.""" + target_spec = { + 'name': 'test_target', + 'source_code': '// Test source', + 'project_name': 'test_project', + 'language': 'c++' + } + build_metadata = {'artifacts': {}} + + result = self.sdk.run_fuzz_target(target_spec, build_metadata) + + self.assertIsInstance(result, RunResult) + self.assertFalse(result.success) + # Check for actual error message about missing build_script + self.assertIn('Failed to run fuzz target', result.message) + + def test_run_benchmark_no_manager(self): + """Test run_benchmark when benchmark manager not available.""" + result = self.sdk.run_benchmark('test_benchmark') + + self.assertIsInstance(result, RunResult) + self.assertFalse(result.success) + # Should fail at build stage first + + def test_get_run_status(self): + """Test get_run_status method.""" + status = self.sdk.get_run_status('test_run_id') + + self.assertIsInstance(status, dict) + self.assertEqual(status['run_id'], 'test_run_id') + self.assertIn('status', status) + self.assertIn('timestamp', status) + + def test_get_run_results(self): + """Test get_run_results method.""" + results = self.sdk.get_run_results('test_run_id') + + self.assertIsInstance(results, dict) + self.assertEqual(results['run_id'], 'test_run_id') + self.assertIn('results', results) + + def test_list_recent_runs(self): + """Test list_recent_runs method.""" + runs = self.sdk.list_recent_runs(limit=5) + + self.assertIsInstance(runs, list) + # Should be empty since no run history available + + # Workflow Orchestration Tests + + def test_run_full_pipeline_no_components(self): + """Test run_full_pipeline when components not available.""" + options = PipelineOptions(trials=2) + result = self.sdk.run_full_pipeline('test_benchmark', options) + + self.assertIsInstance(result, PipelineResult) + self.assertFalse(result.success) + self.assertEqual(len(result.build_results), 2) # Should attempt all trials + # All builds should fail due to missing components + + def test_pipeline_options_defaults(self): + """Test PipelineOptions with default values.""" + options = PipelineOptions() + + self.assertEqual(options.trials, 1) + self.assertTrue(options.analyze_coverage) + self.assertTrue(options.store_results) + self.assertIsInstance(options.build_options, BuildOptions) + self.assertIsInstance(options.run_options, RunOptions) + + # Result Management Tests + + def test_get_benchmark_result_no_manager(self): + """Test get_benchmark_result when ResultManager not available.""" + result = self.sdk.get_benchmark_result('test_benchmark') + + self.assertIsNone(result) + + def test_get_benchmark_result_with_trial(self): + """Test get_benchmark_result with specific trial.""" + result = self.sdk.get_benchmark_result('test_benchmark', trial=1) + + self.assertIsNone(result) # No ResultManager available + + def test_get_benchmark_metrics_no_manager(self): + """Test get_benchmark_metrics when ResultManager not available.""" + metrics = self.sdk.get_benchmark_metrics('test_benchmark') + + self.assertIsInstance(metrics, dict) + # Should return empty metrics structure (not empty dict) + self.assertIn('compiles', metrics) + self.assertIn('crashes', metrics) + self.assertIn('coverage', metrics) + + def test_get_system_metrics_no_manager(self): + """Test get_system_metrics when ResultManager not available.""" + metrics = self.sdk.get_system_metrics() + + self.assertIsInstance(metrics, dict) + # Should return aggregated metrics structure (not empty dict) + self.assertIn('total_benchmarks', metrics) + self.assertIn('total_builds', metrics) + self.assertIn('build_success_rate', metrics) + + def test_get_coverage_trend_no_manager(self): + """Test get_coverage_trend when ResultManager not available.""" + trend = self.sdk.get_coverage_trend('test_benchmark', days=7) + + # Can be either list or DataFrame depending on pandas availability + if hasattr(trend, 'empty'): + # It's a DataFrame + self.assertTrue(trend.empty) # type: ignore + else: + # It's a list + self.assertIsInstance(trend, list) + self.assertEqual(len(trend), 0) + + def test_get_build_success_rate_no_manager(self): + """Test get_build_success_rate when ResultManager not available.""" + rate = self.sdk.get_build_success_rate('test_benchmark', days=7) + + self.assertIsInstance(rate, float) + self.assertEqual(rate, 0.0) + + def test_get_crash_summary_no_manager(self): + """Test get_crash_summary when ResultManager not available.""" + summary = self.sdk.get_crash_summary('test_benchmark', days=7) + + self.assertIsInstance(summary, dict) + # Should return crash summary structure (may have default values) + # Just check it's a dict, don't assume it's empty + + # Historical Data Tests (preserved functionality) + + def test_generate_project_report(self): + """Test generate_project_report method.""" + report = self.sdk.generate_project_report(days=7) + + self.assertIsInstance(report, dict) + self.assertIn('project_name', report) + self.assertEqual(report['project_name'], 'test_project') + + def test_get_project_summary(self): + """Test get_project_summary method.""" + summary = self.sdk.get_project_summary() + + self.assertIsInstance(summary, dict) + self.assertIn('project_name', summary) + + def test_analyze_fuzzing_efficiency(self): + """Test analyze_fuzzing_efficiency method.""" + efficiency = self.sdk.analyze_fuzzing_efficiency(days=7) + + self.assertIsInstance(efficiency, dict) + self.assertIn('overall_efficiency', efficiency) + + # Error Handling Tests + + def test_invalid_project_name(self): + """Test SDK initialization with invalid project name.""" + with self.assertRaises(Exception): + OSSFuzzSDK('', self.config) + + def test_error_handling_in_methods(self): + """Test error handling in various methods.""" + # All methods should handle errors gracefully and not raise exceptions + + # Build operations + self.assertIsInstance(self.sdk.get_build_status('invalid'), dict) + self.assertIsInstance(self.sdk.get_build_artifacts('invalid'), dict) + self.assertIsInstance(self.sdk.list_recent_builds(), list) + + # Run operations + self.assertIsInstance(self.sdk.get_run_status('invalid'), dict) + self.assertIsInstance(self.sdk.get_run_results('invalid'), dict) + self.assertIsInstance(self.sdk.list_recent_runs(), list) + + # Result operations + self.assertIsNone(self.sdk.get_benchmark_result('invalid')) + self.assertIsInstance(self.sdk.get_benchmark_metrics('invalid'), dict) + self.assertIsInstance(self.sdk.get_system_metrics(), dict) + + # Component Integration Tests + + def test_component_availability_checking(self): + """Test component availability checking.""" + # In test environment, most components should not be available + self.assertIsNotNone(self.sdk.storage) + # Other components may or may not be available depending on dependencies + + def test_environment_config_loading(self): + """Test environment configuration loading.""" + # Should not raise exceptions + self.sdk._load_config_from_env() # pylint: disable=protected-access + + # Config should still be valid + self.assertIsInstance(self.sdk.config, dict) + + def test_component_initialization(self): + """Test component initialization.""" + # Should not raise exceptions + self.sdk._initialize_components() # pylint: disable=protected-access + + # SDK should still be functional + self.assertEqual(self.sdk.project_name, 'test_project') + + # Result Classes Tests + + def test_build_result_creation(self): + """Test BuildResult creation and properties.""" + result = BuildResult(success=True, + message='Build successful', + artifacts={'binary': '/path/to/binary'}) + + self.assertTrue(result.success) + self.assertEqual(result.message, 'Build successful') + self.assertIn('binary', result.artifacts) + self.assertIsNotNone(result.build_id) + self.assertIsNotNone(result.timestamp) + + def test_run_result_creation(self): + """Test RunResult creation and properties.""" + result = RunResult(success=True, + message='Run completed', + crashes=False, + coverage_data={ + 'cov_pcs': 100, + 'total_pcs': 1000 + }) + + self.assertTrue(result.success) + self.assertEqual(result.message, 'Run completed') + self.assertFalse(result.crashes) + self.assertEqual(result.coverage_data['cov_pcs'], 100) + self.assertIsNotNone(result.run_id) + self.assertIsNotNone(result.timestamp) + + def test_pipeline_result_creation(self): + """Test PipelineResult creation and properties.""" + build_result = BuildResult(success=True, message='Build OK') + run_result = RunResult(success=True, message='Run OK') + + pipeline_result = PipelineResult(success=True, + message='Pipeline completed', + build_results=[build_result], + run_results=[run_result]) + + self.assertTrue(pipeline_result.success) + self.assertEqual(pipeline_result.message, 'Pipeline completed') + self.assertEqual(len(pipeline_result.build_results), 1) + self.assertEqual(len(pipeline_result.run_results), 1) + self.assertIsNotNone(pipeline_result.pipeline_id) + self.assertIsNotNone(pipeline_result.timestamp) + + # Export and Analysis Tests + + def test_export_results(self): + """Test export_results method.""" + benchmark_ids = ['bench1', 'bench2', 'bench3'] + + # Should handle missing ResultManager gracefully + try: + output_path = self.sdk.export_results(benchmark_ids, export_format='json') + + # Should create a file + self.assertTrue(Path(output_path).exists()) + + # Clean up + Path(output_path).unlink(missing_ok=True) + + except Exception as e: + # Should raise OSSFuzzSDKError for missing ResultManager + self.assertIn('ResultManager not available', str(e)) + + def test_export_results_with_custom_path(self): + """Test export_results with custom output path.""" + benchmark_ids = ['bench1'] + custom_path = Path(self.temp_dir) / 'custom_export.json' + + try: + output_path = self.sdk.export_results(benchmark_ids, + export_format='json', + output_path=str(custom_path)) + + self.assertEqual(output_path, str(custom_path)) + + except Exception as e: + # Should raise OSSFuzzSDKError for missing ResultManager + self.assertIn('ResultManager not available', str(e)) + + def test_generate_comparison_report(self): + """Test generate_comparison_report method.""" + benchmark_ids = ['bench1', 'bench2'] + + report = self.sdk.generate_comparison_report(benchmark_ids, days=7) + + self.assertIsInstance(report, dict) + self.assertIn('comparison_timestamp', report) + self.assertIn('benchmark_count', report) + self.assertEqual(report['benchmark_count'], 2) + self.assertIn('benchmarks', report) + + # Benchmark Management Tests + + def test_create_benchmark(self): + """Test create_benchmark method.""" + benchmark_spec = { + 'id': 'new_benchmark', + 'project': 'test_project', + 'function_name': 'test_function' + } + + result = self.sdk.create_benchmark(benchmark_spec) + + # Should return True since BenchmarkManager is available + self.assertTrue(result) + + def test_update_benchmark(self): + """Test update_benchmark method.""" + updates = {'description': 'Updated description'} + + result = self.sdk.update_benchmark('test_benchmark', updates) + + # Should return True since BenchmarkManager is available + self.assertTrue(result) + + def test_delete_benchmark(self): + """Test delete_benchmark method.""" + result = self.sdk.delete_benchmark('test_benchmark') + + # Should return True since BenchmarkManager is available + self.assertTrue(result) + + def test_list_benchmarks(self): + """Test list_benchmarks method.""" + benchmarks = self.sdk.list_benchmarks() + + self.assertIsInstance(benchmarks, list) + self.assertEqual(len(benchmarks), 0) # No BenchmarkManager available + + def test_list_benchmarks_with_filters(self): + """Test list_benchmarks with filters.""" + filters = {'language': 'c++'} + benchmarks = self.sdk.list_benchmarks(filters=filters) + + self.assertIsInstance(benchmarks, list) + self.assertEqual(len(benchmarks), 0) # No BenchmarkManager available + + def test_search_benchmarks(self): + """Test search_benchmarks method.""" + results = self.sdk.search_benchmarks('test', limit=5) + + self.assertIsInstance(results, list) + self.assertEqual(len(results), 0) # No BenchmarkManager available + + # Integration Tests + + def test_full_workflow_simulation(self): + """Test a complete workflow simulation.""" + # This tests the full API without requiring actual components + + # 1. Create options + build_opts = BuildOptions(sanitizer='address') + run_opts = RunOptions(duration_seconds=300) + pipeline_opts = PipelineOptions(build_options=build_opts, + run_options=run_opts, + trials=1) + + # 2. Run pipeline (should fail gracefully) + result = self.sdk.run_full_pipeline('test_benchmark', pipeline_opts) + self.assertIsInstance(result, PipelineResult) + self.assertFalse(result.success) # Expected to fail without components + + # 3. Check status + build_status = self.sdk.get_build_status('test_build') + self.assertIsInstance(build_status, dict) + + run_status = self.sdk.get_run_status('test_run') + self.assertIsInstance(run_status, dict) + + # 4. Get metrics + metrics = self.sdk.get_benchmark_metrics('test_benchmark') + self.assertIsInstance(metrics, dict) + + # 5. Generate report + report = self.sdk.generate_project_report(days=1) + self.assertIsInstance(report, dict) + + def test_api_consistency(self): + """Test API consistency and method availability.""" + # Check that all expected methods exist + expected_methods = [ + # Build operations + 'build_fuzz_target', + 'build_benchmark', + 'get_build_status', + 'get_build_artifacts', + 'list_recent_builds', + + # Execution operations + 'run_fuzz_target', + 'run_benchmark', + 'get_run_status', + 'get_run_results', + 'list_recent_runs', + + # Workflow orchestration + 'run_full_pipeline', + + # Result management + 'get_benchmark_result', + 'get_benchmark_metrics', + 'get_system_metrics', + 'get_coverage_trend', + 'get_build_success_rate', + 'get_crash_summary', + + # Benchmark management + 'create_benchmark', + 'update_benchmark', + 'delete_benchmark', + 'list_benchmarks', + 'search_benchmarks', + + # Export and analysis + 'export_results', + 'generate_comparison_report', + + # Historical data (preserved) + 'generate_project_report', + 'get_project_summary', + 'analyze_fuzzing_efficiency' + ] + + for method_name in expected_methods: + self.assertTrue(hasattr(self.sdk, method_name), + f"Method {method_name} not found") + self.assertTrue(callable(getattr(self.sdk, method_name)), + f"Method {method_name} not callable") + + def test_method_signatures(self): + """Test method signatures for consistency.""" + import inspect + + # Test key method signatures + build_target_sig = inspect.signature(self.sdk.build_fuzz_target) + self.assertIn('target_spec', build_target_sig.parameters) + self.assertIn('options', build_target_sig.parameters) + + run_pipeline_sig = inspect.signature(self.sdk.run_full_pipeline) + self.assertIn('benchmark_id', run_pipeline_sig.parameters) + self.assertIn('options', run_pipeline_sig.parameters) + + export_sig = inspect.signature(self.sdk.export_results) + self.assertIn('benchmark_ids', export_sig.parameters) + self.assertIn('export_format', export_sig.parameters) + + +if __name__ == '__main__': + unittest.main() diff --git a/ossfuzz_py/unittests/test_result_manager.py b/ossfuzz_py/unittests/test_result_manager.py new file mode 100644 index 000000000..a7177cc33 --- /dev/null +++ b/ossfuzz_py/unittests/test_result_manager.py @@ -0,0 +1,283 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Simple unit tests for ResultManager using mock HistoryManagers. + +This module tests the core functionality of ResultManager without requiring +external dependencies like pydantic, yaml, etc. +""" + +import unittest +from datetime import datetime + +from ossfuzz_py.core.benchmark_manager import Benchmark +from ossfuzz_py.result.result_manager import ResultManager +from ossfuzz_py.result.results import (AnalysisInfo, BuildInfo, + CoverageAnalysis, Result, RunInfo) + + +class TestResultManagerSimple(unittest.TestCase): + """Test ResultManager with mock HistoryManager classes.""" + + def setUp(self): + """Set up test environment with mock HistoryManagers.""" + # Create mock HistoryManager instances with proper mock behavior + from unittest.mock import Mock + self.build_mgr = Mock() + self.crash_mgr = Mock() + self.corpus_mgr = Mock() + self.coverage_mgr = Mock() + + # Configure mock methods to return appropriate empty data + self.build_mgr.get_build_history.return_value = [] + self.build_mgr.get_build_statistics.return_value = { + 'total_builds': 0, + 'successful_builds': 0 + } + self.build_mgr.store_build_result.return_value = True + + self.crash_mgr.get_crash_history.return_value = [] + self.crash_mgr.get_crash_statistics.return_value = {'total_crashes': 0} + self.crash_mgr.store_crash.return_value = True + self.crash_mgr.check_duplicate_crash.return_value = False + + self.corpus_mgr.get_corpus_history.return_value = [] + self.corpus_mgr.store_corpus.return_value = True + + self.coverage_mgr.get_coverage_history.return_value = [] + self.coverage_mgr.store_coverage.return_value = True + + # Create ResultManager + self.result_manager = ResultManager( + build_mgr=self.build_mgr, + crash_mgr=self.crash_mgr, + corpus_mgr=self.corpus_mgr, + coverage_mgr=self.coverage_mgr, + ) + + def test_result_manager_creation(self): + """Test that ResultManager can be created successfully.""" + self.assertIsNotNone(self.result_manager) + self.assertEqual(self.result_manager.build_mgr, self.build_mgr) + self.assertEqual(self.result_manager.crash_mgr, self.crash_mgr) + self.assertEqual(self.result_manager.corpus_mgr, self.corpus_mgr) + self.assertEqual(self.result_manager.coverage_mgr, self.coverage_mgr) + + def test_store_and_retrieve_build_result(self): + """Test storing and retrieving a build result.""" + # Create test benchmark + benchmark = Benchmark( + project='test_project', + language='c++', + function_signature='int test_function(const char* input)', + function_name='test_function', + return_type='int', + target_path='/path/to/test.h', + id='test_benchmark_build', + ) + + # Create build result + build_info = BuildInfo( + compiles=True, + compile_log='Build successful', + errors=[], + binary_exists=True, + is_function_referenced=True, + fuzz_target_source='// Test fuzz target source', + build_script_source='// Test build script', + ) + + result = Result( + benchmark=benchmark, + work_dirs='/tmp/work', + trial=1, + build_info=build_info, + ) + + # Store the result + benchmark_id = 'test_benchmark_build' + try: + self.result_manager.store_result(benchmark_id, result) + except Exception as e: + self.fail(f"store_result should not raise exception: {e}") + + def test_get_metrics_with_no_data(self): + """Test getting metrics when no data is available.""" + metrics = self.result_manager.get_metrics('nonexistent_benchmark') + + # Should return empty metrics structure + self.assertIsInstance(metrics, dict) + self.assertIn('compiles', metrics) + self.assertIn('crashes', metrics) + self.assertIn('coverage', metrics) + self.assertIn('benchmark_id', metrics) + + def test_get_aggregated_metrics(self): + """Test getting aggregated metrics across all benchmarks.""" + metrics = self.result_manager.get_metrics() + + # Should return aggregated metrics structure + self.assertIsInstance(metrics, dict) + self.assertIn('total_benchmarks', metrics) + self.assertIn('total_builds', metrics) + self.assertIn('build_success_rate', metrics) + self.assertIn('timestamp', metrics) + + def test_get_trial_result_nonexistent(self): + """Test getting trial result for non-existent trial.""" + result = self.result_manager.get_trial_result('nonexistent_benchmark', 1) + self.assertIsNone(result) + + def test_coverage_trend(self): + """Test coverage trend functionality.""" + start_date = datetime.now() + end_date = datetime.now() + + trend_data = self.result_manager.coverage_trend('test_benchmark', + start_date, end_date) + + # Should return empty DataFrame (since no data) + # Check if it's a pandas DataFrame or a list + if hasattr(trend_data, 'empty'): + # It's a DataFrame + self.assertTrue(trend_data.empty) # type: ignore + else: + # It's a list + self.assertIsInstance(trend_data, list) + self.assertEqual(len(trend_data), 0) + + def test_latest_successful_build(self): + """Test getting latest successful build.""" + result = self.result_manager.latest_successful_build('test_benchmark') + # Should return None since no data + self.assertIsNone(result) + + def test_get_build_success_rate(self): + """Test getting build success rate.""" + rate = self.result_manager.get_build_success_rate('test_benchmark') + # Should return 0.0 since no data + self.assertIsInstance(rate, float) + self.assertEqual(rate, 0.0) + + def test_get_crash_summary(self): + """Test getting crash summary.""" + summary = self.result_manager.get_crash_summary('test_benchmark') + # Should return empty dict since no data + self.assertIsInstance(summary, dict) + + def test_store_result_with_run_info(self): + """Test storing result with run info.""" + benchmark = Benchmark( + project='test_project', + language='c++', + function_signature='int test_function(const char* input)', + function_name='test_function', + return_type='int', + target_path='/path/to/test.h', + id='test_benchmark_run', + ) + + run_info = RunInfo( + crashes=True, + run_log='Fuzzer run log', + corpus_path='/tmp/corpus', + cov_pcs=100, + total_pcs=1000, + crash_info='Test crash info', + ) + + result = Result( + benchmark=benchmark, + work_dirs='/tmp/work', + trial=1, + run_info=run_info, + ) + + # Store the result + benchmark_id = 'test_benchmark_run' + try: + self.result_manager.store_result(benchmark_id, result) + except Exception as e: + self.fail(f"store_result with run_info should not raise exception: {e}") + + def test_store_result_with_analysis_info(self): + """Test storing result with analysis info.""" + benchmark = Benchmark( + project='test_project', + language='c++', + function_signature='int test_function(const char* input)', + function_name='test_function', + return_type='int', + target_path='/path/to/test.h', + id='test_benchmark_analysis', + ) + + coverage_analysis = CoverageAnalysis( + line_coverage=75.5, + line_coverage_diff=15.2, + coverage_report_path='/tmp/coverage_report', + textcov_diff=None, + cov_pcs=100, + total_pcs=1000, + ) + + analysis_info = AnalysisInfo(coverage_analysis=coverage_analysis) + + result = Result( + benchmark=benchmark, + work_dirs='/tmp/work', + trial=1, + analysis_info=analysis_info, + ) + + # Store the result + benchmark_id = 'test_benchmark_analysis' + try: + self.result_manager.store_result(benchmark_id, result) + except Exception as e: + self.fail( + f"store_result with analysis_info should not raise exception: {e}") + + def test_error_handling(self): + """Test error handling for invalid inputs.""" + # Test with invalid benchmark_id and result + try: + # Use type: ignore to suppress type checker warnings for intentional test + self.result_manager.store_result(None, None) # type: ignore + except Exception: + pass # Expected to fail + + # Test with invalid trial_id - should return None gracefully + result = self.result_manager.get_trial_result('test', -1) + # Should handle gracefully and return None + self.assertIsNone(result) + + def test_helper_methods(self): + """Test helper methods work correctly.""" + # Test _create_minimal_benchmark + benchmark = self.result_manager._create_minimal_benchmark('test_id') # pylint: disable=protected-access + self.assertIsInstance(benchmark, Benchmark) + # The ID is auto-generated, so just check it's not empty + self.assertIsNotNone(benchmark.id) + self.assertTrue(len(benchmark.id) > 0) + + # Test _get_empty_metrics + empty_metrics = self.result_manager._get_empty_metrics() # pylint: disable=protected-access + self.assertIsInstance(empty_metrics, dict) + self.assertIn('compiles', empty_metrics) + self.assertIn('crashes', empty_metrics) + + +if __name__ == '__main__': + unittest.main() diff --git a/ossfuzz_py/utils/env_vars.py b/ossfuzz_py/utils/env_vars.py index 3d0794443..a534576fc 100644 --- a/ossfuzz_py/utils/env_vars.py +++ b/ossfuzz_py/utils/env_vars.py @@ -59,3 +59,8 @@ class EnvVars(str, Enum): OSSFUZZ_CLIENT_SECRET = "OSSFUZZ_CLIENT_SECRET" OSSFUZZ_TOKEN_URL = "OSSFUZZ_TOKEN_URL" OSSFUZZ_API_KEY = "OSSFUZZ_API_KEY" + + # Historical Data SDK specific variables + GCS_BUCKET_NAME = "GCS_BUCKET_NAME" + OSSFUZZ_HISTORY_STORAGE_BACKEND = "OSSFUZZ_HISTORY_STORAGE_BACKEND" + OSSFUZZ_HISTORY_STORAGE_PATH = "OSSFUZZ_HISTORY_STORAGE_PATH"