diff --git a/config/defaults/presets.yaml b/config/defaults/presets.yaml new file mode 100644 index 0000000..0cc697d --- /dev/null +++ b/config/defaults/presets.yaml @@ -0,0 +1,64 @@ +# DeepSearch Configuration Presets +# Default configurations for various research scenarios +# +# Each preset combines provider, model, parameters, timeout, and prompt template +# for optimized performance in specific use cases. + +presets: + perplexity-sonar-pro: + description: "Academic research optimized for Perplexity with high reasoning effort" + provider: perplexity + model: sonar-reasoning-pro + timeout: 180 + prompt_template: gene_analysis_academic + provider_params: + return_citations: true + reasoning_effort: high # low, medium, high + search_recency_filter: month # hour, day, week, month, year + search_domain_filter: + - "pubmed.ncbi.nlm.nih.gov" + - "ncbi.nlm.nih.gov/pmc/" + - "www.ncbi.nlm.nih.gov" + - "europepmc.org" + - "biorxiv.org" + - "nature.com" + - "cell.com" + - "science.org" + system_prompt: null # Will be set dynamically with JSON schema + + perplexity-deep-research_JSON: + description: "Academic research optimized for Perplexity deep search, returning structured results." + provider: perplexity + model: sonar-deep-research + timeout: 500 + prompt_template: gene_analysis_deep_research + provider_params: + return_citations: true + # reasoning_effort: high # low, medium, high. Not applicable to deep-research + search_recency_filter: null # hour, day, week, month, year + search_domain_filter: + - "pubmed.ncbi.nlm.nih.gov" + - "ncbi.nlm.nih.gov/pmc/" + - "www.ncbi.nlm.nih.gov" + - "europepmc.org" + - "biorxiv.org" + - "nature.com" + - "cell.com" + - "science.org" + system_prompt: Respond with JSON conforming to the provided schema - no prose, no markdown. If you are unable to respond with JSON alone, make sure the text includes code-fenced schema compliant JSON covering all report content. If that is not possible, ensure the report includes tables that unambiguously capture the elements and their associations that would be captured if returning schema compliant JSON. Provided schema ```JSON {schema}``` + # Example additional preset for fast analysis + perplexity-fast: + description: "Fast analysis with lower reasoning effort for quick experiments" + provider: perplexity + model: sonar-small + timeout: 60 + prompt_template: gene_analysis_structured + provider_params: + return_citations: true + reasoning_effort: low + search_recency_filter: week + search_domain_filter: + - "pubmed.ncbi.nlm.nih.gov" + - "ncbi.nlm.nih.gov/pmc/" + - "nature.com" + system_prompt: null \ No newline at end of file diff --git a/config/defaults/templates.yaml b/config/defaults/templates.yaml new file mode 100644 index 0000000..10fc275 --- /dev/null +++ b/config/defaults/templates.yaml @@ -0,0 +1,120 @@ +# DeepSearch Prompt Templates +# Reusable prompt templates for different analysis approaches and provider optimizations +# +# Templates use {genes} and {context} placeholders that will be substituted at runtime + +templates: + gene_analysis_academic: + description: "Academic research-focused analysis with comprehensive literature review strategy" + optimized_for: [perplexity, consensus] + supports_json_schema: true + schema_instruction: | + You are an expert biologist. Analyze the provided genes in the given biological context. + + CRITICAL: Respond ONLY with valid JSON that exactly follows this schema structure: + {schema} + + Do not include any prose, markdown, explanatory text, or tags. Only the JSON structure. + template: | + Perform comprehensive literature analysis for the following gene list in the + specified biological context. + + **Gene List**: {genes} + + **Biological Context**: {context} + + **Analysis Strategy**: + 1. Search current scientific literature for functional roles of each gene in the input list + 2. Identify clusters of genes that act together in pathways, processes, or cellular states + 3. Treat each cluster as a potential gene program within the list + 4. Interpret findings in light of both normal physiological roles and disease-specific alterations + 5. Prioritize well-established functions with strong literature support, but highlight emerging + evidence if contextually relevant + + **Guidelines**: + * Anchor all predictions in either the normal physiology and development of the cell type and + tissue specified in the context OR the alterations and dysregulations characteristic of the + specified disease + * Connect gene-level roles to program-level implications + * Consider gene interactions, regulatory networks, and pathway dynamics + * Highlight cases where multiple genes collectively strengthen evidence + * Ensure all claims are backed by experimental evidence with proper attribution + + Provide a structured analysis identifying biological programs and their predicted cellular + impacts within the given context. + + gene_analysis_structured: + description: "Structured analysis optimized for clear, systematic gene program identification" + optimized_for: [openai, edison] + supports_json_schema: true + schema_instruction: | + You are an expert biologist conducting systematic gene analysis. + + Please structure your response as JSON following this exact format: + {schema} + + Ensure all required fields are populated with accurate biological data and evidence. + template: | + Analyze the following genes in the specified biological context and provide + structured findings. + + **Genes to Analyze**: {genes} + + **Context**: {context} + + **Required Analysis**: + 1. For each gene, identify its primary biological functions + 2. Group genes by common pathways or biological processes + 3. Identify potential gene programs (clusters of functionally related genes) + 4. Assess the relevance to the specified biological context + + **Output Requirements**: + - Focus on well-established, peer-reviewed findings + - Prioritize recent research when available + - Include pathway and process associations + - Highlight gene-gene interactions where relevant + + Provide a systematic analysis organized by biological programs and functional clusters. + + gene_analysis_deep_research: + description: "Deep research analysis optimized for Perplexity deep research models with comprehensive findings" + optimized_for: [perplexity] + supports_json_schema: true + schema_instruction: | + You are an expert biologist conducting comprehensive deep research on genes in biological context. + + Please format your detailed analysis as JSON following this structure: + {schema} + + Include thorough findings with proper citations and evidence. Ensure all analysis is backed by current scientific literature. + template: | + Conduct comprehensive deep research and literature analysis for the following gene list within the specified biological context. + + **Gene List**: {genes} + + **Biological Context**: {context} + + **Deep Research Strategy**: + 1. Perform extensive literature search across current scientific databases + 2. Analyze functional roles and molecular mechanisms of each gene + 3. Identify functional clusters and biological programs within the gene list + 4. Examine gene interactions, regulatory networks, and pathway dynamics + 5. Assess both normal physiological roles and disease-associated alterations + 6. Prioritize well-established functions while highlighting emerging evidence + + **Research Guidelines**: + * Conduct thorough investigation of recent publications and reviews + * Focus on experimental evidence and peer-reviewed findings + * Consider tissue-specific and context-dependent gene functions + * Analyze gene co-expression patterns and functional annotations + * Include both direct and indirect gene interactions + * Provide detailed citations for all claims and findings + + **Output Requirements**: + * Comprehensive analysis with detailed biological insights + * Strong evidence base with proper attribution + * Clear identification of biological programs and functional clusters + * Assessment of predicted cellular impacts within the given context + * Integration of findings into coherent biological narrative + + Provide a thorough, evidence-based analysis identifying biological programs and their predicted cellular impacts within the specified context. diff --git a/config/schemas/preset-schema.yaml b/config/schemas/preset-schema.yaml new file mode 100644 index 0000000..63735e3 --- /dev/null +++ b/config/schemas/preset-schema.yaml @@ -0,0 +1,71 @@ +# JSON Schema for validating DeepSearch preset configurations +$schema: http://json-schema.org/draft-07/schema# +title: DeepSearch Preset Configuration Schema +description: Schema for validating DeepSearch configuration presets + +type: object +properties: + presets: + type: object + patternProperties: + "^[a-zA-Z0-9_-]+$": # Preset names: alphanumeric, underscore, hyphen + type: object + required: + - description + - provider + - model + - timeout + - prompt_template + - provider_params + properties: + description: + type: string + minLength: 10 + description: "Human-readable description of the preset" + + provider: + type: string + enum: [perplexity, openai, anthropic, edison, consensus] + description: "Research provider name" + + model: + type: string + minLength: 1 + description: "Model name for the provider" + + timeout: + type: integer + minimum: 30 + maximum: 600 + description: "Request timeout in seconds" + + prompt_template: + type: string + minLength: 1 + description: "Name of prompt template to use" + + provider_params: + type: object + description: "Provider-specific parameters" + properties: + return_citations: + type: boolean + reasoning_effort: + type: string + enum: [low, medium, high] + search_recency_filter: + type: [string, "null"] + enum: [hour, day, week, month, year, null] + search_domain_filter: + type: array + items: + type: string + format: hostname + system_prompt: + type: [string, "null"] + additionalProperties: true # Allow provider-specific params + additionalProperties: false + additionalProperties: false +required: + - presets +additionalProperties: false \ No newline at end of file diff --git a/config/schemas/template-schema.yaml b/config/schemas/template-schema.yaml new file mode 100644 index 0000000..39d05ba --- /dev/null +++ b/config/schemas/template-schema.yaml @@ -0,0 +1,49 @@ +# JSON Schema for validating DeepSearch template configurations +$schema: http://json-schema.org/draft-07/schema# +title: DeepSearch Template Configuration Schema +description: Schema for validating DeepSearch prompt templates + +type: object +properties: + templates: + type: object + patternProperties: + "^[a-zA-Z0-9_-]+$": # Template names: alphanumeric, underscore, hyphen + type: object + required: + - description + - optimized_for + - supports_json_schema + - template + properties: + description: + type: string + minLength: 10 + description: "Human-readable description of the template" + + optimized_for: + type: array + items: + type: string + enum: [perplexity, openai, anthropic, edison, consensus] + minItems: 1 + description: "List of providers this template is optimized for" + + supports_json_schema: + type: boolean + description: "Whether this template supports structured JSON output" + + schema_instruction: + type: string + minLength: 20 + description: "Custom instruction for schema compliance, use {schema} placeholder" + + template: + type: string + minLength: 50 + description: "The prompt template text with {genes} and {context} placeholders" + additionalProperties: false + additionalProperties: false +required: + - templates +additionalProperties: false \ No newline at end of file diff --git a/config/user/.gitignore b/config/user/.gitignore new file mode 100644 index 0000000..cb35bd7 --- /dev/null +++ b/config/user/.gitignore @@ -0,0 +1,3 @@ +# User configuration overrides - not tracked in git +*.yaml +*.yml \ No newline at end of file diff --git a/inputs/glioblastoma/test_context.txt b/inputs/glioblastoma/test_context.txt new file mode 100644 index 0000000..a4184e6 --- /dev/null +++ b/inputs/glioblastoma/test_context.txt @@ -0,0 +1 @@ +malignant glioblastoma cells \ No newline at end of file diff --git a/inputs/glioblastoma/test_genes.txt b/inputs/glioblastoma/test_genes.txt new file mode 100644 index 0000000..770b8ff --- /dev/null +++ b/inputs/glioblastoma/test_genes.txt @@ -0,0 +1 @@ +"CFAP43", "NEGR1", "DNAH12", "LRRC2", "VAT1L", "ZNF804B", "RBMS3", "SLC14A1", "GABRA5", "ZBBX", "ADAMTS18", "CFAP52", "GRM1", "MAP3K19", "FHAD1", "TCTEX1D1", "DNAAF1", "DCDC2", "AC005165.1", "COL21A1", "PKHD1", "ZNF521", "EPB41L4B", "ERICH3", "PLAGL1", "EXPH5", "SHISAL2B", "SATB1-AS1", "RERGL", "FRMPD2", "TOGARAM2", "AP003062.2", "BMP6", "NRG3", "CFAP61", "FAM81B", "SLC47A2", "TMEM232", "NWD2", "AC109466.1", "GABRG3", "DTHD1", "COL13A1", "COL23A1", "CFAP73", "RFTN1", "FYB2", "POSTN", "AL513323.1", "BANK1", "CHD5", "THBS1", "ADCY8", "ADGB", "AFF2", "DRC1", "CFAP206", "CFAP47", "PPM1H", "KIAA2012", "MAP7", "KSR2", "DNAH5", "LYPD6B", "WSCD2", "CACNA2D1", "LRRIQ1", "CPNE4", "LINC01088", "SCIN", "PRMT8", "LINGO2", "CASC1", "CCDC170", "AC092110.1", "VWA3A", "CA10", "AC013470.2", "SLC22A3", "GRM4", "COL26A1", "CFAP221", "CFAP157", "TTC29", "C7orf57", "HMCN1", "CFAP100", "U91319.1", "RSPH1", "NAALAD2", "IL6R", "CDH7", "KCNJ3", "AL356108.1" \ No newline at end of file diff --git a/planning/backwards_compatibility_bug_ds.md b/planning/backwards_compatibility_bug_ds.md new file mode 100644 index 0000000..9c873ec --- /dev/null +++ b/planning/backwards_compatibility_bug_ds.md @@ -0,0 +1,16 @@ + +scripts/run_deepsearch.py +Comment on lines +457 to +459 +```python + + # Backward compatibility +service = DeepSearchService(preferred_provider=args.preferred_provider, **config_overrides) +``` + +Copilot AI +2 minutes ago +When using backward compatibility mode (no preset specified) and providing config_overrides, the overrides are applied but there's a logical issue: if args.preferred_provider is specified along with args.model or provider parameter overrides, the initialization on line 458 passes both preferred_provider and **config_overrides. + +Looking at the DeepSearchService code, when preset is None, it loads the default "perplexity-sonar-pro" preset and only applies the preferred_provider override if it's specified (line 55-57 in deepsearch_service.py). However, the **config_overrides passed here are ignored in backward compatibility mode because the service doesn't merge them. + +This means that in backward compatibility mode, --model, --reasoning-effort, and --search-recency arguments will be silently ignored, which could be confusing for users. Either document this limitation or ensure config_overrides are also applied in backward compatibility mode. \ No newline at end of file diff --git a/scripts/manage_config.py b/scripts/manage_config.py new file mode 100755 index 0000000..9967e14 --- /dev/null +++ b/scripts/manage_config.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +"""Configuration management CLI for DeepSearch YAML configurations. + +Provides commands for managing presets and templates without editing code. +""" + +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from pathlib import Path + +import yaml + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from langpa.services.yaml_config import ConfigurationError, YAMLConfigManager + + +def list_presets(config_manager: YAMLConfigManager) -> None: + """List all available presets with descriptions.""" + try: + presets = config_manager.list_available_presets() + if not presets: + print("No presets found.") + return + + print("Available Presets:") + print("-" * 50) + for name, description in presets.items(): + print(f" {name:20} - {description}") + except ConfigurationError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +def list_templates(config_manager: YAMLConfigManager) -> None: + """List all available templates with descriptions.""" + try: + templates = config_manager.list_available_templates() + if not templates: + print("No templates found.") + return + + print("Available Templates:") + print("-" * 50) + for name, description in templates.items(): + print(f" {name:20} - {description}") + except ConfigurationError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +def show_preset(config_manager: YAMLConfigManager, preset_name: str) -> None: + """Show complete details for a specific preset.""" + try: + config = config_manager.get_preset_config(preset_name) + + print(f"Preset: {preset_name}") + print("=" * (len(preset_name) + 8)) + print(f"Description: {config.description}") + print(f"Provider: {config.provider}") + print(f"Model: {config.model}") + print(f"Timeout: {config.timeout}s") + print(f"Prompt Template: {config.prompt_template}") + print("\nProvider Parameters:") + print(yaml.dump(config.provider_params, indent=2, default_flow_style=False)) + except ConfigurationError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +def show_template(config_manager: YAMLConfigManager, template_name: str) -> None: + """Show complete details for a specific template.""" + try: + template = config_manager.get_template(template_name) + metadata = config_manager.get_template_metadata(template_name) + + print(f"Template: {template_name}") + print("=" * (len(template_name) + 10)) + print(f"Description: {metadata['description']}") + print(f"Supports JSON Schema: {metadata['supports_json_schema']}") + print(f"Optimized For: {', '.join(metadata['optimized_for'])}") + + # Show schema instruction if present + if metadata.get("schema_instruction"): + print("\nSchema Instruction:") + print("-" * 40) + print(metadata["schema_instruction"]) + + print("\nTemplate Text:") + print("-" * 40) + print(template["template"]) + print("-" * 40) + print("\nExample Usage:") + print(" genes: ['TP53', 'BRCA1']") + print(" context: 'cancer tumor suppressor genes'") + + # Show formatted example + example_genes = ["TP53", "BRCA1"] + example_context = "cancer tumor suppressor genes" + formatted = config_manager.format_prompt_template( + template_name, example_genes, example_context + ) + print("\nFormatted Example (first 200 chars):") + print(formatted[:200] + "..." if len(formatted) > 200 else formatted) + except ConfigurationError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +def validate_config(config_manager: YAMLConfigManager, preset_name: str | None = None) -> None: + """Validate configuration files.""" + try: + if preset_name: + # Validate specific preset + config_manager.get_preset_config(preset_name) + print(f"✅ Preset '{preset_name}' is valid") + else: + # Validate all configurations + presets = config_manager.list_available_presets() + templates = config_manager.list_available_templates() + + print(f"✅ Loaded {len(presets)} presets successfully") + print(f"✅ Loaded {len(templates)} templates successfully") + print("✅ All configurations are valid") + except ConfigurationError as e: + print(f"❌ Validation failed: {e}", file=sys.stderr) + sys.exit(1) + + +def get_user_config_path(config_manager: YAMLConfigManager) -> Path: + """Get path to user configuration directory, creating if needed.""" + if config_manager.config_dir: + user_config_dir = config_manager.config_dir / "user" + else: + user_config_dir = Path.cwd() / "config" / "user" + + user_config_dir.mkdir(parents=True, exist_ok=True) + return user_config_dir + + +def copy_preset(config_manager: YAMLConfigManager, source_name: str, target_name: str) -> None: + """Copy a preset to create a new one.""" + try: + # Get source preset + source_config = config_manager.get_preset_config(source_name) + + # Load existing user presets or create new structure + user_config_dir = get_user_config_path(config_manager) + user_presets_file = user_config_dir / "presets.yaml" + + if user_presets_file.exists(): + with open(user_presets_file) as f: + user_data = yaml.safe_load(f) or {} + else: + user_data = {} + + if "presets" not in user_data: + user_data["presets"] = {} + + # Add new preset + user_data["presets"][target_name] = { + "description": f"Copy of {source_name} - {source_config.description}", + "provider": source_config.provider, + "model": source_config.model, + "timeout": source_config.timeout, + "prompt_template": source_config.prompt_template, + "provider_params": source_config.provider_params, + } + + # Write updated configuration + with open(user_presets_file, "w") as f: + yaml.dump(user_data, f, indent=2, default_flow_style=False) + + print(f"✅ Created preset '{target_name}' as copy of '{source_name}'") + print(f"📝 Edit with: python scripts/manage_config.py edit-preset {target_name}") + except ConfigurationError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error creating preset: {e}", file=sys.stderr) + sys.exit(1) + + +def edit_preset(config_manager: YAMLConfigManager, preset_name: str) -> None: + """Open preset for editing in user's default editor.""" + try: + user_config_dir = get_user_config_path(config_manager) + user_presets_file = user_config_dir / "presets.yaml" + + # Create file if it doesn't exist + if not user_presets_file.exists(): + print(f"Creating new user presets file at {user_presets_file}") + user_data = {"presets": {}} + with open(user_presets_file, "w") as f: + yaml.dump(user_data, f, indent=2, default_flow_style=False) + + # Open in editor + editor = shutil.which("code") or shutil.which("vim") or shutil.which("nano") + if not editor: + print(f"Please edit the file manually: {user_presets_file}") + return + + print(f"Opening {user_presets_file} in {editor}...") + subprocess.run([editor, str(user_presets_file)]) + + # Validate after editing + try: + config_manager._presets_cache = None # Clear cache + if preset_name in config_manager.list_available_presets(): + config_manager.get_preset_config(preset_name) + print(f"✅ Preset '{preset_name}' is valid after editing") + else: + print(f"ℹ️ Preset '{preset_name}' not found - may have been renamed or removed") + except ConfigurationError as e: + print(f"⚠️ Warning: Configuration has validation errors: {e}") + + except Exception as e: + print(f"Error editing preset: {e}", file=sys.stderr) + sys.exit(1) + + +def delete_preset(config_manager: YAMLConfigManager, preset_name: str) -> None: + """Delete a user preset.""" + try: + user_config_dir = get_user_config_path(config_manager) + user_presets_file = user_config_dir / "presets.yaml" + + if not user_presets_file.exists(): + print(f"No user presets file found. Cannot delete '{preset_name}'") + sys.exit(1) + + with open(user_presets_file) as f: + user_data = yaml.safe_load(f) or {} + + if "presets" not in user_data or preset_name not in user_data["presets"]: + print(f"Preset '{preset_name}' not found in user configurations") + sys.exit(1) + + del user_data["presets"][preset_name] + + with open(user_presets_file, "w") as f: + yaml.dump(user_data, f, indent=2, default_flow_style=False) + + print(f"✅ Deleted preset '{preset_name}'") + except Exception as e: + print(f"Error deleting preset: {e}", file=sys.stderr) + sys.exit(1) + + +def main() -> None: + """Main CLI entry point.""" + parser = argparse.ArgumentParser( + description="Manage DeepSearch YAML configuration presets and templates", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # List available configurations + python scripts/manage_config.py list-presets + python scripts/manage_config.py list-templates + + # Show configuration details + python scripts/manage_config.py show-preset perplexity-sonar-pro + python scripts/manage_config.py show-template gene_analysis_academic + + # Create and manage custom presets + python scripts/manage_config.py copy-preset perplexity-sonar-pro my-custom-preset + python scripts/manage_config.py edit-preset my-custom-preset + python scripts/manage_config.py validate-preset my-custom-preset + + # Validate all configurations + python scripts/manage_config.py validate + """, + ) + + parser.add_argument( + "--config-dir", + type=str, + help="Custom configuration directory (default: ./config or ~/.langpa/config)", + ) + + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # List commands + subparsers.add_parser("list-presets", help="List all available presets") + subparsers.add_parser("list-templates", help="List all available templates") + + # Show commands + show_preset_parser = subparsers.add_parser("show-preset", help="Show preset details") + show_preset_parser.add_argument("preset_name", help="Name of preset to show") + + show_template_parser = subparsers.add_parser("show-template", help="Show template details") + show_template_parser.add_argument("template_name", help="Name of template to show") + + # Validation commands + validate_parser = subparsers.add_parser("validate", help="Validate all configurations") + validate_preset_parser = subparsers.add_parser( + "validate-preset", help="Validate specific preset" + ) + validate_preset_parser.add_argument("preset_name", help="Name of preset to validate") + + # Preset management commands + copy_parser = subparsers.add_parser("copy-preset", help="Copy preset to create new one") + copy_parser.add_argument("source_name", help="Name of preset to copy") + copy_parser.add_argument("target_name", help="Name for new preset") + + edit_parser = subparsers.add_parser("edit-preset", help="Edit preset in default editor") + edit_parser.add_argument("preset_name", help="Name of preset to edit") + + delete_parser = subparsers.add_parser("delete-preset", help="Delete user preset") + delete_parser.add_argument("preset_name", help="Name of preset to delete") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + # Initialize configuration manager + config_manager = YAMLConfigManager(args.config_dir) + + # Dispatch to appropriate command + if args.command == "list-presets": + list_presets(config_manager) + elif args.command == "list-templates": + list_templates(config_manager) + elif args.command == "show-preset": + show_preset(config_manager, args.preset_name) + elif args.command == "show-template": + show_template(config_manager, args.template_name) + elif args.command == "validate": + validate_config(config_manager) + elif args.command == "validate-preset": + validate_config(config_manager, args.preset_name) + elif args.command == "copy-preset": + copy_preset(config_manager, args.source_name, args.target_name) + elif args.command == "edit-preset": + edit_preset(config_manager, args.preset_name) + elif args.command == "delete-preset": + delete_preset(config_manager, args.preset_name) + else: + parser.print_help() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_deepsearch.py b/scripts/run_deepsearch.py index b83500c..cdb958e 100644 --- a/scripts/run_deepsearch.py +++ b/scripts/run_deepsearch.py @@ -10,10 +10,13 @@ from dotenv import load_dotenv -from langpa.services import CitationResolver, DeepSearchService, OutputManager + +from langpa.services import DeepSearchService, OutputManager, CitationResolver +from langpa.services.yaml_config import get_config_manager from langpa.services.deepsearch_prompts import get_prompt_template, get_template_metadata from langpa.services.markdown_citation_extractor import extract_citations_from_markdown + # Ensure .env is loaded before instantiating clients load_dotenv() @@ -102,6 +105,11 @@ def parse_args() -> argparse.Namespace: "--preferred-provider", help="Preferred provider when initializing DeepSearchService (for backward compatibility).", ) + parser.add_argument( + "--config-dir", + help="Custom configuration directory path. Default: uses ./config or ~/.langpa/config " + "hierarchy with built-in defaults as fallback.", + ) parser.add_argument( "--template", help="Prompt template to use (e.g., 'gene_analysis_academic'). Default: preset's default " @@ -252,16 +260,16 @@ def load_context(args: argparse.Namespace) -> str: return context -def list_presets() -> None: +def list_presets(config_dir: str | None = None) -> None: """Display available configuration presets with complete details.""" print("Available configuration presets:") print("=" * 60) print() - preset_descriptions = DeepSearchService.get_available_presets() + preset_descriptions = DeepSearchService.get_available_presets(config_dir) for preset_name, description in preset_descriptions.items(): # Get the actual configuration for detailed display - config = DeepSearchService.get_preset_config(preset_name) + config = DeepSearchService.get_preset_config(preset_name, config_dir) print(f"{preset_name}") print(f" Description: {description}") @@ -290,16 +298,17 @@ def list_presets() -> None: print() -def list_templates() -> None: +def list_templates(config_dir: str | None = None) -> None: """Display available prompt templates with complete details.""" print("Available prompt templates:") print("=" * 60) print() - templates = DeepSearchService.get_available_templates() + templates = DeepSearchService.get_available_templates(config_dir) + config_manager = get_config_manager(config_dir) for template_name, description in templates.items(): # Get template metadata for detailed display - metadata = get_template_metadata(template_name) + metadata = config_manager.get_template_metadata(template_name) print(f"{template_name}") print(f" Description: {description}") @@ -309,12 +318,13 @@ def list_templates() -> None: print() -def show_template(template_name: str) -> None: +def show_template(template_name: str, config_dir: str | None = None) -> None: """Display complete details for a specific template.""" try: + config_manager = get_config_manager(config_dir) # Get template metadata and content - metadata = get_template_metadata(template_name) - template_config = get_prompt_template(template_name) + metadata = config_manager.get_template_metadata(template_name) + template_config = config_manager.get_template(template_name) template_text = template_config["template"] print(f"Template Details: {template_name}") @@ -350,12 +360,12 @@ def show_template(template_name: str) -> None: print("Use --list-templates to see available templates.") -def show_preset(preset_name: str) -> None: +def show_preset(preset_name: str, config_dir: str | None = None) -> None: """Display complete details for a specific preset.""" try: # Get preset configuration - config = DeepSearchService.get_preset_config(preset_name) - preset_descriptions = DeepSearchService.get_available_presets() + config = DeepSearchService.get_preset_config(preset_name, config_dir) + preset_descriptions = DeepSearchService.get_available_presets(config_dir) description = preset_descriptions.get(preset_name, "No description available") print(f"Preset Details: {preset_name}") @@ -436,18 +446,27 @@ def show_dry_run( # Show provider parameters print("Provider Parameters:") for key, value in service.config.provider_params.items(): - if key == "system_prompt" and value: - print(f" {key}: [JSON schema system prompt - truncated]") + if key == "system_prompt": + # Skip showing system_prompt here - we'll show it separately below + continue else: print(f" {key}: {value}") print() + # Show complete system prompt with schema injection + system_prompt = service._get_system_prompt_with_schema(args.template) + print("System Prompt:") + print("-" * 60) + print(system_prompt) + print("-" * 60) + print() + # Construct and show the prompt if args.custom_prompt: prompt = args.custom_prompt print("Custom Prompt:") else: - prompt = service.construct_prompt(genes, context, template_override=args.template) + prompt = service._construct_prompt(genes, context, template_override=args.template) print("Generated Prompt:") print("-" * 60) @@ -467,19 +486,19 @@ def main() -> None: # Handle informational operations first if args.list_presets: - list_presets() + list_presets(args.config_dir) return if args.list_templates: - list_templates() + list_templates(args.config_dir) return if args.show_template: - show_template(args.show_template) + show_template(args.show_template, args.config_dir) return if args.show_preset: - show_preset(args.show_preset) + show_preset(args.show_preset, args.config_dir) return offline_input = args.from_markdown or args.raw_input @@ -508,15 +527,18 @@ def main() -> None: provider_params["search_recency_filter"] = args.search_recency config_overrides["provider_params"] = provider_params - # Initialize service with preset support and overrides (only needed for live calls) - if not offline_input: - if args.preset: - service = DeepSearchService(preset=args.preset, **config_overrides) - else: - # Backward compatibility - service = DeepSearchService( - preferred_provider=args.preferred_provider, **config_overrides - ) + # Initialize service with preset support and overrides + if args.preset: + service = DeepSearchService( + preset=args.preset, config_dir=args.config_dir, **config_overrides + ) + else: + # Backward compatibility + service = DeepSearchService( + preferred_provider=args.preferred_provider, + config_dir=args.config_dir, + **config_overrides, + ) # Handle dry run if args.dry_run: diff --git a/src/langpa/services/deepsearch_configs.py b/src/langpa/services/deepsearch_configs.py deleted file mode 100644 index a153fab..0000000 --- a/src/langpa/services/deepsearch_configs.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Configuration presets for DeepSearch service. - -This module provides well-tested configuration presets that combine provider settings, -model selection, and optimized parameters for different use cases. -""" - -from __future__ import annotations - -import copy -from dataclasses import dataclass -from typing import Any - - -@dataclass -class DeepSearchConfig: - """Configuration for DeepSearch service with provider-specific settings. - - Args: - provider: Research provider name (e.g., 'perplexity', 'openai') - model: Model to use with the provider - provider_params: Provider-specific parameters (dict or parameter object) - timeout: Request timeout in seconds - prompt_template: Name of prompt template to use - description: Human-readable description of this configuration - """ - - provider: str - model: str - provider_params: dict[str, Any] | Any - timeout: int = 180 - prompt_template: str = "gene_analysis_academic" - description: str = "" - - -# Well-tested configuration presets -PRESET_CONFIGS: dict[str, DeepSearchConfig] = { - "perplexity-sonar-pro": DeepSearchConfig( - provider="perplexity", - model="sonar-reasoning-pro", - provider_params={ - "return_citations": True, - "search_domain_filter": [ - "pubmed.ncbi.nlm.nih.gov", - "ncbi.nlm.nih.gov/pmc/", - "www.ncbi.nlm.nih.gov", - "europepmc.org", - "biorxiv.org", - "nature.com", - "cell.com", - "science.org", - ], - "reasoning_effort": "high", - "search_recency_filter": "month", - "system_prompt": None, # Will be set dynamically with JSON schema - }, - timeout=180, - prompt_template="gene_analysis_academic", - description="Academic research optimized for Perplexity with high reasoning effort", - ), -} - - -def get_preset_config(preset_name: str) -> DeepSearchConfig: - """Get a configuration preset by name. - - Args: - preset_name: Name of the preset to retrieve - - Returns: - DeepSearchConfig object with the preset configuration - - Raises: - ValueError: If preset_name is not found - """ - if preset_name not in PRESET_CONFIGS: - available = ", ".join(PRESET_CONFIGS.keys()) - raise ValueError(f"Unknown preset '{preset_name}'. Available presets: {available}") - - # Return a deep copy to prevent accidental modification of the original - return copy.deepcopy(PRESET_CONFIGS[preset_name]) - - -def list_available_presets() -> dict[str, str]: - """List all available configuration presets. - - Returns: - Dictionary mapping preset names to their descriptions - """ - return {name: config.description for name, config in PRESET_CONFIGS.items()} - - -def merge_config_overrides( - base_config: DeepSearchConfig, overrides: dict[str, Any] -) -> DeepSearchConfig: - """Merge configuration overrides into a base configuration. - - Args: - base_config: Base configuration to start with - overrides: Dictionary of field names to override values - - Returns: - New DeepSearchConfig with overrides applied - """ - # Start with a copy of the base config - merged = copy.deepcopy(base_config) - - # Apply overrides - for field_name, value in overrides.items(): - if hasattr(merged, field_name): - setattr(merged, field_name, value) - else: - raise ValueError(f"Unknown configuration field: {field_name}") - - return merged diff --git a/src/langpa/services/deepsearch_prompts.py b/src/langpa/services/deepsearch_prompts.py deleted file mode 100644 index 35437d7..0000000 --- a/src/langpa/services/deepsearch_prompts.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Prompt templates for DeepSearch service. - -This module provides reusable prompt templates for different analysis approaches -and provider optimizations. -""" - -from __future__ import annotations - -from typing import Any - -# Registry of available prompt templates -PROMPT_TEMPLATES: dict[str, dict[str, Any]] = { - "gene_analysis_academic": { - "template": """Perform comprehensive literature analysis for the following gene list in the -specified biological context. - -**Gene List**: {genes} - -**Biological Context**: {context} - -**Analysis Strategy**: -1. Search current scientific literature for functional roles of each gene in the input list -2. Identify clusters of genes that act together in pathways, processes, or cellular states -3. Treat each cluster as a potential gene program within the list -4. Interpret findings in light of both normal physiological roles and disease-specific alterations -5. Prioritize well-established functions with strong literature support, but highlight emerging - evidence if contextually relevant - -**Guidelines**: -* Anchor all predictions in either the normal physiology and development of the cell type and - tissue specified in the context OR the alterations and dysregulations characteristic of the - specified disease -* Connect gene-level roles to program-level implications -* Consider gene interactions, regulatory networks, and pathway dynamics -* Highlight cases where multiple genes collectively strengthen evidence -* Ensure all claims are backed by experimental evidence with proper attribution - -Provide a structured analysis identifying biological programs and their predicted cellular -impacts within the given context.""", - "supports_json_schema": True, - "optimized_for": ["perplexity", "consensus"], - "description": "Academic research-focused analysis with comprehensive literature review " - "strategy", - }, - "gene_analysis_structured": { - "template": """Analyze the following genes in the specified biological context and provide -structured findings. - -**Genes to Analyze**: {genes} - -**Context**: {context} - -**Required Analysis**: -1. For each gene, identify its primary biological functions -2. Group genes by common pathways or biological processes -3. Identify potential gene programs (clusters of functionally related genes) -4. Assess the relevance to the specified biological context - -**Output Requirements**: -- Focus on well-established, peer-reviewed findings -- Prioritize recent research when available -- Include pathway and process associations -- Highlight gene-gene interactions where relevant - -Provide a systematic analysis organized by biological programs and functional clusters.""", - "supports_json_schema": True, - "optimized_for": ["openai", "edison"], - "description": "Structured analysis optimized for clear, systematic gene program " - "identification", - }, -} - - -def get_prompt_template(template_name: str) -> dict[str, Any]: - """Get a prompt template by name. - - Args: - template_name: Name of the template to retrieve - - Returns: - Dictionary containing template configuration - - Raises: - ValueError: If template_name is not found - """ - if template_name not in PROMPT_TEMPLATES: - available = ", ".join(PROMPT_TEMPLATES.keys()) - raise ValueError(f"Unknown template '{template_name}'. Available templates: {available}") - - return PROMPT_TEMPLATES[template_name].copy() - - -def list_available_templates() -> dict[str, str]: - """List all available prompt templates. - - Returns: - Dictionary mapping template names to their descriptions - """ - return {name: template["description"] for name, template in PROMPT_TEMPLATES.items()} - - -def format_prompt_template(template_name: str, genes: list[str], context: str) -> str: - """Format a prompt template with the provided genes and context. - - Args: - template_name: Name of the template to use - genes: List of gene symbols to analyze - context: Biological context for the analysis - - Returns: - Formatted prompt string ready for API call - - Raises: - ValueError: If template_name is not found - """ - template_config = get_prompt_template(template_name) - template_text = template_config["template"] - - # Format genes as comma-separated string - genes_str = ", ".join(genes) - - # Substitute placeholders - formatted_prompt = template_text.format(genes=genes_str, context=context) - - return formatted_prompt - - -def get_template_metadata(template_name: str) -> dict[str, Any]: - """Get metadata about a specific template. - - Args: - template_name: Name of the template - - Returns: - Dictionary with template metadata (supports_json_schema, optimized_for, description) - - Raises: - ValueError: If template_name is not found - """ - template_config = get_prompt_template(template_name) - - return { - "supports_json_schema": template_config["supports_json_schema"], - "optimized_for": template_config["optimized_for"], - "description": template_config["description"], - } diff --git a/src/langpa/services/deepsearch_service.py b/src/langpa/services/deepsearch_service.py index 55db121..2637ac5 100644 --- a/src/langpa/services/deepsearch_service.py +++ b/src/langpa/services/deepsearch_service.py @@ -9,15 +9,9 @@ from dotenv import load_dotenv from langpa.schemas import load_schema -from langpa.services.deepsearch_configs import ( +from langpa.services.yaml_config import ( DeepSearchConfig, - get_preset_config, - list_available_presets, - merge_config_overrides, -) -from langpa.services.deepsearch_prompts import ( - format_prompt_template, - list_available_templates, + get_config_manager, ) # Load environment variables @@ -31,67 +25,78 @@ def __init__( self, preferred_provider: str | None = None, preset: str | None = None, + config_dir: str | None = None, **config_overrides: Any, ) -> None: """Initialize the DeepSearch service. Args: preferred_provider: Preferred research provider (e.g., 'perplexity', 'openai') - If None, uses first available provider. (Backward compatibility) + If None, uses provider from preset. (Backward compatibility) preset: Configuration preset name (e.g., 'perplexity-sonar-pro') If None, uses 'perplexity-sonar-pro' as default + config_dir: Optional custom configuration directory **config_overrides: Override specific configuration fields """ self.client = DeepResearchClient() + self.config_manager = get_config_manager(config_dir) - # Load configuration (preset takes precedence over preferred_provider for new pattern) - if preset is not None: - self.config = get_preset_config(preset) - if config_overrides: - self.config = merge_config_overrides(self.config, config_overrides) - else: - # Backward compatibility: use preferred_provider with default preset - self.config = get_preset_config("perplexity-sonar-pro") - if preferred_provider: - # Override provider in default config for backward compatibility - self.config = merge_config_overrides(self.config, {"provider": preferred_provider}) + # Load base configuration from preset + preset_name = preset or "perplexity-sonar-pro" # Default preset + self.config = self.config_manager.get_preset_config(preset_name) + + # Apply overrides - handle preferred_provider as special case for backward compatibility + final_overrides = dict(config_overrides) + if preferred_provider: + final_overrides["provider"] = preferred_provider + + # Apply ALL overrides (fixes backward compatibility bug) + if final_overrides: + self.config = self.config_manager.merge_config_overrides(self.config, final_overrides) # Keep old attributes for backward compatibility self.preferred_provider = preferred_provider self._available_providers: list[str] | None = None @classmethod - def get_available_presets(cls) -> dict[str, str]: + def get_available_presets(cls, config_dir: str | None = None) -> dict[str, str]: """Get list of available configuration presets. + Args: + config_dir: Optional custom configuration directory + Returns: Dictionary mapping preset names to their descriptions """ - return list_available_presets() + return get_config_manager(config_dir).list_available_presets() @classmethod - def get_preset_config(cls, preset_name: str) -> DeepSearchConfig: + def get_preset_config(cls, preset_name: str, config_dir: str | None = None) -> DeepSearchConfig: """Get a specific preset configuration. Args: preset_name: Name of the preset to retrieve + config_dir: Optional custom configuration directory Returns: DeepSearchConfig object for the preset Raises: - ValueError: If preset_name is not found + ConfigurationError: If preset_name is not found """ - return get_preset_config(preset_name) + return get_config_manager(config_dir).get_preset_config(preset_name) @classmethod - def get_available_templates(cls) -> dict[str, str]: + def get_available_templates(cls, config_dir: str | None = None) -> dict[str, str]: """Get list of available prompt templates. + Args: + config_dir: Optional custom configuration directory + Returns: Dictionary mapping template names to their descriptions """ - return list_available_templates() + return get_config_manager(config_dir).list_available_templates() @property def available_providers(self) -> list[str]: @@ -116,6 +121,45 @@ def _get_provider(self) -> str: return providers[0] + def _get_system_prompt_with_schema(self, prompt_template: str | None = None) -> str: + """Generate system prompt with JSON schema injection. + + Args: + prompt_template: Optional template name override + + Returns: + System prompt with schema injected + """ + # Load schema for response_format + schema = load_schema("deepsearch_results_schema.json") + + # Get provider params for current config + provider_params = dict(self.config.provider_params) + + # Determine schema instruction priority: preset system_prompt > template schema_instruction > default + if provider_params.get("system_prompt"): + # Preset has custom system_prompt - use it and substitute {schema} if present + instruction = provider_params["system_prompt"] + else: + # Get template metadata to access schema instruction + template_name = prompt_template or self.config.prompt_template + template_metadata = self.config_manager.get_template_metadata(template_name) + + # Use template's schema instruction or default fallback + if template_metadata.get("schema_instruction"): + instruction = template_metadata["schema_instruction"] + else: + # Default fallback for templates without schema_instruction + instruction = """You are an expert biologist. Analyze the provided genes in the given biological context. + +CRITICAL: Respond ONLY with valid JSON that exactly follows this schema structure: +{schema} + +Do not include any prose, markdown, explanatory text, or tags. Only the JSON structure.""" + + # Return system prompt with schema substitution + return instruction.format(schema=json.dumps(schema, indent=2)) + def _construct_prompt( self, genes: list[str], context: str, template_override: str | None = None ) -> str: @@ -134,8 +178,8 @@ def _construct_prompt( # Determine which template to use template_name = template_override or self.config.prompt_template - # Format the prompt using the template system - return format_prompt_template(template_name, genes, context) + # Format the prompt using the YAML template system + return self.config_manager.format_prompt_template(template_name, genes, context) def research_gene_list( self, @@ -185,24 +229,12 @@ def research_gene_list( # Note: timeout parameter is used for backward compatibility but not currently applied try: - # Load schema for response_format - schema = load_schema("deepsearch_results_schema.json") - # Prepare provider params from config provider_params = dict(self.config.provider_params) # Copy to avoid modifying original - # Set system_prompt with JSON schema (overwrites any preset system_prompt) - provider_params[ - "system_prompt" - ] = f"""You are an expert biologist. Analyze the provided genes in the given biological -context. + # Set system_prompt with schema injection + provider_params["system_prompt"] = self._get_system_prompt_with_schema(prompt_template) -CRITICAL: Respond ONLY with valid JSON that exactly follows this schema structure: -{json.dumps(schema, indent=2)} - -Ensure every citation object includes \"source_id\" matching DeepSearch/Perplexity numbering. - -Do not include any prose, markdown, explanatory text, or tags. Only the JSON structure.""" # Use configuration-driven research call result = self.client.research( diff --git a/src/langpa/services/yaml_config.py b/src/langpa/services/yaml_config.py new file mode 100644 index 0000000..61d2d2c --- /dev/null +++ b/src/langpa/services/yaml_config.py @@ -0,0 +1,402 @@ +"""YAML-based configuration system for DeepSearch service. + +This module provides configuration loading and validation for presets and templates +from YAML files with a clear loading hierarchy. +""" + +from __future__ import annotations + +import copy +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import jsonschema +import yaml + + +@dataclass +class DeepSearchConfig: + """Configuration for DeepSearch service with provider-specific settings. + + Args: + provider: Research provider name (e.g., 'perplexity', 'openai') + model: Model to use with the provider + provider_params: Provider-specific parameters (dict or parameter object) + timeout: Request timeout in seconds + prompt_template: Name of prompt template to use + description: Human-readable description of this configuration + """ + + provider: str + model: str + provider_params: dict[str, Any] | Any + timeout: int = 180 + prompt_template: str = "gene_analysis_academic" + description: str = "" + + +class ConfigurationError(Exception): + """Raised when configuration loading or validation fails.""" + + pass + + +class YAMLConfigManager: + """Manages YAML-based configuration loading and validation.""" + + def __init__(self, config_dir: str | None = None): + """Initialize configuration manager. + + Args: + config_dir: Optional custom config directory. If None, uses default hierarchy: + 1. ./config/ (project config) + 2. ~/.langpa/config/ (user config) + """ + self.config_dir = Path(config_dir) if config_dir else None + self._presets_cache: dict[str, dict[str, Any]] | None = None + self._templates_cache: dict[str, dict[str, Any]] | None = None + + def _get_config_paths(self) -> list[Path]: + """Get list of configuration directory paths in loading priority order. + + Returns: + List of Path objects for config directories to check + """ + paths = [] + + if self.config_dir: + # Custom config directory specified + paths.append(self.config_dir) + else: + # Default hierarchy + project_config = Path.cwd() / "config" + user_config = Path.home() / ".langpa" / "config" + + # Add paths that exist + for path in [project_config, user_config]: + if path.exists(): + paths.append(path) + + # Always include defaults from package + package_defaults = Path(__file__).parent.parent.parent.parent / "config" / "defaults" + if package_defaults.exists(): + paths.append(package_defaults) + + return paths + + def _load_yaml_file(self, filename: str) -> dict[str, Any]: + """Load and merge YAML configuration from hierarchy of config directories. + + Args: + filename: Name of YAML file to load (e.g., 'presets.yaml') + + Returns: + Merged configuration dictionary + + Raises: + ConfigurationError: If no valid config files found or YAML parsing fails + """ + config_paths = self._get_config_paths() + merged_config: dict[str, Any] = {} + + configs_found = 0 + for config_dir in reversed(config_paths): # Start with defaults, overlay user configs + config_file = config_dir / filename + if config_file.exists(): + try: + with open(config_file) as f: + file_config = yaml.safe_load(f) or {} + + # Merge configurations (later configs override earlier ones) + if isinstance(file_config, dict): + merged_config.update(file_config) + configs_found += 1 + except yaml.YAMLError as e: + raise ConfigurationError(f"Invalid YAML in {config_file}: {e}") from e + except OSError as e: + raise ConfigurationError(f"Cannot read {config_file}: {e}") from e + + if configs_found == 0: + raise ConfigurationError(f"No {filename} found in any config directory") + + return merged_config + + def _validate_config(self, config: dict[str, Any], schema_name: str) -> None: + """Validate configuration against JSON schema. + + Args: + config: Configuration dictionary to validate + schema_name: Name of schema file (e.g., 'preset-schema.yaml') + + Raises: + ConfigurationError: If validation fails + """ + # Find schema file + schema_paths = [] + for config_dir in self._get_config_paths(): + schema_dir = config_dir / "schemas" + if schema_dir.exists(): + schema_paths.append(schema_dir) + + schema_file = None + for schema_dir in schema_paths: + potential_schema = schema_dir / schema_name + if potential_schema.exists(): + schema_file = potential_schema + break + + if not schema_file: + # Schema validation is optional - warn but don't fail + return + + try: + with open(schema_file) as f: + schema = yaml.safe_load(f) + + jsonschema.validate(instance=config, schema=schema) + except jsonschema.ValidationError as e: + raise ConfigurationError(f"Configuration validation failed: {e.message}") from e + except (OSError, yaml.YAMLError) as e: + raise ConfigurationError(f"Cannot load schema {schema_file}: {e}") from e + + def _load_presets(self) -> dict[str, dict[str, Any]]: + """Load preset configurations from YAML files. + + Returns: + Dictionary of preset configurations + + Raises: + ConfigurationError: If loading or validation fails + """ + if self._presets_cache is not None: + return self._presets_cache + + config = self._load_yaml_file("presets.yaml") + self._validate_config(config, "preset-schema.yaml") + + if "presets" not in config: + raise ConfigurationError("Invalid presets.yaml: missing 'presets' key") + + self._presets_cache = config["presets"] + return self._presets_cache + + def _load_templates(self) -> dict[str, dict[str, Any]]: + """Load template configurations from YAML files. + + Returns: + Dictionary of template configurations + + Raises: + ConfigurationError: If loading or validation fails + """ + if self._templates_cache is not None: + return self._templates_cache + + config = self._load_yaml_file("templates.yaml") + self._validate_config(config, "template-schema.yaml") + + if "templates" not in config: + raise ConfigurationError("Invalid templates.yaml: missing 'templates' key") + + self._templates_cache = config["templates"] + return self._templates_cache + + def get_preset_config(self, preset_name: str) -> DeepSearchConfig: + """Get a configuration preset by name. + + Args: + preset_name: Name of the preset to retrieve + + Returns: + DeepSearchConfig object with the preset configuration + + Raises: + ConfigurationError: If preset_name is not found + """ + presets = self._load_presets() + + if preset_name not in presets: + available = ", ".join(presets.keys()) + raise ConfigurationError( + f"Unknown preset '{preset_name}'. Available presets: {available}" + ) + + preset_data = presets[preset_name] + + # Convert YAML data to DeepSearchConfig object + return DeepSearchConfig( + provider=preset_data["provider"], + model=preset_data["model"], + provider_params=copy.deepcopy(preset_data["provider_params"]), + timeout=preset_data.get("timeout", 180), + prompt_template=preset_data.get("prompt_template", "gene_analysis_academic"), + description=preset_data.get("description", ""), + ) + + def list_available_presets(self) -> dict[str, str]: + """List all available configuration presets. + + Returns: + Dictionary mapping preset names to their descriptions + """ + presets = self._load_presets() + return {name: config.get("description", "") for name, config in presets.items()} + + def get_template(self, template_name: str) -> dict[str, Any]: + """Get a prompt template by name. + + Args: + template_name: Name of the template to retrieve + + Returns: + Dictionary containing template configuration + + Raises: + ConfigurationError: If template_name is not found + """ + templates = self._load_templates() + + if template_name not in templates: + available = ", ".join(templates.keys()) + raise ConfigurationError( + f"Unknown template '{template_name}'. Available templates: {available}" + ) + + return copy.deepcopy(templates[template_name]) + + def list_available_templates(self) -> dict[str, str]: + """List all available prompt templates. + + Returns: + Dictionary mapping template names to their descriptions + """ + templates = self._load_templates() + return {name: template.get("description", "") for name, template in templates.items()} + + def format_prompt_template(self, template_name: str, genes: list[str], context: str) -> str: + """Format a prompt template with the provided genes and context. + + Args: + template_name: Name of the template to use + genes: List of gene symbols to analyze + context: Biological context for the analysis + + Returns: + Formatted prompt string ready for API call + + Raises: + ConfigurationError: If template_name is not found + """ + template_config = self.get_template(template_name) + template_text = template_config["template"] + + # Format genes as comma-separated string + genes_str = ", ".join(genes) + + # Substitute placeholders + formatted_prompt = template_text.format(genes=genes_str, context=context) + + return formatted_prompt + + def get_template_metadata(self, template_name: str) -> dict[str, Any]: + """Get metadata about a specific template. + + Args: + template_name: Name of the template + + Returns: + Dictionary with template metadata (supports_json_schema, optimized_for, description, schema_instruction) + + Raises: + ConfigurationError: If template_name is not found + """ + template_config = self.get_template(template_name) + + return { + "supports_json_schema": template_config["supports_json_schema"], + "optimized_for": template_config["optimized_for"], + "description": template_config["description"], + "schema_instruction": template_config.get("schema_instruction"), + } + + def merge_config_overrides( + self, base_config: DeepSearchConfig, overrides: dict[str, Any] + ) -> DeepSearchConfig: + """Merge configuration overrides into a base configuration. + + Args: + base_config: Base configuration to start with + overrides: Dictionary of field names to override values + + Returns: + New DeepSearchConfig with overrides applied + + Raises: + ConfigurationError: If unknown field names provided + """ + # Start with a copy of the base config + merged = copy.deepcopy(base_config) + + # Apply overrides + for field_name, value in overrides.items(): + if hasattr(merged, field_name): + setattr(merged, field_name, value) + else: + raise ConfigurationError(f"Unknown configuration field: {field_name}") + + return merged + + +# Global instance for backward compatibility +_global_config_manager: YAMLConfigManager | None = None + + +def get_config_manager(config_dir: str | None = None) -> YAMLConfigManager: + """Get global configuration manager instance. + + Args: + config_dir: Optional custom config directory + + Returns: + YAMLConfigManager instance + """ + global _global_config_manager + + if _global_config_manager is None or config_dir is not None: + _global_config_manager = YAMLConfigManager(config_dir) + + return _global_config_manager + + +# Convenience functions for backward compatibility +def get_preset_config(preset_name: str) -> DeepSearchConfig: + """Get a configuration preset by name.""" + return get_config_manager().get_preset_config(preset_name) + + +def list_available_presets() -> dict[str, str]: + """List all available configuration presets.""" + return get_config_manager().list_available_presets() + + +def format_prompt_template(template_name: str, genes: list[str], context: str) -> str: + """Format a prompt template with genes and context.""" + return get_config_manager().format_prompt_template(template_name, genes, context) + + +def list_available_templates() -> dict[str, str]: + """List all available prompt templates.""" + return get_config_manager().list_available_templates() + + +def get_template_metadata(template_name: str) -> dict[str, Any]: + """Get metadata about a specific template.""" + return get_config_manager().get_template_metadata(template_name) + + +def merge_config_overrides( + base_config: DeepSearchConfig, overrides: dict[str, Any] +) -> DeepSearchConfig: + """Merge configuration overrides into base configuration.""" + return get_config_manager().merge_config_overrides(base_config, overrides) diff --git a/tests/unit/test_deepsearch_configs.py b/tests/unit/test_deepsearch_configs.py deleted file mode 100644 index daba186..0000000 --- a/tests/unit/test_deepsearch_configs.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Unit tests for DeepSearch configuration presets.""" - -from __future__ import annotations - -import pytest - -from langpa.services.deepsearch_configs import ( - PRESET_CONFIGS, - DeepSearchConfig, - get_preset_config, - list_available_presets, -) - - -@pytest.mark.unit -def test_deepsearch_config_dataclass() -> None: - """Test DeepSearchConfig dataclass structure and validation.""" - config = DeepSearchConfig( - provider="test_provider", - model="test_model", - provider_params={"test_param": "test_value"}, - timeout=120, - prompt_template="test_template", - description="Test configuration", - ) - - assert config.provider == "test_provider" - assert config.model == "test_model" - assert config.provider_params == {"test_param": "test_value"} - assert config.timeout == 120 - assert config.prompt_template == "test_template" - assert config.description == "Test configuration" - - -@pytest.mark.unit -def test_preset_configs_structure() -> None: - """Test that PRESET_CONFIGS contains expected presets.""" - # Should have at least the perplexity-sonar-pro preset - assert "perplexity-sonar-pro" in PRESET_CONFIGS - - # Each preset should be a DeepSearchConfig - for preset_name, config in PRESET_CONFIGS.items(): - assert isinstance(config, DeepSearchConfig) - assert isinstance(preset_name, str) - assert len(preset_name) > 0 - - # Each config should have all required fields - assert hasattr(config, "provider") - assert hasattr(config, "model") - assert hasattr(config, "provider_params") - assert hasattr(config, "timeout") - assert hasattr(config, "prompt_template") - assert hasattr(config, "description") - - -@pytest.mark.unit -def test_perplexity_sonar_pro_preset() -> None: - """Test the perplexity-sonar-pro preset configuration.""" - config = PRESET_CONFIGS["perplexity-sonar-pro"] - - assert config.provider == "perplexity" - assert config.model == "sonar-reasoning-pro" - assert config.timeout == 180 - assert config.prompt_template == "gene_analysis_academic" - assert "perplexity" in config.description.lower() - - # Provider params should be dictionary with expected Perplexity settings - params = config.provider_params - assert isinstance(params, dict) - assert "return_citations" in params - assert "reasoning_effort" in params - assert "search_recency_filter" in params - assert "search_domain_filter" in params - - # Should have academic domain filters - domain_filter = params["search_domain_filter"] - assert isinstance(domain_filter, list) - assert "pubmed.ncbi.nlm.nih.gov" in domain_filter - assert "ncbi.nlm.nih.gov/pmc/" in domain_filter - - -@pytest.mark.unit -def test_get_preset_config_valid() -> None: - """Test getting a valid preset configuration.""" - config = get_preset_config("perplexity-sonar-pro") - - assert isinstance(config, DeepSearchConfig) - assert config.provider == "perplexity" - assert config.model == "sonar-reasoning-pro" - - -@pytest.mark.unit -def test_get_preset_config_invalid() -> None: - """Test getting an invalid preset raises appropriate error.""" - with pytest.raises(ValueError) as exc_info: - get_preset_config("nonexistent-preset") - - assert "Unknown preset" in str(exc_info.value) - assert "nonexistent-preset" in str(exc_info.value) - - -@pytest.mark.unit -def test_list_available_presets() -> None: - """Test listing available presets returns correct structure.""" - presets = list_available_presets() - - assert isinstance(presets, dict) - assert len(presets) > 0 - assert "perplexity-sonar-pro" in presets - - # Each entry should map name to description - for name, description in presets.items(): - assert isinstance(name, str) - assert isinstance(description, str) - assert len(name) > 0 - assert len(description) > 0 - - -@pytest.mark.unit -def test_preset_config_immutability() -> None: - """Test that preset configs can be safely copied without side effects.""" - original_config = get_preset_config("perplexity-sonar-pro") - - # Modify the returned config - original_config.timeout = 999 - original_config.provider_params["new_param"] = "new_value" - - # Getting the preset again should return unmodified version - fresh_config = get_preset_config("perplexity-sonar-pro") - assert fresh_config.timeout == 180 # Original value - assert "new_param" not in fresh_config.provider_params - - -@pytest.mark.unit -def test_config_merge_functionality() -> None: - """Test configuration merging for overrides.""" - from langpa.services.deepsearch_configs import merge_config_overrides - - base_config = DeepSearchConfig( - provider="test", - model="base_model", - provider_params={"base_param": "base_value"}, - timeout=100, - prompt_template="base_template", - description="Base config", - ) - - overrides = { - "model": "override_model", - "timeout": 200, - "provider_params": {"override_param": "override_value"}, - } - - merged = merge_config_overrides(base_config, overrides) - - assert merged.provider == "test" # Unchanged - assert merged.model == "override_model" # Overridden - assert merged.timeout == 200 # Overridden - assert merged.provider_params == {"override_param": "override_value"} # Replaced - assert merged.prompt_template == "base_template" # Unchanged - assert merged.description == "Base config" # Unchanged diff --git a/tests/unit/test_deepsearch_prompts.py b/tests/unit/test_deepsearch_prompts.py deleted file mode 100644 index ca8f0c4..0000000 --- a/tests/unit/test_deepsearch_prompts.py +++ /dev/null @@ -1,202 +0,0 @@ -"""Unit tests for DeepSearch prompt template system.""" - -from __future__ import annotations - -import pytest - -from langpa.services.deepsearch_prompts import ( - PROMPT_TEMPLATES, - format_prompt_template, - get_prompt_template, - list_available_templates, -) - - -@pytest.mark.unit -def test_prompt_templates_structure() -> None: - """Test that PROMPT_TEMPLATES contains expected templates.""" - # Should have at least the gene_analysis_academic template - assert "gene_analysis_academic" in PROMPT_TEMPLATES - - # Each template should be a dictionary with required fields - for template_name, template in PROMPT_TEMPLATES.items(): - assert isinstance(template, dict) - assert isinstance(template_name, str) - assert len(template_name) > 0 - - # Each template should have required fields - assert "template" in template - assert "supports_json_schema" in template - assert "optimized_for" in template - assert "description" in template - - # Validate field types - assert isinstance(template["template"], str) - assert isinstance(template["supports_json_schema"], bool) - assert isinstance(template["optimized_for"], list) - assert isinstance(template["description"], str) - - -@pytest.mark.unit -def test_gene_analysis_academic_template() -> None: - """Test the gene_analysis_academic template configuration.""" - template = PROMPT_TEMPLATES["gene_analysis_academic"] - - assert template["supports_json_schema"] is True - assert "perplexity" in template["optimized_for"] - assert "academic" in template["description"].lower() - - # Template should contain placeholders for genes and context - template_text = template["template"] - assert "{genes}" in template_text - assert "{context}" in template_text - - # Should contain key analysis elements from original prompt - template_lower = template_text.lower() - assert "literature analysis" in template_lower or "analysis" in template_lower - assert "gene" in template_lower - assert "biological" in template_lower - - -@pytest.mark.unit -def test_get_prompt_template_valid() -> None: - """Test getting a valid prompt template.""" - template = get_prompt_template("gene_analysis_academic") - - assert isinstance(template, dict) - assert "template" in template - assert "supports_json_schema" in template - - -@pytest.mark.unit -def test_get_prompt_template_invalid() -> None: - """Test getting an invalid prompt template raises appropriate error.""" - with pytest.raises(ValueError) as exc_info: - get_prompt_template("nonexistent-template") - - assert "Unknown template" in str(exc_info.value) - assert "nonexistent-template" in str(exc_info.value) - - -@pytest.mark.unit -def test_list_available_templates() -> None: - """Test listing available prompt templates.""" - templates = list_available_templates() - - assert isinstance(templates, dict) - assert len(templates) > 0 - assert "gene_analysis_academic" in templates - - # Each entry should map name to description - for name, description in templates.items(): - assert isinstance(name, str) - assert isinstance(description, str) - assert len(name) > 0 - assert len(description) > 0 - - -@pytest.mark.unit -def test_format_prompt_template() -> None: - """Test formatting a prompt template with genes and context.""" - genes = ["TP53", "BRCA1"] - context = "cancer tumor suppressor genes" - - formatted = format_prompt_template("gene_analysis_academic", genes, context) - - assert isinstance(formatted, str) - assert len(formatted) > 100 # Should be substantial content - - # Should contain the provided genes and context - assert "TP53" in formatted - assert "BRCA1" in formatted - assert "cancer tumor suppressor genes" in formatted - - # Should not contain unsubstituted placeholders - assert "{genes}" not in formatted - assert "{context}" not in formatted - - -@pytest.mark.unit -def test_format_prompt_template_genes_formatting() -> None: - """Test that genes are properly formatted in templates.""" - genes = ["MYC", "EGFR", "KRAS"] - context = "oncogenes" - - formatted = format_prompt_template("gene_analysis_academic", genes, context) - - # Genes should be joined properly (likely comma-separated) - genes_str = ", ".join(genes) - assert genes_str in formatted - - # Each gene should appear in the formatted text - for gene in genes: - assert gene in formatted - - -@pytest.mark.unit -def test_format_prompt_template_invalid_template() -> None: - """Test formatting with invalid template name raises error.""" - with pytest.raises(ValueError): - format_prompt_template("invalid-template", ["TP53"], "cancer") - - -@pytest.mark.unit -def test_template_placeholder_substitution() -> None: - """Test that all placeholders in templates are properly substituted.""" - # Test with all templates to ensure they work - genes = ["TEST1", "TEST2"] - context = "test biological context" - - for template_name in PROMPT_TEMPLATES: - formatted = format_prompt_template(template_name, genes, context) - - # Should not contain any unsubstituted placeholders - assert "{genes}" not in formatted - assert "{context}" not in formatted - - # Should contain the test values - assert "TEST1" in formatted - assert "test biological context" in formatted - - -@pytest.mark.unit -def test_prompt_template_content_preservation() -> None: - """Test that the current prompt content is preserved in gene_analysis_academic template.""" - template = get_prompt_template("gene_analysis_academic") - template_text = template["template"] - - # Key elements from the original prompt should be preserved - template_lower = template_text.lower() - - # Analysis strategy elements - expected_elements = ["literature", "analysis", "gene", "biological", "program", "pathway"] - - found_elements = sum(1 for element in expected_elements if element in template_lower) - assert found_elements >= 4, "Template should preserve key analysis concepts" - - -@pytest.mark.unit -def test_template_optimization_metadata() -> None: - """Test that template optimization metadata is meaningful.""" - for _template_name, template in PROMPT_TEMPLATES.items(): - optimized_for = template["optimized_for"] - - # Should be optimized for at least one provider - assert len(optimized_for) > 0 - - # Provider names should be reasonable - for provider in optimized_for: - assert isinstance(provider, str) - assert len(provider) > 0 - assert provider.lower() in ["perplexity", "openai", "edison", "consensus", "anthropic"] - - -@pytest.mark.unit -def test_json_schema_support_consistency() -> None: - """Test that JSON schema support is consistently handled.""" - # All current templates should support JSON schema for structured output - for template_name, template in PROMPT_TEMPLATES.items(): - # Current implementation should support JSON schema - assert template["supports_json_schema"] is True, ( - f"Template {template_name} should support JSON schema" - ) diff --git a/tests/unit/test_yaml_config.py b/tests/unit/test_yaml_config.py new file mode 100644 index 0000000..f6d7364 --- /dev/null +++ b/tests/unit/test_yaml_config.py @@ -0,0 +1,526 @@ +"""Unit tests for YAML configuration system.""" + +from __future__ import annotations + +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + +from langpa.services.yaml_config import ( + ConfigurationError, + DeepSearchConfig, + YAMLConfigManager, + format_prompt_template, + get_config_manager, + get_preset_config, + list_available_presets, + list_available_templates, +) + + +@pytest.mark.unit +def test_yaml_config_manager_init() -> None: + """Test YAMLConfigManager initialization.""" + # Test with no config directory + manager = YAMLConfigManager() + assert manager.config_dir is None + + # Test with custom config directory + with tempfile.TemporaryDirectory() as temp_dir: + manager = YAMLConfigManager(temp_dir) + assert manager.config_dir == Path(temp_dir) + + +@pytest.mark.unit +def test_load_presets_from_yaml() -> None: + """Test loading presets from YAML configuration.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + # Create test presets YAML + preset_config = { + "presets": { + "test-preset": { + "description": "Test preset", + "provider": "perplexity", + "model": "sonar-reasoning-pro", + "timeout": 180, + "prompt_template": "gene_analysis_academic", + "provider_params": { + "return_citations": True, + "reasoning_effort": "high", + }, + } + } + } + + with open(presets_file, "w") as f: + yaml.dump(preset_config, f) + + manager = YAMLConfigManager(str(config_dir)) + config = manager.get_preset_config("test-preset") + + assert config.provider == "perplexity" + assert config.model == "sonar-reasoning-pro" + assert config.timeout == 180 + assert config.prompt_template == "gene_analysis_academic" + assert config.provider_params["reasoning_effort"] == "high" + + +@pytest.mark.unit +def test_load_templates_from_yaml() -> None: + """Test loading templates from YAML configuration.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + # Create test templates YAML + template_config = { + "templates": { + "test-template": { + "description": "Test template", + "optimized_for": ["perplexity"], + "supports_json_schema": True, + "template": "Analyze genes: {genes} in context: {context}", + } + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + template = manager.get_template("test-template") + + assert template["description"] == "Test template" + assert template["optimized_for"] == ["perplexity"] + assert template["supports_json_schema"] is True + assert template["template"] == "Analyze genes: {genes} in context: {context}" + + +@pytest.mark.unit +def test_config_hierarchy_loading() -> None: + """Test configuration loading hierarchy (user > project > defaults).""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create base config + base_dir = Path(temp_dir) / "base" + base_dir.mkdir() + base_presets = base_dir / "presets.yaml" + + base_config = { + "presets": { + "test-preset": { + "description": "Base preset", + "provider": "perplexity", + "model": "sonar-small", + "timeout": 120, + "prompt_template": "gene_analysis_academic", + "provider_params": {"reasoning_effort": "low"}, + } + } + } + + with open(base_presets, "w") as f: + yaml.dump(base_config, f) + + # Mock the _get_config_paths to return our test directory + manager = YAMLConfigManager() + with patch.object(manager, "_get_config_paths", return_value=[base_dir]): + config = manager.get_preset_config("test-preset") + assert config.model == "sonar-small" + assert config.provider_params["reasoning_effort"] == "low" + + +@pytest.mark.unit +def test_preset_not_found_error() -> None: + """Test appropriate error when preset is not found.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + # Create empty presets file + with open(presets_file, "w") as f: + yaml.dump({"presets": {}}, f) + + manager = YAMLConfigManager(str(config_dir)) + + with pytest.raises(ConfigurationError) as exc_info: + manager.get_preset_config("nonexistent-preset") + + assert "Unknown preset 'nonexistent-preset'" in str(exc_info.value) + + +@pytest.mark.unit +def test_template_not_found_error() -> None: + """Test appropriate error when template is not found.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + # Create empty templates file + with open(templates_file, "w") as f: + yaml.dump({"templates": {}}, f) + + manager = YAMLConfigManager(str(config_dir)) + + with pytest.raises(ConfigurationError) as exc_info: + manager.get_template("nonexistent-template") + + assert "Unknown template 'nonexistent-template'" in str(exc_info.value) + + +@pytest.mark.unit +def test_format_prompt_template() -> None: + """Test prompt template formatting with genes and context.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + template_config = { + "templates": { + "test-template": { + "description": "Test template", + "optimized_for": ["perplexity"], + "supports_json_schema": True, + "template": "Analyze {genes} in {context} context.", + } + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + formatted = manager.format_prompt_template("test-template", ["TP53", "BRCA1"], "cancer") + + assert formatted == "Analyze TP53, BRCA1 in cancer context." + + +@pytest.mark.unit +def test_merge_config_overrides() -> None: + """Test merging configuration overrides.""" + base_config = DeepSearchConfig( + provider="perplexity", + model="sonar-small", + provider_params={"reasoning_effort": "low"}, + timeout=120, + ) + + manager = YAMLConfigManager() + overrides = {"model": "sonar-reasoning-pro", "timeout": 180} + + merged = manager.merge_config_overrides(base_config, overrides) + + assert merged.provider == "perplexity" # Unchanged + assert merged.model == "sonar-reasoning-pro" # Overridden + assert merged.timeout == 180 # Overridden + assert merged.provider_params == {"reasoning_effort": "low"} # Unchanged + + +@pytest.mark.unit +def test_merge_config_invalid_field() -> None: + """Test error when merging unknown configuration field.""" + base_config = DeepSearchConfig( + provider="perplexity", + model="sonar-small", + provider_params={}, + ) + + manager = YAMLConfigManager() + overrides = {"invalid_field": "value"} + + with pytest.raises(ConfigurationError) as exc_info: + manager.merge_config_overrides(base_config, overrides) + + assert "Unknown configuration field: invalid_field" in str(exc_info.value) + + +@pytest.mark.unit +def test_list_available_presets() -> None: + """Test listing available presets.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + preset_config = { + "presets": { + "preset1": { + "description": "First preset", + "provider": "perplexity", + "model": "test", + }, + "preset2": {"description": "Second preset", "provider": "openai", "model": "test"}, + } + } + + with open(presets_file, "w") as f: + yaml.dump(preset_config, f) + + manager = YAMLConfigManager(str(config_dir)) + presets = manager.list_available_presets() + + assert len(presets) == 2 + assert presets["preset1"] == "First preset" + assert presets["preset2"] == "Second preset" + + +@pytest.mark.unit +def test_list_available_templates() -> None: + """Test listing available templates.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + template_config = { + "templates": { + "template1": {"description": "First template", "optimized_for": ["perplexity"]}, + "template2": {"description": "Second template", "optimized_for": ["openai"]}, + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + templates = manager.list_available_templates() + + assert len(templates) == 2 + assert templates["template1"] == "First template" + assert templates["template2"] == "Second template" + + +@pytest.mark.unit +def test_invalid_yaml_error() -> None: + """Test error handling for invalid YAML files.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + # Write invalid YAML + with open(presets_file, "w") as f: + f.write("invalid: yaml: content: [") + + manager = YAMLConfigManager(str(config_dir)) + + with pytest.raises(ConfigurationError) as exc_info: + manager.list_available_presets() + + assert "Invalid YAML" in str(exc_info.value) + + +@pytest.mark.unit +def test_missing_config_section_error() -> None: + """Test error when configuration file is missing required sections.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + # Write YAML without 'presets' key + with open(presets_file, "w") as f: + yaml.dump({"other_key": "value"}, f) + + manager = YAMLConfigManager(str(config_dir)) + + # Mock _get_config_paths to only return our test directory (no fallback to defaults) + with patch.object(manager, "_get_config_paths", return_value=[config_dir]): + with pytest.raises(ConfigurationError) as exc_info: + manager.list_available_presets() + + assert "missing 'presets' key" in str(exc_info.value) + + +@pytest.mark.unit +def test_global_config_manager_functions() -> None: + """Test global convenience functions work with default config.""" + # These functions should work with the built-in default configs + # even if no custom config directory is provided + presets = list_available_presets() + assert isinstance(presets, dict) + assert "perplexity-sonar-pro" in presets + + templates = list_available_templates() + assert isinstance(templates, dict) + assert "gene_analysis_academic" in templates + + # Test getting a default preset + config = get_preset_config("perplexity-sonar-pro") + assert config.provider == "perplexity" + assert config.model == "sonar-reasoning-pro" + + # Test formatting with default template + formatted = format_prompt_template("gene_analysis_academic", ["TP53"], "cancer") + assert "TP53" in formatted + assert "cancer" in formatted + + +@pytest.mark.unit +def test_template_metadata() -> None: + """Test getting template metadata.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + template_config = { + "templates": { + "test-template": { + "description": "Test template for metadata", + "optimized_for": ["perplexity", "openai"], + "supports_json_schema": True, + "schema_instruction": "Please format as JSON: {schema}", + "template": "Test template: {genes} {context}", + } + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + metadata = manager.get_template_metadata("test-template") + + assert metadata["description"] == "Test template for metadata" + assert metadata["optimized_for"] == ["perplexity", "openai"] + assert metadata["supports_json_schema"] is True + assert metadata["schema_instruction"] == "Please format as JSON: {schema}" + + +@pytest.mark.unit +def test_template_metadata_without_schema_instruction() -> None: + """Test getting template metadata when schema_instruction is missing.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + template_config = { + "templates": { + "test-template": { + "description": "Test template without schema instruction", + "optimized_for": ["perplexity"], + "supports_json_schema": True, + "template": "Test template: {genes} {context}", + # Note: no schema_instruction field + } + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + metadata = manager.get_template_metadata("test-template") + + assert metadata["description"] == "Test template without schema instruction" + assert metadata["optimized_for"] == ["perplexity"] + assert metadata["supports_json_schema"] is True + assert metadata["schema_instruction"] is None + + +@pytest.mark.unit +def test_schema_instruction_with_placeholder() -> None: + """Test that schema instruction properly handles {schema} placeholder.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + templates_file = config_dir / "templates.yaml" + + template_config = { + "templates": { + "test-template": { + "description": "Test template with schema placeholder", + "optimized_for": ["perplexity"], + "supports_json_schema": True, + "schema_instruction": """Custom instruction for schema compliance. + +Please format your response as JSON following this structure: +{schema} + +Ensure all fields are complete.""", + "template": "Test template: {genes} {context}", + } + } + } + + with open(templates_file, "w") as f: + yaml.dump(template_config, f) + + manager = YAMLConfigManager(str(config_dir)) + metadata = manager.get_template_metadata("test-template") + + instruction = metadata["schema_instruction"] + assert "{schema}" in instruction + assert "Custom instruction" in instruction + assert "Please format your response as JSON" in instruction + + +@pytest.mark.unit +def test_deepsearch_config_dataclass() -> None: + """Test DeepSearchConfig dataclass behavior.""" + config = DeepSearchConfig( + provider="perplexity", + model="sonar-reasoning-pro", + provider_params={"reasoning_effort": "high"}, + ) + + # Test default values + assert config.timeout == 180 + assert config.prompt_template == "gene_analysis_academic" + assert config.description == "" + + # Test explicit values + assert config.provider == "perplexity" + assert config.model == "sonar-reasoning-pro" + assert config.provider_params == {"reasoning_effort": "high"} + + +@pytest.mark.unit +def test_configuration_caching() -> None: + """Test that configuration is cached for performance.""" + with tempfile.TemporaryDirectory() as temp_dir: + config_dir = Path(temp_dir) + presets_file = config_dir / "presets.yaml" + + preset_config = { + "presets": { + "test-preset": { + "description": "Cached preset", + "provider": "perplexity", + "model": "sonar-small", + "timeout": 120, + "prompt_template": "gene_analysis_academic", + "provider_params": {}, + } + } + } + + with open(presets_file, "w") as f: + yaml.dump(preset_config, f) + + manager = YAMLConfigManager(str(config_dir)) + + # First call should load and cache + presets1 = manager._load_presets() + # Second call should return cached result + presets2 = manager._load_presets() + + # Should be the same object (cached) + assert presets1 is presets2 + + +@pytest.mark.unit +def test_config_manager_global_instance() -> None: + """Test global config manager instance management.""" + # Get default instance + manager1 = get_config_manager() + manager2 = get_config_manager() + + # Should be same instance + assert manager1 is manager2 + + # Getting with config_dir should create new instance + with tempfile.TemporaryDirectory() as temp_dir: + manager3 = get_config_manager(temp_dir) + assert manager3 is not manager1 + assert manager3.config_dir == Path(temp_dir)