diff --git a/assets/schemas/project.schema.json b/assets/schemas/project.schema.json new file mode 100644 index 0000000..a30f88c --- /dev/null +++ b/assets/schemas/project.schema.json @@ -0,0 +1,1189 @@ +{ + "$defs": { + "BatchConfig": { + "additionalProperties": true, + "description": "Batch processing configuration.", + "properties": { + "max_batches": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Batches" + }, + "max_consecutive_check_failures": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Consecutive Check Failures" + }, + "max_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Size" + }, + "send_delay": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Send Delay" + }, + "size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Size" + }, + "tick": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Tick" + } + }, + "title": "BatchConfig", + "type": "object" + }, + "CachePolicy": { + "additionalProperties": true, + "description": "Caching options for model generation.", + "properties": { + "expiry": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "1W", + "title": "Expiry" + }, + "per_epoch": { + "default": true, + "title": "Per Epoch", + "type": "boolean" + }, + "scopes": { + "additionalProperties": true, + "title": "Scopes", + "type": "object" + } + }, + "title": "CachePolicy", + "type": "object" + }, + "GenerateConfig": { + "additionalProperties": true, + "description": "Model generation options.", + "properties": { + "attempt_timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Attempt Timeout" + }, + "batch": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "integer" + }, + { + "$ref": "#/$defs/BatchConfig" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Batch" + }, + "best_of": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Best Of" + }, + "cache": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/CachePolicy" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache" + }, + "cache_prompt": { + "anyOf": [ + { + "const": "auto", + "type": "string" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache Prompt" + }, + "effort": { + "anyOf": [ + { + "enum": [ + "low", + "medium", + "high" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Effort" + }, + "extra_body": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Extra Body" + }, + "frequency_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Frequency Penalty" + }, + "internal_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Internal Tools" + }, + "logit_bias": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Logit Bias" + }, + "logprobs": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Logprobs" + }, + "max_connections": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Connections" + }, + "max_retries": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Retries" + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Tokens" + }, + "max_tool_output": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Tool Output" + }, + "num_choices": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Num Choices" + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Parallel Tool Calls" + }, + "presence_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Presence Penalty" + }, + "reasoning_effort": { + "anyOf": [ + { + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Effort" + }, + "reasoning_history": { + "anyOf": [ + { + "enum": [ + "none", + "all", + "last", + "auto" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning History" + }, + "reasoning_summary": { + "anyOf": [ + { + "enum": [ + "none", + "concise", + "detailed", + "auto" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Summary" + }, + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Tokens" + }, + "response_schema": { + "anyOf": [ + { + "$ref": "#/$defs/ResponseSchema" + }, + { + "type": "null" + } + ], + "default": null + }, + "seed": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Seed" + }, + "stop_seqs": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Stop Seqs" + }, + "system_message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "System Message" + }, + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Temperature" + }, + "timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Timeout" + }, + "top_k": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top K" + }, + "top_logprobs": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top Logprobs" + }, + "top_p": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top P" + }, + "verbosity": { + "anyOf": [ + { + "enum": [ + "low", + "medium", + "high" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Verbosity" + } + }, + "title": "GenerateConfig", + "type": "object" + }, + "JSONSchema": { + "additionalProperties": true, + "description": "JSON Schema for type.", + "properties": { + "additionalProperties": true, + "anyOf": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/JSONSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Anyof" + }, + "default": { + "default": null, + "title": "Default" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "enum": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Enum" + }, + "format": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Format" + }, + "items": { + "anyOf": [ + { + "$ref": "#/$defs/JSONSchema" + }, + { + "type": "null" + } + ], + "default": null + }, + "properties": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Properties" + }, + "required": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Required" + }, + "type": { + "anyOf": [ + { + "enum": [ + "string", + "integer", + "number", + "boolean", + "array", + "object", + "null" + ], + "type": "string" + }, + { + "items": { + "enum": [ + "string", + "integer", + "number", + "boolean", + "array", + "object", + "null" + ], + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Type" + } + }, + "title": "JSONSchema", + "type": "object" + }, + "JsonValue": {}, + "ModelConfig": { + "additionalProperties": true, + "description": "Model config.", + "properties": { + "args": { + "additionalProperties": true, + "title": "Args", + "type": "object" + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Base Url" + }, + "config": { + "$ref": "#/$defs/GenerateConfig" + }, + "model": { + "title": "Model", + "type": "string" + } + }, + "required": [ + "model" + ], + "title": "ModelConfig", + "type": "object" + }, + "ResponseSchema": { + "additionalProperties": true, + "description": "Schema for model response when using Structured Output.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "json_schema": { + "$ref": "#/$defs/JSONSchema" + }, + "name": { + "title": "Name", + "type": "string" + }, + "strict": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Strict" + } + }, + "required": [ + "name", + "json_schema" + ], + "title": "ResponseSchema", + "type": "object" + }, + "ScannerSpec": { + "additionalProperties": true, + "description": "Scanner used by scan.", + "properties": { + "file": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "File" + }, + "name": { + "title": "Name", + "type": "string" + }, + "package_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Package Version" + }, + "params": { + "additionalProperties": true, + "title": "Params", + "type": "object" + }, + "version": { + "default": 0, + "title": "Version", + "type": "integer" + } + }, + "required": [ + "name" + ], + "title": "ScannerSpec", + "type": "object" + }, + "ValidationCase": { + "additionalProperties": true, + "description": "Validation case for comparing to scanner results.\n\nA `ValidationCase` specifies the ground truth for a scan of particular id (e.g. transcript id, message id, etc.\n\nUse `target` for single-value or dict validation.\nUse `labels` for validating resultsets with label-specific expectations.", + "properties": { + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "type": "string" + }, + "type": "array" + } + ], + "title": "Id" + }, + "labels": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Labels" + }, + "target": { + "anyOf": [ + { + "$ref": "#/$defs/JsonValue" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "required": [ + "id" + ], + "title": "ValidationCase", + "type": "object" + }, + "ValidationSet": { + "additionalProperties": true, + "description": "Validation set for a scanner.", + "properties": { + "cases": { + "items": { + "$ref": "#/$defs/ValidationCase" + }, + "title": "Cases", + "type": "array" + }, + "predicate": { + "anyOf": [ + { + "enum": [ + "gt", + "gte", + "lt", + "lte", + "eq", + "ne", + "contains", + "startswith", + "endswith", + "icontains", + "iequals" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": "eq", + "title": "Predicate" + } + }, + "required": [ + "cases" + ], + "title": "ValidationSet", + "type": "object" + }, + "Worklist": { + "additionalProperties": true, + "description": "List of transcript ids to process for a scanner.", + "properties": { + "scanner": { + "title": "Scanner", + "type": "string" + }, + "transcripts": { + "items": { + "type": "string" + }, + "title": "Transcripts", + "type": "array" + } + }, + "required": [ + "scanner", + "transcripts" + ], + "title": "Worklist", + "type": "object" + } + }, + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": true, + "properties": { + "generate_config": { + "anyOf": [ + { + "$ref": "#/$defs/GenerateConfig" + }, + { + "type": "null" + } + ], + "default": null + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Limit" + }, + "log_level": { + "anyOf": [ + { + "enum": [ + "debug", + "http", + "sandbox", + "info", + "warning", + "error", + "critical", + "notset" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Log Level" + }, + "max_processes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Processes" + }, + "max_transcripts": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Transcripts" + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Metadata" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model" + }, + "model_args": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Args" + }, + "model_base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Base Url" + }, + "model_roles": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Roles" + }, + "name": { + "default": "job", + "title": "Name", + "type": "string" + }, + "results": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Results" + }, + "scanners": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/ScannerSpec" + }, + "type": "array" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Scanners" + }, + "shuffle": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Shuffle" + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Tags" + }, + "transcripts": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Transcripts" + }, + "validation": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Validation" + }, + "worklist": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Worklist" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Worklist" + } + }, + "title": "Scout Project", + "type": "object" +} diff --git a/assets/schemas/scanjob.schema.json b/assets/schemas/scanjob.schema.json new file mode 100644 index 0000000..8614033 --- /dev/null +++ b/assets/schemas/scanjob.schema.json @@ -0,0 +1,1189 @@ +{ + "$defs": { + "BatchConfig": { + "additionalProperties": true, + "description": "Batch processing configuration.", + "properties": { + "max_batches": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Batches" + }, + "max_consecutive_check_failures": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Consecutive Check Failures" + }, + "max_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Size" + }, + "send_delay": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Send Delay" + }, + "size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Size" + }, + "tick": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Tick" + } + }, + "title": "BatchConfig", + "type": "object" + }, + "CachePolicy": { + "additionalProperties": true, + "description": "Caching options for model generation.", + "properties": { + "expiry": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "1W", + "title": "Expiry" + }, + "per_epoch": { + "default": true, + "title": "Per Epoch", + "type": "boolean" + }, + "scopes": { + "additionalProperties": true, + "title": "Scopes", + "type": "object" + } + }, + "title": "CachePolicy", + "type": "object" + }, + "GenerateConfig": { + "additionalProperties": true, + "description": "Model generation options.", + "properties": { + "attempt_timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Attempt Timeout" + }, + "batch": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "integer" + }, + { + "$ref": "#/$defs/BatchConfig" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Batch" + }, + "best_of": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Best Of" + }, + "cache": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/CachePolicy" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache" + }, + "cache_prompt": { + "anyOf": [ + { + "const": "auto", + "type": "string" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache Prompt" + }, + "effort": { + "anyOf": [ + { + "enum": [ + "low", + "medium", + "high" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Effort" + }, + "extra_body": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Extra Body" + }, + "frequency_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Frequency Penalty" + }, + "internal_tools": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Internal Tools" + }, + "logit_bias": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Logit Bias" + }, + "logprobs": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Logprobs" + }, + "max_connections": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Connections" + }, + "max_retries": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Retries" + }, + "max_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Tokens" + }, + "max_tool_output": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Tool Output" + }, + "num_choices": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Num Choices" + }, + "parallel_tool_calls": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Parallel Tool Calls" + }, + "presence_penalty": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Presence Penalty" + }, + "reasoning_effort": { + "anyOf": [ + { + "enum": [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Effort" + }, + "reasoning_history": { + "anyOf": [ + { + "enum": [ + "none", + "all", + "last", + "auto" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning History" + }, + "reasoning_summary": { + "anyOf": [ + { + "enum": [ + "none", + "concise", + "detailed", + "auto" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Summary" + }, + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Reasoning Tokens" + }, + "response_schema": { + "anyOf": [ + { + "$ref": "#/$defs/ResponseSchema" + }, + { + "type": "null" + } + ], + "default": null + }, + "seed": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Seed" + }, + "stop_seqs": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Stop Seqs" + }, + "system_message": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "System Message" + }, + "temperature": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Temperature" + }, + "timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Timeout" + }, + "top_k": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top K" + }, + "top_logprobs": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top Logprobs" + }, + "top_p": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Top P" + }, + "verbosity": { + "anyOf": [ + { + "enum": [ + "low", + "medium", + "high" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Verbosity" + } + }, + "title": "GenerateConfig", + "type": "object" + }, + "JSONSchema": { + "additionalProperties": true, + "description": "JSON Schema for type.", + "properties": { + "additionalProperties": true, + "anyOf": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/JSONSchema" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Anyof" + }, + "default": { + "default": null, + "title": "Default" + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "enum": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Enum" + }, + "format": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Format" + }, + "items": { + "anyOf": [ + { + "$ref": "#/$defs/JSONSchema" + }, + { + "type": "null" + } + ], + "default": null + }, + "properties": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Properties" + }, + "required": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Required" + }, + "type": { + "anyOf": [ + { + "enum": [ + "string", + "integer", + "number", + "boolean", + "array", + "object", + "null" + ], + "type": "string" + }, + { + "items": { + "enum": [ + "string", + "integer", + "number", + "boolean", + "array", + "object", + "null" + ], + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Type" + } + }, + "title": "JSONSchema", + "type": "object" + }, + "JsonValue": {}, + "ModelConfig": { + "additionalProperties": true, + "description": "Model config.", + "properties": { + "args": { + "additionalProperties": true, + "title": "Args", + "type": "object" + }, + "base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Base Url" + }, + "config": { + "$ref": "#/$defs/GenerateConfig" + }, + "model": { + "title": "Model", + "type": "string" + } + }, + "required": [ + "model" + ], + "title": "ModelConfig", + "type": "object" + }, + "ResponseSchema": { + "additionalProperties": true, + "description": "Schema for model response when using Structured Output.", + "properties": { + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Description" + }, + "json_schema": { + "$ref": "#/$defs/JSONSchema" + }, + "name": { + "title": "Name", + "type": "string" + }, + "strict": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Strict" + } + }, + "required": [ + "name", + "json_schema" + ], + "title": "ResponseSchema", + "type": "object" + }, + "ScannerSpec": { + "additionalProperties": true, + "description": "Scanner used by scan.", + "properties": { + "file": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "File" + }, + "name": { + "title": "Name", + "type": "string" + }, + "package_version": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Package Version" + }, + "params": { + "additionalProperties": true, + "title": "Params", + "type": "object" + }, + "version": { + "default": 0, + "title": "Version", + "type": "integer" + } + }, + "required": [ + "name" + ], + "title": "ScannerSpec", + "type": "object" + }, + "ValidationCase": { + "additionalProperties": true, + "description": "Validation case for comparing to scanner results.\n\nA `ValidationCase` specifies the ground truth for a scan of particular id (e.g. transcript id, message id, etc.\n\nUse `target` for single-value or dict validation.\nUse `labels` for validating resultsets with label-specific expectations.", + "properties": { + "id": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "type": "string" + }, + "type": "array" + } + ], + "title": "Id" + }, + "labels": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Labels" + }, + "target": { + "anyOf": [ + { + "$ref": "#/$defs/JsonValue" + }, + { + "type": "null" + } + ], + "default": null + } + }, + "required": [ + "id" + ], + "title": "ValidationCase", + "type": "object" + }, + "ValidationSet": { + "additionalProperties": true, + "description": "Validation set for a scanner.", + "properties": { + "cases": { + "items": { + "$ref": "#/$defs/ValidationCase" + }, + "title": "Cases", + "type": "array" + }, + "predicate": { + "anyOf": [ + { + "enum": [ + "gt", + "gte", + "lt", + "lte", + "eq", + "ne", + "contains", + "startswith", + "endswith", + "icontains", + "iequals" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": "eq", + "title": "Predicate" + } + }, + "required": [ + "cases" + ], + "title": "ValidationSet", + "type": "object" + }, + "Worklist": { + "additionalProperties": true, + "description": "List of transcript ids to process for a scanner.", + "properties": { + "scanner": { + "title": "Scanner", + "type": "string" + }, + "transcripts": { + "items": { + "type": "string" + }, + "title": "Transcripts", + "type": "array" + } + }, + "required": [ + "scanner", + "transcripts" + ], + "title": "Worklist", + "type": "object" + } + }, + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": true, + "properties": { + "generate_config": { + "anyOf": [ + { + "$ref": "#/$defs/GenerateConfig" + }, + { + "type": "null" + } + ], + "default": null + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Limit" + }, + "log_level": { + "anyOf": [ + { + "enum": [ + "debug", + "http", + "sandbox", + "info", + "warning", + "error", + "critical", + "notset" + ], + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Log Level" + }, + "max_processes": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Processes" + }, + "max_transcripts": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Max Transcripts" + }, + "metadata": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Metadata" + }, + "model": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model" + }, + "model_args": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Args" + }, + "model_base_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Base Url" + }, + "model_roles": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Model Roles" + }, + "name": { + "default": "job", + "title": "Name", + "type": "string" + }, + "results": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Results" + }, + "scanners": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/ScannerSpec" + }, + "type": "array" + }, + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Scanners" + }, + "shuffle": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Shuffle" + }, + "tags": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Tags" + }, + "transcripts": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Transcripts" + }, + "validation": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Validation" + }, + "worklist": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Worklist" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Worklist" + } + }, + "title": "Scout Scan Job", + "type": "object" +} diff --git a/package.json b/package.json index f080bd1..8a15f2b 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,8 @@ "Testing" ], "extensionDependencies": [ - "ms-python.python" + "ms-python.python", + "redhat.vscode-yaml" ], "activationEvents": [ "onWebviewPanel:inspect.logview", diff --git a/src/extension.ts b/src/extension.ts index 9d34ceb..e81c130 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -41,6 +41,7 @@ import { activateScoutCodeLens } from "./providers/codelens/scout-codelens-provi import { activateScoutScanManager } from "./providers/scout/scout-scan"; import { activateWorkspaceEnvironment } from "./providers/environment"; import { activateOpenScan } from "./providers/openscan"; +import { activateYamlSchemaProvider } from "./providers/yaml/yaml-schema-provider"; import { PackageManager } from "./core/package/manager"; const kInspectMinimumVersion = "0.3.8"; @@ -270,6 +271,14 @@ export async function activateScout( // Activate code lends activateScoutCodeLens(context); + // Activate YAML schema support for Scout config files + start("Setup YAML Schemas"); + const yamlDisposable = await activateYamlSchemaProvider(context); + if (yamlDisposable) { + context.subscriptions.push(yamlDisposable); + } + end("Setup YAML Schemas"); + return Promise.resolve([ scoutManager, [...scoutViewCommands, ...activityBarCommands, ...scanManagerCommands], diff --git a/src/providers/yaml/yaml-schema-provider.ts b/src/providers/yaml/yaml-schema-provider.ts new file mode 100644 index 0000000..202fdb7 --- /dev/null +++ b/src/providers/yaml/yaml-schema-provider.ts @@ -0,0 +1,207 @@ +import { readFileSync } from "fs"; +import { join } from "path"; + +import { Disposable, ExtensionContext, extensions, Uri } from "vscode"; + +import { log } from "../../core/log"; + +// Magic comment pattern for detecting scan job config files +const SCANJOB_MAGIC_PATTERN = /^#\s*scanjob\s*$/m; + +// Auto-detection patterns: both must be present for ScanJobConfig +const TRANSCRIPTS_PATTERN = /^transcripts:\s/m; +const SCANNERS_PATTERN = /^scanners:\s/m; + +// Schema identifier constants +const SCHEMA_ID = "scout-yaml-schemas"; +const PROJECT_SCHEMA_URI = `${SCHEMA_ID}:///project`; +const SCANJOB_SCHEMA_URI = `${SCHEMA_ID}:///scanjob`; + +interface YamlExtensionApi { + registerContributor( + schemaId: string, + requestUri: (resource: string) => string | undefined, + requestContent: (uri: string) => string | undefined + ): void; +} + +/** + * Activates YAML schema support for Scout configuration files. + * + * Provides: + * - Schema validation and completion for scout.yaml (project config) + * - Schema validation and completion for scan job config files + * (detected via magic comment or presence of transcripts+scanners fields) + */ +export async function activateYamlSchemaProvider( + context: ExtensionContext +): Promise { + const yamlExtension = extensions.getExtension("redhat.vscode-yaml"); + + if (!yamlExtension) { + log.info("YAML extension not found, skipping schema registration"); + return undefined; + } + + try { + // Activate the YAML extension if needed + if (!yamlExtension.isActive) { + await yamlExtension.activate(); + } + + const yamlApi = yamlExtension.exports as YamlExtensionApi; + + if (!yamlApi?.registerContributor) { + log.warn("YAML extension API does not support registerContributor"); + return undefined; + } + + // Load schemas from bundled assets + const schemasDir = join(context.extensionPath, "assets", "schemas"); + + const schemas: Record = {}; + try { + schemas.project = readFileSync( + join(schemasDir, "project.schema.json"), + "utf-8" + ); + schemas.scanjob = readFileSync( + join(schemasDir, "scanjob.schema.json"), + "utf-8" + ); + } catch (error) { + log.error(`Failed to load YAML schemas: ${String(error)}`); + return undefined; + } + + // Cache for file content checks + const contentCache = new Map< + string, + { content: string; timestamp: number } + >(); + const CACHE_TTL_MS = 5000; // 5 second cache + + /** + * Request schema URI callback - determines which schema to use for a file + */ + const onRequestSchemaURI = (resource: string): string | undefined => { + try { + const uri = Uri.parse(resource); + const fileName = uri.path.split("/").pop() || ""; + + // Check for scout.yaml (project config) + if (fileName === "scout.yaml") { + log.info(`YAML schema: matched scout.yaml, returning ${PROJECT_SCHEMA_URI}`); + return PROJECT_SCHEMA_URI; + } + + // Check for scan job config files + if (fileName.endsWith(".yaml") || fileName.endsWith(".yml")) { + if (isScanJobConfig(uri, contentCache, CACHE_TTL_MS)) { + return SCANJOB_SCHEMA_URI; + } + } + + return undefined; + } catch (error) { + log.error(`Error in onRequestSchemaURI: ${String(error)}`); + return undefined; + } + }; + + /** + * Request schema content callback - returns the schema JSON + */ + const onRequestSchemaContent = (schemaUri: string): string | undefined => { + try { + log.info(`YAML schema: onRequestSchemaContent called with ${schemaUri}`); + const uri = Uri.parse(schemaUri); + + if (uri.scheme !== SCHEMA_ID) { + log.info(`YAML schema: scheme ${uri.scheme} doesn't match ${SCHEMA_ID}`); + return undefined; + } + + switch (uri.path) { + case "/project": + log.info(`YAML schema: returning project schema (${schemas.project.length} bytes)`); + return schemas.project; + case "/scanjob": + log.info(`YAML schema: returning scanjob schema (${schemas.scanjob.length} bytes)`); + return schemas.scanjob; + default: + log.info(`YAML schema: unknown path ${uri.path}`); + return undefined; + } + } catch (error) { + log.error(`Error in onRequestSchemaContent: ${String(error)}`); + return undefined; + } + }; + + // Register the schema contributor + yamlApi.registerContributor( + SCHEMA_ID, + onRequestSchemaURI, + onRequestSchemaContent + ); + + log.info("Registered YAML schema contributor for Scout configs"); + + // Return a disposable that clears the cache + return { + dispose: () => { + contentCache.clear(); + }, + }; + } catch (error) { + log.error(`Failed to activate YAML schema provider: ${String(error)}`); + return undefined; + } +} + +/** + * Check if a file is a ScanJobConfig based on content. + * + * Detection methods: + * 1. Magic comment: `# scanjob` in the first 10 lines + * 2. Auto-detection: Both `transcripts:` and `scanners:` as top-level fields + */ +function isScanJobConfig( + uri: Uri, + cache: Map, + cacheTtlMs: number +): boolean { + try { + const now = Date.now(); + const cacheKey = uri.toString(); + const cached = cache.get(cacheKey); + + let content: string; + if (cached && now - cached.timestamp < cacheTtlMs) { + content = cached.content; + } else { + // Read the file content + const fullContent = readFileSync(uri.fsPath, "utf-8"); + // Cache only first portion for performance (enough for detection) + content = fullContent.slice(0, 2000); + cache.set(cacheKey, { content, timestamp: now }); + } + + // Check for magic comment in first 10 lines + const firstLines = content.split("\n").slice(0, 10).join("\n"); + if (SCANJOB_MAGIC_PATTERN.test(firstLines)) { + return true; + } + + // Auto-detect: check for both transcripts and scanners top-level fields + if (TRANSCRIPTS_PATTERN.test(content) && SCANNERS_PATTERN.test(content)) { + return true; + } + + return false; + } catch { + // File may not exist or be readable + return false; + } +}