diff --git a/rdf/contexts/ai.jsonld b/rdf/contexts/ai.jsonld new file mode 100644 index 0000000..57d5f17 --- /dev/null +++ b/rdf/contexts/ai.jsonld @@ -0,0 +1,150 @@ +{ + "@context": [ + "./base.jsonld", + { + "LLMModel": { + "@id": "om:LLMModel", + "@type": ["om:DataAsset", "dcat:DataService"] + }, + "AIApplication": { + "@id": "om:AIApplication", + "@type": ["om:DataAsset", "dcat:DataService"] + }, + "AIGovernancePolicy": { + "@id": "om:AIGovernancePolicy", + "@type": ["om:Entity", "om:Policy"] + }, + "PromptTemplate": { + "@id": "om:PromptTemplate", + "@type": "om:Entity" + }, + "McpServer": { + "@id": "om:MCPServer", + "@type": ["om:DataAsset", "dcat:DataService"] + }, + "AgentExecution": { + "@id": "om:AgentExecution", + "@type": ["om:Entity", "prov:Activity"] + }, + "McpExecution": { + "@id": "om:MCPExecution", + "@type": ["om:Entity", "prov:Activity"] + }, + + "applicationType": { + "@id": "om:applicationType", + "@type": "xsd:string" + }, + "developmentStage": { + "@id": "om:developmentStage", + "@type": "xsd:string" + }, + "modelType": { + "@id": "om:modelType", + "@type": "xsd:string" + }, + "modelCapabilities": { + "@id": "om:modelCapability", + "@type": "xsd:string", + "@container": "@set" + }, + "baseModel": { + "@id": "om:baseModel", + "@type": "@id" + }, + "trainingMetadata": { + "@id": "om:trainingMetadata", + "@type": "@json" + }, + "policyType": { + "@id": "om:policyType", + "@type": "xsd:string" + }, + "rules": { + "@id": "om:hasPolicyRule", + "@type": "@json", + "@container": "@set" + }, + "serverType": { + "@id": "om:serverType", + "@type": "xsd:string" + }, + "transportType": { + "@id": "om:transportType", + "@type": "xsd:string" + }, + "tools": { + "@id": "om:hasMCPTool", + "@type": "@json", + "@container": "@set" + }, + "resources": { + "@id": "om:hasMCPResource", + "@type": "@json", + "@container": "@set" + }, + "prompts": { + "@id": "om:hasMCPPrompt", + "@type": "@json", + "@container": "@set" + }, + + "models": { + "@id": "om:usesModel", + "@type": "@id", + "@container": "@set" + }, + "promptTemplates": { + "@id": "om:usesPromptTemplate", + "@type": "@id", + "@container": "@set" + }, + "mcpServers": { + "@id": "om:usesMCPServer", + "@type": "@id", + "@container": "@set" + }, + "governancePolicies": { + "@id": "om:governedBy", + "@type": "@id", + "@container": "@set" + }, + + "executionStatus": { + "@id": "om:executionStatus", + "@type": "xsd:string" + }, + "startTime": { + "@id": "prov:startedAtTime", + "@type": "xsd:dateTime" + }, + "endTime": { + "@id": "prov:endedAtTime", + "@type": "xsd:dateTime" + }, + "agent": { + "@id": "om:executedAgent", + "@type": "@id" + }, + "mcpServer": { + "@id": "om:executedMCPServer", + "@type": "@id" + }, + "modelCalls": { + "@id": "om:hasModelCall", + "@type": "@json", + "@container": "@list" + }, + "toolCalls": { + "@id": "om:hasToolCall", + "@type": "@json", + "@container": "@list" + }, + "resourceAccesses": { + "@id": "om:hasResourceAccess", + "@type": "@json", + "@container": "@list" + } + } + ] +} diff --git a/rdf/contexts/learning.jsonld b/rdf/contexts/learning.jsonld new file mode 100644 index 0000000..bb3fb31 --- /dev/null +++ b/rdf/contexts/learning.jsonld @@ -0,0 +1,36 @@ +{ + "@context": [ + "./base.jsonld", + { + "LearningResource": { + "@id": "om:LearningResource", + "@type": ["om:Entity", "foaf:Document"] + }, + "resourceType": { + "@id": "om:resourceType", + "@type": "xsd:string" + }, + "category": { + "@id": "om:resourceCategory", + "@type": "xsd:string" + }, + "difficulty": { + "@id": "om:resourceDifficulty", + "@type": "xsd:string" + }, + "url": { + "@id": "dct:source", + "@type": "@id" + }, + "duration": { + "@id": "om:duration", + "@type": "xsd:string" + }, + "surfaces": { + "@id": "om:productSurface", + "@type": "xsd:string", + "@container": "@set" + } + } + ] +} diff --git a/rdf/contexts/service.jsonld b/rdf/contexts/service.jsonld index 40b1fa5..51b7971 100644 --- a/rdf/contexts/service.jsonld +++ b/rdf/contexts/service.jsonld @@ -55,6 +55,14 @@ "@id": "om:DriveService", "@type": ["om:Service", "dcat:DataService"] }, + "LLMService": { + "@id": "om:LLMService", + "@type": ["om:Service", "dcat:DataService"] + }, + "MCPService": { + "@id": "om:MCPService", + "@type": ["om:Service", "dcat:DataService"] + }, "serviceType": { "@id": "om:serviceType", "@type": "xsd:string" @@ -107,6 +115,14 @@ "apiConnection": { "@id": "om:hasAPIConnection", "@type": "@json" + }, + "llmConnection": { + "@id": "om:hasLLMConnection", + "@type": "@json" + }, + "mcpConnection": { + "@id": "om:hasMCPConnection", + "@type": "@json" } } ] diff --git a/rdf/ontology/openmetadata.ttl b/rdf/ontology/openmetadata.ttl index 84cb16a..491d545 100644 --- a/rdf/ontology/openmetadata.ttl +++ b/rdf/ontology/openmetadata.ttl @@ -24,11 +24,12 @@ # OpenMetadata Complete Ontology om: a owl:Ontology ; - owl:versionInfo "1.0.0" ; + owl:versionInfo "1.13.0" ; dct:title "OpenMetadata Ontology" ; dct:description "Complete ontology for OpenMetadata covering all entities and relationships" ; dct:creator "OpenMetadata Team" ; dct:created "2025-08-24"^^xsd:date ; + dct:modified "2026-04-23"^^xsd:date ; dct:license ; owl:imports dcat: , prov: , skos: . @@ -110,6 +111,16 @@ om:DriveService a owl:Class ; rdfs:comment "File and document storage service" ; rdfs:subClassOf om:Service, dcat:DataService . +om:LLMService a owl:Class ; + rdfs:label "LLM Service" ; + rdfs:comment "Service managing Large Language Model providers (OpenAI, Anthropic, Bedrock, VertexAI, Ollama, etc.)" ; + rdfs:subClassOf om:Service, dcat:DataService . + +om:MCPService a owl:Class ; + rdfs:label "MCP Service" ; + rdfs:comment "Service for discovering and managing MCP (Model Context Protocol) servers" ; + rdfs:subClassOf om:Service, dcat:DataService . + ################################################################# # Data Asset Classes ################################################################# @@ -214,6 +225,68 @@ om:Worksheet a owl:Class ; rdfs:comment "Worksheet within a spreadsheet" ; rdfs:subClassOf om:DataAsset . +################################################################# +# AI / LLM / MCP Classes (OpenMetadata 1.13) +################################################################# + +om:LLMModel a owl:Class ; + rdfs:label "LLM Model" ; + rdfs:comment "Registered Large Language Model deployment, fine-tune, or base model referenced by AI applications and agents" ; + rdfs:subClassOf om:DataAsset, dcat:DataService . + +om:AIApplication a owl:Class ; + rdfs:label "AI Application" ; + rdfs:comment "AI system such as a chatbot, agent, copilot, RAG application or multi-agent pipeline that may use multiple LLM models and tools" ; + rdfs:subClassOf om:DataAsset, dcat:DataService . + +om:AIGovernancePolicy a owl:Class ; + rdfs:label "AI Governance Policy" ; + rdfs:comment "Policy rules for AI/LLM usage, compliance, and risk management (model approval, data access, bias thresholds, cost controls, etc.)" ; + rdfs:subClassOf om:Entity, om:Policy . + +om:PromptTemplate a owl:Class ; + rdfs:label "Prompt Template" ; + rdfs:comment "Reusable prompt template with variables, system prompts and examples for consistent AI behavior" ; + rdfs:subClassOf om:Entity . + +om:MCPServer a owl:Class ; + rdfs:label "MCP Server" ; + rdfs:comment "Model Context Protocol server deployment that exposes tools, resources and prompts to AI applications" ; + rdfs:subClassOf om:DataAsset, dcat:DataService . + +om:AgentExecution a owl:Class ; + rdfs:label "Agent Execution" ; + rdfs:comment "Single execution run of an AI agent, tracking inputs, outputs, lineage, metrics and errors for observability and governance" ; + rdfs:subClassOf om:Entity, prov:Activity . + +om:MCPExecution a owl:Class ; + rdfs:label "MCP Execution" ; + rdfs:comment "Single execution session of an MCP server, tracking tool calls, resource accesses, prompt uses and data lineage for audit and compliance" ; + rdfs:subClassOf om:Entity, prov:Activity . + +################################################################# +# Learning Classes (OpenMetadata 1.13) +################################################################# + +om:LearningResource a owl:Class ; + rdfs:label "Learning Resource" ; + rdfs:comment "Learning resource such as an in-product tutorial, Storylane walkthrough, video or article contextualized for product surfaces" ; + rdfs:subClassOf om:Entity, foaf:Document . + +################################################################# +# Column Context Classes (OpenMetadata 1.13) +################################################################# + +om:TableColumn a owl:Class ; + rdfs:label "Table Column" ; + rdfs:comment "Table column context type used to attach custom properties to columns of a Table" ; + rdfs:subClassOf om:Column . + +om:DashboardDataModelColumn a owl:Class ; + rdfs:label "Dashboard Data Model Column" ; + rdfs:comment "Dashboard data model column context type used to attach custom properties to columns of a DashboardDataModel" ; + rdfs:subClassOf om:Column . + ################################################################# # Metadata & Governance Classes ################################################################# @@ -1806,3 +1879,121 @@ om:dataModelType a owl:DatatypeProperty ; rdfs:comment "Type of data model (TableauDataModel, SupersetDataModel, MetabaseDataModel, LookMlView, LookMlExplore, PowerBIDataModel, QlikDataModel)" ; rdfs:domain om:DashboardDataModel ; rdfs:range xsd:string . + +################################################################# +# AI / LLM / MCP Properties (OpenMetadata 1.13) +################################################################# + +om:modelType a owl:DatatypeProperty ; + rdfs:label "model type" ; + rdfs:comment "Type of LLM model (BaseModel, FineTuned, Quantized, Distilled, Adapter, Custom)" ; + rdfs:domain om:LLMModel ; + rdfs:range xsd:string . + +om:baseModel a owl:ObjectProperty ; + rdfs:label "base model" ; + rdfs:comment "Base model this model was trained or fine-tuned from" ; + rdfs:domain om:LLMModel ; + rdfs:range om:LLMModel . + +om:modelCapability a owl:DatatypeProperty ; + rdfs:label "model capability" ; + rdfs:comment "Capability exposed by the LLM model (TextGeneration, CodeGeneration, Embeddings, Chat, Vision, Audio, FunctionCalling, ToolUse)" ; + rdfs:domain om:LLMModel ; + rdfs:range xsd:string . + +om:applicationType a owl:DatatypeProperty ; + rdfs:label "application type" ; + rdfs:comment "Type of AI application (Chatbot, Agent, Copilot, Assistant, RAG, CodeGenerator, DataAnalyst, AutomationBot, MultiAgent, Custom)" ; + rdfs:domain om:AIApplication ; + rdfs:range xsd:string . + +om:developmentStage a owl:DatatypeProperty ; + rdfs:label "development stage" ; + rdfs:domain om:AIApplication ; + rdfs:range xsd:string . + +om:usesModel a owl:ObjectProperty ; + rdfs:label "uses model" ; + rdfs:comment "AI application or agent execution uses an LLM model" ; + rdfs:subPropertyOf prov:used ; + rdfs:range om:LLMModel . + +om:usesPromptTemplate a owl:ObjectProperty ; + rdfs:label "uses prompt template" ; + rdfs:subPropertyOf prov:used ; + rdfs:range om:PromptTemplate . + +om:usesMCPServer a owl:ObjectProperty ; + rdfs:label "uses MCP server" ; + rdfs:comment "AI application uses an MCP server for tools, resources or prompts" ; + rdfs:subPropertyOf prov:used ; + rdfs:domain om:AIApplication ; + rdfs:range om:MCPServer . + +om:governedBy a owl:ObjectProperty ; + rdfs:label "governed by" ; + rdfs:comment "AI application, LLM model or MCP server is governed by an AI governance policy" ; + rdfs:range om:AIGovernancePolicy . + +om:policyType a owl:DatatypeProperty ; + rdfs:label "policy type" ; + rdfs:comment "Type of AI governance policy (ModelApproval, DataAccess, BiasThreshold, ComplianceCheck, CostControl, PerformanceStandard, SecurityControl)" ; + rdfs:domain om:AIGovernancePolicy ; + rdfs:range xsd:string . + +om:serverType a owl:DatatypeProperty ; + rdfs:label "MCP server type" ; + rdfs:comment "Type of MCP server (DataAccess, FileSystem, WebAPI, Database, Cloud, Security, Development, Communication, Custom)" ; + rdfs:domain om:MCPServer ; + rdfs:range xsd:string . + +om:transportType a owl:DatatypeProperty ; + rdfs:label "MCP transport type" ; + rdfs:comment "Transport protocol used by the MCP server" ; + rdfs:domain om:MCPServer ; + rdfs:range xsd:string . + +om:executionStatus a owl:DatatypeProperty ; + rdfs:label "execution status" ; + rdfs:comment "Status of an agent or MCP execution (Running, Success, Failed, Timeout, Cancelled, PartialSuccess)" ; + rdfs:range xsd:string . + +om:executedAgent a owl:ObjectProperty ; + rdfs:label "executed agent" ; + rdfs:comment "Agent execution was an execution of this AI application" ; + rdfs:domain om:AgentExecution ; + rdfs:range om:AIApplication . + +om:executedMCPServer a owl:ObjectProperty ; + rdfs:label "executed MCP server" ; + rdfs:comment "MCP execution was an execution of this MCP server" ; + rdfs:domain om:MCPExecution ; + rdfs:range om:MCPServer . + +om:templateVariable a owl:DatatypeProperty ; + rdfs:label "template variable" ; + rdfs:domain om:PromptTemplate ; + rdfs:range xsd:string . + +################################################################# +# Learning Properties (OpenMetadata 1.13) +################################################################# + +om:resourceType a owl:DatatypeProperty ; + rdfs:label "learning resource type" ; + rdfs:comment "Kind of learning asset (Storylane, Video, Article)" ; + rdfs:domain om:LearningResource ; + rdfs:range xsd:string . + +om:resourceCategory a owl:DatatypeProperty ; + rdfs:label "learning resource category" ; + rdfs:comment "Primary topic grouping (Discovery, Administration, DataGovernance, DataQuality, Observability, AI)" ; + rdfs:domain om:LearningResource ; + rdfs:range xsd:string . + +om:resourceDifficulty a owl:DatatypeProperty ; + rdfs:label "learning resource difficulty" ; + rdfs:comment "Suggested proficiency tier (Intro, Intermediate, Advanced)" ; + rdfs:domain om:LearningResource ; + rdfs:range xsd:string . diff --git a/rdf/shapes/openmetadata-shapes.ttl b/rdf/shapes/openmetadata-shapes.ttl index e8de716..926f0e1 100644 --- a/rdf/shapes/openmetadata-shapes.ttl +++ b/rdf/shapes/openmetadata-shapes.ttl @@ -296,4 +296,140 @@ om:UserTeamMembershipShape a sh:NodeShape ; sh:path om:memberOf ; sh:class om:Team ; sh:message "User can only be member of valid teams"@en ; + ] . + +################################################################# +# AI / LLM / MCP Shapes (OpenMetadata 1.13) +################################################################# + +# LLM Model Shape +om:LLMModelShape a sh:NodeShape ; + sh:targetClass om:LLMModel ; + sh:property [ + sh:path om:modelType ; + sh:datatype xsd:string ; + sh:in ("BaseModel" "FineTuned" "Quantized" "Distilled" "Adapter" "Custom") ; + sh:maxCount 1 ; + sh:message "LLM model type must be one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:modelCapability ; + sh:datatype xsd:string ; + sh:in ("TextGeneration" "CodeGeneration" "Embeddings" "Chat" "Vision" "Audio" "FunctionCalling" "ToolUse") ; + sh:message "Model capability must be one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:baseModel ; + sh:class om:LLMModel ; + sh:maxCount 1 ; + sh:message "Base model must reference another LLM model"@en ; + ] . + +# AI Application Shape +om:AIApplicationShape a sh:NodeShape ; + sh:targetClass om:AIApplication ; + sh:property [ + sh:path om:applicationType ; + sh:datatype xsd:string ; + sh:in ("Chatbot" "Agent" "Copilot" "Assistant" "RAG" "CodeGenerator" "DataAnalyst" "AutomationBot" "MultiAgent" "Custom") ; + sh:maxCount 1 ; + sh:message "AI application type must be one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:usesModel ; + sh:class om:LLMModel ; + sh:message "usesModel must reference valid LLM models"@en ; + ] ; + sh:property [ + sh:path om:usesMCPServer ; + sh:class om:MCPServer ; + sh:message "usesMCPServer must reference valid MCP servers"@en ; + ] . + +# AI Governance Policy Shape +om:AIGovernancePolicyShape a sh:NodeShape ; + sh:targetClass om:AIGovernancePolicy ; + sh:property [ + sh:path om:policyType ; + sh:datatype xsd:string ; + sh:in ("ModelApproval" "DataAccess" "BiasThreshold" "ComplianceCheck" "CostControl" "PerformanceStandard" "SecurityControl") ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "AI governance policy must have exactly one policy type"@en ; + ] . + +# MCP Server Shape +om:MCPServerShape a sh:NodeShape ; + sh:targetClass om:MCPServer ; + sh:property [ + sh:path om:serverType ; + sh:datatype xsd:string ; + sh:in ("DataAccess" "FileSystem" "WebAPI" "Database" "Cloud" "Security" "Development" "Communication" "Custom") ; + sh:maxCount 1 ; + sh:message "MCP server type must be one of the allowed values"@en ; + ] . + +# Agent Execution Shape +om:AgentExecutionShape a sh:NodeShape ; + sh:targetClass om:AgentExecution ; + sh:property [ + sh:path om:executionStatus ; + sh:datatype xsd:string ; + sh:in ("Running" "Success" "Failed" "Timeout" "Cancelled" "PartialSuccess") ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "Agent execution status must be exactly one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:executedAgent ; + sh:class om:AIApplication ; + sh:maxCount 1 ; + sh:message "Agent execution can reference at most one AI application"@en ; + ] . + +# MCP Execution Shape +om:MCPExecutionShape a sh:NodeShape ; + sh:targetClass om:MCPExecution ; + sh:property [ + sh:path om:executionStatus ; + sh:datatype xsd:string ; + sh:in ("Running" "Success" "Failed" "Timeout" "Cancelled") ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "MCP execution status must be exactly one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:executedMCPServer ; + sh:class om:MCPServer ; + sh:maxCount 1 ; + sh:message "MCP execution can reference at most one MCP server"@en ; + ] . + +################################################################# +# Learning Shape (OpenMetadata 1.13) +################################################################# + +om:LearningResourceShape a sh:NodeShape ; + sh:targetClass om:LearningResource ; + sh:property [ + sh:path om:resourceType ; + sh:datatype xsd:string ; + sh:in ("Storylane" "Video" "Article") ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:message "Learning resource type must be Storylane, Video, or Article"@en ; + ] ; + sh:property [ + sh:path om:resourceCategory ; + sh:datatype xsd:string ; + sh:in ("Discovery" "Administration" "DataGovernance" "DataQuality" "Observability" "AI") ; + sh:maxCount 1 ; + sh:message "Learning resource category must be one of the allowed values"@en ; + ] ; + sh:property [ + sh:path om:resourceDifficulty ; + sh:datatype xsd:string ; + sh:in ("Intro" "Intermediate" "Advanced") ; + sh:maxCount 1 ; + sh:message "Learning resource difficulty must be Intro, Intermediate, or Advanced"@en ; ] . \ No newline at end of file diff --git a/schemas/api/ai/createAIApplication.json b/schemas/api/ai/createAIApplication.json new file mode 100644 index 0000000..cd91362 --- /dev/null +++ b/schemas/api/ai/createAIApplication.json @@ -0,0 +1,140 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createAIApplication.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateAIApplicationRequest", + "description": "Create AI Application entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreateAIApplication", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this AI application.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this AI application.", + "type": "string" + }, + "description": { + "description": "Description of the AI application. What it does and how it is used.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "applicationType": { + "description": "Type of AI application", + "$ref": "../../entity/ai/aiApplication.json#/definitions/applicationType" + }, + "developmentStage": { + "description": "Development stage of the AI application", + "$ref": "../../entity/ai/aiApplication.json#/definitions/developmentStage" + }, + "modelConfigurations": { + "description": "Multiple LLM models this application can use for different purposes", + "type": "array", + "items": { + "$ref": "../../entity/ai/aiApplication.json#/definitions/modelConfiguration" + } + }, + "primaryModel": { + "description": "Primary/default LLM model used by this application", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "promptTemplates": { + "description": "Prompt templates used by this application", + "$ref": "../../type/entityReferenceList.json" + }, + "tools": { + "description": "MCP tools or other tools available to this application", + "$ref": "../../type/entityReferenceList.json" + }, + "dataSources": { + "description": "Data sources the application can access", + "$ref": "../../type/entityReferenceList.json" + }, + "knowledgeBases": { + "description": "Knowledge bases or vector stores the application uses", + "$ref": "../../type/entityReferenceList.json" + }, + "upstreamApplications": { + "description": "Other AI applications this application depends on", + "$ref": "../../type/entityReferenceList.json" + }, + "downstreamApplications": { + "description": "AI applications that depend on this application", + "$ref": "../../type/entityReferenceList.json" + }, + "framework": { + "description": "Framework used to build the application", + "$ref": "../../entity/ai/aiApplication.json#/definitions/frameworkInfo" + }, + "governanceMetadata": { + "description": "Governance and compliance metadata", + "$ref": "../../entity/ai/aiApplication.json#/definitions/governanceMetadata" + }, + "biasMetrics": { + "description": "Bias evaluation metrics", + "$ref": "../../entity/ai/aiApplication.json#/definitions/biasMetrics" + }, + "performanceMetrics": { + "description": "Runtime performance metrics", + "$ref": "../../entity/ai/aiApplication.json#/definitions/performanceMetrics" + }, + "qualityMetrics": { + "description": "Quality metrics for responses", + "$ref": "../../entity/ai/aiApplication.json#/definitions/qualityMetrics" + }, + "safetyMetrics": { + "description": "Safety metrics", + "$ref": "../../entity/ai/aiApplication.json#/definitions/safetyMetrics" + }, + "testSuites": { + "description": "Test suites for validating this AI application", + "$ref": "../../type/entityReferenceList.json" + }, + "sourceCode": { + "description": "Link to source code repository", + "type": "string" + }, + "deploymentUrl": { + "description": "Production deployment endpoint", + "type": "string" + }, + "documentation": { + "description": "Link to external documentation", + "type": "string" + }, + "tags": { + "description": "Tags for this AI Application", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this AI Application", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains" : { + "description": "Fully qualified names of the domains the AI Application belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "dataProducts" : { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items" : { + "$ref" : "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + } + }, + "required": ["name", "applicationType"], + "additionalProperties": false +} diff --git a/schemas/api/ai/createAIGovernancePolicy.json b/schemas/api/ai/createAIGovernancePolicy.json new file mode 100644 index 0000000..f837699 --- /dev/null +++ b/schemas/api/ai/createAIGovernancePolicy.json @@ -0,0 +1,99 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createAIGovernancePolicy.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateAIGovernancePolicyRequest", + "description": "Create AI Governance Policy entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreateAIGovernancePolicy", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this AI governance policy.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this AI governance policy.", + "type": "string" + }, + "description": { + "description": "Description of the AI governance policy. Its requirements and scope.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "policyType": { + "description": "Type of governance policy", + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/policyType" + }, + "rules": { + "description": "Policy rules and conditions", + "type": "array", + "items": { + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/policyRule" + } + }, + "biasThresholds": { + "description": "Bias detection thresholds", + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/biasThreshold" + }, + "dataAccessControls": { + "description": "Data access control requirements", + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/dataAccessControl" + }, + "costControls": { + "description": "Cost control thresholds and limits", + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/costControl" + }, + "complianceRequirements": { + "description": "Compliance and regulatory requirements", + "type": "array", + "items": { + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/complianceRequirement" + } + }, + "performanceStandards": { + "description": "Performance and quality standards", + "$ref": "../../entity/ai/aiGovernancePolicy.json#/definitions/performanceStandard" + }, + "appliesTo": { + "description": "Entities this policy applies to", + "$ref": "../../type/entityReferenceList.json" + }, + "enforcementLevel": { + "description": "How strictly the policy is enforced", + "type": "string", + "enum": ["Advisory", "Warning", "Blocking"], + "default": "Warning" + }, + "enabled": { + "description": "Whether this policy is currently active", + "type": "boolean", + "default": true + }, + "tags": { + "description": "Tags for this AI Governance Policy", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this AI Governance Policy", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains" : { + "description": "Fully qualified names of the domains the AI Governance Policy belongs to.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["name", "policyType"], + "additionalProperties": false +} diff --git a/schemas/api/ai/createAgentExecution.json b/schemas/api/ai/createAgentExecution.json new file mode 100644 index 0000000..9dc63dd --- /dev/null +++ b/schemas/api/ai/createAgentExecution.json @@ -0,0 +1,103 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createAgentExecution.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateAgentExecutionRequest", + "description": "Create Agent Execution entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreateAgentExecution", + + "properties": { + "agent": { + "description": "Reference to the AI agent that executed", + "$ref": "../../type/entityReference.json" + }, + "agentId": { + "description": "ID of the AI Agent", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "timestamp": { + "description": "Execution start timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "endTimestamp": { + "description": "Execution end timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "status": { + "description": "Execution status", + "$ref": "../../entity/ai/agentExecution.json#/definitions/executionStatus" + }, + "input": { + "description": "Input provided to the agent", + "type": "string" + }, + "output": { + "description": "Output generated by the agent", + "type": "string" + }, + "modelCalls": { + "description": "LLM model calls made during execution", + "type": "array", + "items": { + "$ref": "../../entity/ai/agentExecution.json#/definitions/modelCall" + } + }, + "dataAccessed": { + "description": "Data sources accessed during execution", + "type": "array", + "items": { + "$ref": "../../entity/ai/agentExecution.json#/definitions/dataAccess" + } + }, + "toolCalls": { + "description": "Tool calls made during execution", + "type": "array", + "items": { + "$ref": "../../entity/ai/agentExecution.json#/definitions/toolCall" + } + }, + "metrics": { + "description": "Performance and cost metrics", + "$ref": "../../entity/ai/agentExecution.json#/definitions/executionMetrics" + }, + "errorMessage": { + "description": "Error message if execution failed", + "type": "string" + }, + "errorStack": { + "description": "Error stack trace", + "type": "string" + }, + "complianceChecks": { + "description": "Compliance checks performed", + "type": "array", + "items": { + "$ref": "../../entity/ai/agentExecution.json#/definitions/complianceCheck" + } + }, + "executedBy": { + "description": "User or system that triggered the execution", + "type": "string" + }, + "sessionId": { + "description": "Session ID for grouping related executions", + "type": "string" + }, + "environment": { + "description": "Environment where execution occurred", + "type": "string", + "enum": ["Development", "Staging", "Production"] + }, + "agentVersion": { + "description": "Version of the agent at execution time", + "type": "string" + }, + "metadata": { + "description": "Additional execution metadata", + "type": "object", + "additionalProperties": {"type": "string"} + } + }, + "required": ["agent", "agentId", "timestamp", "status"], + "additionalProperties": false +} diff --git a/schemas/api/ai/createLLMModel.json b/schemas/api/ai/createLLMModel.json new file mode 100644 index 0000000..245cd4d --- /dev/null +++ b/schemas/api/ai/createLLMModel.json @@ -0,0 +1,108 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createLLMModel.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateLLMModelRequest", + "description": "Create LLM Model entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreateLLMModel", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this LLM model.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this LLM model.", + "type": "string" + }, + "description": { + "description": "Description of the LLM model. Its capabilities, use cases, and limitations.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "baseModel": { + "description": "Base model name", + "type": "string" + }, + "modelVersion": { + "description": "Version of the model", + "type": "string" + }, + "modelProvider": { + "description": "Model provider (e.g., 'OpenAI', 'Anthropic', 'Meta')", + "type": "string" + }, + "modelSpecifications": { + "description": "Detailed model specifications", + "$ref": "../../entity/ai/llmModel.json#/definitions/modelSpecifications" + }, + "trainingMetadata": { + "description": "Training data and methodology information", + "$ref": "../../entity/ai/llmModel.json#/definitions/trainingMetadata" + }, + "modelEvaluation": { + "description": "Model performance metrics and evaluation results", + "$ref": "../../entity/ai/llmModel.json#/definitions/modelEvaluation" + }, + "costMetrics": { + "description": "Cost metrics for model usage", + "$ref": "../../entity/ai/llmModel.json#/definitions/costMetrics" + }, + "deploymentInfo": { + "description": "Deployment and availability information", + "$ref": "../../entity/ai/llmModel.json#/definitions/deploymentInfo" + }, + "governanceStatus": { + "description": "Governance status - tracks unauthorized/shadow AI models", + "type": "string", + "enum": ["Approved", "PendingReview", "Rejected", "Unauthorized"] + }, + "certifications": { + "description": "Certifications this model has received", + "type": "array", + "items": {"type": "string"} + }, + "regulatoryCompliance": { + "description": "Regulatory compliance standards met", + "type": "array", + "items": {"type": "string"} + }, + "service": { + "description": "Link to the LLM service where this model is hosted", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "tags": { + "description": "Tags for this LLM Model", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this LLM Model", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains" : { + "description": "Fully qualified names of the domains the LLM Model belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "dataProducts" : { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items" : { + "$ref" : "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + } + }, + "required": ["name", "baseModel", "service"], + "additionalProperties": false +} diff --git a/schemas/api/ai/createMcpServer.json b/schemas/api/ai/createMcpServer.json new file mode 100644 index 0000000..e975038 --- /dev/null +++ b/schemas/api/ai/createMcpServer.json @@ -0,0 +1,137 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createMcpServer.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateMcpServerRequest", + "description": "Create MCP Server entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreateMcpServer", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this MCP Server.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this MCP Server.", + "type": "string" + }, + "description": { + "description": "Description of the MCP Server, its purpose, and capabilities.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "service": { + "description": "Link to the MCP service that contains this server", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "serverType": { + "description": "Type of MCP server based on its primary function", + "$ref": "../../entity/ai/mcpServer.json#/definitions/serverType" + }, + "transportType": { + "description": "Transport protocol used by the MCP server", + "$ref": "../../entity/ai/mcpServer.json#/definitions/transportType" + }, + "protocolVersion": { + "type": "string", + "description": "MCP protocol version supported by this server" + }, + "developmentStage": { + "description": "Development stage of the MCP server", + "$ref": "../../entity/ai/mcpServer.json#/definitions/developmentStage" + }, + "serverInfo": { + "description": "Information about the MCP server software", + "$ref": "../../entity/ai/mcpServer.json#/definitions/serverInfo" + }, + "connectionConfig": { + "description": "Connection configuration for the MCP server", + "$ref": "../../entity/ai/mcpServer.json#/definitions/connectionConfig" + }, + "capabilities": { + "description": "Capabilities supported by the MCP server", + "$ref": "../../entity/ai/mcpServer.json#/definitions/serverCapabilities" + }, + "tools": { + "description": "Tools exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "../../entity/ai/mcpServer.json#/definitions/mcpTool" + }, + "default": null + }, + "resources": { + "description": "Resources exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "../../entity/ai/mcpServer.json#/definitions/mcpResource" + }, + "default": null + }, + "prompts": { + "description": "Prompt templates exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "../../entity/ai/mcpServer.json#/definitions/mcpPrompt" + }, + "default": null + }, + "governanceMetadata": { + "description": "Governance and compliance metadata", + "$ref": "../../entity/ai/mcpServer.json#/definitions/governanceMetadata" + }, + "dataAccessSummary": { + "description": "Summary of data access patterns", + "$ref": "../../entity/ai/mcpServer.json#/definitions/dataAccessSummary" + }, + "securityMetrics": { + "description": "Security metrics and settings", + "$ref": "../../entity/ai/mcpServer.json#/definitions/securityMetrics" + }, + "sourceCode": { + "description": "Link to source code repository", + "type": "string" + }, + "deploymentUrl": { + "description": "Deployment endpoint URL", + "type": "string" + }, + "documentation": { + "description": "Link to external documentation", + "type": "string" + }, + "tags": { + "description": "Tags for this MCP Server", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this MCP Server", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Fully qualified names of the domains the MCP Server belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "dataProducts": { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items": { + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + } + }, + "required": ["name", "serverType", "service"], + "additionalProperties": false +} diff --git a/schemas/api/ai/createPromptTemplate.json b/schemas/api/ai/createPromptTemplate.json new file mode 100644 index 0000000..5ca43bd --- /dev/null +++ b/schemas/api/ai/createPromptTemplate.json @@ -0,0 +1,88 @@ +{ + "$id": "https://open-metadata.org/schema/api/ai/createPromptTemplate.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreatePromptTemplateRequest", + "description": "Create Prompt Template entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.ai.CreatePromptTemplate", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this prompt template.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this prompt template.", + "type": "string" + }, + "description": { + "description": "Description of the prompt template. Its purpose and usage.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "templateContent": { + "description": "The actual prompt template text with variable placeholders", + "type": "string" + }, + "systemPrompt": { + "description": "Optional system prompt to accompany the template", + "type": "string" + }, + "variables": { + "description": "Variables used in the template", + "type": "array", + "items": { + "$ref": "../../entity/ai/promptTemplate.json#/definitions/templateVariable" + } + }, + "examples": { + "description": "Example usages of the template", + "type": "array", + "items": { + "$ref": "../../entity/ai/promptTemplate.json#/definitions/promptExample" + } + }, + "templateType": { + "description": "Type of prompt template", + "type": "string", + "enum": ["ChatCompletion", "TextGeneration", "CodeGeneration", "Embedding", "Classification", "Extraction", "Custom"] + }, + "templateVersion": { + "description": "Template version for tracking changes", + "type": "string" + }, + "tags": { + "description": "Tags for this Prompt Template", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this Prompt Template", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains" : { + "description": "Fully qualified names of the domains the Prompt Template belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "dataProducts" : { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items" : { + "$ref" : "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + } + }, + "required": ["name", "templateContent"], + "additionalProperties": false +} diff --git a/schemas/api/configuration/rdfConfiguration.json b/schemas/api/configuration/rdfConfiguration.json index 676f4f2..c868823 100644 --- a/schemas/api/configuration/rdfConfiguration.json +++ b/schemas/api/configuration/rdfConfiguration.json @@ -5,6 +5,14 @@ "description": "Configuration for RDF/Knowledge Graph support in OpenMetadata", "type": "object", "javaType": "org.openmetadata.schema.api.configuration.rdf.RdfConfiguration", + "definitions": { + "reasoningLevel": { + "description": "Level of reasoning/inference to apply to SPARQL queries", + "type": "string", + "enum": ["NONE", "RDFS", "OWL_LITE", "OWL_DL", "CUSTOM"], + "default": "NONE" + } + }, "properties": { "enabled": { "description": "Enable or disable RDF support", @@ -40,6 +48,21 @@ "type": "string", "format": "uri", "default": "https://open-metadata.org/" + }, + "inferenceEnabled": { + "description": "Enable inference/reasoning on SPARQL queries. When enabled, SPARQL queries will use the inference engine to derive additional triples based on the reasoning level.", + "type": "boolean", + "default": true + }, + "defaultInferenceLevel": { + "description": "Default reasoning level for SPARQL queries when inference is enabled. CUSTOM provides OpenMetadata-specific inference rules including transitive lineage traversal and inverse relationships.", + "$ref": "#/definitions/reasoningLevel", + "default": "CUSTOM" + }, + "cacheInferredTriples": { + "description": "Cache inferred triples for better query performance (requires more storage)", + "type": "boolean", + "default": false } }, "required": ["enabled", "storageType"], diff --git a/schemas/api/data/bulkColumnUpdatePreview.json b/schemas/api/data/bulkColumnUpdatePreview.json new file mode 100644 index 0000000..82973a9 --- /dev/null +++ b/schemas/api/data/bulkColumnUpdatePreview.json @@ -0,0 +1,98 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/bulkColumnUpdatePreview.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BulkColumnUpdatePreview", + "description": "Preview response for bulk column update showing what will change for each column.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.BulkColumnUpdatePreview", + "properties": { + "totalColumns": { + "description": "Total number of columns that will be updated.", + "type": "integer", + "minimum": 0 + }, + "columnPreviews": { + "description": "List of column update previews showing current vs new values.", + "type": "array", + "items": { + "$ref": "#/definitions/columnUpdatePreview" + } + } + }, + "required": ["totalColumns", "columnPreviews"], + "definitions": { + "columnUpdatePreview": { + "javaType": "org.openmetadata.schema.api.data.ColumnUpdatePreview", + "description": "Preview of changes for a single column showing diff between current and new values.", + "type": "object", + "properties": { + "columnFQN": { + "description": "Fully qualified name of the column.", + "type": "string" + }, + "entityType": { + "description": "Type of entity containing the column (table, dashboardDataModel, etc.).", + "type": "string" + }, + "entityFQN": { + "description": "Fully qualified name of the parent entity.", + "type": "string" + }, + "entityDisplayName": { + "description": "Display name of the parent entity.", + "type": "string" + }, + "serviceName": { + "description": "Name of the service.", + "type": "string" + }, + "databaseName": { + "description": "Name of the database (if applicable).", + "type": "string" + }, + "schemaName": { + "description": "Name of the schema (if applicable).", + "type": "string" + }, + "currentValues": { + "description": "Current column metadata values.", + "$ref": "#/definitions/columnMetadata" + }, + "newValues": { + "description": "New column metadata values that will be applied.", + "$ref": "#/definitions/columnMetadata" + }, + "hasChanges": { + "description": "True if there are actual changes between current and new values.", + "type": "boolean" + } + }, + "required": ["columnFQN", "entityType", "entityFQN", "currentValues", "newValues", "hasChanges"], + "additionalProperties": false + }, + "columnMetadata": { + "javaType": "org.openmetadata.schema.api.data.ColumnMetadata", + "description": "Column metadata including display name, description, tags, and glossary terms.", + "type": "object", + "properties": { + "displayName": { + "description": "Display name of the column.", + "type": "string" + }, + "description": { + "description": "Description of the column.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags and glossary terms associated with the column.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/schemas/api/data/bulkColumnUpdateRequest.json b/schemas/api/data/bulkColumnUpdateRequest.json new file mode 100644 index 0000000..982675e --- /dev/null +++ b/schemas/api/data/bulkColumnUpdateRequest.json @@ -0,0 +1,101 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/bulkColumnUpdateRequest.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BulkColumnUpdateRequest", + "description": "Bulk update request for updating column metadata (description, display name, tags, glossary terms) across entities. Supports two modes: 1) Search-based: provide columnName and filters to find and update all matching columns, 2) Explicit: provide specific list of column FQNs to update.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.BulkColumnUpdateRequest", + "properties": { + "columnName": { + "description": "Column name to search for (exact match, case-sensitive). When provided, the system will search for all columns with this name and apply updates based on filters.", + "type": "string" + }, + "entityTypes": { + "description": "Filter by entity types (e.g., table, dashboardDataModel). If not provided, searches across all supported types.", + "type": "array", + "items": { + "type": "string" + } + }, + "serviceName": { + "description": "Filter by service name.", + "type": "string" + }, + "databaseName": { + "description": "Filter by database name.", + "type": "string" + }, + "schemaName": { + "description": "Filter by schema name.", + "type": "string" + }, + "domainId": { + "description": "Filter by domain ID.", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "displayName": { + "description": "Display Name to apply to all matching columns.", + "type": "string" + }, + "description": { + "description": "Description to apply to all matching columns.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags and glossary terms to apply to all matching columns. Provide an empty array to remove all tags.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "columnUpdates": { + "description": "Explicit list of column updates (alternative to search-based mode). Use this when you want to update specific columns by FQN.", + "type": "array", + "items": { + "$ref": "#/definitions/columnUpdate" + } + }, + "dryRun": { + "description": "If true, performs a dry-run to preview which columns will be updated without actually making changes. Returns a list of columns that would be affected.", + "type": "boolean", + "default": false + } + }, + "definitions": { + "columnUpdate": { + "javaType": "org.openmetadata.schema.api.data.ColumnUpdate", + "description": "Individual column update with FQN and metadata changes.", + "type": "object", + "properties": { + "columnFQN": { + "description": "Fully qualified name of the column to update.", + "type": "string" + }, + "entityType": { + "description": "Type of entity containing the column (table or dashboardDataModel).", + "type": "string" + }, + "displayName": { + "description": "Display Name that identifies this column name.", + "type": "string" + }, + "description": { + "description": "Description of the column.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags and glossary terms associated with the column. Provide an empty array to remove all tags.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + } + }, + "required": ["columnFQN", "entityType"], + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/schemas/api/data/columnGridResponse.json b/schemas/api/data/columnGridResponse.json new file mode 100644 index 0000000..0a970a8 --- /dev/null +++ b/schemas/api/data/columnGridResponse.json @@ -0,0 +1,215 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/columnGridResponse.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ColumnGridResponse", + "description": "Response containing all unique columns grouped by metadata similarity for grid-based editing.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.ColumnGridResponse", + "properties": { + "columns": { + "description": "List of unique column names with their metadata groups.", + "type": "array", + "items": { + "$ref": "#/definitions/columnGridItem" + } + }, + "totalUniqueColumns": { + "description": "Total number of unique column names.", + "type": "integer", + "minimum": 0 + }, + "totalOccurrences": { + "description": "Total number of column occurrences across all entities.", + "type": "integer", + "minimum": 0 + }, + "cursor": { + "description": "Cursor for pagination (Base64-encoded). Use this in the next request to get the next page of results.", + "type": "string" + } + }, + "required": ["columns", "totalUniqueColumns", "totalOccurrences"], + "definitions": { + "columnGridItem": { + "javaType": "org.openmetadata.schema.api.data.ColumnGridItem", + "description": "A unique column name with its metadata groups.", + "type": "object", + "properties": { + "columnName": { + "description": "Name of the column.", + "type": "string" + }, + "totalOccurrences": { + "description": "Total number of occurrences for this column name.", + "type": "integer", + "minimum": 0 + }, + "hasVariations": { + "description": "Whether this column has different metadata across occurrences.", + "type": "boolean" + }, + "metadataStatus": { + "description": "Aggregate metadata status across all occurrences. Uses worst-case: MISSING if any occurrence is missing, INCOMPLETE if any is incomplete, otherwise COMPLETE.", + "$ref": "#/definitions/metadataStatus" + }, + "groups": { + "description": "Metadata groups - columns with identical metadata are grouped together.", + "type": "array", + "items": { + "$ref": "#/definitions/columnMetadataGroup" + } + } + }, + "required": ["columnName", "totalOccurrences", "hasVariations", "groups"], + "additionalProperties": false + }, + "columnMetadataGroup": { + "javaType": "org.openmetadata.schema.api.data.ColumnMetadataGroup", + "description": "A group of columns with identical metadata.", + "type": "object", + "properties": { + "groupId": { + "description": "Unique identifier for this metadata group (hash of metadata values).", + "type": "string" + }, + "displayName": { + "description": "Display name (common across all columns in this group).", + "type": "string" + }, + "description": { + "description": "Description (common across all columns in this group).", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags (common across all columns in this group).", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + }, + "dataType": { + "description": "Data type (common across all columns in this group).", + "type": "string" + }, + "metadataStatus": { + "description": "Metadata completeness status for this group.", + "$ref": "#/definitions/metadataStatus" + }, + "occurrenceCount": { + "description": "Number of column occurrences in this group.", + "type": "integer", + "minimum": 1 + }, + "occurrences": { + "description": "List of column occurrences in this group.", + "type": "array", + "items": { + "$ref": "#/definitions/columnOccurrenceRef" + } + }, + "children": { + "description": "Nested columns for STRUCT, MAP, or UNION data types.", + "type": "array", + "items": { + "$ref": "#/definitions/columnChild" + } + } + }, + "required": ["groupId", "occurrenceCount", "occurrences"], + "additionalProperties": false + }, + "columnChild": { + "javaType": "org.openmetadata.schema.api.data.ColumnChild", + "description": "A child column within a STRUCT, MAP, or UNION type.", + "type": "object", + "properties": { + "name": { + "description": "Name of the child column.", + "type": "string" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the child column.", + "type": "string" + }, + "displayName": { + "description": "Display name of the child column.", + "type": "string" + }, + "description": { + "description": "Description of the child column.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "dataType": { + "description": "Data type of the child column.", + "type": "string" + }, + "tags": { + "description": "Tags on the child column.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + }, + "children": { + "description": "Nested children for deeply nested structures.", + "type": "array", + "items": { + "$ref": "#/definitions/columnChild" + } + } + }, + "required": ["name"], + "additionalProperties": false + }, + "columnOccurrenceRef": { + "javaType": "org.openmetadata.schema.api.data.ColumnOccurrenceRef", + "description": "Reference to a column occurrence.", + "type": "object", + "properties": { + "columnFQN": { + "description": "Fully qualified name of the column.", + "type": "string" + }, + "entityType": { + "description": "Type of entity containing the column.", + "type": "string" + }, + "entityFQN": { + "description": "Fully qualified name of the parent entity.", + "type": "string" + }, + "entityDisplayName": { + "description": "Display name of the parent entity.", + "type": "string" + }, + "serviceName": { + "description": "Name of the service.", + "type": "string" + }, + "databaseName": { + "description": "Name of the database (if applicable).", + "type": "string" + }, + "schemaName": { + "description": "Name of the schema (if applicable).", + "type": "string" + } + }, + "required": ["columnFQN", "entityType", "entityFQN"], + "additionalProperties": false + }, + "metadataStatus": { + "javaType": "org.openmetadata.schema.api.data.MetadataStatus", + "description": "Metadata completeness status for a column or group of columns.", + "type": "string", + "enum": ["MISSING", "INCOMPLETE", "INCONSISTENT", "COMPLETE"], + "javaEnums": [ + {"name": "MISSING", "description": "No description and no tags."}, + {"name": "INCOMPLETE", "description": "Has description or tags, but not both."}, + {"name": "INCONSISTENT", "description": "Metadata varies across occurrences (different descriptions, tags, or glossary terms)."}, + {"name": "COMPLETE", "description": "Has both description and tags, and is consistent across all occurrences."} + ] + } + }, + "additionalProperties": false +} diff --git a/schemas/api/data/columnGroupUpdateRequest.json b/schemas/api/data/columnGroupUpdateRequest.json new file mode 100644 index 0000000..6a25cea --- /dev/null +++ b/schemas/api/data/columnGroupUpdateRequest.json @@ -0,0 +1,39 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/columnGroupUpdateRequest.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ColumnGroupUpdateRequest", + "description": "Request to update metadata for a specific group of columns.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.ColumnGroupUpdateRequest", + "properties": { + "groupId": { + "description": "ID of the metadata group to update.", + "type": "string" + }, + "columnFQNs": { + "description": "List of column FQNs to update (all must be from the same group).", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "displayName": { + "description": "New display name for the columns.", + "type": "string" + }, + "description": { + "description": "New description for the columns.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "New tags for the columns.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + } + }, + "required": ["columnFQNs"], + "additionalProperties": false +} diff --git a/schemas/api/data/createAPIEndpoint.json b/schemas/api/data/createAPIEndpoint.json index b5ddf59..11cc537 100644 --- a/schemas/api/data/createAPIEndpoint.json +++ b/schemas/api/data/createAPIEndpoint.json @@ -87,6 +87,6 @@ "maxLength": 32 } }, - "required": ["name", "endpointURL", "apiCollection"], + "required": ["name", "apiCollection"], "additionalProperties": false } diff --git a/schemas/api/data/createDataContract.json b/schemas/api/data/createDataContract.json index 56181bb..36c7497 100644 --- a/schemas/api/data/createDataContract.json +++ b/schemas/api/data/createDataContract.json @@ -53,6 +53,14 @@ }, "default": null }, + "odcsQualityRules": { + "description": "ODCS quality rules for round-trip compatibility with ODCS export.", + "type": "array", + "items": { + "$ref": "../../entity/datacontract/odcs/odcsDataContract.json#/definitions/odcsQualityRule" + }, + "default": null + }, "owners": { "description": "Owners of this data contract.", "$ref": "../../type/entityReferenceList.json", diff --git a/schemas/api/data/createFile.json b/schemas/api/data/createFile.json index 2acb4b7..09ebdfd 100644 --- a/schemas/api/data/createFile.json +++ b/schemas/api/data/createFile.json @@ -47,6 +47,14 @@ "description": "File size in bytes", "type": "integer" }, + "columns": { + "description": "Column definitions for structured data files (CSV, etc.)", + "type": "array", + "items": { + "$ref": "../../entity/data/table.json#/definitions/column" + }, + "default": null + }, "checksum": { "description": "File checksum/hash", "type": "string" diff --git a/schemas/api/data/createGlossaryTerm.json b/schemas/api/data/createGlossaryTerm.json index c50ed84..a22a88f 100644 --- a/schemas/api/data/createGlossaryTerm.json +++ b/schemas/api/data/createGlossaryTerm.json @@ -52,6 +52,13 @@ "$ref": "../../entity/data/glossaryTerm.json#/definitions/termReference" } }, + "conceptMappings": { + "description": "Optional mappings to external concepts (e.g., SKOS alignments).", + "type": "array", + "items": { + "$ref": "../../entity/data/glossaryTerm.json#/definitions/conceptMapping" + } + }, "reviewers": { "description": "User or Team references of the reviewers for this glossary.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/api/data/groupedColumnsResponse.json b/schemas/api/data/groupedColumnsResponse.json new file mode 100644 index 0000000..7ba0c8d --- /dev/null +++ b/schemas/api/data/groupedColumnsResponse.json @@ -0,0 +1,86 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/groupedColumnsResponse.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GroupedColumnsResponse", + "description": "Response containing columns grouped by name with occurrence information.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.GroupedColumnsResponse", + "properties": { + "columnName": { + "description": "Name of the column being grouped.", + "type": "string" + }, + "occurrences": { + "description": "List of column occurrences across different entities.", + "type": "array", + "items": { + "$ref": "#/definitions/columnOccurrence" + } + }, + "totalCount": { + "description": "Total number of occurrences for this column name.", + "type": "integer", + "minimum": 0 + } + }, + "required": ["columnName", "occurrences", "totalCount"], + "definitions": { + "columnOccurrence": { + "javaType": "org.openmetadata.schema.api.data.ColumnOccurrence", + "description": "Individual column occurrence with entity details.", + "type": "object", + "properties": { + "columnFQN": { + "description": "Fully qualified name of the column.", + "type": "string" + }, + "entityType": { + "description": "Type of entity containing the column (table or dashboardDataModel).", + "type": "string" + }, + "entityFQN": { + "description": "Fully qualified name of the parent entity.", + "type": "string" + }, + "entityDisplayName": { + "description": "Display name of the parent entity.", + "type": "string" + }, + "serviceName": { + "description": "Name of the service.", + "type": "string" + }, + "databaseName": { + "description": "Name of the database (if applicable).", + "type": "string" + }, + "schemaName": { + "description": "Name of the schema (if applicable).", + "type": "string" + }, + "displayName": { + "description": "Display name of the column.", + "type": "string" + }, + "description": { + "description": "Description of the column.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags and glossary terms associated with the column.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + }, + "dataType": { + "description": "Data type of the column.", + "type": "string" + } + }, + "required": ["columnFQN", "entityType", "entityFQN"], + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/schemas/api/data/searchColumnsRequest.json b/schemas/api/data/searchColumnsRequest.json new file mode 100644 index 0000000..8af5b75 --- /dev/null +++ b/schemas/api/data/searchColumnsRequest.json @@ -0,0 +1,38 @@ +{ + "$id": "https://open-metadata.org/schema/api/data/searchColumnsRequest.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SearchColumnsRequest", + "description": "Request to search and group columns by name across different entity types.", + "type": "object", + "javaType": "org.openmetadata.schema.api.data.SearchColumnsRequest", + "properties": { + "columnName": { + "description": "Column name to search for (exact match, case-sensitive).", + "type": "string" + }, + "entityTypes": { + "description": "Filter by entity types. If not provided, searches across all supported types (table, dashboardDataModel, container, searchIndex).", + "type": "array", + "items": { + "type": "string" + } + }, + "serviceName": { + "description": "Filter by service name.", + "type": "string" + }, + "databaseName": { + "description": "Filter by database name.", + "type": "string" + }, + "schemaName": { + "description": "Filter by schema name.", + "type": "string" + }, + "domainId": { + "description": "Filter by domain ID.", + "$ref": "../../type/basic.json#/definitions/uuid" + } + }, + "additionalProperties": false +} diff --git a/schemas/api/data/updateColumn.json b/schemas/api/data/updateColumn.json index 6de95cc..ed00ce4 100644 --- a/schemas/api/data/updateColumn.json +++ b/schemas/api/data/updateColumn.json @@ -30,6 +30,10 @@ "description": "Set to true to remove the existing column constraint. Only applicable to table columns, ignored for dashboard data model columns. If both 'constraint' and 'removeConstraint' are provided, 'removeConstraint' takes precedence.", "type": "boolean", "default": false + }, + "extension": { + "description": "Entity extension data with custom attributes added to the column.", + "$ref": "../../type/basic.json#/definitions/entityExtension" } }, "additionalProperties": false diff --git a/schemas/api/domains/dataProductPortsView.json b/schemas/api/domains/dataProductPortsView.json new file mode 100644 index 0000000..0407fb3 --- /dev/null +++ b/schemas/api/domains/dataProductPortsView.json @@ -0,0 +1,47 @@ +{ + "$id": "https://open-metadata.org/schema/api/domains/dataProductPortsView.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DataProductPortsView", + "description": "Combined view of input and output ports for a data product, optimized for lineage-like visualization with independent pagination for each port type. Returns full entity details for each port.", + "type": "object", + "javaType": "org.openmetadata.schema.api.domains.DataProductPortsView", + "definitions": { + "paginatedEntities": { + "type": "object", + "description": "A paginated list of full entity objects with total count", + "javaType": "org.openmetadata.schema.api.domains.PaginatedEntities", + "properties": { + "data": { + "description": "List of full entity objects in the current page", + "type": "array", + "items": { + "type": "object" + }, + "default": [] + }, + "paging": { + "description": "Pagination information including offset and total count", + "$ref": "../../type/paging.json" + } + }, + "required": ["data", "paging"], + "additionalProperties": false + } + }, + "properties": { + "entity": { + "description": "Reference to the data product entity", + "$ref": "../../type/entityReference.json" + }, + "inputPorts": { + "description": "Paginated list of input ports - full entity details of data assets consumed by this data product", + "$ref": "#/definitions/paginatedEntities" + }, + "outputPorts": { + "description": "Paginated list of output ports - full entity details of data assets produced/exposed by this data product", + "$ref": "#/definitions/paginatedEntities" + } + }, + "required": ["entity", "inputPorts", "outputPorts"], + "additionalProperties": false +} diff --git a/schemas/api/events/createNotificationTemplate.json b/schemas/api/events/createNotificationTemplate.json index 78cbb56..26a7240 100644 --- a/schemas/api/events/createNotificationTemplate.json +++ b/schemas/api/events/createNotificationTemplate.json @@ -29,7 +29,7 @@ "description": "Handlebars template content for rendering notifications", "type": "string", "minLength": 1, - "maxLength": 10240 + "maxLength": 65536 }, "owners": { "description": "Owners of this template", diff --git a/schemas/api/events/notificationTemplateRenderRequest.json b/schemas/api/events/notificationTemplateRenderRequest.json index 57d4558..084eec0 100644 --- a/schemas/api/events/notificationTemplateRenderRequest.json +++ b/schemas/api/events/notificationTemplateRenderRequest.json @@ -16,7 +16,7 @@ "description": "Handlebars template content for rendering notifications", "type": "string", "minLength": 1, - "maxLength": 10240 + "maxLength": 65536 }, "resource": { "type": "string", diff --git a/schemas/api/events/notificationTemplateValidationRequest.json b/schemas/api/events/notificationTemplateValidationRequest.json index 9a31835..8b15b73 100644 --- a/schemas/api/events/notificationTemplateValidationRequest.json +++ b/schemas/api/events/notificationTemplateValidationRequest.json @@ -10,7 +10,7 @@ "description": "The template body to validate", "type": "string", "minLength": 1, - "maxLength": 10240 + "maxLength": 65536 }, "templateSubject": { "description": "The template subject line to validate", diff --git a/schemas/api/feed/createPost.json b/schemas/api/feed/createPost.json index 2076633..b8fc91a 100644 --- a/schemas/api/feed/createPost.json +++ b/schemas/api/feed/createPost.json @@ -8,12 +8,8 @@ "message": { "description": "Message in Markdown format. See markdown support for more details.", "type": "string" - }, - "from": { - "description": "Name of the User posting the message", - "type": "string" } }, - "required": ["message", "from"], + "required": ["message"], "additionalProperties": false } diff --git a/schemas/api/feed/createThread.json b/schemas/api/feed/createThread.json index 31e75c5..71051ac 100644 --- a/schemas/api/feed/createThread.json +++ b/schemas/api/feed/createThread.json @@ -35,10 +35,6 @@ "description": "Message", "type": "string" }, - "from": { - "description": "Name of the User (regular user or bot) posting the message", - "type": "string" - }, "addressedTo": { "description": "User or team this thread is addressed to in format <#E::{entities}::{entityName}::{field}::{fieldValue}.", "$ref": "../../type/basic.json#/definitions/entityLink" @@ -69,6 +65,6 @@ "default": null } }, - "required": ["message", "from", "about"], + "required": ["message", "about"], "additionalProperties": false } diff --git a/schemas/api/learning/createLearningResource.json b/schemas/api/learning/createLearningResource.json new file mode 100644 index 0000000..24932e1 --- /dev/null +++ b/schemas/api/learning/createLearningResource.json @@ -0,0 +1,90 @@ +{ + "$id": "https://open-metadata.org/schema/api/learning/createLearningResource.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateLearningResourceRequest", + "description": "Create Learning Resource API request", + "type": "object", + "javaType": "org.openmetadata.schema.api.learning.CreateLearningResource", + "javaInterfaces": [ + "org.openmetadata.schema.CreateEntity" + ], + "properties": { + "name": { + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "type": "string", + "maxLength": 120 + }, + "description": { + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "resourceType": { + "$ref": "../../entity/learning/learningResource.json#/definitions/resourceType" + }, + "categories": { + "type": "array", + "items": { + "$ref": "../../entity/learning/learningResource.json#/definitions/resourceCategory" + }, + "minItems": 1 + }, + "difficulty": { + "$ref": "../../entity/learning/learningResource.json#/definitions/resourceDifficulty" + }, + "source": { + "$ref": "../../entity/learning/learningResource.json#/definitions/resourceSource" + }, + "estimatedDuration": { + "type": "integer", + "minimum": 0 + }, + "completionThreshold": { + "type": "number", + "minimum": 0, + "maximum": 100 + }, + "contexts": { + "type": "array", + "items": { + "$ref": "../../entity/learning/learningResource.json#/definitions/resourceContext" + }, + "minItems": 1 + }, + "status": { + "type": "string", + "enum": [ + "Draft", + "Active", + "Deprecated" + ], + "default": "Active" + }, + "owners": { + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "reviewers": { + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "tags": { + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "extension": { + "$ref": "../../type/basic.json#/definitions/entityExtension" + } + }, + "required": [ + "name", + "resourceType", + "categories", + "source", + "contexts" + ], + "additionalProperties": false +} diff --git a/schemas/api/lineage/entityCountLineageRequest.json b/schemas/api/lineage/entityCountLineageRequest.json index 545079e..208c5c5 100644 --- a/schemas/api/lineage/entityCountLineageRequest.json +++ b/schemas/api/lineage/entityCountLineageRequest.json @@ -40,6 +40,18 @@ "maximum": 10, "default": 3 }, + "upstreamDepth": { + "description": "Maximum upstream depth to compute pagination info for when requested", + "type": "integer", + "minimum": 0, + "maximum": 10 + }, + "downstreamDepth": { + "description": "Maximum downstream depth to compute pagination info for when requested", + "type": "integer", + "minimum": 0, + "maximum": 10 + }, "isConnectedVia": { "description": "Connected Via", "type": "boolean" @@ -48,6 +60,20 @@ "description": "Query Filter", "type": "string" }, + "columnFilter": { + "description": "Filter lineage by specific column names. Use comma-separated list (e.g., 'col1,col2') to filter columns.", + "type": "string" + }, + "preservePaths": { + "description": "Preserve all paths when applying node-level filters. When true, intermediate nodes that don't match filters are kept if they're part of a path to matching nodes.", + "type": "boolean", + "default": false + }, + "includePaginationInfo": { + "description": "Include pagination totals and depth counts in the response", + "type": "boolean", + "default": false + }, "includeDeleted": { "description": "Include deleted entities", "type": "boolean", @@ -64,4 +90,4 @@ }, "required": ["fqn", "direction"], "additionalProperties": false -} \ No newline at end of file +} diff --git a/schemas/api/lineage/esLineageData.json b/schemas/api/lineage/esLineageData.json index a3c3044..bda8d5c 100644 --- a/schemas/api/lineage/esLineageData.json +++ b/schemas/api/lineage/esLineageData.json @@ -45,6 +45,10 @@ "description": "Sql Query associated.", "type": "string" }, + "sqlQueryKey": { + "description": "Key referencing the full SQL text in the parent document's lineageSqlQueries map. Set when the same SQL appears in multiple edges to avoid storing it repeatedly. Look up the actual SQL in lineageSqlQueries[sqlQueryKey].", + "type": "string" + }, "columns": { "description": "Columns associated.", "type": "array", @@ -92,6 +96,14 @@ "description": "Asset count in case of child assets lineage.", "type": "integer", "default": null + }, + "tempLineageTables": { + "description": "Lineage path through temporary/intermediate tables. Each element represents a hop with fromEntity and toEntity fields.", + "type": "array", + "items": { + "$ref": "../../type/entityLineage.json#/definitions/tempLineageTable" + }, + "default": null } }, "additionalProperties": false diff --git a/schemas/api/lineage/openlineage/openLineageFacets.json b/schemas/api/lineage/openlineage/openLineageFacets.json index 16181c7..2eea849 100644 --- a/schemas/api/lineage/openlineage/openLineageFacets.json +++ b/schemas/api/lineage/openlineage/openLineageFacets.json @@ -295,6 +295,9 @@ }, "ownership": { "$ref": "#/definitions/ownershipFacet" + }, + "columnLineage": { + "$ref": "#/definitions/columnLineageFacet" } }, "additionalProperties": true diff --git a/schemas/api/lineage/searchLineageRequest.json b/schemas/api/lineage/searchLineageRequest.json index cccdc3b..fbcdb28 100644 --- a/schemas/api/lineage/searchLineageRequest.json +++ b/schemas/api/lineage/searchLineageRequest.json @@ -49,6 +49,15 @@ "description": "Query Filter", "type": "string" }, + "columnFilter": { + "description": "Column-level lineage filter. Supports filtering by column names in fromColumns or toColumn fields.", + "type": "string" + }, + "preservePaths": { + "description": "When true, preserves all nodes in the path to filtered results. When false, only returns nodes matching the filter.", + "type": "boolean", + "default": true + }, "includeDeleted": { "description": "Include deleted entities", "type": "boolean", diff --git a/schemas/api/lineage/searchLineageResult.json b/schemas/api/lineage/searchLineageResult.json index 3762a98..27a2338 100644 --- a/schemas/api/lineage/searchLineageResult.json +++ b/schemas/api/lineage/searchLineageResult.json @@ -52,6 +52,10 @@ "downstreamEdges": { "description": "Downstream Edges for the node.", "existingJavaType": "java.util.Map" + }, + "paginationInfo": { + "description": "Optional pagination information for table-mode impact analysis.", + "existingJavaType": "org.openmetadata.schema.api.lineage.LineagePaginationInfo" } }, "additionalProperties": false diff --git a/schemas/api/search/orphanCleanupResponse.json b/schemas/api/search/orphanCleanupResponse.json new file mode 100644 index 0000000..24698d6 --- /dev/null +++ b/schemas/api/search/orphanCleanupResponse.json @@ -0,0 +1,13 @@ +{ + "$id": "https://open-metadata.org/schema/api/search/orphanCleanupResponse.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OrphanCleanupResponse", + "description": "Response after cleaning up orphan indexes.", + "type": "object", + "javaType": "org.openmetadata.schema.api.search.OrphanCleanupResponse", + "properties": { + "deletedIndexes": { "description": "List of deleted index names.", "type": "array", "items": { "type": "string" } }, + "deletedCount": { "description": "Number of indexes deleted.", "type": "integer" } + }, + "additionalProperties": false +} diff --git a/schemas/api/search/searchStats.json b/schemas/api/search/searchStats.json new file mode 100644 index 0000000..0fc71fa --- /dev/null +++ b/schemas/api/search/searchStats.json @@ -0,0 +1,96 @@ +{ + "$id": "https://open-metadata.org/schema/api/search/searchStats.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SearchStatsResponse", + "description": "Response containing search cluster statistics and index details.", + "type": "object", + "javaType": "org.openmetadata.schema.api.search.SearchStatsResponse", + "properties": { + "clusterHealth": { + "description": "Health status of the search cluster (GREEN, YELLOW, RED).", + "type": "string" + }, + "totalIndexes": { + "description": "Total number of indexes in the cluster.", + "type": "integer" + }, + "totalDocuments": { + "description": "Total number of documents across all indexes.", + "type": "integer", + "format": "int64" + }, + "totalSizeInBytes": { + "description": "Total storage size in bytes.", + "type": "integer", + "format": "int64" + }, + "totalSizeFormatted": { + "description": "Human-readable total storage size.", + "type": "string" + }, + "totalPrimaryShards": { + "description": "Total number of primary shards.", + "type": "integer" + }, + "totalReplicaShards": { + "description": "Total number of replica shards.", + "type": "integer" + }, + "indexes": { + "description": "List of index details.", + "type": "array", + "items": { + "$ref": "#/definitions/indexStats" + } + }, + "orphanIndexes": { + "description": "List of orphan indexes (indexes with zero aliases).", + "type": "array", + "items": { + "$ref": "#/definitions/orphanIndex" + } + }, + "isSearchIndexingRunning": { + "description": "Indicates if search indexing job is currently running. Orphan cleanup is not allowed while indexing is in progress.", + "type": "boolean", + "default": false + }, + "expectedIndexCount": { + "description": "Total number of expected indices from entity index mappings.", + "type": "integer" + }, + "missingIndexes": { + "description": "List of expected index names that are missing from the search cluster.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "definitions": { + "indexStats": { + "type": "object", + "javaType": "org.openmetadata.schema.api.search.SearchStatsResponse$IndexStats", + "properties": { + "name": { "description": "Index name.", "type": "string" }, + "documents": { "description": "Number of documents in the index.", "type": "integer", "format": "int64" }, + "primaryShards": { "description": "Number of primary shards.", "type": "integer" }, + "replicaShards": { "description": "Number of replica shards.", "type": "integer" }, + "sizeInBytes": { "description": "Index size in bytes.", "type": "integer", "format": "int64" }, + "sizeFormatted": { "description": "Human-readable index size.", "type": "string" }, + "health": { "description": "Index health status.", "type": "string" }, + "aliases": { "description": "List of aliases pointing to this index.", "type": "array", "items": { "type": "string" } } + } + }, + "orphanIndex": { + "type": "object", + "javaType": "org.openmetadata.schema.api.search.SearchStatsResponse$OrphanIndex", + "properties": { + "name": { "description": "Orphan index name.", "type": "string" }, + "sizeInBytes": { "description": "Index size in bytes.", "type": "integer", "format": "int64" }, + "sizeFormatted": { "description": "Human-readable index size.", "type": "string" } + } + } + }, + "additionalProperties": false +} diff --git a/schemas/api/services/createLLMService.json b/schemas/api/services/createLLMService.json new file mode 100644 index 0000000..736bc1a --- /dev/null +++ b/schemas/api/services/createLLMService.json @@ -0,0 +1,63 @@ +{ + "$id": "https://open-metadata.org/schema/api/services/createLLMService.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateLLMServiceRequest", + "description": "Create LLM service entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.services.CreateLLMService", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies the this entity instance uniquely", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this LLM service.", + "type": "string" + }, + "description": { + "description": "Description of LLM entity.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags for this LLM Service.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "serviceType": { + "$ref": "../../entity/services/llmService.json#/definitions/llmServiceType" + }, + "connection": { + "$ref": "../../entity/services/llmService.json#/definitions/llmConnection" + }, + "owners": { + "description": "Owners of this LLM service.", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "dataProducts" : { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items" : { + "$ref" : "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + }, + "domains" : { + "description": "Fully qualified names of the domains the LLM Service belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "ingestionRunner" : { + "description": "The ingestion agent responsible for executing the ingestion pipeline. It will be defined at runtime based on the Ingestion Agent of the service.", + "$ref": "../../type/entityReference.json" + } + }, + "required": ["name", "serviceType"], + "additionalProperties": false +} diff --git a/schemas/api/services/createMcpService.json b/schemas/api/services/createMcpService.json new file mode 100644 index 0000000..8185aa5 --- /dev/null +++ b/schemas/api/services/createMcpService.json @@ -0,0 +1,63 @@ +{ + "$id": "https://open-metadata.org/schema/api/services/createMcpService.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateMcpServiceRequest", + "description": "Create MCP service entity request", + "type": "object", + "javaType": "org.openmetadata.schema.api.services.CreateMcpService", + "javaInterfaces": ["org.openmetadata.schema.CreateEntity"], + + "properties": { + "name": { + "description": "Name that identifies this entity instance uniquely", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display Name that identifies this MCP service.", + "type": "string" + }, + "description": { + "description": "Description of MCP service entity.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "tags": { + "description": "Tags for this MCP Service.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "serviceType": { + "$ref": "../../entity/services/mcpService.json#/definitions/mcpServiceType" + }, + "connection": { + "$ref": "../../entity/services/mcpService.json#/definitions/mcpConnection" + }, + "owners": { + "description": "Owners of this MCP service.", + "$ref": "../../type/entityReferenceList.json", + "default": null + }, + "dataProducts": { + "description": "List of fully qualified names of data products this entity is part of.", + "type": "array", + "items": { + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + } + }, + "domains": { + "description": "Fully qualified names of the domains the MCP Service belongs to.", + "type": "array", + "items": { + "type": "string" + } + }, + "ingestionRunner": { + "description": "The ingestion agent responsible for executing the ingestion pipeline.", + "$ref": "../../type/entityReference.json" + } + }, + "required": ["name", "serviceType"], + "additionalProperties": false +} diff --git a/schemas/api/teams/createTeam.json b/schemas/api/teams/createTeam.json index 45bf6dc..06d0436 100644 --- a/schemas/api/teams/createTeam.json +++ b/schemas/api/teams/createTeam.json @@ -67,6 +67,10 @@ }, "default": null }, + "defaultPersona": { + "description": "Default persona for all users in this team. Only applicable to Group type teams.", + "$ref": "../../type/basic.json#/definitions/uuid" + }, "owners": { "description": "Owners sof this team. ", "$ref": "../../type/entityReferenceList.json", diff --git a/schemas/api/tests/bundleSuiteBulkAddRequest.json b/schemas/api/tests/bundleSuiteBulkAddRequest.json new file mode 100644 index 0000000..3fa9355 --- /dev/null +++ b/schemas/api/tests/bundleSuiteBulkAddRequest.json @@ -0,0 +1,35 @@ +{ + "$id": "https://open-metadata.org/schema/api/tests/bundleSuiteBulkAddRequest.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BundleSuiteBulkAddRequest", + "description": "Request object for bulk-adding test case to a bundle. Use mode 'ids' to specify exact suites, or mode 'all' to select all suites with optional exclusions.", + "javaType": "org.openmetadata.schema.api.tests.BundleSuiteBulkAddRequest", + "properties": { + "selection": { + "description": "Configuration for selecting test case to add to the bundle. Choose either 'ids' mode to specify exact suites, or 'all' mode to select all suites with optional exclusions.", + "oneOf": [ + { + "$ref": "bundleSuiteBulkAddRequestBulkByIds.json" + }, + { + "$ref": "bundleSuiteBulkAddRequestBulkAll.json" + } + ] + }, + "testSuiteId": { + "description": "TestSuite ID where we will be adding the test cases.", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "mode": { + "description": "Mode of bulk addition. 'ids' to specify exact test suite IDs, 'all' to select all suites with optional exclusions.", + "type": "string", + "enum": ["ids", "all"] + } + }, + "required": [ + "selection", + "testSuiteId", + "mode" + ], + "additionalProperties": false +} diff --git a/schemas/api/tests/bundleSuiteBulkAddRequestBulkAll.json b/schemas/api/tests/bundleSuiteBulkAddRequestBulkAll.json new file mode 100644 index 0000000..aa4e5d9 --- /dev/null +++ b/schemas/api/tests/bundleSuiteBulkAddRequestBulkAll.json @@ -0,0 +1,25 @@ +{ + "$id": "https://open-metadata.org/schema/api/tests/bundleSuiteBulkAddRequestBulkAll.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BundleSuiteBulkAddRequestBulkAll", + "description": "Add all test suites, with an optional list of IDs to exclude.", + "javaType": "org.openmetadata.schema.api.tests.BundleSuiteBulkAddRequestBulkAll", + "type": "object", + "additionalProperties": false, + "properties": { + "filter": { + "description": "Optional filter to exclude specific test suites from the bulk selection.", + "type": "object", + "additionalProperties": false, + "properties": { + "excludeIds": { + "description": "List of test suite IDs to exclude from the bulk add.", + "type": "array", + "items": { + "$ref": "../../type/basic.json#/definitions/uuid" + } + } + } + } + } +} diff --git a/schemas/api/tests/bundleSuiteBulkAddRequestBulkByIds.json b/schemas/api/tests/bundleSuiteBulkAddRequestBulkByIds.json new file mode 100644 index 0000000..1e83e59 --- /dev/null +++ b/schemas/api/tests/bundleSuiteBulkAddRequestBulkByIds.json @@ -0,0 +1,20 @@ +{ + "$id": "https://open-metadata.org/schema/api/tests/bundleSuiteBulkAddRequestBulkByIds.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BundleSuiteBulkAddRequestBulkByIds", + "description": "Add a specific set of test suites by their IDs.", + "javaType": "org.openmetadata.schema.api.tests.BundleSuiteBulkAddRequestBulkByIds", + "type": "object", + "required": ["ids"], + "additionalProperties": false, + "properties": { + "ids": { + "description": "List of test suite IDs to add.", + "type": "array", + "items": { + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "minItems": 1 + } + } +} diff --git a/schemas/api/tests/createTestCase.json b/schemas/api/tests/createTestCase.json index a93a42e..e58fc63 100644 --- a/schemas/api/tests/createTestCase.json +++ b/schemas/api/tests/createTestCase.json @@ -65,6 +65,13 @@ "type": "string" }, "default": [] + }, + "topDimensions": { + "description": "Number of top dimension values to show before grouping the rest as Others. Controls the cardinality of dimensional test results. Defaults to 5 when not specified.", + "type": "integer", + "minimum": 1, + "maximum": 50, + "default": 5 } }, "required": ["name", "testDefinition", "entityLink"], diff --git a/schemas/api/tests/createTestDefinition.json b/schemas/api/tests/createTestDefinition.json index ca2f6a5..13891c4 100644 --- a/schemas/api/tests/createTestDefinition.json +++ b/schemas/api/tests/createTestDefinition.json @@ -47,6 +47,9 @@ "$ref": "../../tests/testDefinition.json#/definitions/testCaseParameterDefinition" } }, + "dataQualityDimension": { + "$ref": "../../tests/testDefinition.json#/definitions/dataQualityDimensions" + }, "supportedServices": { "description": "List of services that this test definition supports. When empty, it implies all services are supported.", "type": "array", @@ -61,8 +64,16 @@ "items": { "type": "string" } + }, + "sqlExpression": { + "description": "SQL expression template for custom SQL-based test definitions. Supports substitution variables: {table} and {column} for runtime entity references, and {{paramName}} for user-defined parameters.", + "$ref": "../../type/basic.json#/definitions/sqlQuery" + }, + "validatorClass": { + "description": "Class name of the validator to use for this test definition. If not provided for custom SQL-based tests, defaults to 'ruleLibrarySqlExpressionValidator'.", + "type": "string" } }, - "required": ["name", "description","entityType", "testPlatforms"], + "required": ["name", "entityType", "testPlatforms"], "additionalProperties": false } diff --git a/schemas/auth/generateToken.json b/schemas/auth/generateToken.json index 77af131..8fdbbc4 100644 --- a/schemas/auth/generateToken.json +++ b/schemas/auth/generateToken.json @@ -6,6 +6,10 @@ "type": "object", "javaType": "org.openmetadata.schema.auth.GenerateTokenRequest", "properties": { + "id": { + "description": "User ID for which to generate the token.", + "$ref": "../type/basic.json#/definitions/uuid" + }, "JWTTokenExpiry": { "$ref": "jwtAuth.json#/definitions/JWTTokenExpiry" } diff --git a/schemas/configuration/adminOpsConfiguration.json b/schemas/configuration/adminOpsConfiguration.json new file mode 100644 index 0000000..1f64e87 --- /dev/null +++ b/schemas/configuration/adminOpsConfiguration.json @@ -0,0 +1,56 @@ +{ + "$id": "https://open-metadata.org/schema/configuration/adminOpsConfiguration.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AdminOpsConfiguration", + "description": "Configuration for the Admin Operations API that allows SaaS Hypervisor to trigger administrative operations.", + "type": "object", + "javaType": "org.openmetadata.schema.configuration.AdminOpsConfiguration", + "definitions": { + "adminOpsMtlsConfiguration": { + "title": "Admin Ops mTLS Configuration", + "description": "Mutual TLS authentication configuration for the Admin Ops API.", + "type": "object", + "javaType": "org.openmetadata.schema.configuration.AdminOpsMtlsConfiguration", + "properties": { + "enabled": { + "description": "Enable mTLS client certificate authentication.", + "type": "boolean", + "default": false + }, + "requireClientCert": { + "description": "Require client certificate when mTLS is enabled.", + "type": "boolean", + "default": true + }, + "allowedCNs": { + "description": "List of allowed certificate Common Names.", + "type": "array", + "items": { "type": "string" } + }, + "allowedSANs": { + "description": "List of allowed Subject Alternative Names.", + "type": "array", + "items": { "type": "string" } + }, + "allowedIssuers": { + "description": "List of allowed certificate issuer DNs.", + "type": "array", + "items": { "type": "string" } + } + }, + "additionalProperties": false + } + }, + "properties": { + "enabled": { + "title": "Enabled", + "description": "Whether the Admin Ops API is enabled.", + "type": "boolean", + "default": false + }, + "mtls": { + "$ref": "#/definitions/adminOpsMtlsConfiguration" + } + }, + "additionalProperties": false +} diff --git a/schemas/configuration/aiPlatformConfiguration.json b/schemas/configuration/aiPlatformConfiguration.json index baf8017..0f9b95d 100644 --- a/schemas/configuration/aiPlatformConfiguration.json +++ b/schemas/configuration/aiPlatformConfiguration.json @@ -7,27 +7,47 @@ "javaType": "org.openmetadata.schema.configuration.AiPlatformConfiguration", "definitions": { "grpcConfiguration": { - "javaType": "org.openmetadata.schema.configuration.GrpcConfiguration", - "type": "object", - "properties": { - "port": { - "description": "Host for the gRPC server", - "type": "integer" - }, - "maxInboundMessageSize": { - "description": "Port for the gRPC server", - "type": "integer" - }, - "keepAliveTime": { - "description": "Keep alive time for the gRPC server", - "type": "integer" - }, - "keepAliveTimeout": { - "description": "Keep alive timeout for the gRPC server", - "type": "integer" - } + "javaType": "org.openmetadata.schema.configuration.GrpcConfiguration", + "type": "object", + "properties": { + "port": { + "description": "Host for the gRPC server", + "type": "integer" }, - "required": ["port"] + "maxInboundMessageSize": { + "description": "Port for the gRPC server", + "type": "integer" + }, + "keepAliveTime": { + "description": "Keep alive time for the gRPC server", + "type": "integer" + }, + "keepAliveTimeout": { + "description": "Keep alive timeout for the gRPC server", + "type": "integer" + } + }, + "required": ["port"] + }, + "contextMemoryConfiguration": { + "description": "Tuning for the Context Memory T0 (user preferences) block shipped to the AI Platform via the gRPC user_memory_context field.", + "javaType": "org.openmetadata.schema.configuration.ContextMemoryConfiguration", + "type": "object", + "properties": { + "tokenBudget": { + "description": "Maximum number of tokens worth of T0 user-preference memories to include. Memories are ranked by freshness + usage and filled in order until the budget or item cap is reached.", + "type": "integer", + "default": 500, + "minimum": 0 + }, + "maxItems": { + "description": "Maximum number of T0 user-preference memories to include regardless of token budget.", + "type": "integer", + "default": 5, + "minimum": 0 + } + }, + "additionalProperties": false } }, "properties": { @@ -59,6 +79,10 @@ "grpc": { "description": "gRPC configuration for the AI Platform server", "$ref": "#/definitions/grpcConfiguration" + }, + "contextMemory": { + "description": "Context Memory tuning for the T0 user-preference block assembled by Collate and shipped to the AI Platform.", + "$ref": "#/definitions/contextMemoryConfiguration" } }, "required": ["host", "port", "grpc"], diff --git a/schemas/configuration/appsPrivateConfiguration.json b/schemas/configuration/appsPrivateConfiguration.json index 03e11c4..210033d 100644 --- a/schemas/configuration/appsPrivateConfiguration.json +++ b/schemas/configuration/appsPrivateConfiguration.json @@ -16,10 +16,10 @@ "type": "string", "description": "Application Name" }, - "preview": { + "enabled": { "type": "boolean", - "description": "Flag to enable/disable preview for the application. If the app is in preview mode, it can't be installed.", - "default": false + "description": "Flag to enable/disable the application. If the app is not enabled, it can't be installed.", + "default": true }, "schedule": { "$ref": "../entity/applications/app.json#/definitions/appSchedule" diff --git a/schemas/configuration/authenticationConfiguration.json b/schemas/configuration/authenticationConfiguration.json index 7ca54ee..b8f30d7 100644 --- a/schemas/configuration/authenticationConfiguration.json +++ b/schemas/configuration/authenticationConfiguration.json @@ -53,7 +53,7 @@ "title": "Token Validation Algorithm", "description": "Token Validation Algorithm to use.", "type": "string", - "enum": ["RS256", "RS384", "RS512"], + "enum": ["RS256", "RS384", "RS512", "ES256", "ES384", "ES512"], "default": "RS256" }, "authority": { @@ -81,12 +81,18 @@ }, "jwtPrincipalClaimsMapping": { "title": "Jwt Principal Claims Mapping", - "description": "Jwt Principal Claim Mapping", + "description": "Jwt Principal Claim Mapping. Format: 'key:claim_name' where key must be 'username' or 'email'. Both username and email mappings are required.", "type": "array", "items": { - "type": "string" + "type": "string", + "pattern": "^(username|email):[a-zA-Z0-9_-]+$" } }, + "jwtTeamClaimMapping": { + "title": "Jwt Team Claim Mapping", + "description": "JWT claim name that contains team/department information. For SAML SSO, this is the attribute name (e.g., 'department') from the SAML assertion. For JWT, this is the claim name in the JWT token. The value from this claim will be used to automatically assign users to matching teams in OpenMetadata during login.", + "type": "string" + }, "enableSelfSignup": { "title": "Enable Self Sign Up", "description": "Enable Self Sign Up", @@ -112,8 +118,78 @@ "description": "Force secure flag on session cookies even when not using HTTPS directly. Enable this when running behind a proxy/load balancer that handles SSL termination.", "type": "boolean", "default": false + }, + "enableAutoRedirect": { + "title": "Enable Auto Redirect", + "description": "Enable automatic redirect from the sign-in page to the configured SSO provider.", + "type": "boolean", + "default": false } }, - "required": ["provider", "providerName", "publicKeyUrls", "authority", "callbackUrl", "clientId", "jwtPrincipalClaims"], + "required": ["provider", "providerName", "jwtPrincipalClaims"], + "allOf": [ + { + "if": { + "properties": { + "provider": { "const": "ldap" } + } + }, + "then": { + "required": ["ldapConfiguration"] + } + }, + { + "if": { + "properties": { + "provider": { "const": "saml" } + } + }, + "then": { + "required": ["samlConfiguration"] + } + }, + { + "if": { + "allOf": [ + { + "properties": { + "provider": { + "enum": ["google", "azure", "okta", "auth0", "aws-cognito", "custom-oidc"] + } + } + }, + { + "properties": { + "clientType": { "const": "public" } + } + } + ] + }, + "then": { + "required": ["publicKeyUrls", "authority", "callbackUrl", "clientId"] + } + }, + { + "if": { + "allOf": [ + { + "properties": { + "provider": { + "enum": ["google", "azure", "okta", "auth0", "aws-cognito", "custom-oidc"] + } + } + }, + { + "properties": { + "clientType": { "const": "confidential" } + } + } + ] + }, + "then": { + "required": ["authority", "callbackUrl", "clientId", "oidcConfiguration"] + } + } + ], "additionalProperties": false } diff --git a/schemas/configuration/authorizerConfiguration.json b/schemas/configuration/authorizerConfiguration.json index 372a2e7..60502f4 100644 --- a/schemas/configuration/authorizerConfiguration.json +++ b/schemas/configuration/authorizerConfiguration.json @@ -82,6 +82,11 @@ "description": "Use Roles from Provider", "type": "boolean", "default": false + }, + "defaultOAuthRole": { + "title": "Default OAuth Role", + "description": "Default role assigned to new OAuth users during self-signup. If not specified, users will be created without roles.", + "type": "string" } }, "required": ["className", "containerRequestFilter", "adminPrincipals", "principalDomain", "enforcePrincipalDomain", "enableSecureSocketConnection"], diff --git a/schemas/configuration/elasticSearchConfiguration.json b/schemas/configuration/elasticSearchConfiguration.json index e32aa23..38ca4c3 100644 --- a/schemas/configuration/elasticSearchConfiguration.json +++ b/schemas/configuration/elasticSearchConfiguration.json @@ -21,11 +21,11 @@ }, "properties": { "host": { - "description": "Elastic Search Host", + "description": "Elastic Search Host. Supports single host or comma-separated list for multiple hosts (e.g., 'localhost' or 'es-node1:9200,es-node2:9200,es-node3:9200').", "type": "string" }, "port": { - "description": "Elastic Search port", + "description": "Elastic Search port. Used when host does not include port. Ignored when using comma-separated hosts with ports.", "type": "integer" }, "scheme": { @@ -103,6 +103,26 @@ "description": "Index factory name", "type": "string" }, + "aws": { + "description": "AWS IAM authentication configuration for OpenSearch. IAM auth must be explicitly enabled. When enabled, uses standard AWS environment variables or configured credentials.", + "type": "object", + "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.AwsConfiguration", + "extends": { + "$ref": "../security/credentials/awsBaseConfig.json" + }, + "properties": { + "enabled": { + "description": "Enable AWS IAM authentication for OpenSearch. When enabled, requires region to be configured. Defaults to false for backward compatibility.", + "type": "boolean", + "default": false + }, + "serviceName": { + "description": "AWS service name for signing (es for Elasticsearch/OpenSearch, aoss for OpenSearch Serverless)", + "type": "string", + "default": "es" + } + } + }, "naturalLanguageSearch": { "description": "Configuration for natural language search capabilities", "type": "object", @@ -113,11 +133,32 @@ "type": "boolean", "default": false }, + "semanticSearchEnabled": { + "description": "Enable or disable semantic search using vector embeddings", + "type": "boolean", + "default": false + }, + "keywordWeight": { + "description": "Weight for BM25 keyword search results in hybrid RRF pipeline (0.0-1.0)", + "type": "number", + "default": 0.4 + }, + "semanticWeight": { + "description": "Weight for semantic vector search results in hybrid RRF pipeline (0.0-1.0)", + "type": "number", + "default": 0.6 + }, "embeddingProvider": { "description": "The provider to use for generating vector embeddings (e.g., bedrock, openai).", "type": "string", "default": "bedrock" }, + "maxConcurrentEmbeddingRequests": { + "description": "Maximum number of concurrent embedding API requests. Controls the semaphore used to throttle calls to the embedding provider and prevent overwhelming HTTP/2 connection limits.", + "type": "integer", + "default": 10, + "minimum": 1 + }, "providerClass": { "description": "Fully qualified class name of the NLQService implementation to use", "type": "string", @@ -127,10 +168,9 @@ "description": "AWS Bedrock configuration for natural language processing", "type": "object", "properties": { - "region": { - "description": "AWS Region for Bedrock service", - "type": "string", - "default": "us-east-1" + "awsConfig": { + "description": "AWS credentials configuration for Bedrock service", + "$ref": "../security/credentials/awsBaseConfig.json" }, "modelId": { "description": "Bedrock model identifier to use for query transformation", @@ -146,19 +186,6 @@ "description": "Dimension of the embedding vector", "type": "integer", "default": 512 - }, - "accessKey": { - "description": "AWS access key for Bedrock service authentication", - "type": "string" - }, - "secretKey": { - "description": "AWS secret key for Bedrock service authentication", - "type": "string" - }, - "useIamRole": { - "description": "Set to true to use IAM role based authentication instead of access/secret keys.", - "type": "boolean", - "default": false } }, "additionalProperties": false @@ -173,6 +200,41 @@ } }, "additionalProperties": false + }, + "openai": { + "description": "OpenAI configuration for embedding generation. Supports both OpenAI and Azure OpenAI endpoints.", + "type": "object", + "javaType": "org.openmetadata.schema.service.configuration.elasticsearch.Openai", + "properties": { + "apiKey": { + "description": "API key for authenticating with OpenAI or Azure OpenAI.", + "type": "string" + }, + "embeddingModelId": { + "description": "OpenAI embedding model identifier (e.g., text-embedding-3-small, text-embedding-ada-002).", + "type": "string", + "default": "text-embedding-3-small" + }, + "embeddingDimension": { + "description": "Dimension of the embedding vector. Default is 1536 for text-embedding-3-small.", + "type": "integer", + "default": 1536 + }, + "endpoint": { + "description": "Custom endpoint URL. For Azure OpenAI, use the Azure resource endpoint (e.g., https://your-resource.openai.azure.com). Leave empty for standard OpenAI API.", + "type": "string" + }, + "deploymentName": { + "description": "Azure OpenAI deployment name. Required when using Azure OpenAI.", + "type": "string" + }, + "apiVersion": { + "description": "Azure OpenAI API version. Only used with Azure OpenAI.", + "type": "string", + "default": "2024-02-01" + } + }, + "additionalProperties": false } }, "additionalProperties": false diff --git a/schemas/configuration/glossaryTermRelationSettings.json b/schemas/configuration/glossaryTermRelationSettings.json new file mode 100644 index 0000000..f30e571 --- /dev/null +++ b/schemas/configuration/glossaryTermRelationSettings.json @@ -0,0 +1,107 @@ +{ + "$id": "https://open-metadata.org/schema/configuration/glossaryTermRelationSettings.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GlossaryTermRelationSettings", + "description": "This schema defines the Glossary Term Relation Settings for configuring typed semantic relations between glossary terms.", + "type": "object", + "javaType": "org.openmetadata.schema.configuration.GlossaryTermRelationSettings", + "definitions": { + "relationCategory": { + "javaType": "org.openmetadata.schema.configuration.RelationCategory", + "description": "Category of the relation type.", + "type": "string", + "enum": ["hierarchical", "associative", "equivalence"] + }, + "relationCardinality": { + "javaType": "org.openmetadata.schema.configuration.RelationCardinality", + "description": "Preset cardinality for term-to-term relations.", + "type": "string", + "enum": ["ONE_TO_ONE", "ONE_TO_MANY", "MANY_TO_ONE", "MANY_TO_MANY", "CUSTOM"] + }, + "glossaryTermRelationType": { + "javaType": "org.openmetadata.schema.configuration.GlossaryTermRelationType", + "description": "Definition of a glossary term relation type.", + "type": "object", + "properties": { + "name": { + "description": "Unique name of the relation type (e.g., 'broader', 'synonym').", + "type": "string", + "pattern": "^[a-zA-Z][a-zA-Z0-9]*$" + }, + "displayName": { + "description": "Display name for the relation type.", + "type": "string" + }, + "description": { + "description": "Description of what this relation type represents.", + "type": "string" + }, + "inverseRelation": { + "description": "Name of the inverse relation type (e.g., 'narrower' for 'broader'). Null for symmetric relations.", + "type": "string" + }, + "rdfPredicate": { + "description": "RDF predicate URI for this relation (e.g., 'skos:broader').", + "type": "string", + "format": "uri" + }, + "cardinality": { + "description": "Preset cardinality for this relation type. CUSTOM lets you set explicit source/target maxima.", + "$ref": "#/definitions/relationCardinality" + }, + "sourceMax": { + "description": "Maximum number of relations of this type that can originate from a term. Null means unbounded.", + "type": ["integer", "null"], + "minimum": 1 + }, + "targetMax": { + "description": "Maximum number of relations of this type that can target a term. Null means unbounded.", + "type": ["integer", "null"], + "minimum": 1 + }, + "isSymmetric": { + "description": "Whether the relation is symmetric (A relates B implies B relates A).", + "type": "boolean", + "default": false + }, + "isTransitive": { + "description": "Whether the relation is transitive (A relates B, B relates C implies A relates C).", + "type": "boolean", + "default": false + }, + "isCrossGlossaryAllowed": { + "description": "Whether relations can be created between terms in different glossaries.", + "type": "boolean", + "default": true + }, + "category": { + "description": "Category of the relation.", + "$ref": "#/definitions/relationCategory" + }, + "isSystemDefined": { + "description": "Whether this is a system-defined relation type (cannot be deleted).", + "type": "boolean", + "default": false + }, + "color": { + "description": "Hex color code for visualizing this relation type in graphs (e.g., '#1890ff').", + "type": "string", + "pattern": "^#[0-9a-fA-F]{6}$" + } + }, + "required": ["name", "displayName", "category"], + "additionalProperties": false + } + }, + "properties": { + "relationTypes": { + "description": "List of configured glossary term relation types.", + "type": "array", + "items": { + "$ref": "#/definitions/glossaryTermRelationType" + }, + "default": [] + } + }, + "additionalProperties": false +} diff --git a/schemas/configuration/limitsConfiguration.json b/schemas/configuration/limitsConfiguration.json index 17b3723..d5b30cb 100644 --- a/schemas/configuration/limitsConfiguration.json +++ b/schemas/configuration/limitsConfiguration.json @@ -20,6 +20,16 @@ "description": "Limits Configuration File.", "type": "string", "default": "limits-config.yaml" + }, + "credits": { + "description": "Collate platform credits", + "existingJavaType": "java.math.BigDecimal", + "default": 1000 + }, + "billingCycleStart": { + "title": "Cycle Start", + "description": "The start of this limit cycle.", + "$ref": "../type/basic.json#/definitions/date" } }, "required": ["enable"], diff --git a/schemas/configuration/lineageSettings.json b/schemas/configuration/lineageSettings.json index 6a071e9..3026ea4 100644 --- a/schemas/configuration/lineageSettings.json +++ b/schemas/configuration/lineageSettings.json @@ -19,6 +19,102 @@ "type": "string", "enum": ["Edge", "Node"], "default": "Node" + }, + "graphPerformanceConfig": { + "type": "object", + "javaType": "org.openmetadata.schema.api.lineage.GraphPerformanceConfig", + "description": "Configuration for lineage graph performance and scalability", + "properties": { + "smallGraphThreshold": { + "description": "Node count threshold for small graphs (eligible for caching)", + "type": "integer", + "default": 5000, + "minimum": 100 + }, + "mediumGraphThreshold": { + "description": "Node count threshold for medium graphs (optimized batching)", + "type": "integer", + "default": 50000, + "minimum": 1000 + }, + "maxInMemoryNodes": { + "description": "Maximum nodes to keep in memory before switching to streaming", + "type": "integer", + "default": 100000, + "minimum": 10000 + }, + "smallGraphBatchSize": { + "description": "Batch size for fetching small graph nodes from search backend", + "type": "integer", + "default": 10000, + "minimum": 1000, + "maximum": 10000 + }, + "mediumGraphBatchSize": { + "description": "Batch size for fetching medium graph nodes", + "type": "integer", + "default": 5000, + "minimum": 500, + "maximum": 10000 + }, + "largeGraphBatchSize": { + "description": "Batch size for fetching large graph nodes", + "type": "integer", + "default": 1000, + "minimum": 100, + "maximum": 5000 + }, + "streamingBatchSize": { + "description": "Batch size for streaming very large graphs", + "type": "integer", + "default": 500, + "minimum": 50, + "maximum": 1000 + }, + "enableCaching": { + "description": "Enable caching for small/medium graphs", + "type": "boolean", + "default": true + }, + "cacheTTLSeconds": { + "description": "Cache time-to-live in seconds", + "type": "integer", + "default": 300, + "minimum": 60, + "maximum": 3600 + }, + "maxCachedGraphs": { + "description": "Maximum number of graphs to cache", + "type": "integer", + "default": 100, + "minimum": 10, + "maximum": 1000 + }, + "enableProgressTracking": { + "description": "Enable progress tracking for long-running queries", + "type": "boolean", + "default": false + }, + "progressReportInterval": { + "description": "Report progress every N nodes processed", + "type": "integer", + "default": 1000, + "minimum": 100 + }, + "useScrollForLargeGraphs": { + "description": "Use Elasticsearch/OpenSearch scroll API for large result sets", + "type": "boolean", + "default": true + }, + "scrollTimeoutMinutes": { + "description": "Scroll context timeout in minutes", + "type": "integer", + "default": 5, + "minimum": 1, + "maximum": 30 + } + }, + "additionalProperties": false } }, "properties": { @@ -43,6 +139,10 @@ "pipelineViewMode": { "description": "Pipeline View Mode for Lineage.", "$ref": "#/definitions/pipelineViewMode" + }, + "graphPerformanceConfig": { + "description": "Performance configuration for lineage graph builder.", + "$ref": "#/definitions/graphPerformanceConfig" } }, "required": [ diff --git a/schemas/configuration/mcpConfiguration.json b/schemas/configuration/mcpConfiguration.json new file mode 100644 index 0000000..3f97ced --- /dev/null +++ b/schemas/configuration/mcpConfiguration.json @@ -0,0 +1,63 @@ +{ + "$id": "https://open-metadata.org/schema/entity/configuration/mcpConfiguration.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MCPConfiguration", + "description": "This schema defines the Model Context Protocol (MCP) Server configuration", + "type": "object", + "javaType": "org.openmetadata.schema.api.configuration.MCPConfiguration", + "properties": { + "mcpServerName": { + "description": "Name of the MCP server", + "type": "string", + "default": "openmetadata-mcp-server" + }, + "mcpServerVersion": { + "description": "Version of the MCP server", + "type": "string", + "default": "1.0.0" + }, + "enabled": { + "description": "Enable or disable the MCP server", + "type": "boolean", + "default": true + }, + "path": { + "description": "Base path for MCP endpoints", + "type": "string", + "default": "/api/v1/mcp" + }, + "originValidationEnabled": { + "description": "Enable or disable origin validation for requests", + "type": "boolean", + "default": false + }, + "originHeaderUri": { + "description": "Expected origin header URI for validation", + "type": "string", + "default": "http://localhost" + }, + "baseUrl": { + "description": "Base URL for MCP OAuth endpoints. Used for OAuth metadata (issuer, endpoints). If not set, falls back to system settings. For clustered deployments, set this to the external-facing URL.", + "type": "string" + }, + "allowedOrigins": { + "description": "List of allowed origins for CORS on OAuth endpoints. Use specific origins for production security. Wildcard (*) is NOT recommended.", + "type": "array", + "items": { + "type": "string" + }, + "default": ["http://localhost:3000", "http://localhost:8585", "http://localhost:9090"] + }, + "connectTimeout": { + "description": "HTTP connection timeout in milliseconds for SSO provider metadata fetching. Default: 30000ms (30 seconds)", + "type": "integer", + "default": 30000 + }, + "readTimeout": { + "description": "HTTP read timeout in milliseconds for SSO provider metadata fetching. Default: 30000ms (30 seconds)", + "type": "integer", + "default": 30000 + } + }, + "additionalProperties": false +} diff --git a/schemas/configuration/profilerConfiguration.json b/schemas/configuration/profilerConfiguration.json index 1eb434c..9793b00 100644 --- a/schemas/configuration/profilerConfiguration.json +++ b/schemas/configuration/profilerConfiguration.json @@ -44,7 +44,9 @@ "thirdQuartile", "system", "histogram", - "cardinalityDistribution" + "cardinalityDistribution", + "valueRank", + "nullMissingCount" ] }, "metricConfigurationDefinition": { @@ -68,6 +70,26 @@ } }, "additionalProperties": false + }, + "sampleDataIngestionConfig": { + "type": "object", + "javaType": "org.openmetadata.schema.api.configuration.profiler.SampleDataIngestionConfig", + "description": "Define the configuration for sample data ingestion at the platform level. This configuration will override the source-level configuration for sample data collection.", + "properties": { + "storeSampleData": { + "title": "Enable Storing of Sample Data", + "type": "boolean", + "description": "Allows OpenMetadata to store the sample data. This setting will override the source configuration.", + "default": true + }, + "readSampleData": { + "title": "Enable Reading of Sample Data", + "type": "boolean", + "description": "Allows OpenMetadata to read the sample data. This setting won't save the sample data but sample data will temporarily be brought in OpenMetadata infrastructure for processing. If reading is disabled but storing is enabled, reading will be enabled by default.", + "default": true + } + }, + "additionalProperties": false } }, "properties": { @@ -76,6 +98,11 @@ "items": { "$ref": "#/definitions/metricConfigurationDefinition" } + }, + "sampleDataConfig": { + "title": "Sample Data Configuration", + "description": "Whether to enable sample data collection at the platform level. This setting will override the source configuration.", + "$ref": "#/definitions/sampleDataIngestionConfig" } }, "additionalProperties": false diff --git a/schemas/configuration/searchSettings.json b/schemas/configuration/searchSettings.json index 87fe868..bc9cd63 100644 --- a/schemas/configuration/searchSettings.json +++ b/schemas/configuration/searchSettings.json @@ -52,6 +52,16 @@ "$ref": "#/definitions/fieldValueBoost" }, "description": "Optional list of numeric field-based boosts applied globally." + }, + "keywordWeight": { + "description": "Weight for BM25 keyword search in hybrid RRF pipeline (0.0-1.0)", + "type": "number", + "default": 0.4 + }, + "semanticWeight": { + "description": "Weight for semantic vector search in hybrid RRF pipeline (0.0-1.0)", + "type": "number", + "default": 0.6 } }, "additionalProperties": false diff --git a/schemas/configuration/teamsAppConfiguration.json b/schemas/configuration/teamsAppConfiguration.json new file mode 100644 index 0000000..c635f6b --- /dev/null +++ b/schemas/configuration/teamsAppConfiguration.json @@ -0,0 +1,28 @@ +{ + "$id": "https://open-metadata.org/schema/entity/configuration/teamsAppConfiguration.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TeamsAppConfiguration", + "description": "This schema defines the Microsoft Teams App configuration", + "type": "object", + "javaType": "org.openmetadata.schema.service.configuration.teamsApp.TeamsAppConfiguration", + "properties": { + "microsoftAppId": { + "title": "Microsoft App ID", + "description": "Azure AD Application (Client) ID for the Teams bot", + "type": "string" + }, + "microsoftAppPassword": { + "title": "Microsoft App Password", + "description": "Azure AD Client Secret for the Teams bot", + "type": "string", + "format": "password" + }, + "microsoftAppTenantId": { + "title": "Microsoft App Tenant ID", + "description": "Azure AD Tenant ID (optional, for single-tenant bots). Use 'common' for multi-tenant.", + "type": "string" + } + }, + "required": ["microsoftAppId", "microsoftAppPassword"], + "additionalProperties": false +} diff --git a/schemas/configuration/themeConfiguration.json b/schemas/configuration/themeConfiguration.json index c3d4f3a..3513a21 100644 --- a/schemas/configuration/themeConfiguration.json +++ b/schemas/configuration/themeConfiguration.json @@ -11,6 +11,16 @@ "type": "string", "pattern": "^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$|^$" }, + "hoverColor": { + "description": "Hover color used in the UI, in hex code format or empty.", + "type": "string", + "pattern": "^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$|^$" + }, + "selectedColor": { + "description": "Selected color used in the UI, in hex code format or empty.", + "type": "string", + "pattern": "^#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$|^$" + }, "errorColor": { "description": "Color used to indicate errors in the UI, in hex code format or empty", "type": "string", diff --git a/schemas/entity/ai/agentExecution.json b/schemas/entity/ai/agentExecution.json new file mode 100644 index 0000000..d2ca8c4 --- /dev/null +++ b/schemas/entity/ai/agentExecution.json @@ -0,0 +1,306 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/agentExecution.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AgentExecution", + "$comment": "@om-entity-type", + "description": "Agent Execution entity representing a single execution run of an AI agent. Tracks inputs, outputs, data lineage, performance metrics, and errors for observability and governance.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.AgentExecution", + "javaInterfaces": ["org.openmetadata.schema.EntityTimeSeriesInterface"], + "definitions": { + "executionStatus": { + "type": "string", + "enum": ["Running", "Success", "Failed", "Timeout", "Cancelled", "PartialSuccess"], + "javaType": "org.openmetadata.schema.entity.ai.ExecutionStatus", + "javaEnums": [ + {"name": "Running"}, + {"name": "Success"}, + {"name": "Failed"}, + {"name": "Timeout"}, + {"name": "Cancelled"}, + {"name": "PartialSuccess"} + ] + }, + "modelCall": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ModelCall", + "description": "Individual LLM model call during execution", + "additionalProperties": false, + "properties": { + "model": { + "$ref": "../../type/entityReference.json", + "description": "LLM Model used" + }, + "prompt": { + "type": "string", + "description": "Prompt sent to the model" + }, + "response": { + "type": "string", + "description": "Response from the model" + }, + "inputTokens": { + "type": "integer", + "description": "Number of input tokens" + }, + "outputTokens": { + "type": "integer", + "description": "Number of output tokens" + }, + "latencyMs": { + "type": "number", + "description": "Latency in milliseconds" + }, + "cost": { + "type": "number", + "description": "Cost for this call" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "purpose": { + "type": "string", + "description": "Purpose of this model call" + } + } + }, + "dataAccess": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.DataAccess", + "description": "Data accessed during execution - CRITICAL for lineage and impact analysis", + "additionalProperties": false, + "properties": { + "dataSource": { + "$ref": "../../type/entityReference.json", + "description": "Reference to the data source (Table, Dashboard, etc.)" + }, + "accessType": { + "type": "string", + "enum": ["Read", "Write", "Update", "Delete"], + "description": "Type of access" + }, + "columns": { + "type": "array", + "items": {"type": "string"}, + "description": "Specific columns accessed" + }, + "recordCount": { + "type": "integer", + "description": "Number of records accessed" + }, + "query": { + "type": "string", + "description": "Query executed (if applicable)" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "piiAccessed": { + "type": "boolean", + "description": "Whether PII data was accessed" + }, + "sensitivityLevel": { + "type": "string", + "enum": ["Public", "Internal", "Confidential", "Restricted"] + } + }, + "required": ["dataSource", "accessType"] + }, + "toolCall": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ToolCall", + "description": "Tool/function call made during execution", + "additionalProperties": false, + "properties": { + "tool": { + "$ref": "../../type/entityReference.json", + "description": "Reference to MCP Tool or custom tool" + }, + "parameters": { + "type": "object", + "description": "Parameters passed to the tool" + }, + "result": { + "type": "string", + "description": "Result from the tool" + }, + "success": { + "type": "boolean", + "description": "Whether the tool call succeeded" + }, + "latencyMs": { + "type": "number", + "description": "Latency in milliseconds" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "errorMessage": { + "type": "string", + "description": "Error message if failed" + } + } + }, + "executionMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ExecutionMetrics", + "description": "Performance and cost metrics for the execution", + "additionalProperties": false, + "properties": { + "totalTokens": { + "type": "integer", + "description": "Total tokens used across all model calls" + }, + "totalCost": { + "type": "number", + "description": "Total cost in USD" + }, + "totalLatencyMs": { + "type": "number", + "description": "Total execution time in milliseconds" + }, + "modelCallCount": { + "type": "integer", + "description": "Number of model calls made" + }, + "toolCallCount": { + "type": "integer", + "description": "Number of tool calls made" + }, + "dataSourcesAccessed": { + "type": "integer", + "description": "Number of unique data sources accessed" + }, + "piiDataAccessed": { + "type": "boolean", + "description": "Whether any PII data was accessed" + } + } + }, + "complianceCheck": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ComplianceCheck", + "description": "Compliance checks performed during execution", + "additionalProperties": false, + "properties": { + "checkName": { + "type": "string", + "description": "Name of the compliance check" + }, + "passed": { + "type": "boolean", + "description": "Whether the check passed" + }, + "details": { + "type": "string", + "description": "Details about the check result" + }, + "severity": { + "type": "string", + "enum": ["Low", "Medium", "High", "Critical"] + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the Agent Execution", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "agent": { + "description": "AI Agent that was executed", + "$ref": "../../type/entityReference.json" + }, + "agentId": { + "description": "ID of the AI Agent (for indexing)", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "timestamp": { + "description": "Execution start timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "endTimestamp": { + "description": "Execution end timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "status": { + "$ref": "#/definitions/executionStatus" + }, + "input": { + "description": "Input provided to the agent", + "type": "string" + }, + "output": { + "description": "Output generated by the agent", + "type": "string" + }, + "modelCalls": { + "description": "LLM model calls made during execution", + "type": "array", + "items": { + "$ref": "#/definitions/modelCall" + } + }, + "dataAccessed": { + "description": "Data sources accessed during execution - KEY FOR LINEAGE", + "type": "array", + "items": { + "$ref": "#/definitions/dataAccess" + } + }, + "toolCalls": { + "description": "Tool calls made during execution", + "type": "array", + "items": { + "$ref": "#/definitions/toolCall" + } + }, + "metrics": { + "$ref": "#/definitions/executionMetrics" + }, + "errorMessage": { + "description": "Error message if execution failed", + "type": "string" + }, + "errorStack": { + "description": "Error stack trace", + "type": "string" + }, + "complianceChecks": { + "description": "Compliance checks performed", + "type": "array", + "items": { + "$ref": "#/definitions/complianceCheck" + } + }, + "executedBy": { + "description": "User or system that triggered the execution", + "type": "string" + }, + "sessionId": { + "description": "Session ID for grouping related executions", + "type": "string" + }, + "environment": { + "description": "Environment where execution occurred", + "type": "string", + "enum": ["Development", "Staging", "Production"] + }, + "agentVersion": { + "description": "Version of the agent at execution time", + "type": "string" + }, + "metadata": { + "description": "Additional execution metadata", + "type": "object", + "additionalProperties": {"type": "string"} + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + } + }, + "required": ["agent", "agentId", "timestamp", "status"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/aiApplication.json b/schemas/entity/ai/aiApplication.json new file mode 100644 index 0000000..24cc5ff --- /dev/null +++ b/schemas/entity/ai/aiApplication.json @@ -0,0 +1,557 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/aiApplication.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AIApplication", + "$comment": "@om-entity-type", + "description": "AI Application entity representing AI systems including chatbots, agents, copilots, and other AI-powered applications. Applications can use multiple LLM models and integrate with various tools and data sources.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.AIApplication", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "applicationType": { + "javaType": "org.openmetadata.schema.entity.ai.ApplicationType", + "description": "Type of AI application based on primary function and interaction pattern", + "type": "string", + "enum": [ + "Chatbot", + "Agent", + "Copilot", + "Assistant", + "RAG", + "CodeGenerator", + "DataAnalyst", + "AutomationBot", + "MultiAgent", + "Custom" + ], + "javaEnums": [ + {"name": "Chatbot"}, + {"name": "Agent"}, + {"name": "Copilot"}, + {"name": "Assistant"}, + {"name": "RAG"}, + {"name": "CodeGenerator"}, + {"name": "DataAnalyst"}, + {"name": "AutomationBot"}, + {"name": "MultiAgent"}, + {"name": "Custom"} + ] + }, + "developmentStage": { + "javaType": "org.openmetadata.schema.entity.ai.DevelopmentStage", + "description": "Development stage of the AI application. 'Unauthorized' indicates Shadow AI that needs governance review.", + "type": "string", + "enum": [ + "Proposal", + "Development", + "Testing", + "Staging", + "Production", + "Deprecated", + "Unauthorized" + ], + "javaEnums": [ + {"name": "Proposal"}, + {"name": "Development"}, + {"name": "Testing"}, + {"name": "Staging"}, + {"name": "Production"}, + {"name": "Deprecated"}, + {"name": "Unauthorized"} + ] + }, + "modelConfiguration": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ModelConfiguration", + "description": "Configuration for one LLM model used by this application. Applications can have multiple model configurations for different purposes.", + "additionalProperties": false, + "properties": { + "model": { + "$ref": "../../type/entityReference.json", + "description": "Reference to LLMModel entity" + }, + "purpose": { + "description": "Purpose of this model in the application workflow", + "type": "string", + "enum": ["Primary", "Reasoning", "Embedding", "CodeGeneration", "Fallback", "CostOptimization"], + "javaType": "org.openmetadata.schema.entity.ai.ModelPurpose", + "javaEnums": [ + {"name": "Primary"}, + {"name": "Reasoning"}, + {"name": "Embedding"}, + {"name": "CodeGeneration"}, + {"name": "Fallback"}, + {"name": "CostOptimization"} + ] + }, + "selectionCriteria": { + "type": "object", + "description": "Criteria for when to use this model", + "properties": { + "queryTypes": { + "type": "array", + "items": {"type": "string"}, + "description": "Types of queries this model handles" + }, + "maxTokens": { + "type": "integer", + "description": "Use this model only if query is under this token count" + }, + "costThreshold": { + "type": "number", + "description": "Use this model if cost per query is under this threshold" + } + } + }, + "parameters": { + "type": "object", + "description": "Model-specific parameters for this application", + "properties": { + "temperature": {"type": "number"}, + "maxTokens": {"type": "integer"}, + "topP": {"type": "number"}, + "frequencyPenalty": {"type": "number"}, + "presencePenalty": {"type": "number"} + } + } + }, + "required": ["model", "purpose"] + }, + "governanceMetadata": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.GovernanceMetadata", + "description": "AI governance metadata for compliance and risk management", + "additionalProperties": false, + "properties": { + "registrationStatus": { + "type": "string", + "description": "Registration status - used to track Shadow AI", + "enum": ["Registered", "Unregistered", "PendingApproval", "Approved", "Rejected"] + }, + "registeredBy": {"type": "string"}, + "registeredAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "approvedBy": {"type": "string"}, + "approvedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "riskAssessment": { + "type": "object", + "description": "Risk assessment for this AI application", + "properties": { + "riskLevel": { + "type": "string", + "enum": ["Low", "Medium", "High", "Critical"] + }, + "riskFactors": { + "type": "array", + "items": {"type": "string"}, + "description": "Identified risk factors" + }, + "mitigations": { + "type": "array", + "items": {"type": "string"}, + "description": "Risk mitigation measures in place" + }, + "assessedBy": {"type": "string"}, + "assessedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + }, + "dataClassification": { + "type": "object", + "description": "Classification of data accessed by this application", + "properties": { + "accessesPII": { + "type": "boolean", + "description": "Does this application access Personally Identifiable Information" + }, + "accessesSensitiveData": { + "type": "boolean", + "description": "Does this application access sensitive business data" + }, + "dataCategories": { + "type": "array", + "items": {"type": "string"}, + "description": "Categories of data accessed" + }, + "dataRetentionPeriod": { + "type": "string", + "description": "Data retention period for application logs" + } + } + }, + "governancePolicies": { + "$ref": "../../type/entityReferenceList.json", + "description": "Governance policies applied to this application" + }, + "aiCompliance": { + "$ref": "../../type/aiCompliance.json", + "description": "AI compliance assessments for various regulatory frameworks (EU AI Act, NIST AI RMF, etc.)" + }, + "intakeNotes": { + "type": "string", + "description": "Notes from AI governance intake form or review process" + }, + "approvalComments": { + "type": "string", + "description": "Comments from governance council on approval/rejection decision" + } + } + }, + "biasMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.BiasMetrics", + "description": "Bias evaluation metrics for the AI application", + "additionalProperties": false, + "properties": { + "lastEvaluatedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "evaluationMethod": { + "type": "string", + "description": "Method used for bias evaluation (e.g., Fairlearn, AI Fairness 360)" + }, + "overallBiasScore": { + "type": "number", + "description": "Overall bias score from 0-1, where higher values indicate more bias", + "minimum": 0, + "maximum": 1 + }, + "demographicParity": { + "type": "number", + "description": "Demographic parity score" + }, + "equalizedOdds": { + "type": "number", + "description": "Equalized odds score" + }, + "disparateImpact": { + "type": "number", + "description": "Disparate impact ratio" + }, + "dimensionScores": { + "type": "object", + "description": "Bias scores by demographic dimension", + "properties": { + "gender": {"type": "number"}, + "race": {"type": "number"}, + "age": {"type": "number"}, + "religion": {"type": "number"}, + "disability": {"type": "number"}, + "socioeconomic": {"type": "number"} + } + }, + "testDataset": { + "$ref": "../../type/entityReference.json", + "description": "Dataset used for bias evaluation" + }, + "biasDetected": { + "type": "boolean", + "description": "Whether significant bias was detected" + }, + "remediationSteps": { + "type": "array", + "items": {"type": "string"}, + "description": "Steps taken or recommended to remediate bias" + } + } + }, + "performanceMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PerformanceMetrics", + "description": "Runtime performance metrics for the AI application", + "additionalProperties": false, + "properties": { + "totalExecutions": { + "type": "integer", + "description": "Total number of executions" + }, + "successRate": { + "type": "number", + "description": "Success rate (0-1)" + }, + "averageLatencyMs": { + "type": "number", + "description": "Average latency in milliseconds" + }, + "p95LatencyMs": { + "type": "number", + "description": "95th percentile latency in milliseconds" + }, + "p99LatencyMs": { + "type": "number", + "description": "99th percentile latency in milliseconds" + }, + "averageCost": { + "type": "number", + "description": "Average cost per execution" + }, + "totalCost": { + "type": "number", + "description": "Total cost across all executions" + }, + "currency": { + "type": "string", + "default": "USD" + }, + "lastExecutionAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + }, + "qualityMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.QualityMetrics", + "description": "Quality metrics for AI application responses", + "additionalProperties": false, + "properties": { + "answerRelevancy": { + "type": "number", + "description": "Answer relevancy score (0-1)" + }, + "contextPrecision": { + "type": "number", + "description": "Context precision score (0-1)" + }, + "faithfulness": { + "type": "number", + "description": "Faithfulness to source data (0-1)" + }, + "hallucinationRate": { + "type": "number", + "description": "Rate of hallucinations (0-1)" + } + } + }, + "safetyMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.SafetyMetrics", + "description": "Safety metrics for AI application", + "additionalProperties": false, + "properties": { + "piiLeakageRate": { + "type": "number", + "description": "Rate of PII leakage incidents" + }, + "harmfulContentRate": { + "type": "number", + "description": "Rate of harmful content generated" + }, + "promptInjectionAttempts": { + "type": "integer", + "description": "Number of prompt injection attempts detected" + }, + "blockedRequests": { + "type": "integer", + "description": "Number of requests blocked by safety filters" + } + } + }, + "frameworkInfo": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.FrameworkInfo", + "description": "Information about the framework used to build the application", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "enum": ["LangChain", "LlamaIndex", "AutoGen", "CrewAI", "Semantic Kernel", "Haystack", "Custom"] + }, + "version": {"type": "string"}, + "language": { + "type": "string", + "enum": ["Python", "TypeScript", "JavaScript", "Java", "C#", "Go"] + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the AI Application.", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this AI Application.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the AI Application.", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display name for the AI Application.", + "type": "string" + }, + "description": { + "description": "Description of the AI Application, its purpose, and usage.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "applicationType": { + "$ref": "#/definitions/applicationType" + }, + "developmentStage": { + "$ref": "#/definitions/developmentStage" + }, + "modelConfigurations": { + "description": "Multiple LLM models this application can use for different purposes", + "type": "array", + "items": { + "$ref": "#/definitions/modelConfiguration" + }, + "minItems": 1 + }, + "primaryModel": { + "description": "Primary/default LLM model used by this application", + "$ref": "../../type/entityReference.json" + }, + "promptTemplates": { + "description": "Prompt templates used by this application", + "$ref": "../../type/entityReferenceList.json" + }, + "tools": { + "description": "MCP tools or other tools available to this application", + "$ref": "../../type/entityReferenceList.json" + }, + "mcpServers": { + "description": "MCP servers this application connects to for tools, resources, and prompts", + "$ref": "../../type/entityReferenceList.json" + }, + "dataSources": { + "description": "Data sources (tables, APIs, etc.) this application can access", + "$ref": "../../type/entityReferenceList.json" + }, + "knowledgeBases": { + "description": "Vector databases, document stores used for RAG", + "$ref": "../../type/entityReferenceList.json" + }, + "upstreamApplications": { + "description": "Other AI applications this application depends on (multi-agent orchestration)", + "$ref": "../../type/entityReferenceList.json" + }, + "downstreamApplications": { + "description": "AI applications that depend on this application", + "$ref": "../../type/entityReferenceList.json" + }, + "framework": { + "$ref": "#/definitions/frameworkInfo" + }, + "governanceMetadata": { + "$ref": "#/definitions/governanceMetadata" + }, + "biasMetrics": { + "$ref": "#/definitions/biasMetrics" + }, + "performanceMetrics": { + "$ref": "#/definitions/performanceMetrics" + }, + "qualityMetrics": { + "$ref": "#/definitions/qualityMetrics" + }, + "safetyMetrics": { + "$ref": "#/definitions/safetyMetrics" + }, + "testSuites": { + "description": "Test suites for validating this AI application", + "$ref": "../../type/entityReferenceList.json" + }, + "sourceCode": { + "description": "Link to source code repository", + "type": "string" + }, + "deploymentUrl": { + "description": "Production deployment endpoint", + "type": "string" + }, + "documentation": { + "description": "Link to external documentation", + "type": "string" + }, + "owners": { + "description": "Owners of this AI Application", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this AI Application", + "$ref": "../../type/entityReferenceList.json" + }, + "domain": { + "description": "Domain the AI Application belongs to", + "$ref": "../../type/entityReference.json" + }, + "dataProducts": { + "description": "Data products this AI Application is part of", + "$ref": "../../type/entityReferenceList.json" + }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, + "tags": { + "description": "Tags for this AI Application", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "href": { + "description": "Link to this resource", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "certification": { + "$ref": "../../type/assetCertification.json" + }, + "extension": { + "description": "Entity extension data with custom attributes", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Domains the AI Application belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity", + "$ref": "../../type/votes.json" + }, + "lifeCycle": { + "description": "Life Cycle properties of the entity", + "$ref": "../../type/lifeCycle.json" + }, + "sourceHash": { + "description": "Source hash of the entity", + "type": "string", + "minLength": 1, + "maxLength": 32 + } + }, + "required": ["id", "name", "applicationType", "modelConfigurations"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/aiGovernancePolicy.json b/schemas/entity/ai/aiGovernancePolicy.json new file mode 100644 index 0000000..fcb3fe3 --- /dev/null +++ b/schemas/entity/ai/aiGovernancePolicy.json @@ -0,0 +1,402 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/aiGovernancePolicy.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AIGovernancePolicy", + "$comment": "@om-entity-type", + "description": "AI Governance Policy entity representing organizational policies and rules for AI/LLM usage, compliance, and risk management. Enforces standards for model approval, data access, bias thresholds, and regulatory compliance.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.AIGovernancePolicy", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "policyType": { + "type": "string", + "enum": ["ModelApproval", "DataAccess", "BiasThreshold", "ComplianceCheck", "CostControl", "PerformanceStandard", "SecurityControl"], + "javaType": "org.openmetadata.schema.entity.ai.PolicyType", + "javaEnums": [ + {"name": "ModelApproval"}, + {"name": "DataAccess"}, + {"name": "BiasThreshold"}, + {"name": "ComplianceCheck"}, + {"name": "CostControl"}, + {"name": "PerformanceStandard"}, + {"name": "SecurityControl"} + ] + }, + "policyRule": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PolicyRule", + "description": "Individual rule within a governance policy", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Rule name" + }, + "description": { + "type": "string", + "description": "Rule description" + }, + "ruleType": { + "type": "string", + "enum": ["Threshold", "Approval", "Prohibition", "Requirement", "Notification"], + "description": "Type of rule" + }, + "condition": { + "type": "string", + "description": "Condition expression (e.g., 'biasScore > 0.3')" + }, + "action": { + "type": "string", + "enum": ["Block", "Warn", "Notify", "Require_Approval", "Log"], + "description": "Action to take when rule is triggered" + }, + "severity": { + "type": "string", + "enum": ["Low", "Medium", "High", "Critical"] + }, + "enabled": { + "type": "boolean", + "default": true + } + }, + "required": ["name", "ruleType", "action"] + }, + "biasThreshold": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.BiasThreshold", + "description": "Bias threshold limits for LLM models", + "additionalProperties": false, + "properties": { + "maxOverallBiasScore": { + "type": "number", + "description": "Maximum allowed overall bias score (0-1)" + }, + "dimensionThresholds": { + "type": "object", + "description": "Maximum bias scores per dimension", + "properties": { + "gender": {"type": "number"}, + "race": {"type": "number"}, + "age": {"type": "number"}, + "religion": {"type": "number"}, + "disability": {"type": "number"}, + "nationality": {"type": "number"}, + "socioeconomic": {"type": "number"} + } + }, + "evaluationFrequency": { + "type": "string", + "enum": ["Continuous", "Daily", "Weekly", "Monthly", "OnDemand"], + "description": "How often to evaluate bias" + } + } + }, + "dataAccessControl": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.DataAccessControl", + "description": "Data access controls for AI agents", + "additionalProperties": false, + "properties": { + "allowedDataSources": { + "$ref": "../../type/entityReferenceList.json", + "description": "Allowed data sources" + }, + "prohibitedDataSources": { + "$ref": "../../type/entityReferenceList.json", + "description": "Prohibited data sources" + }, + "maxSensitivityLevel": { + "type": "string", + "enum": ["Public", "Internal", "Confidential", "Restricted"], + "description": "Maximum data sensitivity level allowed" + }, + "piiAccessAllowed": { + "type": "boolean", + "default": false, + "description": "Whether PII data access is allowed" + }, + "requireApprovalForSensitiveData": { + "type": "boolean", + "default": true + }, + "dataRetentionDays": { + "type": "integer", + "description": "How long execution data can be retained" + } + } + }, + "costControl": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.CostControl", + "description": "Cost control limits", + "additionalProperties": false, + "properties": { + "dailyBudget": { + "type": "number", + "description": "Maximum daily spend in USD" + }, + "monthlyBudget": { + "type": "number", + "description": "Maximum monthly spend in USD" + }, + "perExecutionLimit": { + "type": "number", + "description": "Maximum cost per execution" + }, + "warningThreshold": { + "type": "number", + "description": "Percentage threshold for warnings (0-100)" + }, + "currency": { + "type": "string", + "default": "USD" + } + } + }, + "complianceRequirement": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ComplianceRequirement", + "description": "Regulatory compliance requirement", + "additionalProperties": false, + "properties": { + "standard": { + "type": "string", + "enum": ["GDPR", "HIPAA", "SOC2", "ISO27001", "NIST_AI_RMF", "EU_AI_Act", "OWASP_LLM_Top10", "Custom"], + "description": "Compliance standard" + }, + "requirements": { + "type": "array", + "items": {"type": "string"}, + "description": "Specific requirements" + }, + "attestationRequired": { + "type": "boolean", + "default": false, + "description": "Whether attestation is required" + }, + "auditFrequency": { + "type": "string", + "enum": ["Continuous", "Weekly", "Monthly", "Quarterly", "Annually"] + } + } + }, + "performanceStandard": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PerformanceStandard", + "description": "Performance standards for AI agents", + "additionalProperties": false, + "properties": { + "minAccuracy": { + "type": "number", + "description": "Minimum required accuracy (0-1)" + }, + "maxLatencyMs": { + "type": "number", + "description": "Maximum allowed latency in milliseconds" + }, + "minSuccessRate": { + "type": "number", + "description": "Minimum success rate (0-1)" + }, + "maxErrorRate": { + "type": "number", + "description": "Maximum error rate (0-1)" + }, + "evaluationDataset": { + "$ref": "../../type/entityReference.json", + "description": "Dataset to use for evaluation" + } + } + }, + "policyViolation": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PolicyViolation", + "description": "Policy violation record", + "additionalProperties": false, + "properties": { + "violatedRule": { + "type": "string", + "description": "Name of the violated rule" + }, + "violatingEntity": { + "$ref": "../../type/entityReference.json", + "description": "Entity that violated the policy" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "details": { + "type": "string", + "description": "Details about the violation" + }, + "resolved": { + "type": "boolean", + "default": false + }, + "resolvedBy": { + "type": "string" + }, + "resolvedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the AI Governance Policy", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this AI Governance Policy", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the AI Governance Policy", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display name for the AI Governance Policy", + "type": "string" + }, + "description": { + "description": "Description of the policy and its purpose", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "policyType": { + "$ref": "#/definitions/policyType" + }, + "rules": { + "description": "Rules that make up this policy", + "type": "array", + "items": { + "$ref": "#/definitions/policyRule" + } + }, + "biasThresholds": { + "$ref": "#/definitions/biasThreshold" + }, + "dataAccessControls": { + "$ref": "#/definitions/dataAccessControl" + }, + "costControls": { + "$ref": "#/definitions/costControl" + }, + "complianceRequirements": { + "description": "Compliance requirements this policy enforces", + "type": "array", + "items": { + "$ref": "#/definitions/complianceRequirement" + } + }, + "performanceStandards": { + "$ref": "#/definitions/performanceStandard" + }, + "appliesTo": { + "description": "Entities this policy applies to", + "$ref": "../../type/entityReferenceList.json" + }, + "enforcementLevel": { + "description": "How strictly to enforce this policy", + "type": "string", + "enum": ["Advisory", "Warning", "Blocking"], + "default": "Warning" + }, + "enabled": { + "description": "Whether this policy is currently active", + "type": "boolean", + "default": true + }, + "effectiveDate": { + "description": "Date when policy becomes effective", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "expirationDate": { + "description": "Date when policy expires", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "approvedBy": { + "description": "Who approved this policy", + "type": "string" + }, + "violations": { + "description": "Recent policy violations", + "type": "array", + "items": { + "$ref": "#/definitions/policyViolation" + } + }, + "owners": { + "description": "Owners of this AI Governance Policy", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this AI Governance Policy", + "$ref": "../../type/entityReferenceList.json" + }, + "domain": { + "description": "Domain the policy belongs to", + "$ref": "../../type/entityReference.json" + }, + "tags": { + "description": "Tags for this AI Governance Policy", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "href": { + "description": "Link to this resource", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "extension": { + "description": "Entity extension data with custom attributes", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Domains the policy belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity", + "$ref": "../../type/votes.json" + }, + "lifeCycle": { + "description": "Life Cycle properties of the entity", + "$ref": "../../type/lifeCycle.json" + }, + "sourceHash": { + "description": "Source hash of the entity", + "type": "string", + "minLength": 1, + "maxLength": 32 + } + }, + "required": ["id", "name", "policyType"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/llmModel.json b/schemas/entity/ai/llmModel.json new file mode 100644 index 0000000..6d1ba71 --- /dev/null +++ b/schemas/entity/ai/llmModel.json @@ -0,0 +1,462 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/llmModel.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "LLMModel", + "$comment": "@om-entity-type", + "description": "LLM Model entity representing a registered Large Language Model deployment, fine-tune, or base model. Models are independent entities that can be referenced by multiple AI agents.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.LLMModel", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "modelType": { + "javaType": "org.openmetadata.schema.entity.ai.ModelType", + "description": "Type of LLM model", + "type": "string", + "enum": ["BaseModel", "FineTuned", "Quantized", "Distilled", "Adapter", "Custom"], + "javaEnums": [ + {"name": "BaseModel"}, + {"name": "FineTuned"}, + {"name": "Quantized"}, + {"name": "Distilled"}, + {"name": "Adapter"}, + {"name": "Custom"} + ] + }, + "modelCapability": { + "type": "string", + "enum": ["TextGeneration", "CodeGeneration", "Embeddings", "Chat", "Vision", "Audio", "FunctionCalling", "ToolUse"] + }, + "trainingMetadata": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.TrainingMetadata", + "description": "Training or fine-tuning metadata - critical for data lineage and impact analysis", + "additionalProperties": false, + "properties": { + "baseModel": { + "type": "string", + "description": "Base model this was trained/fine-tuned from" + }, + "trainingType": { + "type": "string", + "enum": ["FullFineTune", "LoRA", "QLoRA", "PrefixTuning", "PromptTuning", "RLHF"] + }, + "trainingDatasets": { + "$ref": "../../type/entityReferenceList.json", + "description": "Datasets used for training - KEY FOR DATA LINEAGE" + }, + "validationDatasets": { + "$ref": "../../type/entityReferenceList.json", + "description": "Datasets used for validation" + }, + "dataLineage": { + "type": "array", + "description": "Detailed data lineage for training - tracks exactly what data was used", + "items": { + "type": "object", + "properties": { + "dataset": { + "$ref": "../../type/entityReference.json" + }, + "recordCount": { + "type": "integer", + "description": "Number of records used from this dataset" + }, + "dateRange": { + "type": "object", + "properties": { + "start": {"$ref": "../../type/basic.json#/definitions/timestamp"}, + "end": {"$ref": "../../type/basic.json#/definitions/timestamp"} + } + }, + "columns": { + "type": "array", + "items": {"type": "string"}, + "description": "Columns used from the dataset" + }, + "piiHandling": { + "type": "string", + "description": "How PII was handled in this dataset" + }, + "dataTransformations": { + "type": "array", + "items": {"type": "string"}, + "description": "Transformations applied to the data" + }, + "sensitivityLevel": { + "type": "string", + "enum": ["Public", "Internal", "Confidential", "Restricted"] + } + } + } + }, + "trainingPeriod": { + "type": "object", + "properties": { + "startDate": {"$ref": "../../type/basic.json#/definitions/timestamp"}, + "endDate": {"$ref": "../../type/basic.json#/definitions/timestamp"}, + "durationHours": {"type": "number"} + } + }, + "trainingCost": { + "type": "object", + "properties": { + "totalCost": {"type": "number"}, + "currency": {"type": "string", "default": "USD"}, + "computeHours": {"type": "number"}, + "resourceType": {"type": "string"} + } + }, + "hyperparameters": { + "type": "object", + "description": "Hyperparameters used for training", + "properties": { + "learningRate": {"type": "number"}, + "batchSize": {"type": "integer"}, + "epochs": {"type": "integer"}, + "optimizer": {"type": "string"}, + "warmupSteps": {"type": "integer"}, + "weightDecay": {"type": "number"} + } + }, + "trainingMetrics": { + "type": "object", + "description": "Metrics from training process", + "properties": { + "finalLoss": {"type": "number"}, + "accuracy": {"type": "number"}, + "perplexity": {"type": "number"}, + "validationLoss": {"type": "number"} + } + }, + "trainedBy": {"type": "string"}, + "trainingJobId": {"type": "string"} + } + }, + "modelEvaluation": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ModelEvaluation", + "description": "Comprehensive evaluation metrics for the LLM model", + "additionalProperties": false, + "properties": { + "evaluatedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "evaluationDataset": { + "$ref": "../../type/entityReference.json" + }, + "accuracyMetrics": { + "type": "object", + "properties": { + "accuracy": {"type": "number"}, + "precision": {"type": "number"}, + "recall": {"type": "number"}, + "f1Score": {"type": "number"}, + "bleuScore": {"type": "number"}, + "rougeScore": {"type": "number"} + } + }, + "biasMetrics": { + "type": "object", + "description": "LLM BIAS EVALUATION - critical for governance", + "properties": { + "overallBiasScore": { + "type": "number", + "description": "Overall bias score 0-1, higher = more biased" + }, + "demographicParity": {"type": "number"}, + "equalizedOdds": {"type": "number"}, + "disparateImpact": {"type": "number"}, + "dimensionScores": { + "type": "object", + "properties": { + "gender": {"type": "number"}, + "race": {"type": "number"}, + "age": {"type": "number"}, + "religion": {"type": "number"}, + "disability": {"type": "number"}, + "nationality": {"type": "number"}, + "socioeconomic": {"type": "number"} + } + }, + "testMethod": {"type": "string"}, + "testDataset": {"$ref": "../../type/entityReference.json"} + } + }, + "fairnessMetrics": { + "type": "object", + "properties": { + "groupFairness": {"type": "number"}, + "individualFairness": {"type": "number"}, + "counterfactualFairness": {"type": "number"} + } + }, + "robustnessMetrics": { + "type": "object", + "properties": { + "adversarialRobustness": {"type": "number"}, + "outlierSensitivity": {"type": "number"}, + "noiseRobustness": {"type": "number"} + } + }, + "safetyMetrics": { + "type": "object", + "properties": { + "toxicityScore": {"type": "number"}, + "harmfulContentRate": {"type": "number"}, + "piiLeakageRisk": {"type": "number"} + } + } + } + }, + "modelSpecifications": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.ModelSpecifications", + "description": "Technical specifications of the model", + "additionalProperties": false, + "properties": { + "contextWindow": { + "type": "integer", + "description": "Context window size in tokens" + }, + "maxOutputTokens": { + "type": "integer", + "description": "Maximum output tokens" + }, + "parametersCount": { + "type": "string", + "description": "Number of parameters (e.g., '7B', '70B', '175B')" + }, + "architecture": { + "type": "string", + "description": "Model architecture (e.g., 'Transformer', 'GPT', 'BERT')" + }, + "quantization": { + "type": "string", + "description": "Quantization method if applicable" + } + } + }, + "costMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.CostMetrics", + "description": "Cost metrics for using this model", + "additionalProperties": false, + "properties": { + "inputCostPer1kTokens": { + "type": "number", + "description": "Cost per 1000 input tokens" + }, + "outputCostPer1kTokens": { + "type": "number", + "description": "Cost per 1000 output tokens" + }, + "currency": { + "type": "string", + "default": "USD" + }, + "estimatedMonthlyUsage": { + "type": "integer", + "description": "Estimated monthly token usage" + }, + "estimatedMonthlyCost": { + "type": "number", + "description": "Estimated monthly cost" + } + } + }, + "deploymentInfo": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.DeploymentInfo", + "description": "Deployment information for the model", + "additionalProperties": false, + "properties": { + "deploymentType": { + "type": "string", + "enum": ["API", "SelfHosted", "Hybrid", "OnPremise"] + }, + "endpoint": { + "type": "string", + "description": "API endpoint for the model" + }, + "region": { + "type": "string", + "description": "Deployment region" + }, + "availabilityZones": { + "type": "array", + "items": {"type": "string"} + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the LLM Model", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this LLM Model", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the LLM Model", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display name for the LLM Model", + "type": "string" + }, + "description": { + "description": "Description of the LLM Model, its purpose, and capabilities", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "service": { + "description": "OPTIONAL reference to LLMService where this model is hosted", + "$ref": "../../type/entityReference.json" + }, + "modelType": { + "$ref": "#/definitions/modelType" + }, + "baseModel": { + "description": "Base model name (e.g., 'gpt-4', 'claude-3-opus', 'llama-2-70b')", + "type": "string" + }, + "modelVersion": { + "description": "Version of the model", + "type": "string" + }, + "modelProvider": { + "description": "Model provider (e.g., 'OpenAI', 'Anthropic', 'Meta')", + "type": "string" + }, + "providerModelId": { + "description": "Provider's internal model ID", + "type": "string" + }, + "capabilities": { + "description": "Capabilities of this model", + "type": "array", + "items": { + "$ref": "#/definitions/modelCapability" + } + }, + "modelSpecifications": { + "$ref": "#/definitions/modelSpecifications" + }, + "trainingMetadata": { + "$ref": "#/definitions/trainingMetadata" + }, + "modelEvaluation": { + "$ref": "#/definitions/modelEvaluation" + }, + "usedByAgents": { + "description": "AI Agents that use this model", + "$ref": "../../type/entityReferenceList.json" + }, + "governanceStatus": { + "description": "Governance status - tracks unauthorized/shadow AI models", + "type": "string", + "enum": ["Approved", "PendingReview", "Rejected", "Unauthorized"] + }, + "certifications": { + "description": "Certifications this model has received", + "type": "array", + "items": {"type": "string"} + }, + "regulatoryCompliance": { + "description": "Regulatory compliance standards met", + "type": "array", + "items": {"type": "string"} + }, + "costMetrics": { + "$ref": "#/definitions/costMetrics" + }, + "deploymentInfo": { + "$ref": "#/definitions/deploymentInfo" + }, + "owners": { + "description": "Owners of this LLM Model", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this LLM Model", + "$ref": "../../type/entityReferenceList.json" + }, + "domain": { + "description": "Domain the LLM Model belongs to", + "$ref": "../../type/entityReference.json" + }, + "dataProducts": { + "description": "Data products this model is part of", + "$ref": "../../type/entityReferenceList.json" + }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, + "tags": { + "description": "Tags for this LLM Model", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "href": { + "description": "Link to this resource", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "certification": { + "$ref": "../../type/assetCertification.json" + }, + "extension": { + "description": "Entity extension data with custom attributes", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Domains the LLMModel belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity", + "$ref": "../../type/votes.json" + }, + "lifeCycle": { + "description": "Life Cycle properties of the entity", + "$ref": "../../type/lifeCycle.json" + }, + "sourceHash": { + "description": "Source hash of the entity", + "type": "string", + "minLength": 1, + "maxLength": 32 + } + }, + "required": ["id", "name", "modelType", "baseModel"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/mcpExecution.json b/schemas/entity/ai/mcpExecution.json new file mode 100644 index 0000000..9649ca7 --- /dev/null +++ b/schemas/entity/ai/mcpExecution.json @@ -0,0 +1,368 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/mcpExecution.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "McpExecution", + "$comment": "@om-entity-type", + "description": "MCP Execution entity representing a single execution session of an MCP server. Tracks tool calls, resource accesses, prompt uses, and data lineage for audit trails and governance. This is a time-series entity for observability and compliance.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpExecution", + "javaInterfaces": ["org.openmetadata.schema.EntityTimeSeriesInterface"], + "definitions": { + "executionStatus": { + "type": "string", + "javaType": "org.openmetadata.schema.entity.ai.McpExecutionStatus", + "description": "Status of the MCP execution", + "enum": ["Running", "Success", "Failed", "Timeout", "Cancelled"], + "javaEnums": [ + {"name": "Running"}, + {"name": "Success"}, + {"name": "Failed"}, + {"name": "Timeout"}, + {"name": "Cancelled"} + ] + }, + "accessType": { + "type": "string", + "javaType": "org.openmetadata.schema.entity.ai.McpAccessType", + "description": "Type of data access", + "enum": ["Read", "Write", "Delete"], + "javaEnums": [ + {"name": "Read"}, + {"name": "Write"}, + {"name": "Delete"} + ] + }, + "sensitivityLevel": { + "type": "string", + "javaType": "org.openmetadata.schema.entity.ai.McpExecutionSensitivityLevel", + "description": "Sensitivity level of data accessed", + "enum": ["Public", "Internal", "Confidential", "Restricted"], + "javaEnums": [ + {"name": "Public"}, + {"name": "Internal"}, + {"name": "Confidential"}, + {"name": "Restricted"} + ] + }, + "complianceSeverity": { + "type": "string", + "javaType": "org.openmetadata.schema.entity.ai.McpComplianceSeverity", + "description": "Severity level for compliance checks", + "enum": ["Info", "Warning", "Error", "Critical"], + "javaEnums": [ + {"name": "Info"}, + {"name": "Warning"}, + {"name": "Error"}, + {"name": "Critical"} + ] + }, + "toolCallRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpToolCallRecord", + "description": "Record of a tool invocation during execution", + "additionalProperties": false, + "properties": { + "toolName": { + "type": "string", + "description": "Name of the tool that was called" + }, + "parameters": { + "type": "object", + "description": "Parameters passed to the tool" + }, + "result": { + "type": "string", + "description": "Result returned by the tool" + }, + "success": { + "type": "boolean", + "description": "Whether the tool call succeeded" + }, + "latencyMs": { + "type": "number", + "description": "Latency in milliseconds" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp", + "description": "When the tool was called" + }, + "errorMessage": { + "type": "string", + "description": "Error message if failed" + }, + "dataAccessed": { + "type": "array", + "items": { + "$ref": "#/definitions/dataAccessRecord" + }, + "description": "Data accessed during this tool call" + } + } + }, + "dataAccessRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpDataAccessRecord", + "description": "Record of data accessed during execution", + "additionalProperties": false, + "properties": { + "dataSource": { + "$ref": "../../type/entityReference.json", + "description": "Reference to the data source accessed" + }, + "accessType": { + "$ref": "#/definitions/accessType" + }, + "recordCount": { + "type": "integer", + "description": "Number of records accessed" + }, + "columns": { + "type": "array", + "items": {"type": "string"}, + "description": "Columns accessed if applicable" + }, + "piiAccessed": { + "type": "boolean", + "description": "Whether PII data was accessed" + }, + "sensitivityLevel": { + "$ref": "#/definitions/sensitivityLevel" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + }, + "resourceAccessRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpResourceAccessRecord", + "description": "Record of a resource access during execution", + "additionalProperties": false, + "properties": { + "resourceName": { + "type": "string", + "description": "Name of the resource accessed" + }, + "resourceUri": { + "type": "string", + "description": "URI of the resource accessed" + }, + "accessType": { + "$ref": "#/definitions/accessType" + }, + "success": { + "type": "boolean", + "description": "Whether the access succeeded" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "bytesTransferred": { + "type": "integer", + "description": "Bytes read or written" + } + } + }, + "promptUseRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpPromptUseRecord", + "description": "Record of a prompt use during execution", + "additionalProperties": false, + "properties": { + "promptName": { + "type": "string", + "description": "Name of the prompt that was used" + }, + "arguments": { + "type": "object", + "description": "Arguments passed to the prompt" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "tokensGenerated": { + "type": "integer", + "description": "Number of tokens in the generated output" + } + } + }, + "complianceCheckRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpComplianceCheckRecord", + "description": "Record of a compliance check during execution", + "additionalProperties": false, + "properties": { + "checkName": { + "type": "string", + "description": "Name of the compliance check" + }, + "passed": { + "type": "boolean", + "description": "Whether the check passed" + }, + "details": { + "type": "string", + "description": "Details about the check result" + }, + "severity": { + "$ref": "#/definitions/complianceSeverity" + }, + "timestamp": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + }, + "executionMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpExecutionMetrics", + "description": "Aggregated metrics for the execution", + "additionalProperties": false, + "properties": { + "totalToolCalls": { + "type": "integer", + "description": "Total number of tool calls" + }, + "successfulToolCalls": { + "type": "integer", + "description": "Number of successful tool calls" + }, + "totalResourceAccesses": { + "type": "integer", + "description": "Total number of resource accesses" + }, + "totalPromptUses": { + "type": "integer", + "description": "Total number of prompt uses" + }, + "piiDataAccessed": { + "type": "boolean", + "description": "Whether any PII data was accessed" + }, + "highSensitivityDataAccessed": { + "type": "boolean", + "description": "Whether confidential or restricted data was accessed" + }, + "highRiskOperations": { + "type": "integer", + "description": "Number of high-risk operations performed" + }, + "complianceViolations": { + "type": "integer", + "description": "Number of compliance violations detected" + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the MCP Execution", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "server": { + "description": "MCP Server that was executed", + "$ref": "../../type/entityReference.json" + }, + "serverId": { + "description": "ID of the MCP Server (for indexing)", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "timestamp": { + "description": "Execution start timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "endTimestamp": { + "description": "Execution end timestamp", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "durationMs": { + "description": "Total execution duration in milliseconds", + "type": "number" + }, + "status": { + "$ref": "#/definitions/executionStatus" + }, + "executedBy": { + "description": "User or system that initiated the execution", + "type": "string" + }, + "applicationContext": { + "description": "AI Application that triggered this execution, if applicable", + "$ref": "../../type/entityReference.json" + }, + "sessionId": { + "description": "Session ID for grouping related executions", + "type": "string" + }, + "toolCalls": { + "description": "Tool invocations during this execution", + "type": "array", + "items": { + "$ref": "#/definitions/toolCallRecord" + } + }, + "resourceAccesses": { + "description": "Resource accesses during this execution", + "type": "array", + "items": { + "$ref": "#/definitions/resourceAccessRecord" + } + }, + "promptUses": { + "description": "Prompt uses during this execution", + "type": "array", + "items": { + "$ref": "#/definitions/promptUseRecord" + } + }, + "dataAccessed": { + "description": "All data sources accessed during execution", + "type": "array", + "items": { + "$ref": "#/definitions/dataAccessRecord" + } + }, + "complianceChecks": { + "description": "Compliance checks performed during execution", + "type": "array", + "items": { + "$ref": "#/definitions/complianceCheckRecord" + } + }, + "metrics": { + "$ref": "#/definitions/executionMetrics" + }, + "errorMessage": { + "description": "Error message if execution failed", + "type": "string" + }, + "errorStack": { + "description": "Error stack trace if available", + "type": "string" + }, + "environment": { + "description": "Environment where execution occurred", + "type": "string", + "enum": ["Development", "Testing", "Staging", "Production"] + }, + "serverVersion": { + "description": "Version of the MCP server at execution time", + "type": "string" + }, + "protocolVersion": { + "description": "MCP protocol version used", + "type": "string" + }, + "metadata": { + "description": "Additional execution metadata", + "type": "object", + "additionalProperties": {"type": "string"} + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + } + }, + "required": ["server", "serverId", "timestamp", "status"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/mcpServer.json b/schemas/entity/ai/mcpServer.json new file mode 100644 index 0000000..a38b396 --- /dev/null +++ b/schemas/entity/ai/mcpServer.json @@ -0,0 +1,975 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/mcpServer.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "McpServer", + "$comment": "@om-entity-type", + "description": "MCP (Model Context Protocol) Server entity representing an MCP server deployment for AI governance. MCP servers expose tools, resources, and prompts that can be used by AI applications. This entity enables comprehensive governance including risk assessment, compliance, shadow AI detection, and data access tracking.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpServer", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "serverType": { + "javaType": "org.openmetadata.schema.entity.ai.McpServerType", + "description": "Type of MCP server based on its primary function", + "type": "string", + "enum": [ + "DataAccess", + "FileSystem", + "WebAPI", + "Database", + "Cloud", + "Security", + "Development", + "Communication", + "Custom" + ], + "javaEnums": [ + {"name": "DataAccess"}, + {"name": "FileSystem"}, + {"name": "WebAPI"}, + {"name": "Database"}, + {"name": "Cloud"}, + {"name": "Security"}, + {"name": "Development"}, + {"name": "Communication"}, + {"name": "Custom"} + ] + }, + "transportType": { + "javaType": "org.openmetadata.schema.entity.ai.McpTransportType", + "description": "Transport protocol used by the MCP server", + "type": "string", + "enum": [ + "Stdio", + "SSE", + "StreamableHTTP" + ], + "javaEnums": [ + {"name": "Stdio"}, + {"name": "SSE"}, + {"name": "StreamableHTTP"} + ] + }, + "developmentStage": { + "javaType": "org.openmetadata.schema.entity.ai.McpDevelopmentStage", + "description": "Development stage of the MCP server. 'Unauthorized' indicates Shadow AI that needs governance review.", + "type": "string", + "enum": [ + "Proposal", + "Development", + "Testing", + "Staging", + "Production", + "Deprecated", + "Unauthorized" + ], + "javaEnums": [ + {"name": "Proposal"}, + {"name": "Development"}, + {"name": "Testing"}, + {"name": "Staging"}, + {"name": "Production"}, + {"name": "Deprecated"}, + {"name": "Unauthorized"} + ] + }, + "serverInfo": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpServerInfo", + "description": "Information about the MCP server software", + "additionalProperties": false, + "properties": { + "serverName": { + "type": "string", + "description": "Name of the MCP server software" + }, + "serverVersion": { + "type": "string", + "description": "Version of the MCP server software" + }, + "vendor": { + "type": "string", + "description": "Vendor or organization that provides the server" + }, + "repositoryUrl": { + "type": "string", + "description": "URL to the server's source code repository" + }, + "documentationUrl": { + "type": "string", + "description": "URL to the server's documentation" + } + } + }, + "connectionConfig": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpConnectionConfig", + "description": "Connection configuration for the MCP server", + "additionalProperties": false, + "properties": { + "command": { + "type": "string", + "description": "Command to start the MCP server" + }, + "args": { + "type": "array", + "items": {"type": "string"}, + "description": "Arguments to pass to the server command" + }, + "env": { + "type": "object", + "additionalProperties": {"type": "string"}, + "description": "Environment variables for the server" + }, + "workingDirectory": { + "type": "string", + "description": "Working directory for the server process" + }, + "url": { + "type": "string", + "description": "URL for SSE or Streamable transport servers" + }, + "timeout": { + "type": "integer", + "description": "Connection timeout in milliseconds" + }, + "retryAttempts": { + "type": "integer", + "description": "Number of retry attempts on connection failure" + } + } + }, + "serverCapabilities": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpServerCapabilities", + "description": "Capabilities supported by the MCP server", + "additionalProperties": false, + "properties": { + "toolsSupported": { + "type": "boolean", + "description": "Whether the server supports tools" + }, + "resourcesSupported": { + "type": "boolean", + "description": "Whether the server supports resources" + }, + "promptsSupported": { + "type": "boolean", + "description": "Whether the server supports prompts" + }, + "loggingSupported": { + "type": "boolean", + "description": "Whether the server supports logging" + }, + "samplingSupported": { + "type": "boolean", + "description": "Whether the server supports sampling" + }, + "rootsSupported": { + "type": "boolean", + "description": "Whether the server supports roots" + } + } + }, + "governanceMetadata": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpGovernanceMetadata", + "description": "Governance metadata for compliance and risk management of the MCP server", + "additionalProperties": false, + "properties": { + "registrationStatus": { + "type": "string", + "description": "Registration status - used to track Shadow AI", + "enum": ["Registered", "Unregistered", "PendingApproval", "Approved", "Rejected"] + }, + "registeredBy": { + "type": "string", + "description": "User who registered the server" + }, + "registeredAt": { + "$ref": "../../type/basic.json#/definitions/timestamp", + "description": "Timestamp when the server was registered" + }, + "approvedBy": { + "type": "string", + "description": "User who approved the server" + }, + "approvedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp", + "description": "Timestamp when the server was approved" + }, + "riskAssessment": { + "type": "object", + "description": "Risk assessment for this MCP server", + "properties": { + "riskLevel": { + "type": "string", + "enum": ["Low", "Medium", "High", "Critical"] + }, + "riskFactors": { + "type": "array", + "items": {"type": "string"}, + "description": "Identified risk factors" + }, + "mitigations": { + "type": "array", + "items": {"type": "string"}, + "description": "Risk mitigation measures in place" + }, + "assessedBy": { + "type": "string" + }, + "assessedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + }, + "dataClassification": { + "type": "object", + "description": "Classification of data accessed by this server", + "properties": { + "accessesPII": { + "type": "boolean", + "description": "Whether this server accesses Personally Identifiable Information" + }, + "accessesSensitiveData": { + "type": "boolean", + "description": "Whether this server accesses sensitive business data" + }, + "dataCategories": { + "type": "array", + "items": {"type": "string"}, + "description": "Categories of data accessed" + }, + "dataRetentionPeriod": { + "type": "string", + "description": "Data retention period for server logs" + } + } + }, + "governancePolicies": { + "$ref": "../../type/entityReferenceList.json", + "description": "Governance policies applied to this server" + }, + "aiCompliance": { + "$ref": "../../type/aiCompliance.json", + "description": "AI compliance assessments for various regulatory frameworks (EU AI Act, NIST AI RMF, etc.)" + }, + "intakeNotes": { + "type": "string", + "description": "Notes from AI governance intake form or review process" + }, + "approvalComments": { + "type": "string", + "description": "Comments from governance council on approval/rejection decision" + } + } + }, + "dataAccessSummary": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpDataAccessSummary", + "description": "Summary of data access patterns for this MCP server", + "additionalProperties": false, + "properties": { + "dataSources": { + "$ref": "../../type/entityReferenceList.json", + "description": "Data sources accessed by this server" + }, + "accessPatterns": { + "type": "array", + "items": { + "type": "string", + "enum": ["Read", "Write", "Execute"] + }, + "description": "Types of access patterns" + }, + "sensitivityLevel": { + "type": "string", + "enum": ["Public", "Internal", "Confidential", "Restricted"], + "description": "Highest sensitivity level of data accessed" + }, + "piiAccess": { + "type": "boolean", + "description": "Whether this server accesses PII data" + }, + "externalApiAccess": { + "type": "boolean", + "description": "Whether this server accesses external APIs" + }, + "fileSystemAccess": { + "type": "boolean", + "description": "Whether this server accesses the file system" + }, + "networkAccess": { + "type": "boolean", + "description": "Whether this server requires network access" + }, + "databaseAccess": { + "type": "boolean", + "description": "Whether this server accesses databases" + } + } + }, + "usageMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpUsageMetrics", + "description": "Usage metrics for the MCP server", + "additionalProperties": false, + "properties": { + "totalInvocations": { + "type": "integer", + "description": "Total number of invocations" + }, + "successRate": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Success rate (0-1)" + }, + "averageLatencyMs": { + "type": "number", + "description": "Average latency in milliseconds" + }, + "p95LatencyMs": { + "type": "number", + "description": "95th percentile latency in milliseconds" + }, + "lastInvokedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp", + "description": "Timestamp of last invocation" + }, + "uniqueUsers": { + "type": "integer", + "description": "Number of unique users" + }, + "dailyActiveUsers": { + "type": "integer", + "description": "Daily active users" + } + } + }, + "securityMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpSecurityMetrics", + "description": "Security metrics and settings for the MCP server", + "additionalProperties": false, + "properties": { + "authenticationRequired": { + "type": "boolean", + "description": "Whether authentication is required" + }, + "authorizationEnforced": { + "type": "boolean", + "description": "Whether authorization is enforced" + }, + "auditLoggingEnabled": { + "type": "boolean", + "description": "Whether audit logging is enabled" + }, + "encryptionInTransit": { + "type": "boolean", + "description": "Whether data is encrypted in transit" + }, + "sandboxed": { + "type": "boolean", + "description": "Whether the server runs in a sandboxed environment" + }, + "secretsManagement": { + "type": "string", + "description": "Method used for secrets management" + } + } + }, + "toolCategory": { + "javaType": "org.openmetadata.schema.entity.ai.McpToolCategory", + "description": "Category of the MCP tool based on its primary function", + "type": "string", + "enum": [ + "FileOperation", + "WebOperation", + "DataOperation", + "DatabaseOperation", + "SystemOperation", + "SecurityOperation", + "CommunicationOperation", + "CodeOperation", + "SearchOperation", + "Custom" + ], + "javaEnums": [ + {"name": "FileOperation"}, + {"name": "WebOperation"}, + {"name": "DataOperation"}, + {"name": "DatabaseOperation"}, + {"name": "SystemOperation"}, + {"name": "SecurityOperation"}, + {"name": "CommunicationOperation"}, + {"name": "CodeOperation"}, + {"name": "SearchOperation"}, + {"name": "Custom"} + ] + }, + "riskLevel": { + "javaType": "org.openmetadata.schema.entity.ai.McpRiskLevel", + "description": "Risk level based on capabilities and data access", + "type": "string", + "enum": ["Low", "Medium", "High", "Critical"], + "javaEnums": [ + {"name": "Low"}, + {"name": "Medium"}, + {"name": "High"}, + {"name": "Critical"} + ] + }, + "sensitivityLevel": { + "javaType": "org.openmetadata.schema.entity.ai.McpSensitivityLevel", + "description": "Sensitivity level of data", + "type": "string", + "enum": ["Public", "Internal", "Confidential", "Restricted"], + "javaEnums": [ + {"name": "Public"}, + {"name": "Internal"}, + {"name": "Confidential"}, + {"name": "Restricted"} + ] + }, + "mcpTool": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpTool", + "description": "MCP Tool - a capability exposed by the server that can perform operations", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the tool" + }, + "displayName": { + "type": "string", + "description": "Display name for the tool" + }, + "description": { + "type": "string", + "description": "Description of what the tool does" + }, + "toolCategory": { + "$ref": "#/definitions/toolCategory" + }, + "inputSchema": { + "type": "object", + "description": "JSON Schema for tool input parameters" + }, + "outputSchema": { + "type": "object", + "description": "JSON Schema for tool output" + }, + "riskLevel": { + "$ref": "#/definitions/riskLevel" + }, + "riskFactors": { + "type": "array", + "items": {"type": "string"}, + "description": "Identified risk factors" + }, + "dataAccess": { + "type": "object", + "description": "Data access patterns for the tool", + "properties": { + "readsData": {"type": "boolean"}, + "writesData": {"type": "boolean"}, + "deletesData": {"type": "boolean"}, + "sensitivityLevel": {"$ref": "#/definitions/sensitivityLevel"}, + "piiAccess": {"type": "boolean"}, + "dataTypes": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "requiredPermissions": { + "type": "array", + "items": {"type": "string"}, + "description": "Permissions required to use this tool" + }, + "sideEffects": { + "type": "boolean", + "description": "Whether this tool causes side effects", + "default": false + }, + "idempotent": { + "type": "boolean", + "description": "Whether this tool is idempotent", + "default": false + }, + "reversible": { + "type": "boolean", + "description": "Whether this tool's action can be reversed", + "default": false + }, + "timeout": { + "type": "integer", + "description": "Maximum execution time in milliseconds" + }, + "rateLimitPerMinute": { + "type": "integer", + "description": "Rate limit for tool invocations per minute" + }, + "annotations": { + "type": "object", + "description": "MCP tool annotations", + "properties": { + "title": {"type": "string"}, + "readOnlyHint": {"type": "boolean"}, + "destructiveHint": {"type": "boolean"}, + "idempotentHint": {"type": "boolean"}, + "openWorldHint": {"type": "boolean"} + } + }, + "usageCount": { + "type": "integer", + "description": "Number of times this tool has been invoked" + }, + "lastUsedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "tags": { + "type": "array", + "items": {"$ref": "../../type/tagLabel.json"}, + "default": null + } + }, + "required": ["name"] + }, + "resourceType": { + "javaType": "org.openmetadata.schema.entity.ai.McpResourceType", + "description": "Type of MCP resource", + "type": "string", + "enum": ["File", "Directory", "URL", "Database", "API", "Blob", "Stream", "Document", "Custom"], + "javaEnums": [ + {"name": "File"}, + {"name": "Directory"}, + {"name": "URL"}, + {"name": "Database"}, + {"name": "API"}, + {"name": "Blob"}, + {"name": "Stream"}, + {"name": "Document"}, + {"name": "Custom"} + ] + }, + "mcpResource": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpResource", + "description": "MCP Resource - data exposed by the server that can be read by AI applications", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the resource" + }, + "displayName": { + "type": "string", + "description": "Display name for the resource" + }, + "description": { + "type": "string", + "description": "Description of the resource" + }, + "resourceType": { + "$ref": "#/definitions/resourceType" + }, + "uri": { + "type": "string", + "description": "URI pattern for accessing this resource" + }, + "uriTemplate": { + "type": "string", + "description": "URI template if the resource supports dynamic URIs" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the resource content" + }, + "dataClassification": { + "type": "object", + "description": "Data classification for the resource", + "properties": { + "sensitivityLevel": {"$ref": "#/definitions/sensitivityLevel"}, + "containsPII": {"type": "boolean"}, + "tags": { + "description": "Tags for data classification including PII types", + "type": "array", + "items": {"$ref": "../../type/tagLabel.json"} + }, + "dataCategories": { + "type": "array", + "items": {"type": "string"} + }, + "retentionPeriod": {"type": "string"}, + "complianceRequirements": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "accessLevel": { + "type": "string", + "enum": ["ReadOnly", "ReadWrite", "Full"] + }, + "requiredPermissions": { + "type": "array", + "items": {"type": "string"} + }, + "sourceEntity": { + "$ref": "../../type/entityReference.json", + "description": "Reference to the underlying data entity if known" + }, + "annotations": { + "type": "object", + "properties": { + "audience": { + "type": "array", + "items": {"type": "string"} + }, + "priority": {"type": "number"} + } + }, + "size": { + "type": "integer", + "description": "Size in bytes" + }, + "lastModified": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "accessCount": { + "type": "integer", + "description": "Number of times accessed" + }, + "tags": { + "type": "array", + "items": {"$ref": "../../type/tagLabel.json"}, + "default": null + } + }, + "required": ["name", "uri"] + }, + "promptType": { + "javaType": "org.openmetadata.schema.entity.ai.McpPromptType", + "description": "Type of MCP prompt", + "type": "string", + "enum": ["Analysis", "Generation", "Transformation", "Validation", "Query", "Summarization", "Extraction", "Classification", "Custom"], + "javaEnums": [ + {"name": "Analysis"}, + {"name": "Generation"}, + {"name": "Transformation"}, + {"name": "Validation"}, + {"name": "Query"}, + {"name": "Summarization"}, + {"name": "Extraction"}, + {"name": "Classification"}, + {"name": "Custom"} + ] + }, + "promptArgument": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpPromptArgument", + "description": "Argument definition for a prompt", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the argument" + }, + "type": { + "type": "string", + "enum": ["String", "Number", "Boolean", "Array", "Object"] + }, + "description": { + "type": "string" + }, + "required": { + "type": "boolean", + "default": false + }, + "default": { + "description": "Default value" + }, + "enum": { + "type": "array", + "description": "Allowed values" + }, + "sensitive": { + "type": "boolean", + "default": false + } + }, + "required": ["name"] + }, + "promptMessage": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpPromptMessage", + "description": "A message in the prompt template", + "additionalProperties": false, + "properties": { + "role": { + "type": "string", + "enum": ["user", "assistant", "system"] + }, + "content": { + "type": "string" + } + }, + "required": ["role", "content"] + }, + "mcpPrompt": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.McpPrompt", + "description": "MCP Prompt - a reusable prompt template exposed by the server", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the prompt" + }, + "displayName": { + "type": "string", + "description": "Display name for the prompt" + }, + "description": { + "type": "string", + "description": "Description of the prompt" + }, + "promptType": { + "$ref": "#/definitions/promptType" + }, + "arguments": { + "type": "array", + "items": {"$ref": "#/definitions/promptArgument"}, + "description": "Arguments that can be passed to this prompt" + }, + "messages": { + "type": "array", + "items": {"$ref": "#/definitions/promptMessage"}, + "description": "Message templates" + }, + "examples": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "arguments": {"type": "object"}, + "expectedOutput": {"type": "string"} + } + } + }, + "dataAccessPatterns": { + "type": "array", + "items": {"type": "string"} + }, + "outputClassification": { + "type": "object", + "properties": { + "sensitivityLevel": {"$ref": "#/definitions/sensitivityLevel"}, + "mayContainPII": {"type": "boolean"}, + "outputCategories": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "usageCount": { + "type": "integer" + }, + "lastUsedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "tags": { + "type": "array", + "items": {"$ref": "../../type/tagLabel.json"}, + "default": null + } + }, + "required": ["name"] + } + }, + "properties": { + "id": { + "description": "Unique identifier of the MCP Server.", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this MCP Server.", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the MCP Server.", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display name for the MCP Server.", + "type": "string" + }, + "description": { + "description": "Description of the MCP Server, its purpose, and capabilities.", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "service": { + "description": "Reference to the MCP Service that contains this MCP Server.", + "$ref": "../../type/entityReference.json" + }, + "serverType": { + "$ref": "#/definitions/serverType" + }, + "transportType": { + "$ref": "#/definitions/transportType" + }, + "protocolVersion": { + "type": "string", + "description": "MCP protocol version supported by this server" + }, + "developmentStage": { + "$ref": "#/definitions/developmentStage" + }, + "serverInfo": { + "$ref": "#/definitions/serverInfo" + }, + "connectionConfig": { + "$ref": "#/definitions/connectionConfig" + }, + "capabilities": { + "$ref": "#/definitions/serverCapabilities" + }, + "tools": { + "description": "Tools exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "#/definitions/mcpTool" + }, + "default": null + }, + "resources": { + "description": "Resources exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "#/definitions/mcpResource" + }, + "default": null + }, + "prompts": { + "description": "Prompt templates exposed by this MCP Server", + "type": "array", + "items": { + "$ref": "#/definitions/mcpPrompt" + }, + "default": null + }, + "governanceMetadata": { + "$ref": "#/definitions/governanceMetadata" + }, + "dataAccessSummary": { + "$ref": "#/definitions/dataAccessSummary" + }, + "usageMetrics": { + "$ref": "#/definitions/usageMetrics" + }, + "securityMetrics": { + "$ref": "#/definitions/securityMetrics" + }, + "usedByApplications": { + "description": "AI Applications that use this MCP Server", + "$ref": "../../type/entityReferenceList.json" + }, + "sourceCode": { + "description": "Link to source code repository", + "type": "string" + }, + "deploymentUrl": { + "description": "Deployment endpoint URL", + "type": "string" + }, + "documentation": { + "description": "Link to external documentation", + "type": "string" + }, + "owners": { + "description": "Owners of this MCP Server", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this MCP Server", + "$ref": "../../type/entityReferenceList.json" + }, + "domain": { + "description": "Domain the MCP Server belongs to", + "$ref": "../../type/entityReference.json" + }, + "dataProducts": { + "description": "Data products this MCP Server is part of", + "$ref": "../../type/entityReferenceList.json" + }, + "tags": { + "description": "Tags for this MCP Server", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "href": { + "description": "Link to this resource", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "certification": { + "$ref": "../../type/assetCertification.json" + }, + "extension": { + "description": "Entity extension data with custom attributes", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Domains the MCP Server belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity", + "$ref": "../../type/votes.json" + }, + "lifeCycle": { + "description": "Life Cycle properties of the entity", + "$ref": "../../type/lifeCycle.json" + }, + "sourceHash": { + "description": "Source hash of the entity", + "type": "string", + "minLength": 1, + "maxLength": 32 + } + }, + "required": ["id", "name", "serverType"], + "additionalProperties": false +} diff --git a/schemas/entity/ai/promptTemplate.json b/schemas/entity/ai/promptTemplate.json new file mode 100644 index 0000000..66ae4c8 --- /dev/null +++ b/schemas/entity/ai/promptTemplate.json @@ -0,0 +1,235 @@ +{ + "$id": "https://open-metadata.org/schema/entity/ai/promptTemplate.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PromptTemplate", + "$comment": "@om-entity-type", + "description": "Prompt Template entity representing a reusable prompt template for AI agents. Templates can include variables, system prompts, and examples for consistent AI behavior.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PromptTemplate", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "templateVariable": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.TemplateVariable", + "description": "Variable definition in the prompt template", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Variable name (e.g., 'user_query', 'context')" + }, + "description": { + "type": "string", + "description": "Description of what this variable represents" + }, + "dataType": { + "type": "string", + "enum": ["String", "Number", "Boolean", "Array", "Object"], + "description": "Expected data type for this variable" + }, + "required": { + "type": "boolean", + "default": false, + "description": "Whether this variable is required" + }, + "defaultValue": { + "type": "string", + "description": "Default value if not provided" + }, + "validationPattern": { + "type": "string", + "description": "Regex pattern for validation" + } + }, + "required": ["name"] + }, + "promptExample": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PromptExample", + "description": "Example of how to use this prompt template", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Example name" + }, + "description": { + "type": "string", + "description": "Description of this example" + }, + "variables": { + "type": "object", + "description": "Example variable values", + "additionalProperties": {"type": "string"} + }, + "expectedOutput": { + "type": "string", + "description": "Expected output for this example" + } + } + }, + "promptMetrics": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.ai.PromptMetrics", + "description": "Usage and performance metrics for the prompt template", + "additionalProperties": false, + "properties": { + "usageCount": { + "type": "integer", + "description": "Number of times this template has been used" + }, + "averageTokens": { + "type": "number", + "description": "Average tokens used when rendering this template" + }, + "averageLatencyMs": { + "type": "number", + "description": "Average latency in milliseconds" + }, + "successRate": { + "type": "number", + "description": "Success rate (0-1) of executions using this template" + }, + "lastUsedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + } + } + } + }, + "properties": { + "id": { + "description": "Unique identifier of the Prompt Template", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this Prompt Template", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "Fully qualified name of the Prompt Template", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display name for the Prompt Template", + "type": "string" + }, + "description": { + "description": "Description of the Prompt Template and its purpose", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "templateContent": { + "description": "The actual prompt template content with variables in {{variable}} format", + "type": "string" + }, + "systemPrompt": { + "description": "System prompt to set the AI's behavior and context", + "type": "string" + }, + "variables": { + "description": "Variables used in this template", + "type": "array", + "items": { + "$ref": "#/definitions/templateVariable" + } + }, + "examples": { + "description": "Examples demonstrating how to use this template", + "type": "array", + "items": { + "$ref": "#/definitions/promptExample" + } + }, + "templateType": { + "description": "Type of prompt template", + "type": "string", + "enum": ["ChatCompletion", "TextGeneration", "CodeGeneration", "Embedding", "Classification", "Extraction", "Custom"] + }, + "templateVersion": { + "description": "Template version for tracking changes", + "type": "string" + }, + "parentTemplate": { + "description": "Reference to parent template if this is a version/fork", + "$ref": "../../type/entityReference.json" + }, + "usedByAgents": { + "description": "AI Agents using this template", + "$ref": "../../type/entityReferenceList.json" + }, + "metrics": { + "$ref": "#/definitions/promptMetrics" + }, + "tags": { + "description": "Tags for this Prompt Template", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + } + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "owners": { + "description": "Owners of this Prompt Template", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this Prompt Template", + "$ref": "../../type/entityReferenceList.json" + }, + "domain": { + "description": "Domain the Prompt Template belongs to", + "$ref": "../../type/entityReference.json" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "href": { + "description": "Link to this resource", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When true, indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "extension": { + "description": "Entity extension data with custom attributes", + "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "domains": { + "description": "Domains the Prompt Template belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity", + "$ref": "../../type/votes.json" + }, + "lifeCycle": { + "description": "Life Cycle properties of the entity", + "$ref": "../../type/lifeCycle.json" + }, + "sourceHash": { + "description": "Source hash of the entity", + "type": "string", + "minLength": 1, + "maxLength": 32 + } + }, + "required": ["id", "name", "templateContent"], + "additionalProperties": false +} diff --git a/schemas/entity/applications/app.json b/schemas/entity/applications/app.json index 4f84dc0..78a027b 100644 --- a/schemas/entity/applications/app.json +++ b/schemas/entity/applications/app.json @@ -275,10 +275,10 @@ "description": "Application Private configuration loaded at runtime.", "$ref": "./configuration/applicationConfig.json#/definitions/privateConfig" }, - "preview": { + "enabled": { "type": "boolean", - "description": "Flag to enable/disable preview for the application. If the app is in preview mode, it can't be installed.", - "default": false + "description": "Flag to enable/disable the application. If the app is not enabled, it can't be installed.", + "default": true }, "pipelines": { "description": "References to pipelines deployed for this database service to extract metadata, usage, lineage etc..", diff --git a/schemas/entity/applications/configuration/external/automator/lineagePropagationAction.json b/schemas/entity/applications/configuration/external/automator/lineagePropagationAction.json index 7031d87..f928ce9 100644 --- a/schemas/entity/applications/configuration/external/automator/lineagePropagationAction.json +++ b/schemas/entity/applications/configuration/external/automator/lineagePropagationAction.json @@ -19,6 +19,13 @@ "$ref": "#/definitions/lineagePropagationActionType", "default": "LineagePropagationAction" }, + "propagationFilterMode": { + "title": "Propagation Filter Mode", + "description": "Determines how the filter selects entities. 'SOURCE' (default): filtered entities push their metadata downstream to all discovered entities via lineage. 'TARGET': filtered entities receive metadata from upstream lineage.", + "type": "string", + "enum": ["TARGET", "SOURCE"], + "default": "SOURCE" + }, "propagateDescription": { "title": "Propagate Description", "description": "Propagate description through lineage", @@ -79,6 +86,13 @@ "type": "integer", "default": null }, + "propagationDepthMode": { + "title": "Propagation Depth Mode", + "description": "Mode for calculating propagation depth. 'ROOT' calculates depth from root nodes (sources with no parents). 'DATA_ASSET' calculates depth relative to each data asset being processed, ensuring each asset only receives metadata from nodes within the specified number of hops upstream.", + "type": "string", + "enum": ["ROOT", "DATA_ASSET"], + "default": "ROOT" + }, "propagationStopConfigs": { "title": "Propagation Stop Configurations", "description": "List of configurations to stop propagation based on conditions", @@ -87,6 +101,12 @@ "items": { "$ref": "propagationStopConfig.json" } + }, + "useOptimizedPropagation": { + "title": "Use Optimized Propagation", + "description": "Use the optimized propagation algorithm that reduces memory usage and API calls. Recommended for large lineage graphs. If set to false, uses the original propagation algorithm. Default is true.", + "type": "boolean", + "default": true } }, "required": ["type"], diff --git a/schemas/entity/applications/configuration/external/automatorAppConfig.json b/schemas/entity/applications/configuration/external/automatorAppConfig.json index ecae093..7b47eba 100644 --- a/schemas/entity/applications/configuration/external/automatorAppConfig.json +++ b/schemas/entity/applications/configuration/external/automatorAppConfig.json @@ -24,7 +24,7 @@ } }, "queryFilter": { - "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domain.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", + "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domains.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", "type": "string" }, "filterJsonTree": { diff --git a/schemas/entity/applications/configuration/external/collateAIAppConfig.json b/schemas/entity/applications/configuration/external/collateAIAppConfig.json index 24e9859..3e6886e 100644 --- a/schemas/entity/applications/configuration/external/collateAIAppConfig.json +++ b/schemas/entity/applications/configuration/external/collateAIAppConfig.json @@ -22,7 +22,7 @@ }, "filter": { "title": "Filter", - "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domain.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", + "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domains.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", "type": "string" }, "patchIfEmpty": { diff --git a/schemas/entity/applications/configuration/external/collateAIQualityAgentAppConfig.json b/schemas/entity/applications/configuration/external/collateAIQualityAgentAppConfig.json index 67f58a6..c8954fb 100644 --- a/schemas/entity/applications/configuration/external/collateAIQualityAgentAppConfig.json +++ b/schemas/entity/applications/configuration/external/collateAIQualityAgentAppConfig.json @@ -22,7 +22,7 @@ }, "filter": { "title": "Filter", - "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domain.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", + "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domains.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", "type": "string" }, "active": { diff --git a/schemas/entity/applications/configuration/external/collateAITierAgentAppConfig.json b/schemas/entity/applications/configuration/external/collateAITierAgentAppConfig.json index 6e56386..993eb9d 100644 --- a/schemas/entity/applications/configuration/external/collateAITierAgentAppConfig.json +++ b/schemas/entity/applications/configuration/external/collateAITierAgentAppConfig.json @@ -22,7 +22,7 @@ }, "filter": { "title": "Filter", - "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domain.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", + "description": "Query filter to be passed to ES. E.g., `{\"query\":{\"bool\":{\"must\":[{\"bool\":{\"should\":[{\"term\":{\"domains.displayName.keyword\":\"DG Anim\"}}]}}]}}}`. This is the same payload as in the Explore page.", "type": "string" }, "patchIfEmpty": { diff --git a/schemas/entity/applications/configuration/external/metadataExporterAppConfig.json b/schemas/entity/applications/configuration/external/metadataExporterAppConfig.json index e62b259..5b97a76 100644 --- a/schemas/entity/applications/configuration/external/metadataExporterAppConfig.json +++ b/schemas/entity/applications/configuration/external/metadataExporterAppConfig.json @@ -80,6 +80,9 @@ }, { "$ref": "metadataExporterConnectors/bigQueryConnection.json" + }, + { + "$ref": "metadataExporterConnectors/trinoConnection.json" } ] }, @@ -94,10 +97,11 @@ "type": "string", "enum": [ "PROFILE", - "TEST_CASE_RESULTS" + "TEST_CASE_RESULTS", + "ENTITY_HISTORY" ] }, - "default": ["PROFILE", "TEST_CASE_RESULTS"] + "default": ["PROFILE", "TEST_CASE_RESULTS", "ENTITY_HISTORY"] }, "backfill": { "description": "Enable backfill for the exporter to process historical data. This will only work on the very first run of the exporter.", diff --git a/schemas/entity/applications/configuration/external/metadataExporterConnectors/trinoConnection.json b/schemas/entity/applications/configuration/external/metadataExporterConnectors/trinoConnection.json new file mode 100644 index 0000000..392fd62 --- /dev/null +++ b/schemas/entity/applications/configuration/external/metadataExporterConnectors/trinoConnection.json @@ -0,0 +1,90 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/trinoConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TrinoConnection", + "description": "Trino Connection Config", + "type": "object", + "definitions": { + "trinoType": { + "description": "Service type.", + "type": "string", + "enum": [ + "Trino" + ], + "default": "Trino" + }, + "trinoScheme": { + "description": "SQLAlchemy driver scheme options.", + "type": "string", + "enum": [ + "trino" + ], + "default": "trino" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/trinoType", + "default": "Trino" + }, + "scheme": { + "title": "Connection Scheme", + "description": "SQLAlchemy driver scheme options.", + "$ref": "#/definitions/trinoScheme", + "default": "trino" + }, + "username": { + "title": "Username", + "description": "Username to connect to Trino.", + "type": "string" + }, + "authType": { + "title": "Auth Configuration Type", + "description": "Choose Auth Config Type.", + "oneOf": [ + { + "$ref": "../../../../services/connections/database/common/basicAuth.json" + }, + { + "$ref": "../../../../services/connections/database/common/jwtAuth.json" + }, + { + "$ref": "../../../../services/connections/database/common/azureConfig.json" + }, + { + "$ref": "../../../../services/connections/database/common/noConfigAuthenticationTypes.json" + } + ] + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the Trino service.", + "type": "string" + }, + "catalog": { + "title": "Catalog", + "description": "Catalog of the data source.", + "type": "string" + }, + "databaseSchema": { + "title": "Database Schema", + "description": "Database Schema of the data source.", + "type": "string" + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../../../../services/connections/connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../../../../services/connections/connectionBasicType.json#/definitions/connectionArguments" + } + }, + "additionalProperties": false, + "required": [ + "hostPort", + "username" + ] +} diff --git a/schemas/entity/applications/configuration/internal/dataInsightsAppConfig.json b/schemas/entity/applications/configuration/internal/dataInsightsAppConfig.json index 1aa2174..a5055c3 100644 --- a/schemas/entity/applications/configuration/internal/dataInsightsAppConfig.json +++ b/schemas/entity/applications/configuration/internal/dataInsightsAppConfig.json @@ -56,7 +56,7 @@ "title": "Data Retention (Days)", "description": "Defines the number of days the Data Assets Insights information will be kept. After it they will be deleted.", "type": "integer", - "default": 7, + "default": 90, "minimum": 0 }, "serviceFilter": { diff --git a/schemas/entity/applications/configuration/internal/dataRetentionConfiguration.json b/schemas/entity/applications/configuration/internal/dataRetentionConfiguration.json index d68fdb9..fddfc5b 100644 --- a/schemas/entity/applications/configuration/internal/dataRetentionConfiguration.json +++ b/schemas/entity/applications/configuration/internal/dataRetentionConfiguration.json @@ -29,10 +29,17 @@ "description": "Enter the retention period for Profile Data in days (e.g., 30 for one month, 60 for two months).", "type": "integer", "default": 1440 + }, + "auditLogRetentionPeriod": { + "title": "Audit Log Retention Period (days)", + "description": "Enter the retention period for Audit Log entries in days (e.g., 90 for three months).", + "type": "integer", + "default": 90, + "minimum": 1 } }, "required": [ - "changeEventRetentionPeriod", "activityThreadsRetentionPeriod", "testCaseResultsRetentionPeriod", "profileDataRetentionPeriod" + "changeEventRetentionPeriod", "activityThreadsRetentionPeriod", "testCaseResultsRetentionPeriod", "profileDataRetentionPeriod", "auditLogRetentionPeriod" ], "additionalProperties": false } diff --git a/schemas/entity/applications/configuration/internal/rdfIndexingAppConfig.json b/schemas/entity/applications/configuration/internal/rdfIndexingAppConfig.json new file mode 100644 index 0000000..e5f2f4d --- /dev/null +++ b/schemas/entity/applications/configuration/internal/rdfIndexingAppConfig.json @@ -0,0 +1,153 @@ +{ + "$id": "https://open-metadata.org/schema/entity/applications/configuration/rdfIndexingApp.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "RdfIndexingApp", + "type": "object", + "description": "RDF indexing application configuration.", + "definitions": { + "rdfIndexingType": { + "description": "Application type.", + "type": "string", + "enum": ["RdfIndexing"], + "default": "RdfIndexing" + } + }, + "properties": { + "type": { + "title": "Application Type", + "description": "Application Type", + "$ref": "#/definitions/rdfIndexingType", + "default": "RdfIndexing" + }, + "entities": { + "title": "Entities", + "description": "List of entities that you need to reindex. Leave empty to index all supported entities.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "aiApplication", + "aiGovernancePolicy", + "apiCollection", + "apiEndpoint", + "apiService", + "app", + "appMarketPlaceDefinition", + "bot", + "chart", + "classification", + "container", + "dashboard", + "dashboardDataModel", + "dashboardService", + "dataContract", + "dataInsightChart", + "dataInsightCustomChart", + "dataProduct", + "database", + "databaseSchema", + "databaseService", + "directory", + "document", + "domain", + "driveService", + "eventsubscription", + "file", + "glossary", + "glossaryTerm", + "ingestionPipeline", + "kpi", + "learningResource", + "llmModel", + "llmService", + "messagingService", + "metadataService", + "metric", + "mlmodel", + "mlmodelService", + "notificationTemplate", + "persona", + "pipeline", + "pipelineService", + "policy", + "promptTemplate", + "query", + "report", + "role", + "searchIndex", + "searchService", + "securityService", + "spreadsheet", + "storageService", + "storedProcedure", + "table", + "tag", + "team", + "testCase", + "testConnectionDefinition", + "testDefinition", + "testSuite", + "topic", + "type", + "user", + "webAnalyticEvent", + "workflow", + "workflowDefinition", + "worksheet" + ] + }, + "default": [], + "uiFieldType": "treeSelect", + "uniqueItems": true + }, + "recreateIndex": { + "title": "Recreate RDF Store", + "description": "Recreate the RDF store before indexing.", + "type": "boolean", + "default": false + }, + "batchSize": { + "title": "Batch Size", + "description": "Maximum number of entities processed in a batch.", + "type": "integer", + "default": 100, + "minimum": 1 + }, + "producerThreads": { + "title": "Number of Producer Threads", + "description": "Number of producer threads to use for non-distributed RDF reindexing", + "type": "integer", + "default": 2, + "minimum": 1 + }, + "consumerThreads": { + "title": "Number of Consumer Threads", + "description": "Number of consumer threads to use for non-distributed RDF reindexing", + "type": "integer", + "default": 3, + "minimum": 1 + }, + "queueSize": { + "title": "Queue Size", + "description": "Queue size to use internally for non-distributed RDF reindexing.", + "type": "integer", + "default": 5000, + "minimum": 1 + }, + "useDistributedIndexing": { + "title": "Use Distributed Indexing", + "description": "Enable distributed RDF indexing across multiple servers with partition coordination and recovery.", + "type": "boolean", + "default": true + }, + "partitionSize": { + "title": "Partition Size", + "description": "Number of entities per partition for distributed RDF indexing. Smaller values create more partitions for better distribution across servers.", + "type": "integer", + "default": 10000, + "minimum": 1000, + "maximum": 50000 + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/applications/configuration/internal/searchIndexingAppConfig.json b/schemas/entity/applications/configuration/internal/searchIndexingAppConfig.json index 7ddaf42..1239a88 100644 --- a/schemas/entity/applications/configuration/internal/searchIndexingAppConfig.json +++ b/schemas/entity/applications/configuration/internal/searchIndexingAppConfig.json @@ -42,7 +42,7 @@ "description": "Maximum number of events sent in a batch (Default 100).", "type": "integer", "existingJavaType": "java.lang.Long", - "default": 104857600 + "default": 9437184 }, "producerThreads": { "title": "Number of Producer Threads", @@ -95,6 +95,35 @@ "description": "Enable automatic performance tuning based on cluster capabilities and database entity count", "type": "boolean", "default": false + }, + "useDistributedIndexing": { + "title": "Use Distributed Indexing", + "description": "Enable distributed indexing to scale reindexing across multiple servers with fault tolerance and parallel processing", + "type": "boolean", + "default": true + }, + "partitionSize": { + "title": "Partition Size", + "description": "Number of entities per partition for distributed indexing. Smaller values create more partitions for better distribution across servers. Range: 1000-50000.", + "type": "integer", + "default": 10000, + "minimum": 1000, + "maximum": 50000 + }, + "timeSeriesMaxDays": { + "title": "Time Series Max Days", + "description": "Maximum age in days for time series data during reindexing. Default 0 (index all data). Set to a positive value like 15 to limit to recent data only.", + "type": "integer", + "default": 0, + "minimum": -1 + }, + "timeSeriesEntityDays": { + "title": "Time Series Entity Days Override", + "description": "Per-entity-type override for time series max days. Keys are entity type names (e.g. testCaseResult, queryCostRecord), values are number of days. Entities not listed here use the default Time Series Max Days value.", + "type": "object", + "additionalProperties": { + "type": "integer" + } } }, "additionalProperties": false diff --git a/schemas/entity/applications/configuration/private/limits.json b/schemas/entity/applications/configuration/private/limits.json index 54c350b..e3d9ba3 100644 --- a/schemas/entity/applications/configuration/private/limits.json +++ b/schemas/entity/applications/configuration/private/limits.json @@ -12,7 +12,8 @@ "description": "The action and its limit.", "type": "object", "additionalProperties": { - "type": "integer", + "type": "number", + "existingJavaType": "java.math.BigDecimal", "description": "Limit for the named action." } } @@ -25,13 +26,12 @@ }, "billingCycleStart": { "title": "Cycle Start", - "description": "The start of this limit cycle.", + "description": "The start of this limit cycle. DEPRECATED: Use central billingCycleStart from LimitsConfiguration in openmetadata.yaml", "$ref": "../../../../type/basic.json#/definitions/date" } }, "additionalProperties": false, "required": [ - "actions", - "billingCycleStart" + "actions" ] } \ No newline at end of file diff --git a/schemas/entity/applications/createAppRequest.json b/schemas/entity/applications/createAppRequest.json index 11eb1da..cbb24ae 100644 --- a/schemas/entity/applications/createAppRequest.json +++ b/schemas/entity/applications/createAppRequest.json @@ -50,6 +50,11 @@ "ingestionRunner" : { "description": "The ingestion agent responsible for executing the ingestion pipeline. It will be defined at runtime based on the Ingestion Agent of the service.", "$ref": "../../type/entityReference.json" + }, + "allowBotImpersonation": { + "description": "When true, the bot created for this application will have allowImpersonation enabled, allowing it to act on behalf of users.", + "type": "boolean", + "default": false } }, "additionalProperties": false diff --git a/schemas/entity/applications/jobStatus.json b/schemas/entity/applications/jobStatus.json index f8ba929..8a0f40b 100644 --- a/schemas/entity/applications/jobStatus.json +++ b/schemas/entity/applications/jobStatus.json @@ -11,8 +11,11 @@ "oneOf": [ { "$ref": "configuration/internal/searchIndexingAppConfig.json" + }, + { + "$ref": "configuration/internal/rdfIndexingAppConfig.json" } ] } } -} \ No newline at end of file +} diff --git a/schemas/entity/applications/marketplace/appMarketPlaceDefinition.json b/schemas/entity/applications/marketplace/appMarketPlaceDefinition.json index 96c8091..10f2412 100644 --- a/schemas/entity/applications/marketplace/appMarketPlaceDefinition.json +++ b/schemas/entity/applications/marketplace/appMarketPlaceDefinition.json @@ -153,10 +153,10 @@ "type": "boolean", "default": false }, - "preview": { + "enabled": { "type": "boolean", - "description": "Flag to enable/disable preview for the application. If the app is in preview mode, it can't be installed.", - "default": false + "description": "Flag to enable/disable the application. If the app is not enabled, it can't be installed.", + "default": true }, "domains" : { "description": "Domains the asset belongs to. When not set, the asset inherits the domain from the parent it belongs to.", @@ -179,6 +179,11 @@ "description": "If the app support execution through the external runner.", "type": "boolean", "default": false + }, + "allowBotImpersonation": { + "description": "When true, the bot created for this application will have allowImpersonation enabled, allowing it to act on behalf of users.", + "type": "boolean", + "default": false } }, "additionalProperties": false, diff --git a/schemas/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.json b/schemas/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.json index 6e5653d..ba041db 100644 --- a/schemas/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.json +++ b/schemas/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.json @@ -141,6 +141,11 @@ "description": "If true, multiple instances of this app can run concurrently. This is useful for apps like QueryRunner that support parallel executions with different configurations.", "type": "boolean", "default": false + }, + "allowBotImpersonation": { + "description": "When true, the bot created for this application will have allowImpersonation enabled, allowing it to act on behalf of users.", + "type": "boolean", + "default": false } }, "additionalProperties": false, diff --git a/schemas/entity/automations/queryRunnerRequest.json b/schemas/entity/automations/queryRunnerRequest.json index a8574d1..c1ca182 100644 --- a/schemas/entity/automations/queryRunnerRequest.json +++ b/schemas/entity/automations/queryRunnerRequest.json @@ -23,8 +23,8 @@ "description": "Optional value to indicate if the query should be transpiled.", "type": "boolean" }, - "ingestionRunner": { - "description": "Optional value of the ingestion runner name responsible for running the test", + "workflowName": { + "description": "Optional value of the workflow name responsible for running the test", "type": "string" }, "userId": { @@ -53,6 +53,19 @@ "description": "Optional database schema to use for query execution (selected by user in QueryRunner Studio). Service-specific name (e.g., Snowflake schema). Named 'databaseSchema' instead of 'schema' to avoid conflicts with Pydantic's BaseModel.schema() method.", "type": "string", "default": null + }, + "maxResultSize": { + "description": "RUNTIME FIELD - Automatically injected by backend from admin QueryRunnerConfig.querySettings.maxResultSize. This is NOT user-configurable in the request. The backend fetches this value from the service's QueryRunnerConfig and injects it here for enforcement by the Python workflow. If query has LIMIT exceeding this value, an error is raised. If query has no LIMIT, one is automatically injected.", + "type": "integer", + "minimum": 1, + "maximum": 10000, + "default": null + }, + "credentialSourceType": { + "description": "Source type of the resolved credentials. Indicates whether the credentials come from a user-level config or a team-level config. Set by the backend during credential resolution.", + "type": "string", + "enum": ["user", "team"], + "default": "user" } }, "additionalProperties": false diff --git a/schemas/entity/classification/tag.json b/schemas/entity/classification/tag.json index 2731307..2e95d6a 100644 --- a/schemas/entity/classification/tag.json +++ b/schemas/entity/classification/tag.json @@ -109,6 +109,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "owners": { "description": "Owners of this glossary term.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/column/dashboardDataModelColumn.json b/schemas/entity/column/dashboardDataModelColumn.json new file mode 100644 index 0000000..a84de4e --- /dev/null +++ b/schemas/entity/column/dashboardDataModelColumn.json @@ -0,0 +1,9 @@ +{ + "$id": "https://open-metadata.org/schema/entity/column/dashboardDataModelColumn.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Dashboard Data Model Column Type", + "$comment": "@om-entity-type", + "description": "This schema defines the type for dashboard data model column context to support custom properties extension.", + "type": "object", + "properties": {} +} \ No newline at end of file diff --git a/schemas/entity/column/tableColumn.json b/schemas/entity/column/tableColumn.json new file mode 100644 index 0000000..9d6530d --- /dev/null +++ b/schemas/entity/column/tableColumn.json @@ -0,0 +1,9 @@ +{ + "$id": "https://open-metadata.org/schema/entity/column/tableColumn.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Table Column Type", + "$comment": "@om-entity-type", + "description": "This schema defines the type for table column context to support custom properties extension.", + "type": "object", + "properties": {} +} \ No newline at end of file diff --git a/schemas/entity/data/apiCollection.json b/schemas/entity/data/apiCollection.json index 9c33727..01568d7 100644 --- a/schemas/entity/data/apiCollection.json +++ b/schemas/entity/data/apiCollection.json @@ -105,6 +105,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" @@ -127,6 +131,6 @@ "$ref": "../../type/status.json" } }, - "required": ["id", "name", "service", "endpointURL"], + "required": ["id", "name", "service"], "additionalProperties": false } diff --git a/schemas/entity/data/apiEndpoint.json b/schemas/entity/data/apiEndpoint.json index 64276a3..3e6e564 100644 --- a/schemas/entity/data/apiEndpoint.json +++ b/schemas/entity/data/apiEndpoint.json @@ -167,6 +167,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" @@ -189,6 +193,6 @@ "$ref": "../../type/status.json" } }, - "required": ["id", "name", "service", "endpointURL"], + "required": ["id", "name", "service"], "additionalProperties": false } diff --git a/schemas/entity/data/chart.json b/schemas/entity/data/chart.json index 87ba23a..bf4cecc 100644 --- a/schemas/entity/data/chart.json +++ b/schemas/entity/data/chart.json @@ -180,6 +180,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/container.json b/schemas/entity/data/container.json index fe7ff2d..a837ad3 100644 --- a/schemas/entity/data/container.json +++ b/schemas/entity/data/container.json @@ -220,6 +220,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/dashboard.json b/schemas/entity/data/dashboard.json index 81b6045..de79034 100644 --- a/schemas/entity/data/dashboard.json +++ b/schemas/entity/data/dashboard.json @@ -143,6 +143,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/dashboardDataModel.json b/schemas/entity/data/dashboardDataModel.json index 9d12980..489506e 100644 --- a/schemas/entity/data/dashboardDataModel.json +++ b/schemas/entity/data/dashboardDataModel.json @@ -128,6 +128,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "tags": { "description": "Tags for this data model.", "type": "array", diff --git a/schemas/entity/data/dataContract.json b/schemas/entity/data/dataContract.json index ee79050..935ea9f 100644 --- a/schemas/entity/data/dataContract.json +++ b/schemas/entity/data/dataContract.json @@ -106,6 +106,10 @@ "type": "object", "description": "Security and access policy expectations", "properties": { + "inherited": { + "description": "If the property is inherited from the Data Product", + "type": "boolean" + }, "dataClassification": { "type": "string", "title": "Data Classification", @@ -125,6 +129,10 @@ "type": "object", "description": "Service Level Agreement expectations (timeliness, availability, etc.)", "properties": { + "inherited": { + "description": "If the property is inherited from the Data Product", + "type": "boolean" + }, "refreshFrequency": { "type": "object", "title": "Refresh Frequency", @@ -210,6 +218,19 @@ "description": "Column that represents the refresh time of the data (if applicable)" } } + }, + "termsOfUse": { + "type": "object", + "description": "Terms of use for the data contract for both human and AI agents consumption.", + "properties": { + "content": { + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "inherited": { + "description": "If the property is inherited from the Data Product", + "type": "boolean" + } + } } }, "properties": { @@ -293,7 +314,7 @@ }, "termsOfUse": { "description": "Terms of use for the data contract for both human and AI agents consumption.", - "$ref": "../../type/basic.json#/definitions/markdown", + "$ref": "#/definitions/termsOfUse", "default": null }, "security": { @@ -316,6 +337,14 @@ }, "default": null }, + "odcsQualityRules": { + "description": "ODCS quality rules stored during import for round-trip compatibility with ODCS export.", + "type": "array", + "items": { + "$ref": "../datacontract/odcs/odcsDataContract.json#/definitions/odcsQualityRule" + }, + "default": null + }, "contractUpdates": { "description": "History of updates to the data contract.", "type": "array", @@ -383,6 +412,11 @@ "extension": { "description": "Entity extension data with custom attributes added to the entity.", "$ref": "../../type/basic.json#/definitions/entityExtension" + }, + "inherited": { + "description": "Indicates whether this data contract is inherited from a parent entity.", + "type": "boolean", + "default": false } }, "required": ["id", "name", "entity"], diff --git a/schemas/entity/data/database.json b/schemas/entity/data/database.json index 437130a..755e313 100644 --- a/schemas/entity/data/database.json +++ b/schemas/entity/data/database.json @@ -34,6 +34,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "tags": { "description": "Tags for this Database.", "type": "array", @@ -171,7 +175,7 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": true + "default": false } } }, diff --git a/schemas/entity/data/databaseSchema.json b/schemas/entity/data/databaseSchema.json index ad377f6..a9fd706 100644 --- a/schemas/entity/data/databaseSchema.json +++ b/schemas/entity/data/databaseSchema.json @@ -35,6 +35,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "version": { "description": "Metadata version of the entity.", "$ref": "../../type/entityHistory.json#/definitions/entityVersion" @@ -167,7 +171,7 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": true + "default": false } } }, diff --git a/schemas/entity/data/directory.json b/schemas/entity/data/directory.json index 9c74ed1..91fdc1b 100644 --- a/schemas/entity/data/directory.json +++ b/schemas/entity/data/directory.json @@ -156,6 +156,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "lifeCycle": { "description": "Life Cycle of the entity", "$ref": "../../type/lifeCycle.json" diff --git a/schemas/entity/data/file.json b/schemas/entity/data/file.json index 4426da7..660db0b 100644 --- a/schemas/entity/data/file.json +++ b/schemas/entity/data/file.json @@ -98,6 +98,18 @@ "description": "File size in bytes", "type": "integer" }, + "columns": { + "description": "Column definitions for structured data files (CSV, etc.)", + "type": "array", + "items": { + "$ref": "../data/table.json#/definitions/column" + }, + "default": null + }, + "sampleData": { + "description": "Sample data from the file", + "$ref": "../data/table.json#/definitions/tableData" + }, "checksum": { "description": "File checksum/hash", "type": "string" @@ -194,6 +206,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "lifeCycle": { "description": "Life Cycle of the entity", "$ref": "../../type/lifeCycle.json" diff --git a/schemas/entity/data/glossary.json b/schemas/entity/data/glossary.json index a847087..bba3de3 100644 --- a/schemas/entity/data/glossary.json +++ b/schemas/entity/data/glossary.json @@ -109,6 +109,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/glossaryTerm.json b/schemas/entity/data/glossaryTerm.json index 65fbd27..7052226 100644 --- a/schemas/entity/data/glossaryTerm.json +++ b/schemas/entity/data/glossaryTerm.json @@ -22,6 +22,44 @@ } }, "additionalProperties": false + }, + "conceptMappingType": { + "description": "Type of mapping used to align this term with an external concept.", + "type": "string", + "enum": [ + "EXACT_MATCH", + "CLOSE_MATCH", + "BROAD_MATCH", + "NARROW_MATCH", + "RELATED_MATCH", + "SAME_AS" + ] + }, + "conceptMapping": { + "description": "Mapping to an external concept (e.g., SKOS concept IRI).", + "type": "object", + "properties": { + "conceptIri": { + "description": "External concept IRI to map this glossary term to.", + "type": "string", + "format": "uri" + }, + "mappingType": { + "description": "Type of mapping used for the external concept alignment.", + "$ref": "#/definitions/conceptMappingType" + }, + "schemeIri": { + "description": "Optional external concept scheme IRI for the mapped concept.", + "type": "string", + "format": "uri" + }, + "source": { + "description": "Optional source label or catalog for the external concept.", + "type": "string" + } + }, + "required": ["conceptIri", "mappingType"], + "additionalProperties": false } }, "properties": { @@ -68,8 +106,12 @@ "$ref": "../../type/entityReferenceList.json" }, "relatedTerms": { - "description": "Other glossary terms that are related to this glossary term.", - "$ref": "../../type/entityReferenceList.json" + "description": "Other glossary terms that are related to this glossary term with typed semantic relations.", + "type": "array", + "items": { + "$ref": "../../type/termRelation.json" + }, + "default": [] }, "references": { "description": "Link to a reference from an external glossary.", @@ -78,6 +120,14 @@ "$ref": "../../entity/data/glossaryTerm.json#/definitions/termReference" } }, + "conceptMappings": { + "description": "Optional mappings to external concepts (e.g., SKOS alignments).", + "type": "array", + "items": { + "$ref": "../../entity/data/glossaryTerm.json#/definitions/conceptMapping" + }, + "default": [] + }, "version": { "description": "Metadata version of the entity.", "$ref": "../../type/entityHistory.json#/definitions/entityVersion" @@ -159,6 +209,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes" : { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/metric.json b/schemas/entity/data/metric.json index 997b273..ddea32d 100644 --- a/schemas/entity/data/metric.json +++ b/schemas/entity/data/metric.json @@ -205,6 +205,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/mlmodel.json b/schemas/entity/data/mlmodel.json index 6051fbb..e494256 100644 --- a/schemas/entity/data/mlmodel.json +++ b/schemas/entity/data/mlmodel.json @@ -285,6 +285,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes" : { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/pipeline.json b/schemas/entity/data/pipeline.json index 89c6ab2..67106e4 100644 --- a/schemas/entity/data/pipeline.json +++ b/schemas/entity/data/pipeline.json @@ -389,6 +389,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "version": { "description": "Metadata version of the entity.", "$ref": "../../type/entityHistory.json#/definitions/entityVersion" diff --git a/schemas/entity/data/searchIndex.json b/schemas/entity/data/searchIndex.json index 4d0170f..d35d2e7 100644 --- a/schemas/entity/data/searchIndex.json +++ b/schemas/entity/data/searchIndex.json @@ -266,6 +266,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/spreadsheet.json b/schemas/entity/data/spreadsheet.json index 53e7fa6..89a6d49 100644 --- a/schemas/entity/data/spreadsheet.json +++ b/schemas/entity/data/spreadsheet.json @@ -167,6 +167,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "lifeCycle": { "description": "Life Cycle of the entity", "$ref": "../../type/lifeCycle.json" diff --git a/schemas/entity/data/storedProcedure.json b/schemas/entity/data/storedProcedure.json index e1db8f7..20ff114 100644 --- a/schemas/entity/data/storedProcedure.json +++ b/schemas/entity/data/storedProcedure.json @@ -108,6 +108,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "updatedAt": { "description": "Last update time corresponding to the new version of the entity in Unix epoch time milliseconds.", "$ref": "../../type/basic.json#/definitions/timestamp" diff --git a/schemas/entity/data/table.json b/schemas/entity/data/table.json index 892a65d..e4b3279 100644 --- a/schemas/entity/data/table.json +++ b/schemas/entity/data/table.json @@ -7,7 +7,8 @@ "type": "object", "javaType": "org.openmetadata.schema.entity.data.Table", "javaInterfaces": [ - "org.openmetadata.schema.EntityInterface", "org.openmetadata.schema.ColumnsEntityInterface" + "org.openmetadata.schema.EntityInterface", + "org.openmetadata.schema.ColumnsEntityInterface" ], "definitions": { "profileSampleType": { @@ -43,7 +44,8 @@ "Partitioned", "Foreign", "Transient", - "Stream" + "Stream", + "Stage" ], "javaEnums": [ { @@ -81,6 +83,9 @@ }, { "name": "Stream" + }, + { + "name": "Stage" } ] }, @@ -238,7 +243,6 @@ "description": "Local name (not fully qualified name) of the column. ColumnName is `-` when the column is not named in struct dataType. For example, BigQuery supports struct with unnamed fields.", "type": "string", "minLength": 1, - "maxLength": 256, "pattern": "^((?!::).)*$" }, "partitionIntervalTypes": { @@ -292,7 +296,9 @@ "column": { "type": "object", "javaType": "org.openmetadata.schema.type.Column", - "javaInterfaces": ["org.openmetadata.schema.FieldInterface"], + "javaInterfaces": [ + "org.openmetadata.schema.FieldInterface" + ], "description": "This schema defines the type for a column in a table.", "properties": { "name": { @@ -372,6 +378,10 @@ "$ref": "../../tests/customMetric.json" }, "default": null + }, + "extension": { + "description": "Entity extension data with custom attributes added to the entity.", + "$ref": "../../type/basic.json#/definitions/entityExtension" } }, "required": [ @@ -849,7 +859,7 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": true + "default": false }, "profileQuery": { "description": "Users' raw SQL query to fetch sample data and profile the table", @@ -1034,7 +1044,24 @@ "fileFormat": { "description": "File format in case of file/datalake tables.", "type": "string", - "enum": ["csv", "csv.gz", "tsv", "avro", "parquet", "pq", "pqt", "parq", "parquet.snappy", "json", "json.gz", "json.zip", "jsonl", "jsonl.gz", "jsonl.zip"] + "enum": [ + "csv", + "csv.gz", + "tsv", + "avro", + "parquet", + "pq", + "pqt", + "parq", + "parquet.snappy", + "json", + "json.gz", + "json.zip", + "jsonl", + "jsonl.gz", + "jsonl.zip", + "MF4" + ] } }, "properties": { @@ -1199,7 +1226,7 @@ "type": "boolean", "default": false }, - "retentionPeriod" : { + "retentionPeriod": { "description": "Retention period of the data in the table. Period is expressed as duration in ISO 8601 format in UTC. Example - `P23DT23H`. When not set, the retention period is inherited from the parent database schema, if it exists.", "$ref": "../../type/basic.json#/definitions/duration" }, @@ -1221,7 +1248,11 @@ "compressionType": { "description": "Type of compression: AUTOMATIC (Snowflake/BigQuery), MANUAL (Redshift), POLICY_BASED (TimescaleDB)", "type": "string", - "enum": ["AUTOMATIC", "MANUAL", "POLICY_BASED"], + "enum": [ + "AUTOMATIC", + "MANUAL", + "POLICY_BASED" + ], "default": null }, "segmentColumns": { @@ -1259,19 +1290,23 @@ "description": "Source URL of table.", "$ref": "../../type/basic.json#/definitions/sourceUrl" }, - "domains" : { + "domains": { "description": "Domains the asset belongs to. When not set, the asset inherits the domain from the parent it belongs to.", "$ref": "../../type/entityReferenceList.json" }, - "dataProducts" : { + "dataProducts": { "description": "List of data products this entity is part of.", - "$ref" : "../../type/entityReferenceList.json" + "$ref": "../../type/entityReferenceList.json" + }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" }, "fileFormat": { "description": "File format in case of file/datalake tables.", - "$ref" : "#/definitions/fileFormat" + "$ref": "#/definitions/fileFormat" }, - "votes" : { + "votes": { "description": "Votes on the entity.", "$ref": "../../type/votes.json" }, diff --git a/schemas/entity/data/topic.json b/schemas/entity/data/topic.json index 6b6aef9..69124ae 100644 --- a/schemas/entity/data/topic.json +++ b/schemas/entity/data/topic.json @@ -174,6 +174,10 @@ "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "votes" : { "description": "Votes on the entity.", "$ref": "../../type/votes.json" diff --git a/schemas/entity/data/worksheet.json b/schemas/entity/data/worksheet.json index 8d35013..8ee2291 100644 --- a/schemas/entity/data/worksheet.json +++ b/schemas/entity/data/worksheet.json @@ -136,6 +136,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "lifeCycle": { "description": "Life Cycle of the entity", "$ref": "../../type/lifeCycle.json" diff --git a/schemas/entity/datacontract/contractValidation.json b/schemas/entity/datacontract/contractValidation.json new file mode 100644 index 0000000..ebcf562 --- /dev/null +++ b/schemas/entity/datacontract/contractValidation.json @@ -0,0 +1,34 @@ +{ + "$id": "https://open-metadata.org/schema/entity/datacontract/contractValidation.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ContractValidation", + "description": "Comprehensive validation result for data contract import operations.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.datacontract.ContractValidation", + "properties": { + "valid": { + "description": "Whether the contract passes all validation checks.", + "type": "boolean", + "default": true + }, + "schemaValidation": { + "description": "Schema field validation results.", + "$ref": "schemaValidation.json" + }, + "entityErrors": { + "description": "List of entity-level validation errors (e.g., name too long, invalid pattern).", + "type": "array", + "items": { + "type": "string" + } + }, + "constraintErrors": { + "description": "List of entity-specific constraint violations (e.g., unsupported entity type, invalid configuration).", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/datacontract/odcs/odcsDataContract.json b/schemas/entity/datacontract/odcs/odcsDataContract.json index d544c4a..203c759 100644 --- a/schemas/entity/datacontract/odcs/odcsDataContract.json +++ b/schemas/entity/datacontract/odcs/odcsDataContract.json @@ -2,15 +2,15 @@ "$id": "https://open-metadata.org/schema/entity/datacontract/odcs/odcsDataContract.json", "$schema": "http://json-schema.org/draft-07/schema#", "title": "ODCSDataContract", - "description": "Open Data Contract Standard (ODCS) v3.0.2 data contract representation for import/export.", + "description": "Open Data Contract Standard (ODCS) v3.1.0 data contract representation for import/export.", "type": "object", "javaType": "org.openmetadata.schema.entity.datacontract.odcs.ODCSDataContract", "definitions": { "odcsApiVersion": { "description": "ODCS API version.", "type": "string", - "enum": ["v3.0.2", "v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"], - "default": "v3.0.2" + "enum": ["v3.1.0", "v3.0.2", "v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"], + "default": "v3.1.0" }, "odcsKind": { "description": "Kind of ODCS document.", @@ -124,6 +124,15 @@ "maxProperties": { "description": "Maximum object properties.", "type": "integer" + }, + "timezone": { + "description": "Whether the timestamp/time defines the timezone or not.", + "type": "boolean" + }, + "defaultTimezone": { + "description": "Default timezone for timestamp/time types.", + "type": "string", + "default": "Etc/UTC" } }, "additionalProperties": true @@ -187,9 +196,9 @@ "default": -1 }, "logicalType": { - "description": "Logical data type.", + "description": "Logical data type per ODCS v3.1.0 spec.", "type": "string", - "enum": ["string", "date", "number", "integer", "object", "array", "boolean"] + "enum": ["string", "text", "date", "timestamp", "time", "number", "integer", "decimal", "float", "double", "long", "object", "array", "boolean", "bytes", "null"] }, "logicalTypeOptions": { "description": "Type-specific options.", @@ -215,9 +224,8 @@ "default": -1 }, "classification": { - "description": "Security level.", - "type": "string", - "enum": ["public", "internal", "restricted", "confidential", "sensitive"] + "description": "Data classification tag (e.g., PII, public, internal, restricted, confidential, sensitive).", + "type": "string" }, "encryptedName": { "description": "Encrypted column reference.", @@ -259,6 +267,13 @@ "items": { "description": "Array element schema.", "$ref": "#/definitions/odcsSchemaElement" + }, + "quality": { + "description": "Quality rules for this schema element.", + "type": "array", + "items": { + "$ref": "#/definitions/odcsQualityRule" + } } }, "required": ["name"], @@ -284,9 +299,13 @@ "type": "string" }, "rule": { - "description": "Library rule name.", + "description": "Library rule name. For type=library, can be one of the standard quality metrics.", "type": "string" }, + "metric": { + "description": "Standard quality metric from library (ODCS 3.1.0).", + "$ref": "#/definitions/odcsQualityMetric" + }, "column": { "description": "Column to apply the rule to.", "type": "string" @@ -351,7 +370,8 @@ "type": "number" }, "minItems": 2, - "maxItems": 2 + "maxItems": 2, + "default": null }, "mustNotBeBetween": { "description": "Value must not be between [min, max].", @@ -360,7 +380,8 @@ "type": "number" }, "minItems": 2, - "maxItems": 2 + "maxItems": 2, + "default": null }, "validValues": { "description": "Static value list.", @@ -399,6 +420,21 @@ }, "additionalProperties": true }, + "odcsQualityMetric": { + "description": "Standard quality metrics library supported by ODCS 3.1.0.", + "type": "string", + "enum": [ + "rowCount", + "nullValues", + "invalidValues", + "duplicateValues", + "missingValues", + "uniqueValues", + "distinctValues", + "completeness", + "freshness" + ] + }, "odcsSupportChannel": { "description": "Support and communication channel.", "type": "object", @@ -510,7 +546,7 @@ "access": { "description": "Access type.", "type": "string", - "enum": ["read", "write"] + "enum": ["read", "write", "readWrite"] }, "firstLevelApprovers": { "description": "Initial approval authority.", diff --git a/schemas/entity/datacontract/schemaValidation.json b/schemas/entity/datacontract/schemaValidation.json index 1306a22..694d651 100644 --- a/schemas/entity/datacontract/schemaValidation.json +++ b/schemas/entity/datacontract/schemaValidation.json @@ -19,7 +19,21 @@ "type": "integer" }, "failedFields": { - "description": "List of fields that failed validation.", + "description": "List of fields that do not exist in the entity.", + "type": "array", + "items": { + "type": "string" + } + }, + "duplicateFields": { + "description": "List of field names that appear more than once in the contract schema.", + "type": "array", + "items": { + "type": "string" + } + }, + "typeMismatchFields": { + "description": "List of fields with data type mismatches between contract and entity (format: 'fieldName: expected TYPE1, got TYPE2').", "type": "array", "items": { "type": "string" diff --git a/schemas/entity/domains/dataProduct.json b/schemas/entity/domains/dataProduct.json index 5b02cdf..265551f 100644 --- a/schemas/entity/domains/dataProduct.json +++ b/schemas/entity/domains/dataProduct.json @@ -65,7 +65,7 @@ "$ref": "../../type/entityReference.json" } }, - "required": ["name", "portType"], + "required": ["name", "portType", "dataAsset"], "additionalProperties": false }, "slaDefinition": { @@ -168,22 +168,6 @@ "deprecated": true, "$comment": "@deprecated Use GET /v1/dataProducts/{id}/assets API endpoint for paginated access to data product assets" }, - "inputPorts": { - "description": "Input ports for consuming data into this data product", - "type": "array", - "items": { - "$ref": "#/definitions/dataProductPort" - }, - "default": [] - }, - "outputPorts": { - "description": "Output ports for exposing data from this data product", - "type": "array", - "items": { - "$ref": "#/definitions/dataProductPort" - }, - "default": [] - }, "lifecycleStage": { "description": "Current lifecycle stage of the data product", "$ref": "#/definitions/lifecycleStage" @@ -194,13 +178,11 @@ }, "consumesFrom": { "description": "Other data products that this product consumes data from", - "$ref": "../../type/entityReferenceList.json", - "default": [] + "$ref": "../../type/entityReferenceList.json" }, "providesTo": { "description": "Other data products that consume data from this product", - "$ref": "../../type/entityReferenceList.json", - "default": [] + "$ref": "../../type/entityReferenceList.json" }, "tags": { "description": "Tags associated with the Data Product.", @@ -226,6 +208,13 @@ "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" }, + "votes": { + "description": "Votes on the entity.", + "$ref": "../../type/votes.json" + }, + "certification": { + "$ref": "../../type/assetCertification.json" + }, "entityStatus": { "description": "Status of the Data Product.", "$ref": "../../type/status.json" diff --git a/schemas/entity/domains/domain.json b/schemas/entity/domains/domain.json index 2e86209..aef2030 100644 --- a/schemas/entity/domains/domain.json +++ b/schemas/entity/domains/domain.json @@ -116,6 +116,17 @@ "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" + }, + "votes": { + "description": "Votes on the entity.", + "$ref": "../../type/votes.json" + }, + "certification": { + "$ref": "../../type/assetCertification.json" + }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" } }, "required": ["id", "name", "description", "domainType"], diff --git a/schemas/entity/events/authentication/webhookBearerAuth.json b/schemas/entity/events/authentication/webhookBearerAuth.json new file mode 100644 index 0000000..c2c401d --- /dev/null +++ b/schemas/entity/events/authentication/webhookBearerAuth.json @@ -0,0 +1,21 @@ +{ + "$id": "https://open-metadata.org/schema/entity/events/authentication/webhookBearerAuth.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BearerAuth", + "description": "Bearer token authentication for webhook endpoints.", + "type": "object", + "properties": { + "type": { + "description": "Authentication type discriminator.", + "type": "string", + "enum": ["bearer"], + "default": "bearer" + }, + "secretKey": { + "description": "Secret key used for computing HMAC SHA256 signature of webhook payload, sent in the X-OM-Signature header.", + "type": "string" + } + }, + "required": ["type", "secretKey"], + "additionalProperties": false +} diff --git a/schemas/entity/events/authentication/webhookNoAuth.json b/schemas/entity/events/authentication/webhookNoAuth.json new file mode 100644 index 0000000..54076aa --- /dev/null +++ b/schemas/entity/events/authentication/webhookNoAuth.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://open-metadata.org/schema/entity/events/authentication/webhookNoAuth.json", + "title": "WebhookNoAuth", + "description": "No authentication.", + "type": "object", + "properties": { + "type": { + "description": "Authentication type discriminator.", + "type": "string", + "enum": ["none"], + "default": "none" + } + }, + "required": ["type"], + "additionalProperties": false +} \ No newline at end of file diff --git a/schemas/entity/events/authentication/webhookOAuth2Config.json b/schemas/entity/events/authentication/webhookOAuth2Config.json new file mode 100644 index 0000000..be3e880 --- /dev/null +++ b/schemas/entity/events/authentication/webhookOAuth2Config.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://open-metadata.org/schema/entity/events/authentication/webhookOAuth2Config.json", + "title": "WebhookOAuth2Config", + "description": "OAuth2 Client Credentials configuration for webhook authentication.", + "type": "object", + "properties": { + "type": { + "description": "Authentication type discriminator.", + "type": "string", + "enum": ["oauth2"], + "default": "oauth2" + }, + "tokenUrl": { + "description": "Token endpoint URL to obtain access tokens.", + "type": "string", + "format": "uri" + }, + "clientId": { + "description": "OAuth2 client identifier. Stored encrypted via Fernet.", + "type": "string" + }, + "clientSecret": { + "description": "OAuth2 client secret. Stored encrypted via Fernet.", + "type": "string" + }, + "scope": { + "description": "Optional OAuth2 scopes to request (space-separated).", + "type": "string" + } + }, + "required": ["type", "tokenUrl", "clientId", "clientSecret"], + "additionalProperties": false +} \ No newline at end of file diff --git a/schemas/entity/events/notificationTemplate.json b/schemas/entity/events/notificationTemplate.json index 0165cd2..2923d51 100644 --- a/schemas/entity/events/notificationTemplate.json +++ b/schemas/entity/events/notificationTemplate.json @@ -60,7 +60,7 @@ "description": "Handlebars HTML template body with placeholders.", "type": "string", "minLength": 1, - "maxLength": 10240 + "maxLength": 65536 }, "provider": { "description": "Provider of the template. System templates are pre-loaded and cannot be deleted. User templates are created by users and can be deleted.", diff --git a/schemas/entity/events/webhook.json b/schemas/entity/events/webhook.json index beb73d8..c1bea92 100644 --- a/schemas/entity/events/webhook.json +++ b/schemas/entity/events/webhook.json @@ -31,10 +31,6 @@ "type": "string", "format": "uri" }, - "secretKey": { - "description": "Secret set by the webhook client used for computing HMAC SHA256 signature of webhook payload and sent in `X-OM-Signature` header in POST requests to publish the events.", - "type": "string" - }, "headers": { "description": "Custom headers to be sent with the webhook request.", "type": "object", @@ -61,6 +57,22 @@ "type": "boolean", "default": false }, + "authType": { + "description": "Authentication configuration for the webhook. If not specified, the webhook will be sent without authentication.", + "title": "Authentication Configuration Type", + "mask": true, + "oneOf": [ + { + "$ref": "./authentication/webhookNoAuth.json" + }, + { + "$ref": "./authentication/webhookBearerAuth.json" + }, + { + "$ref": "./authentication/webhookOAuth2Config.json" + } + ] + }, "sendToFollowers": { "description": "Send the Event to Followers", "type": "boolean", diff --git a/schemas/entity/feed/thread.json b/schemas/entity/feed/thread.json index bfddd0b..fd43988 100644 --- a/schemas/entity/feed/thread.json +++ b/schemas/entity/feed/thread.json @@ -16,6 +16,7 @@ "UpdateTag", "RequestApproval", "RequestTestCaseFailureResolution", + "RecognizerFeedbackApproval", "Generic" ], "javaEnums": [ @@ -37,6 +38,9 @@ { "name": "RequestTestCaseFailureResolution" }, + { + "name": "RecognizerFeedbackApproval" + }, { "name": "Generic" } @@ -84,6 +88,14 @@ "testCaseResolutionStatusId": { "description": "The test case resolution status id for which the task is created.", "$ref": "../../type/basic.json#/definitions/uuid" + }, + "feedback": { + "description": "The recognizer feedback that we're reviewing for the Tag that's supposed to be pointed by this task", + "$ref": "../../type/recognizerFeedback.json" + }, + "recognizer": { + "description": "Metadata about the recognizer that applied the tag being reviewed", + "$ref": "../../type/tagLabelRecognizerMetadata.json" } }, "required": ["id", "assignees", "type"], diff --git a/schemas/entity/learning/learningResource.json b/schemas/entity/learning/learningResource.json new file mode 100644 index 0000000..d4e24d7 --- /dev/null +++ b/schemas/entity/learning/learningResource.json @@ -0,0 +1,195 @@ +{ + "$id": "https://open-metadata.org/schema/entity/learning/learningResource.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "LearningResource", + "description": "A learning resource such as an in-product tutorial, Storylane walkthrough, or expert video contextualized for product surfaces.", + "$comment": "@om-entity-type", + "type": "object", + "javaType": "org.openmetadata.schema.entity.learning.LearningResource", + "javaInterfaces": ["org.openmetadata.schema.EntityInterface"], + "definitions": { + "resourceType": { + "description": "Kind of learning asset represented.", + "type": "string", + "enum": [ + "Storylane", + "Video", + "Article" + ] + }, + "resourceCategory": { + "description": "Primary topic grouping for the resource.", + "type": "string", + "enum": [ + "Discovery", + "Administration", + "DataGovernance", + "DataQuality", + "Observability", + "AI" + ] + }, + "resourceDifficulty": { + "description": "Suggested proficiency tier for the resource.", + "type": "string", + "enum": [ + "Intro", + "Intermediate", + "Advanced" + ] + }, + "resourceSource": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.learning.LearningResourceSource", + "description": "Embedding configuration for rendering the resource inline.", + "properties": { + "provider": { + "description": "Origin system that hosts the resource (Storylane, YouTube, etc.).", + "type": "string" + }, + "url": { + "description": "Canonical URL.", + "type": "string", + "format": "uri" + }, + "embedConfig": { + "description": "Provider-specific configuration passed to the UI.", + "type": "object", + "additionalProperties": true + } + }, + "required": ["url"], + "additionalProperties": false + }, + "resourceContext": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.learning.LearningResourceContext", + "description": "UI placement hints for scoping the resource.", + "properties": { + "pageId": { + "description": "Stable identifier for the product page (e.g., glossary, domains).", + "type": "string" + }, + "componentId": { + "description": "Optional component-level anchor within the page.", + "type": "string" + }, + "priority": { + "description": "Relative ordering weight when multiple resources match a context.", + "type": "integer", + "minimum": 0 + } + }, + "required": ["pageId"], + "additionalProperties": false + } + }, + "properties": { + "id": { + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "displayName": { + "description": "Display label presented to users.", + "type": "string", + "maxLength": 120 + }, + "description": { + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "resourceType": { + "$ref": "#/definitions/resourceType" + }, + "categories": { + "description": "Topic categories the resource belongs to.", + "type": "array", + "items": { + "$ref": "#/definitions/resourceCategory" + }, + "minItems": 1 + }, + "difficulty": { + "$ref": "#/definitions/resourceDifficulty" + }, + "source": { + "$ref": "#/definitions/resourceSource" + }, + "estimatedDuration": { + "description": "Approximate viewing time in seconds.", + "type": "integer", + "minimum": 0 + }, + "contexts": { + "description": "List of contexts where this resource should surface.", + "type": "array", + "items": { + "$ref": "#/definitions/resourceContext" + }, + "minItems": 1 + }, + "status": { + "description": "Lifecycle state for controllable publishing.", + "type": "string", + "enum": [ + "Draft", + "Active", + "Deprecated" + ], + "default": "Active" + }, + "tags": { + "description": "Tags attached to the learning resource.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": [] + }, + "owners": { + "description": "Owners accountable for maintaining this resource.", + "$ref": "../../type/entityReferenceList.json" + }, + "reviewers": { + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "$ref": "../../type/entityReferenceList.json" + }, + "changeDescription": { + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "Soft delete marker.", + "type": "boolean", + "default": false + }, + "version": { + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "type": "string" + }, + "impersonatedBy": { + "$ref": "../../type/basic.json#/definitions/impersonatedBy" + }, + "href": { + "$ref": "../../type/basic.json#/definitions/href" + }, + "extension": { + "$ref": "../../type/basic.json#/definitions/entityExtension" + } + }, + "required": ["id", "name", "resourceType", "source", "contexts", "categories"], + "additionalProperties": false +} diff --git a/schemas/entity/policies/accessControl/resourceDescriptor.json b/schemas/entity/policies/accessControl/resourceDescriptor.json index 942b051..a61cafa 100644 --- a/schemas/entity/policies/accessControl/resourceDescriptor.json +++ b/schemas/entity/policies/accessControl/resourceDescriptor.json @@ -64,7 +64,10 @@ "CreateScim", "DeleteScim", "ViewScim", - "Impersonate" + "Impersonate", + "AuditLogs", + "ViewTestDefinitionLibrary", + "EditTestDefinitionLibrary" ] } }, diff --git a/schemas/entity/services/apiService.json b/schemas/entity/services/apiService.json index caedbc5..0e8a721 100644 --- a/schemas/entity/services/apiService.json +++ b/schemas/entity/services/apiService.json @@ -134,10 +134,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "domains" : { "description": "Domains the API service belongs to.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/connections/api/openAPISchemaFilePath.json b/schemas/entity/services/connections/api/openAPISchemaFilePath.json new file mode 100644 index 0000000..d7553fc --- /dev/null +++ b/schemas/entity/services/connections/api/openAPISchemaFilePath.json @@ -0,0 +1,18 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/api/openAPISchemaFilePath.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OpenAPISchemaFilePath", + "description": "Open API Schema File Path Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.api.OpenAPISchemaFilePath", + "properties": { + "openAPISchemaFilePath": { + "expose": true, + "title": "OpenAPI Schema File Path", + "description": "Path to a local OpenAPI schema file.", + "type": "string" + } + }, + "required": ["openAPISchemaFilePath"], + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/api/openAPISchemaS3.json b/schemas/entity/services/connections/api/openAPISchemaS3.json new file mode 100644 index 0000000..d36a714 --- /dev/null +++ b/schemas/entity/services/connections/api/openAPISchemaS3.json @@ -0,0 +1,24 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/api/openAPISchemaS3.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OpenAPISchemaS3", + "description": "Open API Schema S3 Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.api.OpenAPISchemaS3", + "properties": { + "openAPISchemaS3URL": { + "expose": true, + "title": "OpenAPI Schema S3 URL", + "description": "S3 URL of the OpenAPI schema file (JSON or YAML). Example: https://bucket-name.s3.amazonaws.com/path/to/openapi_schema.json", + "type": "string", + "format": "uri" + }, + "awsCredentials": { + "title": "AWS Credentials", + "description": "AWS credentials required to access the S3 file.", + "$ref": "../../../../security/credentials/awsCredentials.json" + } + }, + "required": ["openAPISchemaS3URL", "awsCredentials"], + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/api/openAPISchemaURL.json b/schemas/entity/services/connections/api/openAPISchemaURL.json new file mode 100644 index 0000000..2570cc8 --- /dev/null +++ b/schemas/entity/services/connections/api/openAPISchemaURL.json @@ -0,0 +1,19 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/api/openAPISchemaURL.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OpenAPISchemaURL", + "description": "Open API Schema URL Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.api.OpenAPISchemaURL", + "properties": { + "openAPISchemaURL": { + "expose": true, + "title": "OpenAPI Schema URL", + "description": "Open API Schema URL.", + "type": "string", + "format": "uri" + } + }, + "required": ["openAPISchemaURL"], + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/api/restConnection.json b/schemas/entity/services/connections/api/restConnection.json index 9506121..5acad16 100644 --- a/schemas/entity/services/connections/api/restConnection.json +++ b/schemas/entity/services/connections/api/restConnection.json @@ -20,12 +20,20 @@ "$ref": "#/definitions/restType", "default": "Rest" }, - "openAPISchemaURL": { - "expose": true, - "title": "OpenAPISchemaURL", - "description": "Open API Schema URL.", - "type": "string", - "format": "uri" + "openAPISchemaConnection": { + "title": "OpenAPI Schema Connection", + "description": "OpenAPI Schema source config. Either a URL or a file path must be provided.", + "oneOf": [ + { + "$ref": "openAPISchemaURL.json" + }, + { + "$ref": "openAPISchemaFilePath.json" + }, + { + "$ref": "openAPISchemaS3.json" + } + ] }, "token": { "title": "Token", @@ -45,6 +53,19 @@ "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", "title": "Default API Collection Filter Pattern" }, + "apiEndpointFilterPattern": { + "description": "Regex to only fetch api endpoints with names matching the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Default API Endpoint Filter Pattern" + }, + "verifySSL": { + "description": "Client SSL verification. Make sure to configure the SSLConfig if enabled.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL", + "default": "no-ssl" + }, + "sslConfig": { + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "description": "Supports Metadata Extraction.", @@ -53,5 +74,5 @@ } }, "additionalProperties": false, - "required": ["openAPISchemaURL"] + "required": ["openAPISchemaConnection"] } diff --git a/schemas/entity/services/connections/connectionBasicType.json b/schemas/entity/services/connections/connectionBasicType.json index 7710041..5384565 100644 --- a/schemas/entity/services/connections/connectionBasicType.json +++ b/schemas/entity/services/connections/connectionBasicType.json @@ -140,14 +140,14 @@ "properties": { "config": { "oneOf": [ - { - "title": "Sample Data Storage Config", - "$ref": "#/definitions/dataStorageConfig" - }, { "title": "No Sample Data Storage Config", "type": "object", "additionalProperties": false + }, + { + "title": "Sample Data Storage Config", + "$ref": "#/definitions/dataStorageConfig" } ] } diff --git a/schemas/entity/services/connections/dashboard/ssrsConnection.json b/schemas/entity/services/connections/dashboard/ssrsConnection.json new file mode 100644 index 0000000..9c51867 --- /dev/null +++ b/schemas/entity/services/connections/dashboard/ssrsConnection.json @@ -0,0 +1,80 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/dashboard/ssrsConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SsrsConnection", + "description": "SQL Server Reporting Services (SSRS) provides a set of on-premises tools and services to create, deploy, and manage paginated reports", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.dashboard.SsrsConnection", + "definitions": { + "ssrsType": { + "description": "Service type.", + "type": "string", + "enum": [ + "Ssrs" + ], + "default": "Ssrs" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/ssrsType", + "default": "Ssrs" + }, + "hostPort": { + "expose": true, + "title": "Host and Port", + "description": "Host and Port of the Ssrs instance.", + "type": "string", + "format": "uri" + }, + "username": { + "title": "Username", + "description": "Username to connect to Ssrs.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to Ssrs.", + "type": "string", + "format": "password" + }, + "dashboardFilterPattern": { + "description": "Regex to exclude or include dashboards that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Default Dashboard Filter Pattern" + }, + "chartFilterPattern": { + "description": "Regex to exclude or include charts that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Default Chart Filter Pattern" + }, + "projectFilterPattern": { + "description": "Regex to exclude or include projects that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Default Project Filter Pattern" + }, + "verifySSL": { + "title": "Verify SSL", + "description": "Client SSL verification.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL", + "default": "no-ssl" + }, + "sslConfig": { + "title": "SSL Configuration", + "description": "SSL Configuration details.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": [ + "hostPort", + "username", + "password" + ] +} diff --git a/schemas/entity/services/connections/database/athenaConnection.json b/schemas/entity/services/connections/database/athenaConnection.json index 6b16f67..3020f00 100644 --- a/schemas/entity/services/connections/database/athenaConnection.json +++ b/schemas/entity/services/connections/database/athenaConnection.json @@ -48,6 +48,11 @@ "description": "Athena workgroup.", "type": "string" }, + "catalogId": { + "title": "Catalog ID", + "description": "Catalog ID for Athena. For S3 Tables, use the format 's3tablescatalog/'. For cross-account Glue catalogs, use the AWS account ID. If not provided, defaults to the caller's AWS account.", + "type": "string" + }, "databaseName": { "title": "Database Name", "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", diff --git a/schemas/entity/services/connections/database/bigQueryConnection.json b/schemas/entity/services/connections/database/bigQueryConnection.json index 2a95d4a..347cf33 100644 --- a/schemas/entity/services/connections/database/bigQueryConnection.json +++ b/schemas/entity/services/connections/database/bigQueryConnection.json @@ -103,6 +103,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/burstIQConnection.json b/schemas/entity/services/connections/database/burstIQConnection.json new file mode 100644 index 0000000..c6bd8fa --- /dev/null +++ b/schemas/entity/services/connections/database/burstIQConnection.json @@ -0,0 +1,74 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/burstIQConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BurstIQConnection", + "description": "BurstIQ LifeGraph Database Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.BurstIQConnection", + "definitions": { + "burstIQType": { + "description": "Service type.", + "type": "string", + "enum": ["BurstIQ"], + "default": "BurstIQ" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/burstIQType", + "default": "BurstIQ" + }, + "username": { + "title": "Username", + "description": "Username to connect to BurstIQ. This user should have privileges to read all the metadata in BurstIQ LifeGraph.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to BurstIQ.", + "type": "string", + "format": "password" + }, + "realmName": { + "title": "Realm Name", + "description": "BurstIQ Keycloak realm name (e.g., 'ems' from https://auth.burstiq.com/realms/ems).", + "type": "string" + }, + "biqSdzName": { + "title": "BurstIQ SDZ Name", + "description": "BurstIQ Secure Data Zone (SDZ) name for API requests.", + "type": "string" + }, + "biqCustomerName": { + "title": "BurstIQ Customer Name", + "description": "BurstIQ customer name for API requests.", + "type": "string" + }, + "biqSystemWalletId": { + "title": "BurstIQ System Wallet ID", + "description": "BurstIQ system wallet ID sent as the biq_system_wallet_id header. Required for profiler data access.", + "type": "string" + }, + "tableFilterPattern": { + "title": "Table Filter Pattern", + "description": "Regex to only include/exclude dictionaries (tables) that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsLineageExtraction": { + "title": "Supports Lineage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, + "supportsProfiler": { + "title": "Supports Profiler", + "$ref": "../connectionBasicType.json#/definitions/supportsProfiler" + } + }, + "additionalProperties": false, + "required": ["username", "password", "realmName", "biqSdzName", "biqCustomerName"] +} diff --git a/schemas/entity/services/connections/database/common/gcpCloudSqlConfig.json b/schemas/entity/services/connections/database/common/gcpCloudSqlConfig.json new file mode 100644 index 0000000..cc30859 --- /dev/null +++ b/schemas/entity/services/connections/database/common/gcpCloudSqlConfig.json @@ -0,0 +1,28 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/common/gcpCloudSqlConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GCP CloudSQL Configuration Source", + "description": "GCP CloudSQL Database Connection Config. Uses the Google Cloud SQL Python Connector.", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.common.GcpCloudSqlConfig", + "properties": { + "password": { + "title": "Database Password", + "description": "Database user password. Leave empty if using IAM database authentication.", + "type": "string", + "format": "password" + }, + "enableIamAuth": { + "title": "Enable IAM Authentication", + "description": "Use GCP IAM for database authentication instead of a password.", + "type": "boolean", + "default": false + }, + "gcpConfig": { + "title": "GCP Credentials Configuration", + "description": "GCP credentials to use. If not provided, Application Default Credentials will be used.", + "$ref": "../../../../../security/credentials/gcpCredentials.json" + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/database/datalakeConnection.json b/schemas/entity/services/connections/database/datalakeConnection.json index 0247ad5..c551775 100644 --- a/schemas/entity/services/connections/database/datalakeConnection.json +++ b/schemas/entity/services/connections/database/datalakeConnection.json @@ -97,6 +97,12 @@ "sampleDataStorageConfig": { "title": "Storage Config for Sample Data", "$ref": "../connectionBasicType.json#/definitions/sampleDataStorageConfig" + }, + "skipColdStorage": { + "title": "Skip Cold Storage", + "description": "Skip files in cold storage tiers (e.g., S3 Glacier, Azure Archive/Cool/Cold, GCS Coldline/Archive). When enabled, only files in hot/standard storage tiers will be processed.", + "type": "boolean", + "default": false } }, "additionalProperties": false, diff --git a/schemas/entity/services/connections/database/db2Connection.json b/schemas/entity/services/connections/database/db2Connection.json index 6c26521..4340763 100644 --- a/schemas/entity/services/connections/database/db2Connection.json +++ b/schemas/entity/services/connections/database/db2Connection.json @@ -68,6 +68,14 @@ "description": "CLI Driver version to connect to DB2. If not provided, the latest version will be used.", "type": "string" }, + "sslMode": { + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" + }, + "sslConfig": { + "title": "SSL Configuration", + "description": "SSL Configuration details for DB2 connection. Provide CA certificate for server validation, and optionally client certificate and key for mutual TLS authentication.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" diff --git a/schemas/entity/services/connections/database/dorisConnection.json b/schemas/entity/services/connections/database/dorisConnection.json index e665040..9abebf1 100644 --- a/schemas/entity/services/connections/database/dorisConnection.json +++ b/schemas/entity/services/connections/database/dorisConnection.json @@ -104,6 +104,10 @@ "supportsQueryComment": { "title": "Supports Query Comment", "$ref": "../connectionBasicType.json#/definitions/supportsQueryComment" + }, + "supportsViewLineageExtraction": { + "title": "Supports View Lineage", + "$ref": "../connectionBasicType.json#/definitions/supportsViewLineageExtraction" } }, "additionalProperties": false, @@ -111,4 +115,4 @@ "hostPort", "username" ] -} \ No newline at end of file +} diff --git a/schemas/entity/services/connections/database/dremio/cloudAuth.json b/schemas/entity/services/connections/database/dremio/cloudAuth.json new file mode 100644 index 0000000..0ae9432 --- /dev/null +++ b/schemas/entity/services/connections/database/dremio/cloudAuth.json @@ -0,0 +1,30 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/dremio/cloudAuth.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Dremio Cloud Authentication", + "description": "Authentication configuration for Dremio Cloud using Personal Access Token (PAT). Dremio Cloud is a fully managed SaaS platform.", + "javaType": "org.openmetadata.schema.services.connections.database.dremio.CloudAuth", + "type": "object", + "properties": { + "region": { + "title": "Cloud Region", + "description": "Dremio Cloud region where your organization is hosted. Choose 'US' for United States region or 'EU' for European region.", + "type": "string", + "enum": ["US", "EU"], + "default": "US" + }, + "personalAccessToken": { + "title": "Personal Access Token (PAT)", + "description": "Personal Access Token for authenticating with Dremio Cloud. Generate this token from your Dremio Cloud account settings under Settings -> Personal Access Tokens.", + "type": "string", + "format": "password" + }, + "projectId": { + "title": "Project ID", + "description": "Dremio Cloud Project ID (required). This unique identifier can be found in your Dremio Cloud project URL or project settings.", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["region", "personalAccessToken", "projectId"] +} diff --git a/schemas/entity/services/connections/database/dremio/softwareAuth.json b/schemas/entity/services/connections/database/dremio/softwareAuth.json new file mode 100644 index 0000000..b4c616b --- /dev/null +++ b/schemas/entity/services/connections/database/dremio/softwareAuth.json @@ -0,0 +1,28 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/dremio/softwareAuth.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Dremio Software Authentication", + "description": "Authentication configuration for self-hosted Dremio Software using username and password. Dremio Software is deployed on-premises or in your own cloud infrastructure.", + "javaType": "org.openmetadata.schema.services.connections.database.dremio.SoftwareAuth", + "type": "object", + "properties": { + "hostPort": { + "title": "Host and Port", + "description": "URL to your self-hosted Dremio Software instance, including protocol and port (e.g., http://localhost:9047 or https://dremio.example.com:9047).", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Username for authenticating with Dremio Software. This user should have appropriate permissions to access metadata.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password for the Dremio Software user account.", + "type": "string", + "format": "password" + } + }, + "additionalProperties": false, + "required": ["hostPort", "username", "password"] +} diff --git a/schemas/entity/services/connections/database/dremioConnection.json b/schemas/entity/services/connections/database/dremioConnection.json new file mode 100644 index 0000000..e8ed1fe --- /dev/null +++ b/schemas/entity/services/connections/database/dremioConnection.json @@ -0,0 +1,74 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/dremioConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DremioConnection", + "description": "Dremio Connection Config supporting both Dremio Cloud (SaaS) and Dremio Software (self-hosted)", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.DremioConnection", + "definitions": { + "dremioType": { + "description": "Service type.", + "type": "string", + "enum": ["Dremio"], + "default": "Dremio" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/dremioType", + "default": "Dremio" + }, + "authType": { + "title": "Authentication Type", + "description": "Choose between Dremio Cloud (SaaS) or Dremio Software (self-hosted) authentication.", + "oneOf": [ + { + "title": "Dremio Cloud", + "$ref": "./dremio/cloudAuth.json" + }, + { + "title": "Dremio Software", + "$ref": "./dremio/softwareAuth.json" + } + ] + }, + "database": { + "title": "Namespace", + "description": "Optional: Restrict metadata ingestion to a specific namespace (source/space). When left blank, all namespaces will be ingested.", + "type": "string" + }, + "schemaFilterPattern": { + "title": "Folder Filter Pattern", + "description": "Regex to only include/exclude folders that match the pattern. In Dremio Cloud, folders are mapped as schemas.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "tableFilterPattern": { + "title": "Table Filter Pattern", + "description": "Regex to only include/exclude tables that match the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "databaseFilterPattern": { + "title": "Namespace Filter Pattern", + "description": "Regex to only include/exclude namespaces (sources/spaces) that match the pattern. In Dremio Cloud, namespaces are mapped as databases.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsUsageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" + }, + "supportsLineageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, + "supportsDatabase": { + "title": "Supports Database", + "$ref": "../connectionBasicType.json#/definitions/supportsDatabase" + } + }, + "additionalProperties": false, + "required": ["authType"] + } \ No newline at end of file diff --git a/schemas/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.json b/schemas/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.json deleted file mode 100644 index 4a4244e..0000000 --- a/schemas/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "DynamoDbCatalogConnection", - "description": "Iceberg DynamoDB Catalog configuration.", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.DynamoDbCatalogConnection", - "properties": { - "tableName": { - "title": "Table Name", - "description": "DynamoDB table name.", - "type": "string", - "default": "iceberg" - }, - "awsConfig": { - "$ref": "../../../../../security/credentials/awsCredentials.json" - } - }, - "additionalProperties": false, - "required": [ - "tableName", - "awsConfig" - ] -} diff --git a/schemas/entity/services/connections/database/iceberg/glueCatalogConnection.json b/schemas/entity/services/connections/database/iceberg/glueCatalogConnection.json deleted file mode 100644 index 2726938..0000000 --- a/schemas/entity/services/connections/database/iceberg/glueCatalogConnection.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/glueCatalogConnection.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "GlueCatalogConnection", - "description": "Iceberg Glue Catalog configuration.", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.GlueCatalogConnection", - "properties": { - "awsConfig": { - "$ref": "../../../../../security/credentials/awsCredentials.json" - } - }, - "additionalProperties": false, - "required": [ - "awsConfig" - ] -} diff --git a/schemas/entity/services/connections/database/iceberg/hiveCatalogConnection.json b/schemas/entity/services/connections/database/iceberg/hiveCatalogConnection.json deleted file mode 100644 index 98af7ba..0000000 --- a/schemas/entity/services/connections/database/iceberg/hiveCatalogConnection.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "HiveCatalogConnection", - "description": "Iceberg Hive Catalog configuration.", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.HiveCatalogConnection", - "properties": { - "uri": { - "title": "URI", - "description": "Uri to the Hive Metastore. Example: 'thrift://localhost:9083'", - "type": "string", - "format": "uri" - }, - "fileSystem": { - "$ref": "./icebergFileSystem.json" - } - }, - "additionalProperties": false, - "required": [ - "uri" - ] -} diff --git a/schemas/entity/services/connections/database/iceberg/icebergCatalog.json b/schemas/entity/services/connections/database/iceberg/icebergCatalog.json deleted file mode 100644 index bd23b79..0000000 --- a/schemas/entity/services/connections/database/iceberg/icebergCatalog.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/icebergCatalog.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "IcebergCatalog", - "description": "Iceberg Catalog configuration.", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.IcebergCatalog", - "properties": { - "name": { - "title": "Name", - "description": "Catalog Name.", - "type": "string" - }, - "connection": { - "title": "Connection", - "description": "Catalog connection configuration, depending on your catalog type.", - "oneOf": [ - { - "$ref": "./hiveCatalogConnection.json" - }, - { - "$ref": "./restCatalogConnection.json" - }, - { - "$ref": "./glueCatalogConnection.json" - }, - { - "$ref": "./dynamoDbCatalogConnection.json" - } - ] - }, - "databaseName": { - "title": "Database Name", - "description": "Custom Database Name for your Iceberg Service. If not set it will be 'default'.", - "type": "string" - }, - "warehouseLocation": { - "title": "Warehouse Location", - "description": "Warehouse Location. Used to specify a custom warehouse location if needed.", - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name", - "connection" - ] -} diff --git a/schemas/entity/services/connections/database/iceberg/icebergFileSystem.json b/schemas/entity/services/connections/database/iceberg/icebergFileSystem.json deleted file mode 100644 index 7b947d9..0000000 --- a/schemas/entity/services/connections/database/iceberg/icebergFileSystem.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/icebergFileSystem.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "IcebergFileSystem", - "description": "Iceberg File System configuration, based on where the Iceberg Warehouse is located.", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.IcebergFileSystem", - "type": "object", - "properties": { - "type": { - "oneOf": [ - { - "title": "Local", - "description": "Local File System.", - "type": "null" - }, - { - "title": "AWS", - "$ref": "../../../../../security/credentials/awsCredentials.json" - }, - { - "title": "Azure", - "$ref": "../../../../../security/credentials/azureCredentials.json" - } - ] - } - }, - "additionalProperties": false -} diff --git a/schemas/entity/services/connections/database/iceberg/restCatalogConnection.json b/schemas/entity/services/connections/database/iceberg/restCatalogConnection.json deleted file mode 100644 index 45a6df7..0000000 --- a/schemas/entity/services/connections/database/iceberg/restCatalogConnection.json +++ /dev/null @@ -1,71 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/iceberg/restCatalogConnection.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "RestCatalogConnection", - "description": "Iceberg REST Catalog configuration.", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.iceberg.RestCatalogConnection", - "properties": { - "uri": { - "title": "URI", - "description": "Uri to the REST catalog. Example: 'http://rest-catalog/ws/'", - "type": "string", - "format": "uri" - }, - "credential": { - "title": "OAuth2 Credential", - "description": "OAuth2 credential to use when initializing the catalog.", - "type": "object", - "properties": { - "clientId": { - "title": "Client ID", - "description": "OAuth2 Client ID.", - "type": "string", - "format": "password" - }, - "clientSecret": { - "title": "Client Secret", - "description": "OAuth2 Client Secret", - "type": "string", - "format": "password" - } - }, - "additionalProperties": false - }, - "token": { - "title": "Token", - "description": "Berarer token to use for the 'Authorization' header.", - "type": "string", - "format": "password" - }, - "ssl": { - "title": "SSL", - "description": "SSL Configuration details.", - "$ref": "../../common/sslCertPaths.json" - }, - "sigv4": { - "title": "Sigv4", - "description": "Sign requests to the REST Server using AWS SigV4 protocol.", - "type": "object", - "properties": { - "signingRegion": { - "title": "Signing AWS Region", - "description": "AWS Region to use when SigV4 signs a request.", - "type": "string" - }, - "signingName": { - "title": "Signing Name", - "description": "The service signing name to use when SigV4 signs a request.", - "type": "string" - } - } - }, - "fileSystem": { - "$ref": "./icebergFileSystem.json" - } - }, - "additionalProperties": false, - "required": [ - "uri" - ] -} diff --git a/schemas/entity/services/connections/database/icebergConnection.json b/schemas/entity/services/connections/database/icebergConnection.json deleted file mode 100644 index 0b6212c..0000000 --- a/schemas/entity/services/connections/database/icebergConnection.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/entity/services/connections/database/icebergConnection.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "IcebergConnection", - "description": "Iceberg Catalog Connection Config", - "type": "object", - "javaType": "org.openmetadata.schema.services.connections.database.IcebergConnection", - "definitions": { - "icebergType": { - "description": "Service type.", - "type": "string", - "enum": [ - "Iceberg" - ], - "default": "Iceberg" - } - }, - "properties": { - "type": { - "title": "Service Type", - "description": "Service Type", - "$ref": "#/definitions/icebergType", - "default": "Iceberg" - }, - "catalog": { - "$ref": "./iceberg/icebergCatalog.json" - }, - "ownershipProperty": { - "title": "Ownership Property", - "description": "Table property to look for the Owner.", - "type": "string", - "default": "owner" - }, - "schemaFilterPattern":{ - "title": "Default Schema Filter Pattern", - "description": "Regex to only include/exclude schemas that matches the pattern.", - "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" - }, - "tableFilterPattern": { - "title": "Default Table Filter Pattern", - "description": "Regex to only include/exclude tables that matches the pattern.", - "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" - }, - "databaseFilterPattern": { - "title": "Default Database Filter Pattern", - "description": "Regex to only include/exclude databases that matches the pattern.", - "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" - }, - "supportsMetadataExtraction": { - "title": "Supports Metadata Extraction", - "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" - } - }, - "additionalProperties": false, - "required": [ - "catalog" - ] -} diff --git a/schemas/entity/services/connections/database/informixConnection.json b/schemas/entity/services/connections/database/informixConnection.json new file mode 100644 index 0000000..f23ae8b --- /dev/null +++ b/schemas/entity/services/connections/database/informixConnection.json @@ -0,0 +1,162 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/informixConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "InformixConnection", + "description": "IBM Informix Database Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.InformixConnection", + "definitions": { + "informixType": { + "description": "Service type.", + "type": "string", + "enum": [ + "Informix" + ], + "default": "Informix" + }, + "informixScheme": { + "description": "SQLAlchemy driver scheme options.", + "type": "string", + "enum": [ + "informix" + ], + "default": "informix" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/informixType", + "default": "Informix" + }, + "scheme": { + "title": "Connection Scheme", + "description": "SQLAlchemy driver scheme options.", + "$ref": "#/definitions/informixScheme", + "default": "informix" + }, + "username": { + "title": "Username", + "description": "Username to connect to Informix. This user should have privileges to read all the metadata in Informix.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to Informix.", + "type": "string", + "format": "password" + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the Informix service.", + "type": "string", + "default": "localhost:9088" + }, + "database": { + "title": "Database", + "description": "Database of the data source.", + "type": "string" + }, + "serverName": { + "title": "Server Name (INFORMIXSERVER)", + "description": "Informix server name as defined in the sqlhosts file or INFORMIXSERVER environment variable.", + "type": "string", + "default": "informix" + }, + "ingestAllDatabases": { + "title": "Ingest All Databases", + "description": "Ingest data from all databases in Informix. You can use databaseFilterPattern on top of this.", + "type": "boolean", + "default": false + }, + "sslMode": { + "title": "SSL Mode", + "description": "SSL Mode to connect to Informix. Use 'disable' for no SSL, 'require' for encrypted SSL without certificate verification, or 'verify-ca' to validate the server certificate against the provided CA certificate.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" + }, + "sslConfig": { + "title": "SSL Configuration", + "description": "SSL Configuration details. Provide the CA certificate to validate the Informix server certificate. Paste the PEM content directly or upload the certificate file.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "schemaFilterPattern": { + "title": "Default Schema Filter Pattern", + "description": "Regex to only include/exclude schemas that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "default": { + "includes": [], + "excludes": [ + "^sysmaster$", + "^sysutils$", + "^sysuser$", + "^sysadmin$" + ] + } + }, + "tableFilterPattern": { + "title": "Default Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "databaseFilterPattern": { + "title": "Default Database Filter Pattern", + "description": "Regex to only include/exclude databases that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "default": { + "includes": [], + "excludes": [ + "^sysmaster$", + "^sysutils$", + "^sysuser$", + "^sysadmin$" + ] + } + }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsProfiler": { + "title": "Supports Profiler", + "$ref": "../connectionBasicType.json#/definitions/supportsProfiler" + }, + "supportsDatabase": { + "title": "Supports Database", + "$ref": "../connectionBasicType.json#/definitions/supportsDatabase" + }, + "supportsQueryComment": { + "title": "Supports Query Comment", + "$ref": "../connectionBasicType.json#/definitions/supportsQueryComment" + }, + "supportsLineageExtraction": { + "title": "Supports Lineage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, + "supportsUsageExtraction": { + "title": "Supports Usage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" + } + }, + "additionalProperties": false, + "required": [ + "hostPort", + "database", + "username", + "password", + "serverName" + ] +} diff --git a/schemas/entity/services/connections/database/iometeConnection.json b/schemas/entity/services/connections/database/iometeConnection.json new file mode 100644 index 0000000..e29dd75 --- /dev/null +++ b/schemas/entity/services/connections/database/iometeConnection.json @@ -0,0 +1,90 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/iometeConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "IometeConnection", + "description": "IOMETE Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.IometeConnection", + "definitions": { + "iometeType": { + "description": "Service type.", + "type": "string", + "enum": ["Iomete"], + "default": "Iomete" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/iometeType", + "default": "Iomete" + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the IOMETE service, e.g. dev.iomete.cloud:443", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Username to connect to IOMETE.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to IOMETE.", + "type": "string", + "format": "password" + }, + "cluster": { + "title": "Cluster", + "description": "IOMETE lakehouse cluster name to connect to.", + "type": "string" + }, + "dataPlane": { + "title": "Data Plane", + "description": "IOMETE data plane name.", + "type": "string" + }, + "catalog": { + "title": "Catalog", + "description": "Catalog of the data source (e.g. spark_catalog). This is an optional parameter; if left blank, OpenMetadata uses default catalog.", + "type": "string" + }, + "databaseSchema": { + "title": "Database Schema", + "description": "IOMETE database to restrict metadata ingestion to (e.g. default, finance_db). This is an optional parameter; if left blank, OpenMetadata attempts to scan all databases in the catalog.", + "type": "string" + }, + "schemaFilterPattern": { + "title": "Default Schema Filter Pattern", + "description": "Regex to only include/exclude IOMETE databases (e.g. 'default', 'finance_db') that match the pattern. In IOMETE, a database corresponds to an OpenMetadata schema.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "tableFilterPattern": { + "title": "Default Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsDBTExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" + }, + "supportsProfiler": { + "title": "Supports Profiler", + "$ref": "../connectionBasicType.json#/definitions/supportsProfiler" + }, + "sampleDataStorageConfig": { + "title": "Storage Config for Sample Data", + "$ref": "../connectionBasicType.json#/definitions/sampleDataStorageConfig" + }, + "supportsViewLineageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsViewLineageExtraction" + } + }, + "additionalProperties": false, + "required": ["hostPort", "username", "password", "cluster", "dataPlane"] +} diff --git a/schemas/entity/services/connections/database/mariaDBConnection.json b/schemas/entity/services/connections/database/mariaDBConnection.json index b131bdc..423d167 100644 --- a/schemas/entity/services/connections/database/mariaDBConnection.json +++ b/schemas/entity/services/connections/database/mariaDBConnection.json @@ -81,6 +81,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/microsoftAccessConnection.json b/schemas/entity/services/connections/database/microsoftAccessConnection.json new file mode 100644 index 0000000..b0abac8 --- /dev/null +++ b/schemas/entity/services/connections/database/microsoftAccessConnection.json @@ -0,0 +1,65 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/microsoftAccessConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MicrosoftAccessConnection", + "description": "Microsoft Access Database Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.MicrosoftAccessConnection", + "definitions": { + "microsoftAccessType": { + "description": "Service type.", + "type": "string", + "enum": ["MicrosoftAccess"], + "default": "MicrosoftAccess" + }, + "localFilePath": { + "title": "Local File Path", + "description": "Local filesystem path to a single Access database file or a directory containing Access files.", + "type": "object", + "properties": { + "localFilePath": { + "type": "string", + "title": "Local File Path", + "description": "Absolute path to the .accdb or .mdb file, or a directory. Supports ~ expansion (e.g. ~/data/sales.accdb). All .accdb and .mdb files found recursively in a directory will be ingested." + } + }, + "required": ["localFilePath"], + "additionalProperties": false + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/microsoftAccessType", + "default": "MicrosoftAccess" + }, + "connection": { + "title": "Access Database Location (Local Path or S3)", + "description": "Choose between local file system path (object) or S3 bucket location (object) for Access database files.", + "oneOf": [ + { + "$ref": "#/definitions/localFilePath" + }, + { + "$ref": "../storage/s3Connection.json" + } + ] + }, + "tableFilterPattern": { + "title": "Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsViewLineageExtraction": { + "title": "Supports View Lineage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsViewLineageExtraction" + } + }, + "additionalProperties": false, + "required": ["connection"] +} diff --git a/schemas/entity/services/connections/database/microsoftFabricConnection.json b/schemas/entity/services/connections/database/microsoftFabricConnection.json new file mode 100644 index 0000000..4181455 --- /dev/null +++ b/schemas/entity/services/connections/database/microsoftFabricConnection.json @@ -0,0 +1,124 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/microsoftFabricConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MicrosoftFabricConnection", + "description": "Microsoft Fabric Warehouse and Lakehouse Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.MicrosoftFabricConnection", + "definitions": { + "microsoftFabricType": { + "description": "Service type.", + "type": "string", + "enum": ["MicrosoftFabric"], + "default": "MicrosoftFabric" + }, + "microsoftFabricScheme": { + "description": "SQLAlchemy driver scheme options.", + "type": "string", + "enum": ["mssql+pyodbc"], + "default": "mssql+pyodbc" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/microsoftFabricType", + "default": "MicrosoftFabric" + }, + "scheme": { + "title": "Connection Scheme", + "description": "SQLAlchemy driver scheme options.", + "$ref": "#/definitions/microsoftFabricScheme", + "default": "mssql+pyodbc" + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the Microsoft Fabric SQL endpoint (e.g., your-workspace.datawarehouse.fabric.microsoft.com:1433).", + "type": "string" + }, + "database": { + "title": "Database", + "description": "Database of the data source. This is the name of your Fabric Warehouse or Lakehouse. This is optional parameter, if you would like to restrict the metadata reading to a single database. When left blank, OpenMetadata Ingestion attempts to scan all the databases.", + "type": "string" + }, + "clientId": { + "title": "Client ID", + "description": "Azure Application (client) ID for Service Principal authentication.", + "type": "string" + }, + "clientSecret": { + "title": "Client Secret", + "description": "Azure Application client secret for Service Principal authentication.", + "type": "string", + "format": "password" + }, + "tenantId": { + "title": "Tenant ID", + "description": "Azure Directory (tenant) ID for Service Principal authentication.", + "type": "string" + }, + "driver": { + "title": "Driver", + "description": "ODBC driver version in case of pyodbc connection.", + "type": "string", + "default": "ODBC Driver 18 for SQL Server" + }, + "ingestAllDatabases": { + "title": "Ingest All Databases", + "description": "Ingest data from all databases (Warehouses and Lakehouses) in Microsoft Fabric. You can use databaseFilterPattern on top of this.", + "type": "boolean", + "default": false + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "schemaFilterPattern": { + "title": "Default Schema Filter Pattern", + "description": "Regex to only include/exclude schemas that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "default": { + "includes": [], + "excludes": ["^db_.*", "^guest$", "^information_schema$", "^sys$"] + } + }, + "tableFilterPattern": { + "title": "Default Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "databaseFilterPattern": { + "title": "Default Database Filter Pattern", + "description": "Regex to only include/exclude databases that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsUsageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" + }, + "supportsLineageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, + "supportsDBTExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" + }, + "supportsProfiler": { + "title": "Supports Profiler", + "$ref": "../connectionBasicType.json#/definitions/supportsProfiler" + }, + "supportsDatabase": { + "title": "Supports Database", + "$ref": "../connectionBasicType.json#/definitions/supportsDatabase" + }, + "sampleDataStorageConfig": { + "title": "Storage Config for Sample Data", + "$ref": "../connectionBasicType.json#/definitions/sampleDataStorageConfig" + } + }, + "additionalProperties": false, + "required": ["hostPort", "clientId", "clientSecret", "tenantId"] +} diff --git a/schemas/entity/services/connections/database/mongoDBConnection.json b/schemas/entity/services/connections/database/mongoDBConnection.json index e0ffe1b..858c688 100644 --- a/schemas/entity/services/connections/database/mongoDBConnection.json +++ b/schemas/entity/services/connections/database/mongoDBConnection.json @@ -63,6 +63,11 @@ "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", "type": "string" }, + "databaseSchema": { + "title": "Database Schema", + "description": "Database Schema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.", + "type": "string" + }, "schemaFilterPattern":{ "title": "Default Schema Filter Pattern", "description": "Regex to only include/exclude schemas that matches the pattern.", diff --git a/schemas/entity/services/connections/database/mssqlConnection.json b/schemas/entity/services/connections/database/mssqlConnection.json index c84e969..3781194 100644 --- a/schemas/entity/services/connections/database/mssqlConnection.json +++ b/schemas/entity/services/connections/database/mssqlConnection.json @@ -9,13 +9,19 @@ "mssqlType": { "description": "Service type.", "type": "string", - "enum": ["Mssql"], + "enum": [ + "Mssql" + ], "default": "Mssql" }, "mssqlScheme": { "description": "SQLAlchemy driver scheme options.", "type": "string", - "enum": ["mssql+pyodbc", "mssql+pytds", "mssql+pymssql"], + "enum": [ + "mssql+pyodbc", + "mssql+pytds", + "mssql+pymssql" + ], "default": "mssql+pytds" } }, @@ -88,7 +94,12 @@ "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", "default": { "includes": [], - "excludes": ["^db_.*", "^guest$", "^information_schema$", "^sys$"] + "excludes": [ + "^db_.*", + "^guest$", + "^information_schema$", + "^sys$" + ] } }, "tableFilterPattern": { @@ -105,6 +116,15 @@ "excludes": ["^msdb$", "^model$", "^tempdb$"] } }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "default": { + "includes": [], + "excludes": ["^sp_", "^xp_", "^ms_"] + } + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" @@ -148,5 +168,7 @@ } }, "additionalProperties": false, - "required": ["database"] -} + "required": [ + "database" + ] +} \ No newline at end of file diff --git a/schemas/entity/services/connections/database/myDbConnection.json b/schemas/entity/services/connections/database/myDbConnection.json new file mode 100644 index 0000000..fd4f338 --- /dev/null +++ b/schemas/entity/services/connections/database/myDbConnection.json @@ -0,0 +1,110 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/myDbConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MyDbConnection", + "description": "MyDb Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.MyDbConnection", + "definitions": { + "myDbType": { + "description": "Service type.", + "type": "string", + "enum": [ + "MyDb" + ], + "default": "MyDb" + }, + "myDbScheme": { + "description": "SQLAlchemy driver scheme options.", + "type": "string", + "enum": [ + "mydb+pymydb" + ], + "default": "mydb+pymydb" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/myDbType", + "default": "MyDb" + }, + "scheme": { + "title": "Connection Scheme", + "description": "SQLAlchemy driver scheme options.", + "$ref": "#/definitions/myDbScheme", + "default": "mydb+pymydb" + }, + "username": { + "title": "Username", + "description": "Username to connect to MyDb.", + "type": "string" + }, + "authType": { + "title": "Auth Configuration Type", + "description": "Choose Auth Config Type.", + "mask": true, + "oneOf": [ + { + "$ref": "./common/basicAuth.json" + } + ] + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the MyDb service.", + "type": "string" + }, + "databaseName": { + "title": "Database Name", + "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", + "type": "string" + }, + "databaseSchema": { + "title": "Database Schema", + "description": "Database Schema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single schema.", + "type": "string" + }, + "sslConfig": { + "title": "SSL", + "description": "SSL Configuration details.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "schemaFilterPattern": { + "title": "Default Schema Filter Pattern", + "description": "Regex to only include/exclude schemas that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "tableFilterPattern": { + "title": "Default Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "databaseFilterPattern": { + "title": "Default Database Filter Pattern", + "description": "Regex to only include/exclude databases that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsDBTExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" + } + }, + "additionalProperties": false, + "required": [ + "username", + "hostPort" + ] +} diff --git a/schemas/entity/services/connections/database/mysqlConnection.json b/schemas/entity/services/connections/database/mysqlConnection.json index 5209514..5fa3794 100644 --- a/schemas/entity/services/connections/database/mysqlConnection.json +++ b/schemas/entity/services/connections/database/mysqlConnection.json @@ -54,12 +54,15 @@ }, { "$ref": "./common/azureConfig.json" + }, + { + "$ref": "./common/gcpCloudSqlConfig.json" } ] }, "hostPort": { "title": "Host and Port", - "description": "Host and port of the MySQL service.", + "description": "Host and port of the MySQL service. For GCP CloudSQL, use the instance connection name in the format 'project_id:region:instance_name'.", "type": "string" }, "databaseName": { @@ -104,6 +107,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/oracleConnection.json b/schemas/entity/services/connections/database/oracleConnection.json index b4bb9e6..307b153 100644 --- a/schemas/entity/services/connections/database/oracleConnection.json +++ b/schemas/entity/services/connections/database/oracleConnection.json @@ -9,18 +9,22 @@ "oracleType": { "description": "Service type.", "type": "string", - "enum": ["Oracle"], + "enum": [ + "Oracle" + ], "default": "Oracle" }, "oracleScheme": { "description": "SQLAlchemy driver scheme options.", "type": "string", - "enum": ["oracle+cx_oracle"], + "enum": [ + "oracle+cx_oracle" + ], "default": "oracle+cx_oracle" }, "OracleDatabaseSchema": { "title": "Database Schema", - "type":"object", + "type": "object", "properties": { "databaseSchema": { "title": "DatabaseSchema", @@ -28,11 +32,13 @@ "type": "string" } }, - "required": ["databaseSchema"] + "required": [ + "databaseSchema" + ] }, "OracleServiceName": { "title": "Oracle Service Name", - "type":"object", + "type": "object", "properties": { "oracleServiceName": { "title": "Oracle Service Name", @@ -40,11 +46,13 @@ "type": "string" } }, - "required": ["oracleServiceName"] + "required": [ + "oracleServiceName" + ] }, "OracleTNSConnection": { "title": "Oracle TNS Connection", - "type":"object", + "type": "object", "properties": { "oracleTNSConnection": { "title": "Oracle TNS Connection String", @@ -52,7 +60,9 @@ "type": "string" } }, - "required": ["oracleTNSConnection"] + "required": [ + "oracleTNSConnection" + ] } }, "properties": { @@ -111,6 +121,18 @@ "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", "type": "string" }, + "preserveIdentifierCase": { + "title": "Preserve Identifier Case", + "type": "boolean", + "description": "Controls how Oracle identifier names (tables, columns, schemas) are stored in OpenMetadata. When disabled (default), Oracle's UPPERCASE unquoted identifiers (e.g. EMPLOYEES) are not guaranteed to be stored as-is — identifiers with the same letters but different case (e.g. unquoted EMPLOYEES and quoted 'employees') will collide into the same name. When enabled, names are stored exactly as Oracle persists them, which solves same-name collisions between quoted and unquoted identifiers. WARNING: enabling this after data has already been ingested with the default setting will change the stored names of all existing tables, columns, schemas, and constraints — breaking attached tags, descriptions, lineage, data quality tests, and any other metadata associated with those entities. If you must switch, soft-delete all previously ingested entities before re-ingesting.", + "default": false + }, + "useDBATable": { + "title": "Use DBA Tables", + "type": "boolean", + "description": "Use Oracle DBA_* tables instead of ALL_* tables for metadata ingestion. Requires DBA privileges.", + "default": true + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" @@ -119,13 +141,18 @@ "title": "Connection Arguments", "$ref": "../connectionBasicType.json#/definitions/connectionArguments" }, - "schemaFilterPattern":{ + "schemaFilterPattern": { "title": "Default Schema Filter Pattern", "description": "Regex to only include/exclude schemas that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", "default": { "includes": [], - "excludes": ["^sys$", "^ctxsys$", "^dbsnmp$", "^outln$"] + "excludes": [ + "^sys$", + "^ctxsys$", + "^dbsnmp$", + "^outln$" + ] } }, "tableFilterPattern": { @@ -138,6 +165,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" @@ -160,8 +192,8 @@ "$ref": "../connectionBasicType.json#/definitions/supportsQueryComment" }, "supportsDataDiff": { - "title": "Supports Data Diff Extraction.", - "$ref": "../connectionBasicType.json#/definitions/supportsDataDiff" + "title": "Supports Data Diff Extraction.", + "$ref": "../connectionBasicType.json#/definitions/supportsDataDiff" }, "sampleDataStorageConfig": { "title": "Storage Config for Sample Data", @@ -169,5 +201,8 @@ } }, "additionalProperties": false, - "required": ["username", "oracleConnectionType"] -} + "required": [ + "username", + "oracleConnectionType" + ] +} \ No newline at end of file diff --git a/schemas/entity/services/connections/database/postgresConnection.json b/schemas/entity/services/connections/database/postgresConnection.json index 7278a2e..2d0403c 100644 --- a/schemas/entity/services/connections/database/postgresConnection.json +++ b/schemas/entity/services/connections/database/postgresConnection.json @@ -97,6 +97,11 @@ "excludes": ["^template1$", "^template0$"] } }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "sslMode": { "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" }, @@ -109,6 +114,12 @@ "type": "string", "default": "PostgresPolicyTags" }, + "queryStatementSource": { + "title": "Query Statement Source", + "description": "Fully qualified name of the view or table to use for query logs. If not provided, defaults to pg_stat_statements. Use this to configure a custom view (e.g., my_schema.custom_pg_stat_statements) when direct access to pg_stat_statements is restricted.", + "type": "string", + "default": "pg_stat_statements" + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" diff --git a/schemas/entity/services/connections/database/redshiftConnection.json b/schemas/entity/services/connections/database/redshiftConnection.json index c784541..1b1b717 100644 --- a/schemas/entity/services/connections/database/redshiftConnection.json +++ b/schemas/entity/services/connections/database/redshiftConnection.json @@ -41,11 +41,18 @@ "description": "Username to connect to Redshift. This user should have privileges to read all the metadata in Redshift.", "type": "string" }, - "password": { - "title": "Password", - "description": "Password to connect to Redshift.", - "type": "string", - "format": "password" + "authType": { + "title": "Auth Configuration Type", + "description": "Choose Auth Configuration Type.", + "mask": true, + "oneOf": [ + { + "$ref": "./common/basicAuth.json" + }, + { + "$ref": "./common/iamAuthConfig.json" + } + ] }, "hostPort": { "title": "Host and Port", @@ -86,6 +93,11 @@ "excludes": ["^template1$"] } }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "sslMode": { "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" }, diff --git a/schemas/entity/services/connections/database/salesforceConnection.json b/schemas/entity/services/connections/database/salesforceConnection.json index 367c6b5..4b374b1 100644 --- a/schemas/entity/services/connections/database/salesforceConnection.json +++ b/schemas/entity/services/connections/database/salesforceConnection.json @@ -9,7 +9,9 @@ "salesforceType": { "description": "Service type.", "type": "string", - "enum": ["Salesforce"], + "enum": [ + "Salesforce" + ], "default": "Salesforce" } }, @@ -53,10 +55,14 @@ "description": "Salesforce Organization ID is the unique identifier for your Salesforce identity", "type": "string" }, - "sobjectName": { - "title": "Object Name", - "description": "Salesforce Object Name.", - "type": "string" + "sobjectNames": { + "title": "Object Names", + "description": "List of Salesforce Object Names to ingest. If specified, only these objects will be fetched. Leave empty to fetch all objects (subject to tableFilterPattern).", + "type": "array", + "items": { + "type": "string" + }, + "default": null }, "databaseName": { "title": "Database Name", @@ -88,7 +94,7 @@ "title": "Connection Arguments", "$ref": "../connectionBasicType.json#/definitions/connectionArguments" }, - "schemaFilterPattern":{ + "schemaFilterPattern": { "title": "Default Schema Filter Pattern", "description": "Regex to only include/exclude schemas that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" @@ -109,4 +115,4 @@ } }, "additionalProperties": false -} +} \ No newline at end of file diff --git a/schemas/entity/services/connections/database/snowflakeConnection.json b/schemas/entity/services/connections/database/snowflakeConnection.json index 43387ba..482fab9 100644 --- a/schemas/entity/services/connections/database/snowflakeConnection.json +++ b/schemas/entity/services/connections/database/snowflakeConnection.json @@ -98,6 +98,12 @@ "type": "boolean", "default": false }, + "includeStages": { + "title": "Include Stages", + "description": "Optional configuration for ingestion of Snowflake stages (internal and external). By default, stages are not ingested.", + "type": "boolean", + "default": false + }, "clientSessionKeepAlive": { "title": "Client Session Keep Alive", "description": "Optional configuration for ingestion to keep the client session active in case the ingestion process runs for longer durations.", @@ -135,6 +141,11 @@ "excludes": ["^snowflake$"] } }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/starrocksConnection.json b/schemas/entity/services/connections/database/starrocksConnection.json new file mode 100644 index 0000000..6b5c49f --- /dev/null +++ b/schemas/entity/services/connections/database/starrocksConnection.json @@ -0,0 +1,124 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/starrocksConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "StarRocksConnection", + "description": "StarRocks Database Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.StarRocksConnection", + "definitions": { + "starrocksType": { + "description": "Service type.", + "type": "string", + "enum": [ + "StarRocks" + ], + "default": "StarRocks" + }, + "starrocksScheme": { + "description": "SQLAlchemy driver scheme options.", + "type": "string", + "enum": [ + "mysql+pymysql" + ], + "default": "mysql+pymysql" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/starrocksType", + "default": "StarRocks" + }, + "scheme": { + "title": "Connection Scheme", + "description": "SQLAlchemy driver scheme options.", + "$ref": "#/definitions/starrocksScheme", + "default": "mysql+pymysql" + }, + "username": { + "title": "Username", + "description": "Username to connect to StarRocks. This user should have privileges to read all the metadata in StarRocks.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to connect to StarRocks.", + "type": "string", + "format": "password" + }, + "hostPort": { + "title": "Host and Port", + "description": "Host and port of the StarRocks service.", + "type": "string" + }, + "databaseName": { + "title": "Database Name", + "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", + "type": "string" + }, + "databaseSchema": { + "title": "Database Schema", + "description": "Database Schema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single schema. When left blank, OpenMetadata Ingestion attempts to scan all the schemas.", + "type": "string" + }, + "sslConfig": { + "title": "SSL", + "description": "SSL Configuration details.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "schemaFilterPattern":{ + "title": "Schema Filter Pattern", + "description": "Regex to only include/exclude schemas that matches the pattern. System schemas (information_schema, _statistics_, sys) are excluded by default.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "tableFilterPattern": { + "title": "Default Table Filter Pattern", + "description": "Regex to only include/exclude tables that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "databaseFilterPattern": { + "title": "Default Database Filter Pattern", + "description": "Regex to only include/exclude databases that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsDBTExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" + }, + "supportsProfiler": { + "title": "Supports Profiler", + "$ref": "../connectionBasicType.json#/definitions/supportsProfiler" + }, + "supportsQueryComment": { + "title": "Supports Query Comment", + "$ref": "../connectionBasicType.json#/definitions/supportsQueryComment" + }, + "supportsUsageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" + }, + "supportsLineageExtraction": { + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, + "supportsViewLineageExtraction": { + "title": "Supports View Lineage", + "$ref": "../connectionBasicType.json#/definitions/supportsViewLineageExtraction" + } + }, + "additionalProperties": false, + "required": [ + "hostPort", + "username" + ] +} diff --git a/schemas/entity/services/connections/database/synapseConnection.json b/schemas/entity/services/connections/database/synapseConnection.json index 8b06b0e..da3ea49 100644 --- a/schemas/entity/services/connections/database/synapseConnection.json +++ b/schemas/entity/services/connections/database/synapseConnection.json @@ -136,6 +136,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/teradataConnection.json b/schemas/entity/services/connections/database/teradataConnection.json index 8fd0564..70b4ce3 100644 --- a/schemas/entity/services/connections/database/teradataConnection.json +++ b/schemas/entity/services/connections/database/teradataConnection.json @@ -99,6 +99,11 @@ "description": "Regex to only include/exclude databases that matches the pattern.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" }, + "storedProcedureFilterPattern": { + "title": "Default Stored Procedure Filter Pattern", + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/schemas/entity/services/connections/database/timescaleConnection.json b/schemas/entity/services/connections/database/timescaleConnection.json index 08acdff..675b937 100644 --- a/schemas/entity/services/connections/database/timescaleConnection.json +++ b/schemas/entity/services/connections/database/timescaleConnection.json @@ -108,6 +108,12 @@ "type": "string", "default": "TimescalePolicyTags" }, + "queryStatementSource": { + "title": "Query Statement Source", + "description": "Fully qualified name of the view or table to use for query logs. If not provided, defaults to pg_stat_statements. Use this to configure a custom view (e.g., my_schema.custom_pg_stat_statements) when direct access to pg_stat_statements is restricted.", + "type": "string", + "default": "pg_stat_statements" + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" diff --git a/schemas/entity/services/connections/drive/sftpConnection.json b/schemas/entity/services/connections/drive/sftpConnection.json new file mode 100644 index 0000000..f47935a --- /dev/null +++ b/schemas/entity/services/connections/drive/sftpConnection.json @@ -0,0 +1,138 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/drive/sftpConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SftpConnection", + "description": "SFTP Connection Config for secure file transfer protocol servers.", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.drive.SftpConnection", + "definitions": { + "sftpType": { + "description": "SFTP service type", + "type": "string", + "enum": ["Sftp"], + "default": "Sftp" + }, + "basicAuth": { + "title": "Username/Password Authentication", + "description": "Username and password authentication for SFTP", + "type": "object", + "properties": { + "username": { + "title": "Username", + "description": "SFTP username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "SFTP password", + "type": "string", + "format": "password" + } + }, + "required": ["username", "password"], + "additionalProperties": false + }, + "keyAuth": { + "title": "Private Key Authentication", + "description": "SSH private key authentication for SFTP", + "type": "object", + "properties": { + "username": { + "title": "Username", + "description": "SFTP username", + "type": "string" + }, + "privateKey": { + "title": "Private Key", + "description": "SSH private key content in PEM format. Supports RSA, Ed25519, ECDSA, and DSS keys.", + "type": "string", + "format": "password" + }, + "privateKeyPassphrase": { + "title": "Private Key Passphrase", + "description": "Passphrase for the private key (if encrypted)", + "type": "string", + "format": "password" + } + }, + "required": ["username", "privateKey"], + "additionalProperties": false + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/sftpType", + "default": "Sftp" + }, + "host": { + "title": "Host", + "description": "SFTP server hostname or IP address", + "type": "string" + }, + "port": { + "title": "Port", + "description": "SFTP server port number", + "type": "integer", + "default": 22 + }, + "authType": { + "title": "Authentication Type", + "description": "Authentication method: username/password or SSH private key", + "oneOf": [ + { + "$ref": "#/definitions/basicAuth" + }, + { + "$ref": "#/definitions/keyAuth" + } + ] + }, + "rootDirectories": { + "title": "Root Directories", + "description": "List of root directories to scan for files and subdirectories. If not specified, defaults to the user's home directory.", + "type": "array", + "items": { + "type": "string" + }, + "default": ["/"] + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "directoryFilterPattern": { + "title": "Directory Filter Pattern", + "description": "Regex to only include/exclude directories that match the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "fileFilterPattern": { + "title": "File Filter Pattern", + "description": "Regex to only include/exclude files that match the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern" + }, + "structuredDataFilesOnly": { + "title": "Structured Data Files Only", + "description": "When enabled, only catalog structured data files (CSV, TSV) that can have schema extracted. Non-structured files like images, PDFs, videos, etc. will be skipped.", + "type": "boolean", + "default": false + }, + "extractSampleData": { + "title": "Extract Sample Data", + "description": "When enabled, extract sample data from structured files (CSV, TSV). This is disabled by default to avoid performance overhead.", + "type": "boolean", + "default": false + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "required": ["host", "authType"], + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/llm/anthropicConnection.json b/schemas/entity/services/connections/llm/anthropicConnection.json new file mode 100644 index 0000000..3800d2a --- /dev/null +++ b/schemas/entity/services/connections/llm/anthropicConnection.json @@ -0,0 +1,59 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/anthropicConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AnthropicConnection", + "description": "Anthropic (Claude) LLM Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.AnthropicConnection", + "definitions": { + "anthropicType": { + "description": "Service type", + "type": "string", + "enum": ["Anthropic"], + "default": "Anthropic" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/anthropicType", + "default": "Anthropic" + }, + "apiKey": { + "title": "API Key", + "description": "Anthropic API Key", + "type": "string", + "format": "password" + }, + "baseURL": { + "title": "Base URL", + "description": "Optional custom base URL for Anthropic API", + "type": "string", + "default": "https://api.anthropic.com" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["apiKey"] +} diff --git a/schemas/entity/services/connections/llm/azureOpenAIConnection.json b/schemas/entity/services/connections/llm/azureOpenAIConnection.json new file mode 100644 index 0000000..a845e7d --- /dev/null +++ b/schemas/entity/services/connections/llm/azureOpenAIConnection.json @@ -0,0 +1,69 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/azureOpenAIConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AzureOpenAIConnection", + "description": "Azure OpenAI Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.AzureOpenAIConnection", + "definitions": { + "azureOpenAIType": { + "description": "Service type", + "type": "string", + "enum": ["AzureOpenAI"], + "default": "AzureOpenAI" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/azureOpenAIType", + "default": "AzureOpenAI" + }, + "apiKey": { + "title": "API Key", + "description": "Azure OpenAI API Key", + "type": "string", + "format": "password" + }, + "endpoint": { + "title": "Endpoint", + "description": "Azure OpenAI endpoint URL (e.g., https://your-resource-name.openai.azure.com/)", + "type": "string" + }, + "apiVersion": { + "title": "API Version", + "description": "Azure OpenAI API version", + "type": "string", + "default": "2024-02-15-preview" + }, + "deployment": { + "title": "Deployment Name", + "description": "Default deployment name to use", + "type": "string" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["apiKey", "endpoint"] +} diff --git a/schemas/entity/services/connections/llm/bedrockConnection.json b/schemas/entity/services/connections/llm/bedrockConnection.json new file mode 100644 index 0000000..3b7cd99 --- /dev/null +++ b/schemas/entity/services/connections/llm/bedrockConnection.json @@ -0,0 +1,75 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/bedrockConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BedrockConnection", + "description": "AWS Bedrock LLM Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.BedrockConnection", + "definitions": { + "bedrockType": { + "description": "Service type", + "type": "string", + "enum": ["Bedrock"], + "default": "Bedrock" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/bedrockType", + "default": "Bedrock" + }, + "awsAccessKeyId": { + "title": "AWS Access Key ID", + "description": "AWS Access Key ID for authentication", + "type": "string", + "format": "password" + }, + "awsSecretAccessKey": { + "title": "AWS Secret Access Key", + "description": "AWS Secret Access Key for authentication", + "type": "string", + "format": "password" + }, + "awsSessionToken": { + "title": "AWS Session Token", + "description": "Optional AWS Session Token for temporary credentials", + "type": "string", + "format": "password" + }, + "region": { + "title": "AWS Region", + "description": "AWS region where Bedrock is deployed (e.g., us-east-1)", + "type": "string" + }, + "assumeRoleArn": { + "title": "Assume Role ARN", + "description": "Optional ARN of IAM role to assume", + "type": "string" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["region"] +} diff --git a/schemas/entity/services/connections/llm/customLLMConnection.json b/schemas/entity/services/connections/llm/customLLMConnection.json new file mode 100644 index 0000000..1d0fb52 --- /dev/null +++ b/schemas/entity/services/connections/llm/customLLMConnection.json @@ -0,0 +1,77 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/customLLMConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CustomLLMConnection", + "description": "Custom LLM Service Connection Config for self-hosted or custom LLM providers", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.CustomLLMConnection", + "definitions": { + "customLLMType": { + "description": "Service type", + "type": "string", + "enum": ["CustomLLM"], + "default": "CustomLLM" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/customLLMType", + "default": "CustomLLM" + }, + "baseURL": { + "title": "Base URL", + "description": "Base URL for the custom LLM API endpoint", + "type": "string" + }, + "apiKey": { + "title": "API Key", + "description": "Optional API Key for authentication", + "type": "string", + "format": "password" + }, + "authType": { + "title": "Authentication Type", + "description": "Type of authentication", + "type": "string", + "enum": ["None", "APIKey", "Bearer", "BasicAuth", "Custom"], + "default": "APIKey" + }, + "headers": { + "title": "Custom Headers", + "description": "Additional custom headers for API requests", + "type": "object", + "additionalProperties": {"type": "string"} + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "verifySsl": { + "title": "Verify SSL", + "description": "Whether to verify SSL certificates", + "type": "boolean", + "default": true + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["baseURL"] +} diff --git a/schemas/entity/services/connections/llm/huggingFaceConnection.json b/schemas/entity/services/connections/llm/huggingFaceConnection.json new file mode 100644 index 0000000..1901f19 --- /dev/null +++ b/schemas/entity/services/connections/llm/huggingFaceConnection.json @@ -0,0 +1,59 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/huggingFaceConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "HuggingFaceConnection", + "description": "HuggingFace LLM Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.HuggingFaceConnection", + "definitions": { + "huggingFaceType": { + "description": "Service type", + "type": "string", + "enum": ["HuggingFace"], + "default": "HuggingFace" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/huggingFaceType", + "default": "HuggingFace" + }, + "apiKey": { + "title": "API Token", + "description": "HuggingFace API Token", + "type": "string", + "format": "password" + }, + "baseURL": { + "title": "Base URL", + "description": "Optional custom base URL (for HuggingFace Inference Endpoints)", + "type": "string", + "default": "https://api-inference.huggingface.co" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["apiKey"] +} diff --git a/schemas/entity/services/connections/llm/ollamaConnection.json b/schemas/entity/services/connections/llm/ollamaConnection.json new file mode 100644 index 0000000..7905cd0 --- /dev/null +++ b/schemas/entity/services/connections/llm/ollamaConnection.json @@ -0,0 +1,53 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/ollamaConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OllamaConnection", + "description": "Ollama (Local LLM) Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.OllamaConnection", + "definitions": { + "ollamaType": { + "description": "Service type", + "type": "string", + "enum": ["Ollama"], + "default": "Ollama" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/ollamaType", + "default": "Ollama" + }, + "baseURL": { + "title": "Base URL", + "description": "Ollama server URL (e.g., http://localhost:11434)", + "type": "string", + "default": "http://localhost:11434" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["baseURL"] +} diff --git a/schemas/entity/services/connections/llm/openAIConnection.json b/schemas/entity/services/connections/llm/openAIConnection.json new file mode 100644 index 0000000..961d7af --- /dev/null +++ b/schemas/entity/services/connections/llm/openAIConnection.json @@ -0,0 +1,64 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/openAIConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OpenAIConnection", + "description": "OpenAI LLM Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.OpenAIConnection", + "definitions": { + "openAIType": { + "description": "Service type", + "type": "string", + "enum": ["OpenAI"], + "default": "OpenAI" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/openAIType", + "default": "OpenAI" + }, + "apiKey": { + "title": "API Key", + "description": "OpenAI API Key", + "type": "string", + "format": "password" + }, + "organization": { + "title": "Organization ID", + "description": "Optional OpenAI Organization ID", + "type": "string" + }, + "baseURL": { + "title": "Base URL", + "description": "Optional custom base URL for OpenAI API (for compatible services)", + "type": "string", + "default": "https://api.openai.com/v1" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["apiKey"] +} diff --git a/schemas/entity/services/connections/llm/vertexAIConnection.json b/schemas/entity/services/connections/llm/vertexAIConnection.json new file mode 100644 index 0000000..a7e5c18 --- /dev/null +++ b/schemas/entity/services/connections/llm/vertexAIConnection.json @@ -0,0 +1,64 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/llm/vertexAIConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "VertexAIConnection", + "description": "Google Cloud Vertex AI LLM Service Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.llm.VertexAIConnection", + "definitions": { + "vertexAIType": { + "description": "Service type", + "type": "string", + "enum": ["VertexAI"], + "default": "VertexAI" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/vertexAIType", + "default": "VertexAI" + }, + "projectId": { + "title": "Project ID", + "description": "GCP Project ID", + "type": "string" + }, + "location": { + "title": "Location", + "description": "GCP region/location (e.g., us-central1)", + "type": "string", + "default": "us-central1" + }, + "credentials": { + "title": "GCP Credentials", + "description": "GCP service account credentials JSON", + "type": "string", + "format": "password" + }, + "timeout": { + "title": "Timeout", + "description": "Request timeout in seconds", + "type": "integer", + "default": 60 + }, + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retries for failed requests", + "type": "integer", + "default": 3 + }, + "modelFilterPattern": { + "description": "Regex to only fetch models with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Model Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["projectId", "credentials"] +} diff --git a/schemas/entity/services/connections/mcp/mcpConnection.json b/schemas/entity/services/connections/mcp/mcpConnection.json new file mode 100644 index 0000000..0c2e4d1 --- /dev/null +++ b/schemas/entity/services/connections/mcp/mcpConnection.json @@ -0,0 +1,147 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/mcp/mcpConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "McpConnection", + "description": "MCP (Model Context Protocol) Service Connection for discovering and cataloging MCP servers, their tools, resources, and prompts.", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.mcp.McpConnection", + "definitions": { + "mcpType": { + "description": "Service type", + "type": "string", + "enum": ["Mcp"], + "default": "Mcp" + }, + "discoveryMethod": { + "description": "Method to discover MCP servers", + "type": "string", + "enum": ["ConfigFile", "DirectConnection", "Registry"], + "javaEnums": [ + {"name": "ConfigFile", "description": "Discover servers from configuration files (e.g., claude_desktop_config.json)"}, + {"name": "DirectConnection", "description": "Connect directly to a specific MCP server"}, + {"name": "Registry", "description": "Discover servers from an MCP registry"} + ] + }, + "transportType": { + "description": "MCP transport protocol type", + "type": "string", + "enum": ["Stdio", "SSE", "StreamableHTTP"], + "javaEnums": [ + {"name": "Stdio", "description": "Standard input/output transport - spawns a subprocess"}, + {"name": "SSE", "description": "Server-Sent Events transport over HTTP"}, + {"name": "StreamableHTTP", "description": "Streamable HTTP transport"} + ] + }, + "mcpServerConfig": { + "type": "object", + "description": "Configuration for a single MCP server to connect to directly", + "properties": { + "name": { + "type": "string", + "description": "Name to assign to this MCP server" + }, + "transport": { + "$ref": "#/definitions/transportType", + "default": "Stdio" + }, + "command": { + "type": "string", + "description": "Command to execute for Stdio transport (e.g., 'npx', 'uvx', 'python')" + }, + "args": { + "type": "array", + "items": {"type": "string"}, + "description": "Arguments to pass to the command" + }, + "env": { + "type": "object", + "additionalProperties": {"type": "string"}, + "description": "Environment variables for the server process" + }, + "url": { + "type": "string", + "description": "URL for SSE or StreamableHTTP transport" + }, + "apiKey": { + "type": "string", + "format": "password", + "description": "API key for authenticated MCP servers" + } + }, + "required": ["name"] + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/mcpType", + "default": "Mcp" + }, + "discoveryMethod": { + "title": "Discovery Method", + "description": "How to discover MCP servers", + "$ref": "#/definitions/discoveryMethod", + "default": "ConfigFile" + }, + "configFilePaths": { + "title": "Config File Paths", + "description": "Paths to MCP configuration files to scan for server definitions. Supports Claude Desktop config, VS Code settings, etc.", + "type": "array", + "items": {"type": "string"}, + "default": [] + }, + "servers": { + "title": "Direct Server Connections", + "description": "List of MCP servers to connect to directly (when discoveryMethod is DirectConnection)", + "type": "array", + "items": {"$ref": "#/definitions/mcpServerConfig"}, + "default": [] + }, + "registryUrl": { + "title": "Registry URL", + "description": "URL of MCP registry to query for server discovery (when discoveryMethod is Registry)", + "type": "string" + }, + "connectionTimeout": { + "title": "Connection Timeout", + "description": "Timeout in seconds for connecting to MCP servers", + "type": "integer", + "default": 30 + }, + "initializationTimeout": { + "title": "Initialization Timeout", + "description": "Timeout in seconds for MCP server initialization handshake", + "type": "integer", + "default": 60 + }, + "fetchTools": { + "title": "Fetch Tools", + "description": "Whether to fetch and catalog tools from MCP servers", + "type": "boolean", + "default": true + }, + "fetchResources": { + "title": "Fetch Resources", + "description": "Whether to fetch and catalog resources from MCP servers", + "type": "boolean", + "default": true + }, + "fetchPrompts": { + "title": "Fetch Prompts", + "description": "Whether to fetch and catalog prompts from MCP servers", + "type": "boolean", + "default": true + }, + "serverFilterPattern": { + "description": "Regex to only fetch servers with names matching the pattern", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Server Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/messaging/pubSubConnection.json b/schemas/entity/services/connections/messaging/pubSubConnection.json new file mode 100644 index 0000000..0c81912 --- /dev/null +++ b/schemas/entity/services/connections/messaging/pubSubConnection.json @@ -0,0 +1,74 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/messaging/pubSubConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "PubSubConnection", + "description": "Google Cloud Pub/Sub Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.messaging.PubSubConnection", + "definitions": { + "pubSubType": { + "description": "Service type.", + "type": "string", + "enum": ["PubSub"], + "default": "PubSub" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/pubSubType", + "default": "PubSub" + }, + "gcpConfig": { + "title": "GCP Credentials Configuration", + "description": "GCP credentials configuration for authenticating with Pub/Sub.", + "$ref": "../../../../security/credentials/gcpCredentials.json" + }, + "projectId": { + "title": "GCP Project ID", + "description": "GCP Project ID where Pub/Sub topics are located. If not specified, will be read from credentials.", + "type": "string" + }, + "hostPort": { + "title": "Host and Port", + "description": "Pub/Sub APIs URL. For local testing with the emulator, use http://localhost:8085.", + "type": "string", + "default": "pubsub.googleapis.com" + }, + "useEmulator": { + "title": "Use Emulator", + "description": "Connect to a Pub/Sub emulator rather than the production service.", + "type": "boolean", + "default": false + }, + "schemaRegistryEnabled": { + "title": "Enable Schema Registry", + "description": "Enable fetching schemas from Pub/Sub Schema Registry.", + "type": "boolean", + "default": true + }, + "includeSubscriptions": { + "title": "Include Subscriptions", + "description": "Include subscription metadata for each topic.", + "type": "boolean", + "default": true + }, + "includeDeadLetterTopics": { + "title": "Include Dead Letter Topics", + "description": "Include dead letter topics in metadata extraction.", + "type": "boolean", + "default": false + }, + "topicFilterPattern": { + "description": "Regex to only fetch topics that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Topic Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/pipeline/airflowConnection.json b/schemas/entity/services/connections/pipeline/airflowConnection.json index 9b1d79b..6f72f1e 100644 --- a/schemas/entity/services/connections/pipeline/airflowConnection.json +++ b/schemas/entity/services/connections/pipeline/airflowConnection.json @@ -33,9 +33,12 @@ "default": "10" }, "connection": { - "title": "Metadata Database Connection", - "description": "Underlying database connection. See https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for supported backends.", + "title": "Airflow Connection", + "description": "Choose between database connection or REST API connection to fetch metadata from Airflow.", "oneOf": [ + { + "$ref": "../../../utils/airflowRestApiConnection.json" + }, { "$ref": "backendConnection.json" }, diff --git a/schemas/entity/services/connections/pipeline/dagsterConnection.json b/schemas/entity/services/connections/pipeline/dagsterConnection.json index a4ddae0..fe8d2f1 100644 --- a/schemas/entity/services/connections/pipeline/dagsterConnection.json +++ b/schemas/entity/services/connections/pipeline/dagsterConnection.json @@ -38,6 +38,13 @@ "type": "integer", "default": "1000" }, + "stripAssetKeyPrefixLength": { + "title": "Strip Asset Key Prefix Length", + "description": "Number of leading segments to remove from asset key paths before resolving to tables. For example, if your asset keys follow the pattern 'project/environment/schema/table' but you only need 'schema/table', set this to 2.", + "type": "integer", + "default": 0, + "minimum": 0 + }, "pipelineFilterPattern": { "description": "Regex exclude pipelines.", "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", diff --git a/schemas/entity/services/connections/pipeline/dbtCloudConnection.json b/schemas/entity/services/connections/pipeline/dbtCloudConnection.json index 2cf13d1..5fbf7dc 100644 --- a/schemas/entity/services/connections/pipeline/dbtCloudConnection.json +++ b/schemas/entity/services/connections/pipeline/dbtCloudConnection.json @@ -55,6 +55,14 @@ "type": "string" } }, + "environmentIds": { + "title": "Environment Ids", + "description": "List of IDs of your DBT cloud environments separated by comma `,`", + "type": "array", + "items": { + "type": "string" + } + }, "numberOfRuns": { "title": "Number of Runs", "description": "Number of runs to fetch from DBT cloud", diff --git a/schemas/entity/services/connections/pipeline/matillion/matillionDPC.json b/schemas/entity/services/connections/pipeline/matillion/matillionDPC.json new file mode 100644 index 0000000..1a2f640 --- /dev/null +++ b/schemas/entity/services/connections/pipeline/matillion/matillionDPC.json @@ -0,0 +1,46 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/pipeline/matillion/matillionDPC.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Matillion DPC Auth Config", + "description": "Matillion Data Productivity Cloud Auth Config.", + "javaType": "org.openmetadata.schema.services.connections.pipeline.matillion.MatillionDPCAuth", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "MatillionDPC" + ], + "default": "MatillionDPC" + }, + "clientId": { + "title": "Client ID", + "description": "OAuth2 Client ID for Matillion DPC authentication.", + "type": "string" + }, + "clientSecret": { + "title": "Client Secret", + "description": "OAuth2 Client Secret for Matillion DPC authentication.", + "type": "string", + "format": "password" + }, + "region": { + "title": "Region", + "description": "Matillion DPC region. Determines the API base URL.", + "type": "string", + "enum": [ + "us1", + "eu1" + ], + "default": "us1" + }, + "personalAccessToken": { + "title": "Personal Access Token", + "description": "Personal Access Token for Matillion DPC. Alternative to OAuth2 Client Credentials.", + "type": "string", + "format": "password" + } + }, + "required": [], + "additionalProperties": false +} diff --git a/schemas/entity/services/connections/pipeline/matillionConnection.json b/schemas/entity/services/connections/pipeline/matillionConnection.json index b2eaa8b..c283df8 100644 --- a/schemas/entity/services/connections/pipeline/matillionConnection.json +++ b/schemas/entity/services/connections/pipeline/matillionConnection.json @@ -28,6 +28,9 @@ "oneOf": [ { "$ref": "matillion/matillionETL.json" + }, + { + "$ref": "matillion/matillionDPC.json" } ] }, @@ -36,10 +39,18 @@ "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", "title": "Default Pipeline Filter Pattern" }, + "lineageLookbackDays": { + "title": "Lineage Lookback Days", + "description": "Number of days to look back when fetching lineage events from Matillion DPC OpenLineage API.", + "type": "integer", + "default": 30, + "minimum": 1, + "maximum": 365 + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" } }, "additionalProperties": false -} \ No newline at end of file +} diff --git a/schemas/entity/services/connections/pipeline/microsoftFabricPipelineConnection.json b/schemas/entity/services/connections/pipeline/microsoftFabricPipelineConnection.json new file mode 100644 index 0000000..84a2fb6 --- /dev/null +++ b/schemas/entity/services/connections/pipeline/microsoftFabricPipelineConnection.json @@ -0,0 +1,62 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/pipeline/microsoftFabricPipelineConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MicrosoftFabricPipelineConnection", + "description": "Microsoft Fabric Data Factory Pipeline Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.pipeline.MicrosoftFabricPipelineConnection", + "definitions": { + "MicrosoftFabricPipelineType": { + "description": "Service type.", + "type": "string", + "enum": ["MicrosoftFabricPipeline"], + "default": "MicrosoftFabricPipeline" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/MicrosoftFabricPipelineType", + "default": "MicrosoftFabricPipeline" + }, + "workspaceId": { + "title": "Workspace ID", + "description": "The Microsoft Fabric workspace ID where the pipelines are located.", + "type": "string" + }, + "clientId": { + "title": "Client ID", + "description": "Azure Application (client) ID for Service Principal authentication.", + "type": "string" + }, + "clientSecret": { + "title": "Client Secret", + "description": "Azure Application client secret for Service Principal authentication.", + "type": "string", + "format": "password" + }, + "tenantId": { + "title": "Tenant ID", + "description": "Azure Directory (tenant) ID for Service Principal authentication.", + "type": "string" + }, + "authorityUri": { + "title": "Authority URI", + "description": "Azure Active Directory authority URI. Defaults to https://login.microsoftonline.com/", + "type": "string", + "default": "https://login.microsoftonline.com/" + }, + "pipelineFilterPattern": { + "description": "Regex to only include/exclude pipelines that matches the pattern.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Default Pipeline Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["workspaceId", "clientId", "clientSecret", "tenantId"] +} diff --git a/schemas/entity/services/connections/pipeline/mulesoftConnection.json b/schemas/entity/services/connections/pipeline/mulesoftConnection.json new file mode 100644 index 0000000..ed8ff91 --- /dev/null +++ b/schemas/entity/services/connections/pipeline/mulesoftConnection.json @@ -0,0 +1,67 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/pipeline/mulesoftConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MulesoftConnection", + "description": "MuleSoft Anypoint Platform Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.pipeline.MulesoftConnection", + "definitions": { + "MulesoftType": { + "description": "Service type.", + "type": "string", + "enum": ["Mulesoft"], + "default": "Mulesoft" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/MulesoftType", + "default": "Mulesoft" + }, + "hostPort": { + "expose": true, + "title": "Host And Port", + "description": "MuleSoft Anypoint Platform URL. Use https://anypoint.mulesoft.com for US cloud, https://eu1.anypoint.mulesoft.com for EU cloud, or your on-premises URL.", + "type": "string", + "format": "uri", + "default": "https://anypoint.mulesoft.com" + }, + "authentication": { + "title": "Authentication", + "description": "Choose between Connected App (OAuth 2.0) or Basic Authentication.", + "oneOf": [ + { + "title": "Basic Authentication", + "$ref": "../../../../security/credentials/basicAuth.json" + }, + { + "title": "OAuth 2.0 Client Credentials (Connected App)", + "$ref": "./airbyte/oauthClientAuth.json" + } + ] + }, + "organizationId": { + "title": "Organization ID", + "description": "Anypoint Platform Organization ID. If not provided, the connector will use the user's default organization.", + "type": "string" + }, + "environmentId": { + "title": "Environment ID", + "description": "Anypoint Platform Environment ID. If not provided, the connector will discover all accessible environments.", + "type": "string" + }, + "pipelineFilterPattern": { + "description": "Regex to filter MuleSoft applications by name.", + "$ref": "../../../../type/filterPattern.json#/definitions/filterPattern", + "title": "Application Filter Pattern" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + } + }, + "additionalProperties": false, + "required": ["authentication"] +} diff --git a/schemas/entity/services/connections/pipeline/nifi/clientCertificateAuth.json b/schemas/entity/services/connections/pipeline/nifi/clientCertificateAuth.json index a540edd..f8223b1 100644 --- a/schemas/entity/services/connections/pipeline/nifi/clientCertificateAuth.json +++ b/schemas/entity/services/connections/pipeline/nifi/clientCertificateAuth.json @@ -7,12 +7,12 @@ "type": "object", "properties": { "certificateAuthorityPath": { - "title": "Certificat Authority Path", + "title": "Certificate Authority Path", "description": "Path to the root CA certificate", "type": "string" }, "clientCertificatePath": { - "title": "Client Certificat", + "title": "Client Certificate", "description": "Path to the client certificate", "type": "string" }, diff --git a/schemas/entity/services/connections/pipeline/nifiConnection.json b/schemas/entity/services/connections/pipeline/nifiConnection.json index 86b0960..1e18db2 100644 --- a/schemas/entity/services/connections/pipeline/nifiConnection.json +++ b/schemas/entity/services/connections/pipeline/nifiConnection.json @@ -45,12 +45,12 @@ "javaType": "org.openmetadata.schema.services.connections.pipeline.NifiClientAuth", "properties": { "certificateAuthorityPath":{ - "title":"Certificat Authority Path", + "title":"Certificate Authority Path", "description": "Path to the root CA certificate", "type": "string" }, "clientCertificatePath":{ - "title":"Client Certificat", + "title":"Client Certificate", "description": "Path to the client certificate", "type": "string" }, diff --git a/schemas/entity/services/connections/pipeline/openLineageConnection.json b/schemas/entity/services/connections/pipeline/openLineageConnection.json index 459ebb1..e9bf9ba 100644 --- a/schemas/entity/services/connections/pipeline/openLineageConnection.json +++ b/schemas/entity/services/connections/pipeline/openLineageConnection.json @@ -13,6 +13,151 @@ "OpenLineage" ], "default": "OpenLineage" + }, + "kafkaBrokerConfig": { + "title": "Kafka", + "description": "Kafka broker configuration for OpenLineage events.", + "type": "object", + "additionalProperties": false, + "properties": { + "brokersUrl": { + "title": "Kafka Brokers List", + "description": "Kafka bootstrap servers URL.", + "type": "string" + }, + "topicName": { + "title": "Topic Name", + "description": "Topic from where OpenLineage events will be pulled.", + "type": "string" + }, + "consumerGroupName": { + "title": "Consumer Group", + "description": "Kafka consumer group name.", + "type": "string" + }, + "consumerOffsets": { + "title": "Initial Consumer Offsets", + "description": "Initial Kafka consumer offset.", + "default": "earliest", + "type": "string", + "enum": [ + "earliest", + "latest" + ], + "javaEnums": [ + { + "name": "earliest" + }, + { + "name": "latest" + } + ] + }, + "poolTimeout": { + "title": "Single Pool Call Timeout", + "description": "Max allowed wait time.", + "type": "number", + "default": 1.0 + }, + "sessionTimeout": { + "title": "Broker Inactive Session Timeout", + "description": "Max allowed inactivity time.", + "type": "integer", + "default": 30 + }, + "securityProtocol": { + "title": "Kafka Security Protocol", + "description": "Kafka security protocol config.", + "default": "PLAINTEXT", + "type": "string", + "enum": [ + "PLAINTEXT", + "SASL_PLAINTEXT", + "SSL", + "SASL_SSL" + ], + "javaEnums": [ + { + "name": "PLAINTEXT" + }, + { + "name": "SASL_PLAINTEXT" + }, + { + "name": "SSL" + }, + { + "name": "SASL_SSL" + } + ] + }, + "sslConfig": { + "title": "SSL", + "description": "SSL Configuration details.", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, + "saslConfig": { + "title": "SASL", + "description": "SASL Configuration details.", + "$ref": "../../../../security/sasl/saslClientConfig.json" + } + }, + "required": [ + "brokersUrl", + "topicName" + ] + }, + "kinesisBrokerConfig": { + "title": "Kinesis", + "description": "AWS Kinesis Data Streams configuration for OpenLineage events.", + "type": "object", + "additionalProperties": false, + "properties": { + "streamName": { + "title": "Stream Name", + "description": "Kinesis Data Stream name.", + "type": "string" + }, + "awsConfig": { + "title": "AWS Credentials Configuration", + "description": "AWS credentials configuration.", + "$ref": "../../../../security/credentials/awsCredentials.json" + }, + "consumerOffsets": { + "title": "Initial Consumer Offsets", + "description": "Initial Kinesis shard iterator type.", + "default": "TRIM_HORIZON", + "type": "string", + "enum": [ + "TRIM_HORIZON", + "LATEST" + ], + "javaEnums": [ + { + "name": "TRIM_HORIZON" + }, + { + "name": "LATEST" + } + ] + }, + "poolTimeout": { + "title": "Poll Interval", + "description": "Poll interval in seconds.", + "type": "number", + "default": 1.0 + }, + "sessionTimeout": { + "title": "Session Timeout", + "description": "Max inactivity timeout in seconds.", + "type": "integer", + "default": 30 + } + }, + "required": [ + "streamName", + "awsConfig" + ] } }, "properties": { @@ -21,82 +166,25 @@ "$ref": "#/definitions/OpenLineageType", "default": "OpenLineage" }, - "brokersUrl": { - "title": "Kafka brokers list", - "description": "service type of the messaging source", - "type": "string" - }, - "topicName": { - "title": "Topic Name", - "description": "topic from where Open lineage events will be pulled ", - "type": "string" - }, - "consumerGroupName": { - "title": "Consumer Group", - "description": "consumer group name ", - "type": "string" - }, - "consumerOffsets": { - "title": "Initial consumer offsets", - "description": "initial Kafka consumer offset", - "default": "earliest", - "type": "string", - "enum": [ - "earliest", - "latest" - ], - "javaEnums": [ + "brokerConfig": { + "title": "Broker Configuration", + "description": "Event broker configuration. Choose between Kafka and Kinesis.", + "oneOf": [ { - "name": "earliest" + "$ref": "#/definitions/kafkaBrokerConfig" }, { - "name": "latest" + "$ref": "#/definitions/kinesisBrokerConfig" } ] }, - "poolTimeout": { - "title": "Single pool call timeout", - "description": "max allowed wait time", - "type": "number", - "default": 1.0 - }, - "sessionTimeout": { - "title": "Broker inactive session timeout", - "description": "max allowed inactivity time", - "type": "integer", - "default": 30 - }, - "securityProtocol": { - "title": "Kafka security protocol", - "description": "Kafka security protocol config", - "default": "PLAINTEXT", - "type": "string", - "enum": [ - "PLAINTEXT", - "SSL", - "SASL_SSL" - ], - "javaEnums": [ - { - "name": "PLAINTEXT" - }, - { - "name": "SSL" - }, - { - "name": "SASL_SSL" - } - ] - }, - "sslConfig": { - "title": "SSL", - "description": "SSL Configuration details.", - "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" - }, - "saslConfig": { - "title": "SASL", - "description": "SASL Configuration details.", - "$ref": "../../../../security/sasl/saslClientConfig.json" + "namespaceToServiceMapping": { + "title": "Namespace to Service Mapping", + "description": "Map OpenLineage dataset namespaces (or prefixes) to OpenMetadata database service names. Used when multiple services of the same type exist. Example: 'mysql://cluster-a:3306' -> 'mysql-cluster-a'.", + "type": "object", + "additionalProperties": { + "type": "string" + } }, "pipelineFilterPattern": { "description": "Regex exclude pipelines.", @@ -107,5 +195,6 @@ "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" } }, + "required": ["brokerConfig"], "additionalProperties": false } \ No newline at end of file diff --git a/schemas/entity/services/connections/serviceConnection.json b/schemas/entity/services/connections/serviceConnection.json index ec28fa8..0197982 100644 --- a/schemas/entity/services/connections/serviceConnection.json +++ b/schemas/entity/services/connections/serviceConnection.json @@ -40,6 +40,9 @@ }, { "$ref": "../driveService.json#/definitions/driveConnection" + }, + { + "$ref": "../mcpService.json#/definitions/mcpConnection" } ] } diff --git a/schemas/entity/services/dashboardService.json b/schemas/entity/services/dashboardService.json index 04cdced..9204237 100644 --- a/schemas/entity/services/dashboardService.json +++ b/schemas/entity/services/dashboardService.json @@ -33,7 +33,8 @@ "Sigma", "ThoughtSpot", "Grafana", - "Hex" + "Hex", + "Ssrs" ], "javaEnums": [ { @@ -92,6 +93,9 @@ }, { "name": "Hex" + }, + { + "name": "Ssrs" } ] }, @@ -162,6 +166,9 @@ }, { "$ref": "./connections/dashboard/hexConnection.json" + }, + { + "$ref": "./connections/dashboard/ssrsConnection.json" } ] } @@ -250,10 +257,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "domains" : { "description": "Domains the Dashboard service belongs to.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/databaseService.json b/schemas/entity/services/databaseService.json index 087440b..8cef874 100644 --- a/schemas/entity/services/databaseService.json +++ b/schemas/entity/services/databaseService.json @@ -23,6 +23,7 @@ "Postgres", "Timescale", "Mssql", + "MicrosoftAccess", "Oracle", "Athena", "Hive", @@ -54,9 +55,9 @@ "Couchbase", "Greenplum", "Doris", + "StarRocks", "UnityCatalog", "SAS", - "Iceberg", "Teradata", "SapErp", "Synapse", @@ -64,7 +65,12 @@ "Cockroach", "SSAS", "Epic", - "ServiceNow" + "ServiceNow", + "Dremio", + "MicrosoftFabric", + "BurstIQ", + "Informix", + "Iomete" ], "javaEnums": [ { @@ -91,6 +97,9 @@ { "name": "Mssql" }, + { + "name": "MicrosoftAccess" + }, { "name": "Oracle" }, @@ -185,13 +194,13 @@ "name": "Doris" }, { - "name": "UnityCatalog" + "name": "StarRocks" }, { - "name": "SAS" + "name": "UnityCatalog" }, { - "name": "Iceberg" + "name": "SAS" }, { "name": "Teradata" @@ -216,6 +225,21 @@ }, { "name": "ServiceNow" + }, + { + "name": "Dremio" + }, + { + "name": "MicrosoftFabric" + }, + { + "name": "BurstIQ" + }, + { + "name": "Informix" + }, + { + "name": "Iomete" } ] }, @@ -274,6 +298,9 @@ { "$ref": "./connections/database/mssqlConnection.json" }, + { + "$ref": "./connections/database/microsoftAccessConnection.json" + }, { "$ref": "./connections/database/mysqlConnection.json" }, @@ -341,13 +368,13 @@ "$ref": "./connections/database/dorisConnection.json" }, { - "$ref": "./connections/database/unityCatalogConnection.json" + "$ref": "./connections/database/starrocksConnection.json" }, { - "$ref": "./connections/database/sasConnection.json" + "$ref": "./connections/database/unityCatalogConnection.json" }, { - "$ref": "./connections/database/icebergConnection.json" + "$ref": "./connections/database/sasConnection.json" }, { "$ref": "./connections/database/teradataConnection.json" @@ -372,6 +399,21 @@ }, { "$ref": "./connections/database/serviceNowConnection.json" + }, + { + "$ref": "./connections/database/dremioConnection.json" + }, + { + "$ref": "./connections/database/microsoftFabricConnection.json" + }, + { + "$ref": "./connections/database/burstIQConnection.json" + }, + { + "$ref": "./connections/database/informixConnection.json" + }, + { + "$ref": "./connections/database/iometeConnection.json" } ] } @@ -464,10 +506,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "domains" : { "description": "Domains the Database service belongs to.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/driveService.json b/schemas/entity/services/driveService.json index f7584dc..cca785b 100644 --- a/schemas/entity/services/driveService.json +++ b/schemas/entity/services/driveService.json @@ -19,6 +19,7 @@ "enum": [ "GoogleDrive", "SharePoint", + "Sftp", "CustomDrive" ], "javaEnums": [ @@ -28,6 +29,9 @@ { "name": "SharePoint" }, + { + "name": "Sftp" + }, { "name": "CustomDrive" } @@ -50,6 +54,9 @@ { "$ref": "connections/drive/sharePointConnection.json" }, + { + "$ref": "connections/drive/sftpConnection.json" + }, { "$ref": "connections/drive/customDriveConnection.json" } @@ -144,6 +151,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/ingestionPipelines/ingestionPipeline.json b/schemas/entity/services/ingestionPipelines/ingestionPipeline.json index 835bebf..6008e28 100644 --- a/schemas/entity/services/ingestionPipelines/ingestionPipeline.json +++ b/schemas/entity/services/ingestionPipelines/ingestionPipeline.json @@ -26,7 +26,7 @@ "description": "Pipeline status denotes if its failed or succeeded.", "type": "string", "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatusType", - "enum": ["queued","success","failed","running","partialSuccess"] + "enum": ["queued","success","failed","running","partialSuccess","stopped"] }, "startDate": { "description": "startDate of the pipeline run for this particular execution.", diff --git a/schemas/entity/services/ingestionPipelines/operationMetrics.json b/schemas/entity/services/ingestionPipelines/operationMetrics.json new file mode 100644 index 0000000..0cb5ac1 --- /dev/null +++ b/schemas/entity/services/ingestionPipelines/operationMetrics.json @@ -0,0 +1,162 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/ingestionPipelines/operationMetrics.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OperationMetrics", + "description": "Unified schema for tracking operation metrics during ingestion pipelines. Uses free-form strings for flexible categorization across all connector types.", + "definitions": { + "operationSummary": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationSummary", + "description": "Aggregated summary of operations", + "properties": { + "count": { + "description": "Total number of operations", + "type": "integer", + "default": 0 + }, + "totalTimeMs": { + "description": "Total time spent on operations in milliseconds", + "type": "number" + }, + "avgTimeMs": { + "description": "Average time per operation in milliseconds", + "type": "number" + }, + "minTimeMs": { + "description": "Minimum operation time in milliseconds", + "type": "number" + }, + "maxTimeMs": { + "description": "Maximum operation time in milliseconds", + "type": "number" + } + }, + "additionalProperties": false + }, + "operationMetric": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationMetric", + "description": "Single operation metric with flexible categorization", + "properties": { + "category": { + "description": "High-level category: db_queries, api_calls, entity_operations", + "type": "string" + }, + "operation": { + "description": "Operation name - free form string like 'SELECT', 'GET:/dashboards', 'yield_columns'", + "type": "string" + }, + "entityType": { + "description": "Entity type being operated on (Table, Dashboard, Pipeline, etc.) - free form string", + "type": "string" + }, + "entityFqn": { + "description": "Fully qualified name of the entity if applicable", + "type": "string" + }, + "timestamp": { + "description": "When the operation occurred", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "durationMs": { + "description": "Duration of the operation in milliseconds", + "type": "integer" + }, + "success": { + "description": "Whether the operation succeeded", + "type": "boolean", + "default": true + }, + "errorMessage": { + "description": "Error message if operation failed", + "type": "string" + }, + "metadata": { + "description": "Additional context (query text snippet, response size, etc.)", + "$ref": "../../../type/basic.json#/definitions/map" + } + }, + "required": ["category", "operation", "timestamp"], + "additionalProperties": false + }, + "operationMetricsBatch": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationMetricsBatch", + "description": "Batch of operation metrics for efficient transmission", + "properties": { + "runId": { + "description": "Pipeline run ID", + "type": "string" + }, + "stepName": { + "description": "Name of the ingestion step", + "type": "string" + }, + "batchTimestamp": { + "description": "When this batch was created", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "metrics": { + "description": "List of operation metrics", + "type": "array", + "items": { + "$ref": "#/definitions/operationMetric" + } + } + }, + "required": ["runId", "stepName", "metrics"], + "additionalProperties": false + }, + "operationMetricsAggregation": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationMetricsAggregation", + "description": "Aggregated operation metrics stored at pipeline completion", + "properties": { + "runId": { + "description": "Pipeline run ID", + "type": "string" + }, + "timestamp": { + "description": "When this aggregation was created", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "byCategory": { + "description": "Metrics aggregated by category -> operation -> entityType", + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/operationSummary" + } + } + } + }, + "totalOperations": { + "description": "Total number of operations across all categories", + "type": "integer" + }, + "totalSuccessful": { + "description": "Total successful operations", + "type": "integer" + }, + "totalFailed": { + "description": "Total failed operations", + "type": "integer" + }, + "sourceTimeMs": { + "description": "Total time spent fetching data from source systems (milliseconds)", + "type": "number" + }, + "sinkTimeMs": { + "description": "Total time spent processing and sinking data to OpenMetadata (milliseconds)", + "type": "number" + } + }, + "required": ["runId", "timestamp"], + "additionalProperties": false + } + }, + "additionalProperties": false +} diff --git a/schemas/entity/services/ingestionPipelines/operationMetricsBatch.json b/schemas/entity/services/ingestionPipelines/operationMetricsBatch.json new file mode 100644 index 0000000..b5737a7 --- /dev/null +++ b/schemas/entity/services/ingestionPipelines/operationMetricsBatch.json @@ -0,0 +1,108 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/ingestionPipelines/operationMetricsBatch.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OperationMetricsBatch", + "description": "Batch of operation metrics for efficient transmission during ingestion pipeline execution", + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationMetricsBatch", + "definitions": { + "operationSummary": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationSummary", + "description": "Aggregated summary of operations", + "properties": { + "count": { + "description": "Total number of operations", + "type": "integer", + "default": 0 + }, + "totalTimeMs": { + "description": "Total time spent on operations in milliseconds", + "type": "number" + }, + "avgTimeMs": { + "description": "Average time per operation in milliseconds", + "type": "number" + }, + "minTimeMs": { + "description": "Minimum operation time in milliseconds", + "type": "number" + }, + "maxTimeMs": { + "description": "Maximum operation time in milliseconds", + "type": "number" + } + }, + "additionalProperties": false + }, + "operationMetric": { + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.OperationMetric", + "description": "Single operation metric with flexible categorization", + "properties": { + "category": { + "description": "High-level category: db_queries, api_calls, entity_operations", + "type": "string" + }, + "operation": { + "description": "Operation name - free form string like 'SELECT', 'GET:/dashboards', 'yield_columns'", + "type": "string" + }, + "entityType": { + "description": "Entity type being operated on (Table, Dashboard, Pipeline, etc.) - free form string", + "type": "string" + }, + "entityFqn": { + "description": "Fully qualified name of the entity if applicable", + "type": "string" + }, + "timestamp": { + "description": "When the operation occurred", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "durationMs": { + "description": "Duration of the operation in milliseconds", + "type": "integer" + }, + "success": { + "description": "Whether the operation succeeded", + "type": "boolean", + "default": true + }, + "errorMessage": { + "description": "Error message if operation failed", + "type": "string" + }, + "metadata": { + "description": "Additional context (query text snippet, response size, etc.)", + "$ref": "../../../type/basic.json#/definitions/map" + } + }, + "required": ["category", "operation", "timestamp"], + "additionalProperties": false + } + }, + "properties": { + "runId": { + "description": "Pipeline run ID", + "type": "string" + }, + "stepName": { + "description": "Name of the ingestion step", + "type": "string" + }, + "batchTimestamp": { + "description": "When this batch was created", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "metrics": { + "description": "List of operation metrics", + "type": "array", + "items": { + "$ref": "#/definitions/operationMetric" + } + } + }, + "required": ["runId", "stepName", "metrics"], + "additionalProperties": false +} diff --git a/schemas/entity/services/ingestionPipelines/progressUpdate.json b/schemas/entity/services/ingestionPipelines/progressUpdate.json new file mode 100644 index 0000000..dfabaa2 --- /dev/null +++ b/schemas/entity/services/ingestionPipelines/progressUpdate.json @@ -0,0 +1,48 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/ingestionPipelines/progressUpdate.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ProgressUpdate", + "description": "Real-time progress update for SSE streaming during ingestion pipeline execution", + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.ProgressUpdate", + "definitions": { + "progressUpdateType": { + "type": "string", + "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.ProgressUpdateType", + "description": "Type of progress update", + "enum": ["DISCOVERY", "PROCESSING", "STEP_COMPLETE", "PIPELINE_COMPLETE", "ERROR"] + } + }, + "properties": { + "runId": { + "description": "Pipeline run ID", + "type": "string" + }, + "timestamp": { + "description": "When this update was created", + "$ref": "../../../type/basic.json#/definitions/timestamp" + }, + "updateType": { + "description": "Type of progress update", + "$ref": "#/definitions/progressUpdateType" + }, + "stepName": { + "description": "Name of the current step", + "type": "string" + }, + "progress": { + "description": "Progress by entity type (e.g., Database, DatabaseSchema, Table). Keys are entity types, values contain total, processed, and estimatedRemainingSeconds.", + "existingJavaType": "java.util.Map" + }, + "currentEntity": { + "description": "FQN of the entity currently being processed", + "type": "string" + }, + "message": { + "description": "Human-readable status message", + "type": "string" + } + }, + "required": ["runId", "timestamp", "updateType"], + "additionalProperties": false +} diff --git a/schemas/entity/services/ingestionPipelines/status.json b/schemas/entity/services/ingestionPipelines/status.json index 6e01186..e26b546 100644 --- a/schemas/entity/services/ingestionPipelines/status.json +++ b/schemas/entity/services/ingestionPipelines/status.json @@ -88,6 +88,50 @@ } } } + }, + "operationMetrics": { + "description": "Operation metrics by category (db_queries, api_calls) -> operation -> entityType -> summary", + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "count": { + "description": "Total number of operations", + "type": "integer", + "default": 0 + }, + "totalTimeMs": { + "description": "Total time spent in milliseconds", + "type": "number" + }, + "avgTimeMs": { + "description": "Average time per operation in milliseconds", + "type": "number" + }, + "minTimeMs": { + "description": "Minimum operation time in milliseconds", + "type": "number" + }, + "maxTimeMs": { + "description": "Maximum operation time in milliseconds", + "type": "number" + } + } + } + } + } + }, + "sourceTimeMs": { + "description": "Total time spent fetching data from source systems (milliseconds)", + "type": "number" + }, + "sinkTimeMs": { + "description": "Total time spent processing and sinking data to OpenMetadata (milliseconds)", + "type": "number" } }, "additionalProperties": false, diff --git a/schemas/entity/services/llmService.json b/schemas/entity/services/llmService.json new file mode 100644 index 0000000..07fbd79 --- /dev/null +++ b/schemas/entity/services/llmService.json @@ -0,0 +1,174 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/llmService.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "LLMService", + "description": "LLM Service Entity for managing Large Language Model providers such as OpenAI, Anthropic, Azure OpenAI, AWS Bedrock, etc.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.LLMService", + "javaInterfaces": [ + "org.openmetadata.schema.EntityInterface", + "org.openmetadata.schema.ServiceEntityInterface" + ], + "definitions": { + "llmServiceType": { + "description": "Type of LLM service provider", + "type": "string", + "javaInterfaces": ["org.openmetadata.schema.EnumInterface"], + "enum": ["OpenAI", "Anthropic", "AzureOpenAI", "Bedrock", "VertexAI", "Ollama", "HuggingFace", "CustomLLM"], + "javaEnums": [ + {"name": "OpenAI"}, + {"name": "Anthropic"}, + {"name": "AzureOpenAI"}, + {"name": "Bedrock"}, + {"name": "VertexAI"}, + {"name": "Ollama"}, + {"name": "HuggingFace"}, + {"name": "CustomLLM"} + ] + }, + "llmConnection": { + "type": "object", + "javaType": "org.openmetadata.schema.type.LLMConnection", + "description": "LLM Service Connection.", + "javaInterfaces": [ + "org.openmetadata.schema.ServiceConnectionEntityInterface" + ], + "properties": { + "config": { + "mask": true, + "oneOf": [ + { + "$ref": "./connections/llm/openAIConnection.json" + }, + { + "$ref": "./connections/llm/anthropicConnection.json" + }, + { + "$ref": "./connections/llm/azureOpenAIConnection.json" + }, + { + "$ref": "./connections/llm/bedrockConnection.json" + }, + { + "$ref": "./connections/llm/vertexAIConnection.json" + }, + { + "$ref": "./connections/llm/ollamaConnection.json" + }, + { + "$ref": "./connections/llm/huggingFaceConnection.json" + }, + { + "$ref": "./connections/llm/customLLMConnection.json" + } + ] + } + }, + "additionalProperties": false + } + }, + "properties": { + "id": { + "description": "Unique identifier of this LLM service instance", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this LLM service", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "FullyQualifiedName same as `name`", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "serviceType": { + "description": "Type of LLM service provider", + "$ref": "#/definitions/llmServiceType" + }, + "description": { + "description": "Description of this LLM service instance", + "type": "string" + }, + "displayName": { + "description": "Display Name that identifies this LLM service", + "type": "string" + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time in Unix epoch milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "models": { + "description": "LLM Models registered with this service", + "$ref": "../../type/entityReferenceList.json" + }, + "pipelines": { + "description": "References to pipelines deployed for this LLM service", + "$ref": "../../type/entityReferenceList.json" + }, + "connection": { + "$ref": "#/definitions/llmConnection" + }, + "testConnectionResult": { + "description": "Last test connection results for this service", + "$ref": "connections/testConnectionResult.json" + }, + "tags": { + "description": "Tags for this LLM Service", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": [] + }, + "owners": { + "description": "Owners of this LLM service", + "$ref": "../../type/entityReferenceList.json" + }, + "href": { + "description": "Link to the resource corresponding to this LLM service", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that led to this version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When `true` indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "dataProducts": { + "description": "List of data products this entity is part of", + "$ref": "../../type/entityReferenceList.json" + }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, + "followers": { + "description": "Followers of this entity", + "$ref": "../../type/entityReferenceList.json" + }, + "domains": { + "description": "Domains the LLM service belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "ingestionRunner": { + "description": "The ingestion agent responsible for executing the ingestion pipeline", + "$ref": "../../type/entityReference.json" + } + }, + "required": ["id", "name", "serviceType"], + "additionalProperties": false +} diff --git a/schemas/entity/services/mcpService.json b/schemas/entity/services/mcpService.json new file mode 100644 index 0000000..a3803c1 --- /dev/null +++ b/schemas/entity/services/mcpService.json @@ -0,0 +1,138 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/mcpService.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "McpService", + "description": "MCP (Model Context Protocol) Service for discovering and managing MCP servers. This service connects to MCP server configurations, registries, or individual servers to catalog tools, resources, and prompts for AI governance.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.services.McpService", + "javaInterfaces": [ + "org.openmetadata.schema.EntityInterface", + "org.openmetadata.schema.ServiceEntityInterface" + ], + "definitions": { + "mcpServiceType": { + "description": "Type of MCP service - currently only Mcp is supported", + "type": "string", + "javaInterfaces": ["org.openmetadata.schema.EnumInterface"], + "enum": ["Mcp"], + "javaEnums": [ + {"name": "Mcp"} + ] + }, + "mcpConnection": { + "type": "object", + "javaType": "org.openmetadata.schema.type.McpConnection", + "description": "MCP Service Connection.", + "javaInterfaces": [ + "org.openmetadata.schema.ServiceConnectionEntityInterface" + ], + "properties": { + "config": { + "mask": true, + "oneOf": [ + { + "$ref": "./connections/mcp/mcpConnection.json" + } + ] + } + }, + "additionalProperties": false + } + }, + "properties": { + "id": { + "description": "Unique identifier of this MCP service instance", + "$ref": "../../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Name that identifies this MCP service", + "$ref": "../../type/basic.json#/definitions/entityName" + }, + "fullyQualifiedName": { + "description": "FullyQualifiedName same as `name`", + "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "serviceType": { + "description": "Type of MCP service", + "$ref": "#/definitions/mcpServiceType" + }, + "description": { + "description": "Description of the MCP service", + "$ref": "../../type/basic.json#/definitions/markdown" + }, + "displayName": { + "description": "Display Name that identifies this MCP service", + "type": "string" + }, + "version": { + "description": "Metadata version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/entityVersion" + }, + "updatedAt": { + "description": "Last update time corresponding to the new version of the entity in Unix epoch time milliseconds", + "$ref": "../../type/basic.json#/definitions/timestamp" + }, + "updatedBy": { + "description": "User who made the update", + "type": "string" + }, + "pipelines": { + "description": "References to pipelines deployed for this MCP service to extract metadata", + "$ref": "../../type/entityReferenceList.json" + }, + "connection": { + "$ref": "#/definitions/mcpConnection" + }, + "testConnectionResult": { + "description": "Result of the last test connection performed on this service", + "$ref": "connections/testConnectionResult.json" + }, + "tags": { + "description": "Tags for this MCP Service.", + "type": "array", + "items": { + "$ref": "../../type/tagLabel.json" + }, + "default": null + }, + "owners": { + "description": "Owners of this MCP service", + "$ref": "../../type/entityReferenceList.json" + }, + "href": { + "description": "Link to the resource corresponding to this entity", + "$ref": "../../type/basic.json#/definitions/href" + }, + "changeDescription": { + "description": "Change that lead to this version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "incrementalChangeDescription": { + "description": "Change that led to this version of the entity", + "$ref": "../../type/entityHistory.json#/definitions/changeDescription" + }, + "deleted": { + "description": "When `true` indicates the entity has been soft deleted", + "type": "boolean", + "default": false + }, + "dataProducts": { + "description": "List of data products this entity is part of", + "$ref": "../../type/entityReferenceList.json" + }, + "followers": { + "description": "Followers of this entity", + "$ref": "../../type/entityReferenceList.json" + }, + "domains": { + "description": "Domains the MCP service belongs to", + "$ref": "../../type/entityReferenceList.json" + }, + "ingestionRunner": { + "description": "The ingestion agent responsible for executing the ingestion pipeline", + "$ref": "../../type/entityReference.json" + } + }, + "required": ["id", "name", "serviceType"], + "additionalProperties": false +} diff --git a/schemas/entity/services/messagingService.json b/schemas/entity/services/messagingService.json index 3d3fda0..1100a85 100644 --- a/schemas/entity/services/messagingService.json +++ b/schemas/entity/services/messagingService.json @@ -14,7 +14,7 @@ "description": "Type of messaging service - Kafka or Pulsar.", "type": "string", "javaInterfaces": ["org.openmetadata.schema.EnumInterface"], - "enum": ["Kafka", "Redpanda", "Kinesis", "CustomMessaging"], + "enum": ["Kafka", "Redpanda", "Kinesis", "PubSub", "CustomMessaging"], "javaEnums": [ { "name": "Kafka" @@ -25,6 +25,9 @@ { "name": "Kinesis" }, + { + "name": "PubSub" + }, { "name": "CustomMessaging" } @@ -58,6 +61,9 @@ { "$ref": "./connections/messaging/kinesisConnection.json" }, + { + "$ref": "./connections/messaging/pubSubConnection.json" + }, { "$ref": "connections/messaging/customMessagingConnection.json" } @@ -148,10 +154,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "domains" : { "description": "Domains the Messaging service belongs to.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/metadataService.json b/schemas/entity/services/metadataService.json index 1797556..77daad3 100644 --- a/schemas/entity/services/metadataService.json +++ b/schemas/entity/services/metadataService.json @@ -158,6 +158,10 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "provider" : { "$ref": "../../type/basic.json#/definitions/providerType" }, diff --git a/schemas/entity/services/mlmodelService.json b/schemas/entity/services/mlmodelService.json index 1a08ae0..10ec94f 100644 --- a/schemas/entity/services/mlmodelService.json +++ b/schemas/entity/services/mlmodelService.json @@ -146,10 +146,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/pipelineService.json b/schemas/entity/services/pipelineService.json index a3e22ed..649839f 100644 --- a/schemas/entity/services/pipelineService.json +++ b/schemas/entity/services/pipelineService.json @@ -38,7 +38,9 @@ "DataFactory", "Wherescape", "SSIS", - "Snowplow" + "Snowplow", + "Mulesoft", + "MicrosoftFabricPipeline" ], "javaEnums": [ { @@ -106,6 +108,12 @@ }, { "name": "Snowplow" + }, + { + "name": "Mulesoft" + }, + { + "name": "MicrosoftFabricPipeline" } ] }, @@ -185,6 +193,12 @@ }, { "$ref": "./connections/pipeline/snowplowConnection.json" + }, + { + "$ref": "./connections/pipeline/mulesoftConnection.json" + }, + { + "$ref": "./connections/pipeline/microsoftFabricPipelineConnection.json" } ] } @@ -273,10 +287,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/searchService.json b/schemas/entity/services/searchService.json index 02c0bad..ef10341 100644 --- a/schemas/entity/services/searchService.json +++ b/schemas/entity/services/searchService.json @@ -140,10 +140,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/securityService.json b/schemas/entity/services/securityService.json index 8616f47..5d5a0f9 100644 --- a/schemas/entity/services/securityService.json +++ b/schemas/entity/services/securityService.json @@ -131,6 +131,10 @@ "description": "List of data products this entity is part of.", "$ref": "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/services/serviceType.json b/schemas/entity/services/serviceType.json index 81199f9..e8f8b85 100644 --- a/schemas/entity/services/serviceType.json +++ b/schemas/entity/services/serviceType.json @@ -16,7 +16,9 @@ "Storage", "Search", "Api", - "Security" + "Security", + "LLM", + "Mcp" ], "additionalProperties": false } \ No newline at end of file diff --git a/schemas/entity/services/storageService.json b/schemas/entity/services/storageService.json index c824c0c..160abb5 100644 --- a/schemas/entity/services/storageService.json +++ b/schemas/entity/services/storageService.json @@ -147,10 +147,18 @@ "type": "boolean", "default": false }, + "entityStatus": { + "description": "Status of the entity.", + "$ref": "../../type/status.json" + }, "dataProducts" : { "description": "List of data products this entity is part of.", "$ref" : "../../type/entityReferenceList.json" }, + "dataContract": { + "description": "Reference to the data contract for this entity.", + "$ref": "../../type/entityReference.json" + }, "followers": { "description": "Followers of this entity.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/teams/team.json b/schemas/entity/teams/team.json index b146004..82f9156 100644 --- a/schemas/entity/teams/team.json +++ b/schemas/entity/teams/team.json @@ -132,6 +132,10 @@ "description": "Roles that a team is inheriting through membership in teams that have set team default roles.", "$ref": "../../type/entityReferenceList.json" }, + "defaultPersona": { + "description": "Default persona for all users in this team. Only applicable to Group type teams.", + "$ref": "../../type/entityReference.json" + }, "policies": { "description": "Policies that is attached to this team.", "$ref": "../../type/entityReferenceList.json" diff --git a/schemas/entity/teams/user.json b/schemas/entity/teams/user.json index 25f4472..774cc4d 100644 --- a/schemas/entity/teams/user.json +++ b/schemas/entity/teams/user.json @@ -153,6 +153,10 @@ "description": "Roles that a user is inheriting through membership in teams that have set team default roles.", "$ref": "../../type/entityReferenceList.json" }, + "inheritedPersonas": { + "description": "Personas inherited through membership in teams that have set a team default persona.", + "$ref": "../../type/entityReferenceList.json" + }, "isEmailVerified": { "description": "If the User has verified the mail", "type": "boolean" diff --git a/schemas/entity/utils/airflowRestApiConnection.json b/schemas/entity/utils/airflowRestApiConnection.json new file mode 100644 index 0000000..23138b4 --- /dev/null +++ b/schemas/entity/utils/airflowRestApiConnection.json @@ -0,0 +1,57 @@ +{ + "$id": "https://open-metadata.org/schema/entity/utils/airflowRestApiConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AirflowRestApiConnection", + "description": "Airflow REST API Connection Config for connecting via REST API.", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection", + "definitions": { + "ApiVersion": { + "description": "Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect the version automatically.", + "type": "string", + "enum": ["v1", "v2", "auto"], + "default": "auto" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "type": "string", + "enum": ["RestAPI"], + "default": "RestAPI" + }, + "authConfig": { + "title": "Authentication Configuration", + "description": "Choose an authentication method: Basic Auth (username/password), Access Token, GCP Service Account (for Cloud Composer), or AWS Credentials (for MWAA).", + "oneOf": [ + { + "$ref": "./common/basicAuthConfig.json" + }, + { + "$ref": "./common/accessTokenConfig.json" + }, + { + "$ref": "./common/gcpCredentialsConfig.json" + }, + { + "$ref": "./common/mwaaAuthConfig.json" + } + ] + }, + "apiVersion": { + "title": "API Version", + "description": "Airflow REST API version.", + "$ref": "#/definitions/ApiVersion", + "default": "auto" + }, + "verifySSL": { + "title": "Verify SSL", + "description": "Whether to verify SSL certificates when connecting to the Airflow API.", + "type": "boolean", + "default": true + } + }, + "required": ["authConfig"], + "additionalProperties": false +} diff --git a/schemas/entity/utils/common/accessTokenConfig.json b/schemas/entity/utils/common/accessTokenConfig.json new file mode 100644 index 0000000..3af3ae2 --- /dev/null +++ b/schemas/entity/utils/common/accessTokenConfig.json @@ -0,0 +1,18 @@ +{ + "$id": "https://open-metadata.org/schema/entity/utils/common/accessTokenConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Access Token", + "description": "Static access token for Airflow API authentication.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.utils.common.AccessTokenConfig", + "properties": { + "token": { + "title": "Token", + "description": "Static access token for Airflow API authentication.", + "type": "string", + "format": "password" + } + }, + "required": ["token"], + "additionalProperties": false +} diff --git a/schemas/entity/utils/common/basicAuthConfig.json b/schemas/entity/utils/common/basicAuthConfig.json new file mode 100644 index 0000000..c8346df --- /dev/null +++ b/schemas/entity/utils/common/basicAuthConfig.json @@ -0,0 +1,23 @@ +{ + "$id": "https://open-metadata.org/schema/entity/utils/common/basicAuthConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Basic Auth", + "description": "Username and password for Airflow API authentication.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.utils.common.BasicAuthConfig", + "properties": { + "username": { + "title": "Username", + "description": "Username for basic authentication to the Airflow API.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password for basic authentication to the Airflow API.", + "type": "string", + "format": "password" + } + }, + "required": ["username", "password"], + "additionalProperties": false +} diff --git a/schemas/entity/utils/common/gcpCredentialsConfig.json b/schemas/entity/utils/common/gcpCredentialsConfig.json new file mode 100644 index 0000000..2a2c766 --- /dev/null +++ b/schemas/entity/utils/common/gcpCredentialsConfig.json @@ -0,0 +1,17 @@ +{ + "$id": "https://open-metadata.org/schema/entity/utils/common/gcpCredentialsConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GCP Service Account", + "description": "GCP credentials for Google Cloud Composer. Supports service account values, credentials path, workload identity (external account), and ADC. Tokens are auto-refreshed at runtime.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig", + "properties": { + "credentials": { + "title": "GCP Credentials", + "description": "GCP credentials configuration.", + "$ref": "../../../security/credentials/gcpCredentials.json" + } + }, + "required": ["credentials"], + "additionalProperties": false +} diff --git a/schemas/entity/utils/common/mwaaAuthConfig.json b/schemas/entity/utils/common/mwaaAuthConfig.json new file mode 100644 index 0000000..ba9bb28 --- /dev/null +++ b/schemas/entity/utils/common/mwaaAuthConfig.json @@ -0,0 +1,37 @@ +{ + "$id": "https://open-metadata.org/schema/entity/utils/common/mwaaAuthConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MWAA Authentication", + "description": "AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.utils.common.MWAAAuthConfig", + "properties": { + "mwaaConfig": { + "title": "MWAA Configuration", + "description": "MWAA credentials and environment configuration.", + "type": "object", + "javaType": "org.openmetadata.schema.entity.utils.common.MWAAConfig", + "properties": { + "mwaaEnvironmentName": { + "title": "MWAA Environment Name", + "description": "The name of your MWAA environment.", + "type": "string" + }, + "awsConfig": { + "title": "AWS Configuration", + "description": "AWS credentials for generating MWAA CLI token.", + "$ref": "../../../security/credentials/awsCredentials.json" + } + }, + "required": [ + "mwaaEnvironmentName", + "awsConfig" + ], + "additionalProperties": false + } + }, + "required": [ + "mwaaConfig" + ], + "additionalProperties": false +} diff --git a/schemas/governance/workflows/elements/nodeSubType.json b/schemas/governance/workflows/elements/nodeSubType.json index 4c91d3a..b3242b6 100644 --- a/schemas/governance/workflows/elements/nodeSubType.json +++ b/schemas/governance/workflows/elements/nodeSubType.json @@ -17,6 +17,11 @@ "runAppTask", "rollbackEntityTask", "dataCompletenessTask", - "parallelGateway" + "parallelGateway", + "sinkTask", + "createRecognizerFeedbackApprovalTask", + "applyRecognizerFeedbackTask", + "rejectRecognizerFeedbackTask", + "checkChangeDescriptionTask" ] } diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/applyRecognizerFeedbackTask.json b/schemas/governance/workflows/elements/nodes/automatedTask/applyRecognizerFeedbackTask.json new file mode 100644 index 0000000..d9585c1 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/applyRecognizerFeedbackTask.json @@ -0,0 +1,80 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/applyRecognizerFeedbackTask.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ApplyRecognizerFeedbackTaskDefinition", + "description": "Applies RecognizerFeedback by removing the tag from the entity and adding it to the recognizer's exception list.", + "javaInterfaces": [ + "org.openmetadata.schema.governance.workflows.elements.WorkflowNodeDefinitionInterface" + ], + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.ApplyRecognizerFeedbackTaskDefinition", + "type": "object", + "properties": { + "type": { + "type": "string", + "default": "automatedTask" + }, + "subType": { + "type": "string", + "default": "applyRecognizerFeedbackTask" + }, + "name": { + "title": "Name", + "description": "Name that identifies this Node.", + "$ref": "../../../../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "title": "Display Name", + "description": "Display Name that identifies this Node.", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the Node.", + "$ref": "../../../../../type/basic.json#/definitions/markdown" + }, + "config": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "input": { + "type": "array", + "items": { "type": "string" }, + "default": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"], + "additionalItems": false, + "minItems": 1, + "maxItems": 4 + }, + "inputNamespaceMap": { + "type": "object", + "properties": { + "relatedEntity": { + "type": "string", + "default": "global" + }, + "updatedBy": { + "type": "string", + "default": "global" + }, + "triggeringObjectId": { + "type": "string", + "default": "global" + }, + "recognizerFeedback": { + "type": "string", + "default": "global" + } + }, + "additionalProperties": false, + "required": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"] + }, + "output": { + "type": "array", + "items": { "type": "string" }, + "default": [], + "additionalItems": false, + "minItems": 0, + "maxItems": 0 + } + } +} diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/checkChangeDescriptionTask.json b/schemas/governance/workflows/elements/nodes/automatedTask/checkChangeDescriptionTask.json new file mode 100644 index 0000000..5f84498 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/checkChangeDescriptionTask.json @@ -0,0 +1,90 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/checkChangeDescriptionTask.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CheckChangeDescriptionTaskDefinition", + "description": "Validates if entity's change description matches specified field patterns.", + "javaInterfaces": [ + "org.openmetadata.schema.governance.workflows.elements.WorkflowNodeDefinitionInterface" + ], + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.CheckChangeDescriptionTaskDefinition", + "type": "object", + "properties": { + "type": { + "type": "string", + "default": "automatedTask" + }, + "subType": { + "type": "string", + "default": "checkChangeDescriptionTask" + }, + "name": { + "title": "Name", + "description": "Name that identifies this Node.", + "$ref": "../../../../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "title": "Display Name", + "description": "Display Name that identifies this Node.", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the Node.", + "$ref": "../../../../../type/basic.json#/definitions/markdown" + }, + "config": { + "title": "Node Configuration", + "type": "object", + "properties": { + "condition": { + "title": "Logical Condition", + "description": "Logical operator to combine multiple field checks (AND requires all fields to match, OR requires at least one).", + "type": "string", + "enum": ["AND", "OR"], + "default": "OR" + }, + "rules": { + "title": "Rules", + "description": "Map of fields to their required values/patterns. Checks fieldsAdded, fieldsUpdated, and fieldsDeleted.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string", + "description": "Field value pattern (substring matching) to check in change description" + } + } + } + }, + "additionalProperties": false, + "required": ["rules"] + }, + "input": { + "type": "array", + "items": { "type": "string" }, + "default": ["relatedEntity"], + "additionalItems": false, + "minItems": 1, + "maxItems": 1 + }, + "inputNamespaceMap": { + "type": "object", + "properties": { + "relatedEntity": { + "type": "string", + "default": "global" + } + }, + "additionalProperties": false, + "required": ["relatedEntity"] + }, + "branches": { + "type": "array", + "items": { "type": "string" }, + "default": ["true", "false"], + "additionalItems": false, + "minItems": 2, + "maxItems": 2 + } + } +} \ No newline at end of file diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/rejectRecognizerFeedbackTask.json b/schemas/governance/workflows/elements/nodes/automatedTask/rejectRecognizerFeedbackTask.json new file mode 100644 index 0000000..510ef14 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/rejectRecognizerFeedbackTask.json @@ -0,0 +1,80 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/rejectRecognizerFeedbackTask.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "RejectRecognizerFeedbackTaskDefinition", + "description": "Rejects RecognizerFeedback, updating its status to REJECTED without modifying the tag or entity.", + "javaInterfaces": [ + "org.openmetadata.schema.governance.workflows.elements.WorkflowNodeDefinitionInterface" + ], + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.RejectRecognizerFeedbackTaskDefinition", + "type": "object", + "properties": { + "type": { + "type": "string", + "default": "automatedTask" + }, + "subType": { + "type": "string", + "default": "rejectRecognizerFeedbackTask" + }, + "name": { + "title": "Name", + "description": "Name that identifies this Node.", + "$ref": "../../../../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "title": "Display Name", + "description": "Display Name that identifies this Node.", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the Node.", + "$ref": "../../../../../type/basic.json#/definitions/markdown" + }, + "config": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "input": { + "type": "array", + "items": { "type": "string" }, + "default": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"], + "additionalItems": false, + "minItems": 1, + "maxItems": 4 + }, + "inputNamespaceMap": { + "type": "object", + "properties": { + "relatedEntity": { + "type": "string", + "default": "global" + }, + "updatedBy": { + "type": "string", + "default": "global" + }, + "triggeringObjectId": { + "type": "string", + "default": "global" + }, + "recognizerFeedback": { + "type": "string", + "default": "global" + } + }, + "additionalProperties": false, + "required": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"] + }, + "output": { + "type": "array", + "items": { "type": "string" }, + "default": [], + "additionalItems": false, + "minItems": 0, + "maxItems": 0 + } + } +} diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/baseSinkConfig.json b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/baseSinkConfig.json new file mode 100644 index 0000000..093306c --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/baseSinkConfig.json @@ -0,0 +1,43 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/sinkConfig/baseSinkConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BaseSinkConfig", + "description": "Base configuration shared by all sink types.", + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.sinkConfig.BaseSinkConfig", + "type": "object", + "properties": { + "retryConfig": { + "title": "Retry Configuration", + "description": "Configuration for retry behavior on failure.", + "type": "object", + "properties": { + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retry attempts.", + "type": "integer", + "default": 3, + "minimum": 0, + "maximum": 10 + }, + "retryDelaySeconds": { + "title": "Retry Delay Seconds", + "description": "Delay between retry attempts in seconds.", + "type": "integer", + "default": 5, + "minimum": 1, + "maximum": 300 + } + }, + "additionalProperties": false + }, + "timeout": { + "title": "Timeout", + "description": "Timeout in seconds for sink operations.", + "type": "integer", + "default": 300, + "minimum": 30, + "maximum": 3600 + } + }, + "additionalProperties": false +} diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/gitSinkConfig.json b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/gitSinkConfig.json new file mode 100644 index 0000000..2286415 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/gitSinkConfig.json @@ -0,0 +1,149 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/sinkConfig/gitSinkConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "GitSinkConfig", + "description": "Configuration for Git-based sinks using REST/GraphQL APIs. Supports GitHub (GraphQL), GitLab (REST), and Bitbucket (REST).", + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.sinkConfig.GitSinkConfig", + "type": "object", + "definitions": { + "gitCredentialsType": { + "title": "Credentials Type", + "description": "Type of authentication for Git API operations.", + "type": "string", + "enum": ["token"], + "default": "token" + }, + "conflictResolution": { + "title": "Conflict Resolution", + "description": "How to handle files that were modified directly in git.", + "type": "string", + "enum": ["overwriteExternal", "preserveExternal", "fail"], + "default": "overwriteExternal" + } + }, + "properties": { + "repositoryUrl": { + "title": "Repository URL", + "description": "Git repository URL (HTTPS format). Example: https://github.com/org/repo.git", + "type": "string", + "examples": ["https://github.com/org/repo.git", "https://github.mycompany.com/org/repo.git"] + }, + "branch": { + "title": "Branch", + "description": "Target branch for commits.", + "type": "string", + "default": "main" + }, + "basePath": { + "title": "Base Path", + "description": "Base directory path in the repository for metadata files.", + "type": "string", + "default": "metadata" + }, + "apiBaseUrl": { + "title": "API Base URL", + "description": "Custom API base URL for GitHub Enterprise. Leave empty for github.com. Example: https://github.mycompany.com/api", + "type": "string" + }, + "credentials": { + "title": "Git Credentials", + "description": "Authentication credentials for Git API operations.", + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/gitCredentialsType" + }, + "token": { + "title": "Access Token", + "description": "Personal access token or GitHub App token for authentication. Supports secret references (e.g., secret:/path/to/token).", + "type": "string", + "format": "password" + } + }, + "required": ["type", "token"], + "additionalProperties": false + }, + "commitConfig": { + "title": "Commit Configuration", + "description": "Configuration for Git commits.", + "type": "object", + "properties": { + "messageTemplate": { + "title": "Message Template", + "description": "Template for commit messages. Variables: {entityType}, {entityName}, {action}, {count}", + "type": "string", + "default": "Sync {entityType}: {entityName}" + }, + "authorName": { + "title": "Author Name", + "description": "Git commit author name.", + "type": "string", + "default": "OpenMetadata Bot" + }, + "authorEmail": { + "title": "Author Email", + "description": "Git commit author email.", + "type": "string", + "format": "email", + "default": "bot@openmetadata.org" + } + }, + "additionalProperties": false + }, + "conflictResolution": { + "$ref": "#/definitions/conflictResolution" + }, + "syncMetadata": { + "title": "Sync Metadata Configuration", + "description": "Configuration for embedding sync metadata in output files.", + "type": "object", + "properties": { + "embed": { + "title": "Embed Metadata", + "description": "Embed _syncMetadata block in each YAML/JSON file for conflict detection and audit.", + "type": "boolean", + "default": true + } + }, + "additionalProperties": false + }, + "retryConfig": { + "title": "Retry Configuration", + "description": "Configuration for retrying failed API calls.", + "type": "object", + "properties": { + "maxRetries": { + "title": "Max Retries", + "description": "Maximum number of retry attempts for transient failures.", + "type": "integer", + "default": 3, + "minimum": 0, + "maximum": 10 + }, + "retryDelaySeconds": { + "title": "Initial Retry Delay", + "description": "Initial delay in seconds before first retry (doubles with each attempt).", + "type": "integer", + "default": 5, + "minimum": 1 + }, + "maxRetryDelaySeconds": { + "title": "Max Retry Delay", + "description": "Maximum delay in seconds between retries (caps exponential backoff).", + "type": "integer", + "default": 60, + "minimum": 1 + } + }, + "additionalProperties": false + }, + "timeout": { + "title": "Timeout", + "description": "Timeout in seconds for API operations.", + "type": "integer", + "default": 300 + } + }, + "required": ["repositoryUrl", "credentials"], + "additionalProperties": false +} diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/webhookSinkConfig.json b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/webhookSinkConfig.json new file mode 100644 index 0000000..616ce02 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/sinkConfig/webhookSinkConfig.json @@ -0,0 +1,148 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/sinkConfig/webhookSinkConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "WebhookSinkConfig", + "description": "Configuration for webhook-based sinks.", + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.sinkConfig.WebhookSinkConfig", + "type": "object", + "definitions": { + "httpMethod": { + "title": "HTTP Method", + "description": "HTTP method to use for the webhook request.", + "type": "string", + "enum": ["POST", "PUT", "PATCH"], + "default": "POST" + }, + "authType": { + "title": "Authentication Type", + "description": "Type of authentication for webhook requests.", + "type": "string", + "enum": ["none", "bearer", "basic", "apiKey"] + } + }, + "properties": { + "endpoint": { + "title": "Webhook Endpoint", + "description": "HTTP endpoint URL for the webhook.", + "type": "string", + "format": "uri" + }, + "httpMethod": { + "$ref": "#/definitions/httpMethod" + }, + "headers": { + "title": "Custom Headers", + "description": "Additional HTTP headers to include in requests.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "queryParams": { + "title": "Query Parameters", + "description": "Query parameters to append to the webhook URL.", + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "required": ["key", "value"], + "additionalProperties": false + } + }, + "authentication": { + "title": "Authentication", + "description": "Authentication configuration for webhook requests.", + "type": "object", + "properties": { + "type": { + "$ref": "#/definitions/authType" + }, + "token": { + "title": "Bearer Token", + "description": "Bearer token for authentication.", + "type": "string", + "format": "password" + }, + "username": { + "title": "Username", + "description": "Username for basic authentication.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password for basic authentication.", + "type": "string", + "format": "password" + }, + "headerName": { + "title": "API Key Header Name", + "description": "Header name for API key authentication.", + "type": "string", + "default": "X-API-Key" + }, + "apiKey": { + "title": "API Key", + "description": "API key value.", + "type": "string", + "format": "password" + } + }, + "additionalProperties": false + }, + "batchConfig": { + "title": "Batch Configuration", + "description": "Configuration for batching multiple entities in a single request.", + "type": "object", + "properties": { + "enabled": { + "title": "Enable Batching", + "description": "Send multiple entities in a single request.", + "type": "boolean", + "default": false + }, + "batchSize": { + "title": "Batch Size", + "description": "Maximum number of entities per request.", + "type": "integer", + "default": 100, + "minimum": 1, + "maximum": 1000 + } + }, + "additionalProperties": false + }, + "retryConfig": { + "title": "Retry Configuration", + "type": "object", + "properties": { + "maxRetries": { + "type": "integer", + "default": 3, + "minimum": 0, + "maximum": 10 + }, + "retryDelaySeconds": { + "type": "integer", + "default": 5, + "minimum": 1 + } + }, + "additionalProperties": false + }, + "timeout": { + "title": "Timeout", + "description": "Timeout in seconds for HTTP requests.", + "type": "integer", + "default": 30 + } + }, + "required": ["endpoint"], + "additionalProperties": false +} diff --git a/schemas/governance/workflows/elements/nodes/automatedTask/sinkTask.json b/schemas/governance/workflows/elements/nodes/automatedTask/sinkTask.json new file mode 100644 index 0000000..ced047d --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/automatedTask/sinkTask.json @@ -0,0 +1,184 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/automatedTask/sinkTask.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SinkTaskDefinition", + "description": "Pushes entity data to an external sink destination such as Git repositories, webhooks, or HTTP endpoints.", + "javaInterfaces": [ + "org.openmetadata.schema.governance.workflows.elements.WorkflowNodeDefinitionInterface" + ], + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.automatedTask.SinkTaskDefinition", + "type": "object", + "definitions": { + "sinkType": { + "title": "Sink Type", + "description": "Type of sink destination.", + "type": "string", + "enum": ["git", "webhook", "httpEndpoint"] + }, + "syncMode": { + "title": "Sync Mode", + "description": "How entities should be synchronized to the sink.", + "type": "string", + "enum": ["append", "overwrite", "merge"], + "default": "overwrite" + }, + "outputFormat": { + "title": "Output Format", + "description": "Format for serialized entity output.", + "type": "string", + "enum": ["yaml", "json"], + "default": "yaml" + }, + "entityFilter": { + "title": "Entity Filter", + "description": "Optional filter to select specific entities or entity types.", + "type": "object", + "properties": { + "entityTypes": { + "title": "Entity Types", + "description": "Filter entities by type (e.g., table, dashboard, glossaryTerm).", + "type": "array", + "items": { "type": "string" } + }, + "domains": { + "title": "Domains", + "description": "Filter entities by domain.", + "type": "array", + "items": { "type": "string" } + }, + "tags": { + "title": "Tags", + "description": "Filter entities by tag.", + "type": "array", + "items": { "type": "string" } + } + }, + "additionalProperties": false + }, + "hierarchyConfig": { + "title": "Hierarchy Configuration", + "description": "Configuration for maintaining asset hierarchy in output.", + "type": "object", + "properties": { + "preserveHierarchy": { + "title": "Preserve Hierarchy", + "description": "Maintain entity hierarchy in directory structure.", + "type": "boolean", + "default": true + }, + "rootPath": { + "title": "Root Path", + "description": "Base directory path for output files.", + "type": "string", + "default": "metadata" + } + }, + "additionalProperties": false + } + }, + "properties": { + "type": { + "type": "string", + "default": "automatedTask" + }, + "subType": { + "type": "string", + "default": "sinkTask" + }, + "name": { + "title": "Name", + "description": "Name that identifies this Node.", + "$ref": "../../../../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "title": "Display Name", + "description": "Display Name that identifies this Node.", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the Node.", + "$ref": "../../../../../type/basic.json#/definitions/markdown" + }, + "config": { + "title": "Sink Task Configuration", + "type": "object", + "properties": { + "sinkType": { + "$ref": "#/definitions/sinkType" + }, + "sinkConfig": { + "title": "Sink Configuration", + "description": "Inline sink-specific configuration. Schema depends on sinkType.", + "type": "object" + }, + "syncMode": { + "$ref": "#/definitions/syncMode" + }, + "outputFormat": { + "$ref": "#/definitions/outputFormat" + }, + "entityFilter": { + "$ref": "#/definitions/entityFilter" + }, + "hierarchyConfig": { + "$ref": "#/definitions/hierarchyConfig" + }, + "batchMode": { + "title": "Batch Mode", + "description": "If true, collect all entities and write in single operation (single git commit). If false, write each entity individually.", + "type": "boolean", + "default": true + }, + "timeoutSeconds": { + "title": "Timeout Seconds", + "description": "Timeout in seconds for sink operations.", + "type": "integer", + "default": 300 + } + }, + "required": ["sinkType"], + "additionalProperties": false + }, + "input": { + "type": "array", + "items": { "type": "string" }, + "default": ["relatedEntity", "updatedBy"], + "additionalItems": false, + "minItems": 1 + }, + "inputNamespaceMap": { + "type": "object", + "properties": { + "relatedEntity": { + "type": "string", + "default": "global" + }, + "updatedBy": { + "type": "string", + "default": "global" + } + }, + "additionalProperties": false, + "required": ["relatedEntity"] + }, + "output": { + "title": "Output Variables", + "description": "Variables this node outputs for use in subsequent nodes", + "type": "array", + "items": { "type": "string" }, + "default": ["syncResult", "syncedCount", "failedCount", "result"], + "additionalItems": false + }, + "branches": { + "type": "array", + "items": { "type": "string" }, + "default": ["success", "failure"], + "additionalItems": false, + "minItems": 2, + "maxItems": 2 + } + }, + "required": ["name", "config"], + "additionalProperties": false +} diff --git a/schemas/governance/workflows/elements/nodes/userTask/createRecognizerFeedbackApprovalTask.json b/schemas/governance/workflows/elements/nodes/userTask/createRecognizerFeedbackApprovalTask.json new file mode 100644 index 0000000..f3c8ea1 --- /dev/null +++ b/schemas/governance/workflows/elements/nodes/userTask/createRecognizerFeedbackApprovalTask.json @@ -0,0 +1,131 @@ +{ + "$id": "https://open-metadata.org/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "CreateRecognizerFeedbackApprovalTask", + "description": "Defines a Task to approve recognizer feedback on a Tag.", + "javaInterfaces": [ + "org.openmetadata.schema.governance.workflows.elements.WorkflowNodeDefinitionInterface" + ], + "javaType": "org.openmetadata.schema.governance.workflows.elements.nodes.userTask.CreateRecognizerFeedbackApprovalTaskDefinition", + "type": "object", + "properties": { + "type": { + "type": "string", + "default": "userTask" + }, + "subType": { + "type": "string", + "default": "createRecognizerFeedbackApprovalTask" + }, + "name": { + "title": "Name", + "description": "Name that identifies this Node.", + "$ref": "../../../../../type/basic.json#/definitions/entityName" + }, + "displayName": { + "title": "Display Name", + "description": "Display Name that identifies this Node.", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the Node.", + "$ref": "../../../../../type/basic.json#/definitions/markdown" + }, + "config": { + "title": "Node Configuration", + "type": "object", + "properties": { + "assignees": { + "title": "Assignees", + "description": "People/Teams assigned to the Task.", + "type": "object", + "properties": { + "addReviewers": { + "description": "Add the Reviewers to the assignees List.", + "type": "boolean", + "default": true + }, + "addOwners": { + "description": "Add the Owners to the assignees List.", + "type": "boolean", + "default": false + }, + "candidates": { + "description": "List of specific candidates (users or teams) assigned to this task.", + "type": "array", + "items": { + "$ref": "../../../../../type/entityReference.json" + }, + "default": [] + } + }, + "additionalProperties": false + }, + "approvalThreshold": { + "title": "Approval Threshold", + "description": "Number of reviewers that must approve for the task to be completed. Default is 1 (any single reviewer can approve).", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "rejectionThreshold": { + "title": "Rejection Threshold", + "description": "Number of reviewers that must reject for the task to be rejected. Default is 1 (any single reviewer can reject). This allows for scenarios where you want multiple approvals but a single rejection can veto.", + "type": "integer", + "minimum": 1, + "default": 1 + } + }, + "required": ["assignees"], + "additionalProperties": false + }, + "input": { + "type": "array", + "items": { "type": "string" }, + "default": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"], + "additionalItems": false, + "minItems": 1, + "maxItems": 4 + }, + "inputNamespaceMap": { + "type": "object", + "properties": { + "relatedEntity": { + "type": "string", + "default": "global" + }, + "updatedBy": { + "type": "string", + "default": "global" + }, + "triggeringObjectId": { + "type": "string", + "default": "global" + }, + "recognizerFeedback": { + "type": "string", + "default": "global" + } + }, + "additionalProperties": false, + "required": ["relatedEntity", "updatedBy", "triggeringObjectId", "recognizerFeedback"] + }, + "output": { + "type": "array", + "items": { "type": "string" }, + "default": ["updatedBy"], + "additionalItems": false, + "minItems": 1, + "maxItems": 1 + }, + "branches": { + "type": "array", + "items": { "type": "string" }, + "default": ["true", "false"], + "additionalItems": false, + "minItems": 2, + "maxItems": 2 + } + } +} diff --git a/schemas/governance/workflows/elements/nodes/userTask/userApprovalTask.json b/schemas/governance/workflows/elements/nodes/userTask/userApprovalTask.json index a39f765..63f6ab2 100644 --- a/schemas/governance/workflows/elements/nodes/userTask/userApprovalTask.json +++ b/schemas/governance/workflows/elements/nodes/userTask/userApprovalTask.json @@ -44,7 +44,20 @@ "addReviewers": { "description": "Add the Reviewers to the assignees List.", "type": "boolean", + "default": true + }, + "addOwners": { + "description": "Add the Owners to the assignees List.", + "type": "boolean", "default": false + }, + "candidates": { + "description": "List of specific candidates (users or teams) assigned to this task.", + "type": "array", + "items": { + "$ref": "../../../../../type/entityReference.json" + }, + "default": [] } }, "additionalProperties": false diff --git a/schemas/governance/workflows/elements/triggers/eventBasedEntityTrigger.json b/schemas/governance/workflows/elements/triggers/eventBasedEntityTrigger.json index 4242358..166c915 100644 --- a/schemas/governance/workflows/elements/triggers/eventBasedEntityTrigger.json +++ b/schemas/governance/workflows/elements/triggers/eventBasedEntityTrigger.json @@ -52,6 +52,15 @@ "type": "string" } }, + "include": { + "title": "Include Fields", + "description": "Array of field names that must be present in the change description to trigger the workflow. Takes priority over exclude fields.", + "type": "array", + "items": { + "type": "string", + "description": "Field name that must be present in the change description for the workflow to trigger" + } + }, "filter": { "title": "Filter Condition", "description": "JSON Logic expression to determine if the workflow should be triggered. Can be a string (applied to all entity types) or an object mapping entity types to their specific filters.", diff --git a/schemas/metadataIngestion/apiServiceMetadataPipeline.json b/schemas/metadataIngestion/apiServiceMetadataPipeline.json index 2a3b45a..fdc5faa 100644 --- a/schemas/metadataIngestion/apiServiceMetadataPipeline.json +++ b/schemas/metadataIngestion/apiServiceMetadataPipeline.json @@ -23,6 +23,11 @@ "$ref": "../type/filterPattern.json#/definitions/filterPattern", "title": "API Collection Filter Pattern" }, + "apiEndpointFilterPattern": { + "description": "Regex to only fetch api endpoints with names matching the pattern.", + "$ref": "../type/filterPattern.json#/definitions/filterPattern", + "title": "API Endpoint Filter Pattern" + }, "markDeletedApiCollections": { "description": "Optional configuration to soft delete api collections in OpenMetadata if the source collections are deleted. Also, if the collection is deleted, all the associated entities like endpoints, etc., with that collection will be deleted", "type": "boolean", diff --git a/schemas/metadataIngestion/application.json b/schemas/metadataIngestion/application.json index 882f33a..dff9a92 100644 --- a/schemas/metadataIngestion/application.json +++ b/schemas/metadataIngestion/application.json @@ -34,6 +34,10 @@ "description": "Enable streaming logs to a remote log storage via the OpenMetadata Server", "type": "boolean", "default": false + }, + "ingestionRunnerName": { + "description": "Name of the ingestion runner executing this workflow. Set at dispatch time by the execution environment.", + "type": "string" } }, "required": ["workflowConfig"], diff --git a/schemas/metadataIngestion/dashboardServiceMetadataPipeline.json b/schemas/metadataIngestion/dashboardServiceMetadataPipeline.json index bb97c48..546dfae 100644 --- a/schemas/metadataIngestion/dashboardServiceMetadataPipeline.json +++ b/schemas/metadataIngestion/dashboardServiceMetadataPipeline.json @@ -55,6 +55,11 @@ "$ref": "../type/filterPattern.json#/definitions/filterPattern", "title": "Project Filter Pattern" }, + "queryParserConfig": { + "title": "Query Parser Configuration", + "description": "Configuration for SQL query parser selection for lineage extraction.", + "$ref": "./parserconfig/queryParserConfig.json" + }, "includeOwners": { "title": "Include Current Owners", "description": "Enabling a flag will replace the current owner with a new owner from the source during metadata ingestion, if the current owner is null. It is recommended to keep the flag enabled to obtain the owner information during the first metadata ingestion.", @@ -91,6 +96,12 @@ "type": "boolean", "default": true }, + "includeUsage": { + "title": "Include Usage", + "description": "Optional configuration to toggle the ingestion of usage metadata for dashboards. When enabled, usage statistics will be collected and ingested.", + "type": "boolean", + "default": true + }, "overrideMetadata": { "title": "Override Metadata", "description": "Set the 'Override Metadata' toggle to control whether to override the existing metadata in the OpenMetadata server with the metadata fetched from the source. If the toggle is set to true, the metadata fetched from the source will override the existing metadata in the OpenMetadata server. If the toggle is set to false, the metadata fetched from the source will not override the existing metadata in the OpenMetadata server. This is applicable for fields like description, tags, owner and displayName", diff --git a/schemas/metadataIngestion/databaseServiceAutoClassificationPipeline.json b/schemas/metadataIngestion/databaseServiceAutoClassificationPipeline.json index afc9ea8..5943d94 100644 --- a/schemas/metadataIngestion/databaseServiceAutoClassificationPipeline.json +++ b/schemas/metadataIngestion/databaseServiceAutoClassificationPipeline.json @@ -73,6 +73,12 @@ "type": "integer", "default": 50, "title": "Sample Data Rows Count" + }, + "classificationLanguage": { + "description": "Language to use for auto classification recognizers. Use 'any' to run all recognizers regardless of their configured language. For specific languages, only recognizers that support that language will be used.", + "$ref": "../type/classificationLanguages.json", + "default": "en", + "title": "Classification Language" } }, "additionalProperties": false diff --git a/schemas/metadataIngestion/databaseServiceMetadataPipeline.json b/schemas/metadataIngestion/databaseServiceMetadataPipeline.json index 584365e..b042885 100644 --- a/schemas/metadataIngestion/databaseServiceMetadataPipeline.json +++ b/schemas/metadataIngestion/databaseServiceMetadataPipeline.json @@ -152,6 +152,11 @@ "$ref": "../type/filterPattern.json#/definitions/filterPattern", "title": "Database Filter Pattern" }, + "storedProcedureFilterPattern": { + "description": "Regex to only include/exclude stored procedures that matches the pattern.", + "$ref": "../type/filterPattern.json#/definitions/filterPattern", + "title": "Stored Procedure Filter Pattern" + }, "threads": { "description": "Number of Threads to use in order to parallelize Table ingestion.", "type": "integer", @@ -162,6 +167,20 @@ "title": "Incremental Metadata Extraction Configuration", "description": "Use incremental Metadata extraction after the first execution. This is commonly done by getting the changes from Audit tables on the supporting databases.", "$ref": "#/definitions/incremental" + }, + "extractJsonSchema": { + "title": "Extract JSON Schema", + "description": "Extract JSON schema from JSON columns by sampling data. This requires SELECT permission on the tables. If disabled or if SELECT fails, JSON columns will be ingested without schema information.", + "type": "boolean", + "default": false + }, + "jsonSchemaSampleSize": { + "title": "JSON Schema Sample Size", + "description": "Number of rows to sample for inferring JSON schema. A larger sample size provides more accurate schema inference but increases query time.", + "type": "integer", + "default": 10, + "minimum": 1, + "maximum": 1000 } }, "additionalProperties": false diff --git a/schemas/metadataIngestion/databaseServiceProfilerPipeline.json b/schemas/metadataIngestion/databaseServiceProfilerPipeline.json index 576753a..df269e1 100644 --- a/schemas/metadataIngestion/databaseServiceProfilerPipeline.json +++ b/schemas/metadataIngestion/databaseServiceProfilerPipeline.json @@ -68,12 +68,6 @@ "default": false, "title": "Use FQN For Filtering" }, - "computeMetrics": { - "description": "Option to turn on/off computing profiler metrics.", - "type": "boolean", - "default": true, - "title": "Compute Metrics" - }, "computeTableMetrics": { "description": "Option to turn on/off table metric computation. If enabled, profiler will compute table level metrics.", "type": "boolean", @@ -87,10 +81,10 @@ "title": "Compute Column Metrics" }, "useStatistics": { - "description": "Use system tables to extract metrics. Metrics that cannot be gathered from system tables will use the default methods. Using system tables can be faster but requires gathering statistics before running (for example using the ANALYZE procedure). More information can be found in the documentation: https://docs.openmetadata.org/latest/profler", + "description": "Use system tables to extract table metrics. Metrics that cannot be gathered from system tables will use the default methods. Using system tables can be faster but requires gathering statistics before running (for example using the ANALYZE procedure). More information can be found in the documentation: https://docs.openmetadata.org/latest/profler", "type": "boolean", "default": false, - "title": "Use Gathered Statistics" + "title": "Use System Table Statistics" }, "profileSampleType": { "$ref": "../entity/data/table.json#/definitions/profileSampleType", @@ -109,12 +103,12 @@ "randomizedSample": { "description": "Whether to randomize the sample data or not.", "type": "boolean", - "default": true + "default": false }, "threadCount": { "description": "Number of threads to use during metric computations", - "type": "number", - "default": 5, + "type": ["integer", "null"], + "default": null, "title": "Thread Count" }, "timeoutSeconds": { @@ -122,6 +116,35 @@ "type": "integer", "default": 43200, "title": "Timeout (in sec.)" + }, + "metrics": { + "description": "List of metrics to compute. If empty, then all metrics will be computed", + "type": "array", + "items": { + "$ref": "../configuration/profilerConfiguration.json#/definitions/metricType" + }, + "default": [ + "mean", + "valuesCount", + "columnCount", + "distinctCount", + "distinctProportion", + "max", + "min", + "nullCount", + "rowCount", + "stddev", + "sum", + "uniqueCount", + "uniqueProportion", + "columnNames", + "nullProportion", + "median", + "firstQuartile", + "thirdQuartile", + "interQuartileRange", + "nonParametricSkew" + ] } }, "additionalProperties": false diff --git a/schemas/metadataIngestion/databaseServiceQueryLineagePipeline.json b/schemas/metadataIngestion/databaseServiceQueryLineagePipeline.json index f7091f9..b58c89b 100644 --- a/schemas/metadataIngestion/databaseServiceQueryLineagePipeline.json +++ b/schemas/metadataIngestion/databaseServiceQueryLineagePipeline.json @@ -41,6 +41,11 @@ "default": 300, "title": "Parsing Timeout Limit" }, + "queryParserConfig": { + "title": "Query Parser Configuration", + "description": "Configuration for SQL query parser selection for lineage extraction.", + "$ref": "./parserconfig/queryParserConfig.json" + }, "filterCondition": { "description": "Configuration the condition to filter the query history.", "type": "string", diff --git a/schemas/metadataIngestion/dbtPipeline.json b/schemas/metadataIngestion/dbtPipeline.json index b19cecc..e947ea4 100644 --- a/schemas/metadataIngestion/dbtPipeline.json +++ b/schemas/metadataIngestion/dbtPipeline.json @@ -69,6 +69,12 @@ "type": "boolean", "default": true }, + "overrideLineage":{ + "title": "Override Lineage", + "description": "Set the 'Override Lineage' toggle to control whether to override the existing lineage.", + "type": "boolean", + "default": false + }, "dbtClassificationName": { "title": "DBT Classification Name", "description": "Custom OpenMetadata Classification name for dbt tags.", diff --git a/schemas/metadataIngestion/dbtconfig/dbtCloudConfig.json b/schemas/metadataIngestion/dbtconfig/dbtCloudConfig.json index 2017dd6..f245dbf 100644 --- a/schemas/metadataIngestion/dbtconfig/dbtCloudConfig.json +++ b/schemas/metadataIngestion/dbtconfig/dbtCloudConfig.json @@ -37,8 +37,7 @@ "title": "dbt Cloud URL", "description": "URL to connect to your dbt cloud instance. E.g., https://cloud.getdbt.com or https://emea.dbt.com/", "type": "string", - "format": "uri", - "default": "https://cloud.getdbt.com" + "format": "uri" } }, "additionalProperties": false, diff --git a/schemas/metadataIngestion/mcpServiceMetadataPipeline.json b/schemas/metadataIngestion/mcpServiceMetadataPipeline.json new file mode 100644 index 0000000..97a9a05 --- /dev/null +++ b/schemas/metadataIngestion/mcpServiceMetadataPipeline.json @@ -0,0 +1,34 @@ +{ + "$id": "https://open-metadata.org/schema/metadataIngestion/mcpServiceMetadataPipeline.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "McpServiceMetadataPipeline", + "description": "McpService Metadata Pipeline Configuration.", + "type": "object", + "definitions": { + "McpMetadataConfigType": { + "description": "MCP Source Config Metadata Pipeline type", + "type": "string", + "enum": ["McpMetadata"], + "default": "McpMetadata" + } + }, + "properties": { + "type": { + "description": "Pipeline type", + "$ref": "#/definitions/McpMetadataConfigType", + "default": "McpMetadata" + }, + "serverFilterPattern": { + "description": "Regex to only fetch MCP servers with names matching the pattern.", + "$ref": "../type/filterPattern.json#/definitions/filterPattern", + "title": "Server Filter Pattern" + }, + "overrideMetadata": { + "title": "Override Metadata", + "description": "Set the 'Override Metadata' toggle to control whether to override the existing metadata in the OpenMetadata server with the metadata fetched from the source.", + "type": "boolean", + "default": false + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/schemas/metadataIngestion/parserconfig/queryParserConfig.json b/schemas/metadataIngestion/parserconfig/queryParserConfig.json new file mode 100644 index 0000000..34a5f48 --- /dev/null +++ b/schemas/metadataIngestion/parserconfig/queryParserConfig.json @@ -0,0 +1,29 @@ +{ + "$id": "https://open-metadata.org/schema/metadataIngestion/parserconfig/queryParserConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Query Parser Config", + "description": "Configuration for SQL query parser selection for lineage and usage extraction.", + "type": "object", + "javaType": "org.openmetadata.schema.metadataIngestion.parserconfig.QueryParserConfig", + "definitions": { + "QueryParserType": { + "description": "Type of SQL query parser to use for lineage and usage extraction. Auto mode is recommended for best results.", + "type": "string", + "enum": [ + "Auto", + "SqlGlot", + "SqlFluff" + ], + "default": "Auto" + } + }, + "properties": { + "type": { + "title": "Query Parser Type", + "description": "Choose the SQL parser for lineage extraction:\n• Auto (default): Automatically tries SqlGlot first, falls back to SqlFluff, then SqlParse. Recommended for best results.\n• SqlGlot: High-performance parser with excellent dialect support. Falls back to SqlParse on failure.\n• SqlFluff: Comprehensive parser with strong dialect support. Falls back to SqlParse on failure.", + "$ref": "#/definitions/QueryParserType", + "default": "Auto" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/schemas/metadataIngestion/pipelineServiceMetadataPipeline.json b/schemas/metadataIngestion/pipelineServiceMetadataPipeline.json index 4cf921d..ea6900d 100644 --- a/schemas/metadataIngestion/pipelineServiceMetadataPipeline.json +++ b/schemas/metadataIngestion/pipelineServiceMetadataPipeline.json @@ -82,6 +82,12 @@ "default": true, "title": "Include UnDeployed Pipelines" }, + "statusLookbackDays": { + "description": "Number of days of pipeline run status history to ingest. Only runs within the last N days will be fetched.", + "type": "integer", + "default": 1, + "title": "Status Lookback Days" + }, "overrideMetadata":{ "title": "Override Metadata", "description": "Set the 'Override Metadata' toggle to control whether to override the existing metadata in the OpenMetadata server with the metadata fetched from the source. If the toggle is set to true, the metadata fetched from the source will override the existing metadata in the OpenMetadata server. If the toggle is set to false, the metadata fetched from the source will not override the existing metadata in the OpenMetadata server. This is applicable for fields like description, tags, owner and displayName", diff --git a/schemas/metadataIngestion/storage/containerMetadataConfig.json b/schemas/metadataIngestion/storage/containerMetadataConfig.json index 0cd65ce..eb19ed2 100644 --- a/schemas/metadataIngestion/storage/containerMetadataConfig.json +++ b/schemas/metadataIngestion/storage/containerMetadataConfig.json @@ -12,19 +12,25 @@ "type": "object", "properties": { "dataPath": { - "title": "Data path", - "description": "The path where the data resides in the container, excluding the bucket name", + "title": "Data Path", + "description": "Literal path relative to the bucket root, or a glob-style pattern. Use a single-star wildcard for one path level, a double-star wildcard for any depth, and a question mark for a single character.", "type": "string" }, "structureFormat": { - "title": "Schema format", - "description": "What's the schema format for the container, eg. avro, parquet, csv.", + "title": "Structure Format", + "description": "Expected file format for schema inference. Leave blank to auto-detect from the file extension. Ignored when Unstructured Data is enabled.", "type": "string", "default": null }, + "unstructuredData": { + "title": "Unstructured Data", + "description": "When true, files matching the glob dataPath are cataloged as individual containers without schema extraction. Use for images, documents, and other non-tabular files.", + "type": "boolean", + "default": false + }, "unstructuredFormats": { - "title": "Unstructured format", - "description": "What the unstructured formats you want to ingest, eg. png, pdf, jpg.", + "title": "Unstructured Formats", + "description": "Legacy option for literal dataPath entries. List of file extensions (e.g. png, pdf, jpg) to catalog as unstructured. Prefer the unstructuredData flag with a glob dataPath for new configurations.", "type": "array", "items": { "type": "string" @@ -49,12 +55,59 @@ "type": "boolean", "default": false }, + "autoPartitionDetection": { + "title": "Auto Partition Detection", + "description": "When true and dataPath is a glob, automatically detect Hive-style partition columns from matched paths (e.g. year=2024/month=01). Ignored for literal paths.", + "type": "boolean", + "default": false + }, + "excludePaths": { + "title": "Exclude Path Segments", + "description": "Path segments to skip during glob discovery. Any file whose path contains one of these segments is ignored. Common defaults applied when unset: _delta_log, _temporary, _spark_metadata, .tmp, _SUCCESS.", + "type": "array", + "items": { + "type": "string" + }, + "default": null + }, + "excludePatterns": { + "title": "Exclude Patterns", + "description": "Glob patterns to exclude during glob discovery. Any file matching one of these patterns is skipped.", + "type": "array", + "items": { + "type": "string" + }, + "default": null + }, "partitionColumns": { "title": "Partition Columns", - "description": "What are the partition columns in case the container's data is partitioned", + "description": "Explicit partition column definitions. Overrides auto-detection when provided.", "type": "array", "items": { - "$ref": "../../entity/data/table.json#/definitions/column" + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Partition column name.", + "type": "string" + }, + "dataType": { + "title": "Data Type", + "description": "Partition column data type.", + "$ref": "../../entity/data/table.json#/definitions/dataType" + }, + "dataTypeDisplay": { + "title": "Data Type Display", + "description": "Display name for the data type (optional).", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the partition column (optional).", + "type": "string" + } + }, + "required": ["name", "dataType"] }, "default": null } diff --git a/schemas/metadataIngestion/storage/manifestMetadataConfig.json b/schemas/metadataIngestion/storage/manifestMetadataConfig.json index e9418c5..bb8e5ca 100644 --- a/schemas/metadataIngestion/storage/manifestMetadataConfig.json +++ b/schemas/metadataIngestion/storage/manifestMetadataConfig.json @@ -16,19 +16,25 @@ "type": "string" }, "dataPath": { - "title": "Data path", - "description": "The path where the data resides in the container, excluding the bucket name", + "title": "Data Path", + "description": "Literal path relative to the container, or a glob-style pattern. Use a single-star wildcard for one path level, a double-star wildcard for any depth, and a question mark for a single character.", "type": "string" }, "structureFormat": { - "title": "Schema format", - "description": "What's the schema format for the container, eg. avro, parquet, csv.", + "title": "Structure Format", + "description": "Expected file format for schema inference. Leave blank to auto-detect from the file extension. Ignored when Unstructured Data is enabled.", "type": "string", "default": null }, + "unstructuredData": { + "title": "Unstructured Data", + "description": "When true, files matching the glob dataPath are cataloged as individual containers without schema extraction. Use for images, documents, and other non-tabular files.", + "type": "boolean", + "default": false + }, "unstructuredFormats": { - "title": "Unstructured Schema Formats", - "description": "What's the schema formats for the container, eg. avro, parquet, csv.", + "title": "Unstructured Formats", + "description": "Legacy option for literal dataPath entries. List of file extensions (e.g. png, pdf, jpg) to catalog as unstructured. Prefer the unstructuredData flag with a glob dataPath for new configurations.", "type": "array", "items": { "type": "string" @@ -47,12 +53,59 @@ "type": "boolean", "default": false }, + "autoPartitionDetection": { + "title": "Auto Partition Detection", + "description": "When true and dataPath is a glob, automatically detect Hive-style partition columns from matched paths (e.g. year=2024/month=01). Ignored for literal paths.", + "type": "boolean", + "default": false + }, + "excludePaths": { + "title": "Exclude Path Segments", + "description": "Path segments to skip during glob discovery. Any file whose path contains one of these segments is ignored. Common defaults applied when unset: _delta_log, _temporary, _spark_metadata, .tmp, _SUCCESS.", + "type": "array", + "items": { + "type": "string" + }, + "default": null + }, + "excludePatterns": { + "title": "Exclude Patterns", + "description": "Glob patterns to exclude during glob discovery. Any file matching one of these patterns is skipped.", + "type": "array", + "items": { + "type": "string" + }, + "default": null + }, "partitionColumns": { "title": "Partition Columns", - "description": "What are the partition columns in case the container's data is partitioned", + "description": "Explicit partition column definitions. Overrides auto-detection when provided.", "type": "array", "items": { - "$ref": "../../entity/data/table.json#/definitions/column" + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Partition column name.", + "type": "string" + }, + "dataType": { + "title": "Data Type", + "description": "Partition column data type.", + "$ref": "../../entity/data/table.json#/definitions/dataType" + }, + "dataTypeDisplay": { + "title": "Data Type Display", + "description": "Display name for the data type (optional).", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the partition column (optional).", + "type": "string" + } + }, + "required": ["name", "dataType"] }, "default": null }, diff --git a/schemas/metadataIngestion/storageServiceMetadataPipeline.json b/schemas/metadataIngestion/storageServiceMetadataPipeline.json index dbac66d..3d0927e 100644 --- a/schemas/metadataIngestion/storageServiceMetadataPipeline.json +++ b/schemas/metadataIngestion/storageServiceMetadataPipeline.json @@ -31,6 +31,7 @@ "storageMetadataConfigSource": { "mask": true, "title": "Storage Metadata Configuration Source", + "description": "Global manifest source. When configured, entries here take precedence over any bucket-level openmetadata.json and over defaultManifest for buckets whose containerName matches.", "oneOf": [ { "$ref": "#/definitions/noMetadataConfigurationSource" @@ -69,6 +70,14 @@ "type": "boolean", "default": false, "title": "Include Tags" + }, + "defaultManifest": { + "title": "Default Manifest (JSON)", + "description": "Fallback manifest applied to any bucket that does not have its own openmetadata.json file. If a bucket has a manifest file, that file takes precedence and this value is ignored for that bucket. Paste the same JSON you would place in a bucket's openmetadata.json file — entries accept literal paths or glob-style dataPath patterns.", + "type": "string", + "uiFieldType": "code", + "format": "json", + "default": null } }, "additionalProperties": false diff --git a/schemas/metadataIngestion/workflow.json b/schemas/metadataIngestion/workflow.json index cb5c9f2..1b6e7f6 100644 --- a/schemas/metadataIngestion/workflow.json +++ b/schemas/metadataIngestion/workflow.json @@ -68,6 +68,9 @@ }, { "$ref": "reverseIngestionPipeline.json" + }, + { + "$ref": "mcpServiceMetadataPipeline.json" } ] } @@ -239,6 +242,10 @@ "description": "Enable streaming logs to a remote log storage via the OpenMetadata Server", "type": "boolean", "default": false + }, + "ingestionRunnerName": { + "description": "Name of the ingestion runner executing this workflow. Set at dispatch time by the execution environment.", + "type": "string" } }, "required": ["source", "workflowConfig"], diff --git a/schemas/search/aggregationRequest.json b/schemas/search/aggregationRequest.json index eeba01e..456f52a 100644 --- a/schemas/search/aggregationRequest.json +++ b/schemas/search/aggregationRequest.json @@ -43,6 +43,10 @@ "type": "string" } }, + "queryText": { + "description": "Free-text search query used to scope aggregation results to matching documents.", + "type": "string" + }, "topHits": { "description": "Optional top_hits sub-aggregation to fetch selected source fields per bucket.", "type": "object", diff --git a/schemas/search/searchRequest.json b/schemas/search/searchRequest.json index fa68208..470503f 100644 --- a/schemas/search/searchRequest.json +++ b/schemas/search/searchRequest.json @@ -105,11 +105,6 @@ "description": "Field Value in case of Aggregations.", "type": "string" }, - "semanticSearch": { - "description": "Enable semantic search using embeddings and RDF context. When true, combines vector similarity with traditional BM25 scoring.", - "type": "boolean", - "default": false - }, "includeAggregations": { "description": "Include aggregations in the search response. Defaults to true. Set to false to skip aggregations for faster response times when only search results are needed.", "type": "boolean", diff --git a/schemas/security/client/samlSSOClientConfig.json b/schemas/security/client/samlSSOClientConfig.json index d38a74a..70185a3 100644 --- a/schemas/security/client/samlSSOClientConfig.json +++ b/schemas/security/client/samlSSOClientConfig.json @@ -19,17 +19,18 @@ "description": "SSO Login URL.", "type": "string" }, + "authorityUrl": { + "description": "Authority URL (deprecated, use entityId instead).", + "type": "string", + "deprecated": true + }, "idpX509Certificate": { "description": "X509 Certificate ", "type": "string", "format": "password" }, - "authorityUrl": { - "description": "Authority URL to redirect the users on Sign In page", - "type": "string" - }, "nameId": { - "description": "Authority URL to redirect the users on Sign In page", + "description": "Name ID format for SAML assertions", "type": "string", "default": "urn:oasis:names:tc:SAML:2.0:nameid-format:emailAddress" } @@ -149,6 +150,13 @@ "description": "Get logs from the Library in debug mode", "type": "boolean", "default": false + }, + "samlDisplayNameAttributes": { + "description": "Ordered list of SAML attribute names to check for display name. First available attribute wins. Defaults to common OIDC/SAML attribute names.", + "type": "array", + "items": { + "type": "string" + } } }, "additionalProperties": false, diff --git a/schemas/security/credentials/awsBaseConfig.json b/schemas/security/credentials/awsBaseConfig.json new file mode 100644 index 0000000..bed3264 --- /dev/null +++ b/schemas/security/credentials/awsBaseConfig.json @@ -0,0 +1,47 @@ +{ + "$id": "https://open-metadata.org/schema/security/credentials/awsBaseConfig.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AWSBaseConfig", + "description": "Base AWS configuration for authentication. Supports static credentials, IAM roles, and default credential provider chain.", + "type": "object", + "javaType": "org.openmetadata.schema.security.credentials.AWSBaseConfig", + "properties": { + "enabled": { + "description": "Enable AWS IAM authentication. When enabled, uses the default credential provider chain (environment variables, instance profile, etc.). Defaults to false for backward compatibility.", + "type": "boolean", + "default": false + }, + "region": { + "description": "AWS Region (e.g., us-east-1). Required when AWS authentication is enabled.", + "type": "string" + }, + "accessKeyId": { + "description": "AWS Access Key ID. Falls back to default credential provider chain if not set.", + "type": "string" + }, + "secretAccessKey": { + "description": "AWS Secret Access Key. Falls back to default credential provider chain if not set.", + "type": "string", + "format": "password" + }, + "sessionToken": { + "description": "AWS Session Token for temporary credentials.", + "type": "string" + }, + "assumeRoleArn": { + "description": "ARN of IAM role to assume for cross-account access.", + "type": "string" + }, + "assumeRoleSessionName": { + "description": "Session name for assumed role.", + "type": "string", + "default": "OpenMetadataSession" + }, + "endpointUrl": { + "description": "Custom endpoint URL for AWS-compatible services (MinIO, LocalStack).", + "type": "string", + "format": "uri" + } + }, + "additionalProperties": false +} diff --git a/schemas/security/credentials/awsCredentials.json b/schemas/security/credentials/awsCredentials.json index 83858ce..9a6661b 100644 --- a/schemas/security/credentials/awsCredentials.json +++ b/schemas/security/credentials/awsCredentials.json @@ -6,6 +6,12 @@ "type": "object", "javaType": "org.openmetadata.schema.security.credentials.AWSCredentials", "properties": { + "enabled": { + "title": "Enable IAM Auth", + "description": "Enable AWS IAM authentication. When enabled, uses the default credential provider chain (environment variables, instance profile, etc.). Defaults to false for backward compatibility.", + "type": "boolean", + "default": false + }, "awsAccessKeyId": { "title": "AWS Access Key ID", "description": "AWS Access key ID.", diff --git a/schemas/security/credentials/fabricCredentials.json b/schemas/security/credentials/fabricCredentials.json new file mode 100644 index 0000000..66208f6 --- /dev/null +++ b/schemas/security/credentials/fabricCredentials.json @@ -0,0 +1,62 @@ +{ + "$id": "https://open-metadata.org/schema/security/credentials/fabricCredentials.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "FabricCredentials", + "description": "Microsoft Fabric credentials configuration for authentication across Fabric services (Database, Pipeline, Dashboard).", + "type": "object", + "javaType": "org.openmetadata.schema.security.credentials.FabricCredentials", + "definitions": { + "servicePrincipal": { + "title": "Service Principal Authentication", + "description": "Authenticate using Azure Active Directory Service Principal (Application)", + "type": "object", + "properties": { + "type": { + "title": "Credentials Type", + "description": "Azure Service Principal Authentication", + "type": "string", + "default": "ServicePrincipal", + "const": "ServicePrincipal" + }, + "clientId": { + "title": "Client ID", + "description": "Azure Active Directory Application (Client) ID", + "type": "string" + }, + "clientSecret": { + "title": "Client Secret", + "description": "Azure Active Directory Application Client Secret", + "type": "string", + "format": "password" + }, + "tenantId": { + "title": "Tenant ID", + "description": "Azure Active Directory Tenant ID", + "type": "string" + }, + "authorityUri": { + "title": "Authority URI", + "description": "Azure AD Authority URI. Defaults to https://login.microsoftonline.com/", + "type": "string", + "default": "https://login.microsoftonline.com/", + "format": "uri" + } + }, + "required": ["clientId", "clientSecret", "tenantId"], + "additionalProperties": false + } + }, + "properties": { + "credentials": { + "title": "Fabric Credentials", + "description": "Microsoft Fabric authentication credentials", + "oneOf": [ + { + "$ref": "#/definitions/servicePrincipal" + } + ] + } + }, + "additionalProperties": false, + "required": ["credentials"] +} diff --git a/schemas/settings/settings.json b/schemas/settings/settings.json index d748063..4daacd6 100644 --- a/schemas/settings/settings.json +++ b/schemas/settings/settings.json @@ -30,6 +30,7 @@ "slackBot", "slackInstaller", "slackState", + "teamsAppConfiguration", "profilerConfiguration", "searchSettings", "assetCertificationSettings", @@ -38,7 +39,9 @@ "scimConfiguration", "securityConfiguration", "entityRulesSettings", - "openLineageSettings" + "openLineageSettings", + "mcpConfiguration", + "glossaryTermRelationSettings" ] } }, @@ -82,6 +85,9 @@ { "$ref": "../configuration/slackAppConfiguration.json" }, + { + "$ref": "../configuration/teamsAppConfiguration.json" + }, { "$ref": "../configuration/profilerConfiguration.json" }, @@ -105,6 +111,12 @@ }, { "$ref": "../configuration/openLineageSettings.json" + }, + { + "$ref": "../configuration/mcpConfiguration.json" + }, + { + "$ref": "../configuration/glossaryTermRelationSettings.json" } ] } diff --git a/schemas/system/eventPublisherJob.json b/schemas/system/eventPublisherJob.json index 16a8910..347c1be 100644 --- a/schemas/system/eventPublisherJob.json +++ b/schemas/system/eventPublisherJob.json @@ -29,6 +29,24 @@ "type": "integer", "default": 0, "minimum": 0 + }, + "warningRecords": { + "description": "Count of Records with Warnings (e.g., stale references that were skipped)", + "type": "integer", + "default": 0, + "minimum": 0 + }, + "vectorSuccessRecords": { + "description": "Count of records with successful vector embeddings", + "type": "integer", + "default": 0, + "minimum": 0 + }, + "vectorFailedRecords": { + "description": "Count of records with failed vector embeddings", + "type": "integer", + "default": 0, + "minimum": 0 } } }, @@ -44,10 +62,18 @@ "description": "Stats for the reader step (reading from database)", "$ref": "#/definitions/stepStats" }, + "processStats": { + "description": "Stats for the process step (building search index documents)", + "$ref": "#/definitions/stepStats" + }, "sinkStats": { "description": "Stats for the sink step (writing to search index)", "$ref": "#/definitions/stepStats" }, + "vectorStats": { + "description": "Stats for the vector step (generating and indexing vector embeddings)", + "$ref": "#/definitions/stepStats" + }, "entityStats": { "javaType": "org.openmetadata.schema.system.EntityStats", "description": "Stats for different entities. Keys should match entity types", @@ -128,7 +154,7 @@ "description": "Payload size in bytes depending on config.", "type": "integer", "existingJavaType": "java.lang.Long", - "default": 104857600, + "default": 9437184, "minimum": 1 }, "producerThreads": { @@ -205,6 +231,35 @@ "slackChannel": { "description": "Slack channel ID or name (required when using bot token, e.g., 'C1234567890' or '#general')", "type": "string" + }, + "useDistributedIndexing": { + "description": "Enable distributed indexing across multiple servers. When enabled, reindexing work is partitioned and can be processed by multiple servers concurrently with crash recovery support.", + "type": "boolean", + "default": true + }, + "partitionSize": { + "title": "Partition Size", + "description": "Number of entities per partition for distributed indexing. Smaller values create more partitions for better distribution across servers. Range: 1000-50000.", + "type": "integer", + "default": 10000, + "minimum": 1000, + "maximum": 50000 + }, + "timeSeriesMaxDays": { + "title": "Time Series Max Days", + "description": "Maximum age in days for time series data during reindexing. Only records from the last N days will be indexed. Default 0 (index all data). Set to a positive value like 15 to limit to recent data.", + "type": "integer", + "default": 0, + "minimum": -1 + }, + "timeSeriesEntityDays": { + "title": "Time Series Entity Days Override", + "description": "Per-entity-type override for time series max days. Keys are entity type names, values are number of days. Entities not in this map use timeSeriesMaxDays as default.", + "type": "object", + "existingJavaType": "java.util.Map", + "additionalProperties": { + "type": "integer" + } } }, "additionalProperties": false diff --git a/schemas/system/ui/page.json b/schemas/system/ui/page.json index a5c5dc1..e833e76 100644 --- a/schemas/system/ui/page.json +++ b/schemas/system/ui/page.json @@ -20,6 +20,7 @@ "Pipeline", "Dashboard", "DashboardDataModel", + "DataMarketplace", "Container", "SearchIndex", "Glossary", diff --git a/schemas/system/validationResponse.json b/schemas/system/validationResponse.json index a105a93..f726110 100644 --- a/schemas/system/validationResponse.json +++ b/schemas/system/validationResponse.json @@ -53,5 +53,5 @@ "$ref": "#/definitions/stepValidation" } }, - "additionalProperties": false + "additionalProperties": true } \ No newline at end of file diff --git a/schemas/tests/testCase.json b/schemas/tests/testCase.json index 1f0ae55..518cec9 100644 --- a/schemas/tests/testCase.json +++ b/schemas/tests/testCase.json @@ -139,6 +139,10 @@ "description": "Domains the test case belongs to. When not set, the test case inherits the domain from the table it belongs to.", "$ref": "../type/entityReferenceList.json" }, + "dataProducts": { + "description": "List of data products this test case is part of. When not set, the test case inherits the data products from the table it belongs to.", + "$ref": "../type/entityReferenceList.json" + }, "followers": { "description": "Followers of this test case. When not set, the test case inherits the followers from the table it belongs to.", "$ref": "../type/entityReferenceList.json" @@ -164,6 +168,13 @@ }, "default": [] }, + "topDimensions": { + "description": "Number of top dimension values to show before grouping the rest as Others. Controls the cardinality of dimensional test results. Defaults to 5 when not specified.", + "type": "integer", + "minimum": 1, + "maximum": 50, + "default": 5 + }, "entityStatus": { "description": "Current status of the test case.", "$ref": "../type/status.json", diff --git a/schemas/tests/testDefinition.json b/schemas/tests/testDefinition.json index befa4c6..3de4101 100644 --- a/schemas/tests/testDefinition.json +++ b/schemas/tests/testDefinition.json @@ -116,7 +116,8 @@ "Validity", "Uniqueness", "Integrity", - "SQL" + "SQL", + "NoDimension" ] } }, @@ -216,6 +217,11 @@ "type": "boolean", "default": false }, + "enabled": { + "description": "When `true` indicates the test definition is available for creating test cases. System test definitions can only be disabled by users with appropriate permissions.", + "type": "boolean", + "default": true + }, "supportedServices": { "description": "List of services that this test definition supports. When empty, it implies all services are supported.", "type": "array", @@ -223,8 +229,16 @@ "type": "string" }, "default": [] + }, + "sqlExpression": { + "description": "SQL expression template for custom SQL-based test definitions. Supports substitution variables: {table} and {column} for runtime entity references, and {{paramName}} for user-defined parameters. This field is only applicable for test definitions with testPlatforms set to 'OpenMetadata' and is used to execute custom SQL queries for data quality validation.", + "$ref": "../type/basic.json#/definitions/sqlQuery" + }, + "validatorClass": { + "description": "Class name of the validator to use for this test definition. This field is used to map the test definition to its corresponding validation logic in the data quality framework.", + "type": "string" } }, - "required": ["name", "description", "testPlatforms"], + "required": ["name", "testPlatforms"], "additionalProperties": false } diff --git a/schemas/type/aiCompliance.json b/schemas/type/aiCompliance.json new file mode 100644 index 0000000..d2f7ebe --- /dev/null +++ b/schemas/type/aiCompliance.json @@ -0,0 +1,332 @@ +{ + "$id": "https://open-metadata.org/schema/type/aiCompliance.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "AICompliance", + "description": "Reusable AI compliance and regulatory framework assessments. Can be applied to AI Applications, LLM Models, MCP Servers, and other AI entities.", + "type": "object", + "javaType": "org.openmetadata.schema.type.AICompliance", + "definitions": { + "complianceFramework": { + "type": "string", + "javaType": "org.openmetadata.schema.type.ComplianceFramework", + "description": "Type of AI compliance framework", + "enum": [ + "EU_AI_Act", + "Singapore_Model_AI_Governance", + "Canada_AIDA", + "US_AI_Bill_of_Rights", + "NIST_AI_RMF", + "ISO_IEC_42001", + "UK_AI_Regulation", + "China_AI_Regulations", + "Custom" + ], + "javaEnums": [ + {"name": "EU_AI_Act"}, + {"name": "Singapore_Model_AI_Governance"}, + {"name": "Canada_AIDA"}, + {"name": "US_AI_Bill_of_Rights"}, + {"name": "NIST_AI_RMF"}, + {"name": "ISO_IEC_42001"}, + {"name": "UK_AI_Regulation"}, + {"name": "China_AI_Regulations"}, + {"name": "Custom"} + ] + }, + "euAIActCompliance": { + "type": "object", + "javaType": "org.openmetadata.schema.type.EUAIActCompliance", + "description": "EU AI Act compliance assessment (Regulation EU 2024/1689)", + "additionalProperties": false, + "properties": { + "riskClassification": { + "type": "string", + "description": "Risk classification under EU AI Act", + "enum": ["Minimal", "Limited", "High", "Unacceptable"] + }, + "riskRationale": { + "type": "string", + "description": "Rationale for the risk classification" + }, + "prohibitedPractices": { + "type": "object", + "description": "Article 5 prohibited AI practices assessment", + "properties": { + "subliminalManipulativeTechniques": { + "type": "boolean", + "description": "Art 5(1)(a): Subliminal techniques beyond person's consciousness" + }, + "exploitationOfVulnerabilities": { + "type": "boolean", + "description": "Art 5(1)(b): Exploitation of vulnerabilities due to age, disability, or social/economic situation" + }, + "socialScoringSystem": { + "type": "boolean", + "description": "Art 5(1)(c): Social scoring by public authorities" + }, + "riskAssessmentCriminalOffences": { + "type": "boolean", + "description": "Art 5(1)(d): Risk assessment based solely on profiling for predicting criminal offences" + }, + "facialRecognitionDatabaseCreation": { + "type": "boolean", + "description": "Art 5(1)(e): Untargeted scraping of facial images for facial recognition databases" + }, + "emotionInferenceWorkplaceEducation": { + "type": "boolean", + "description": "Art 5(1)(f): Emotion recognition in workplace and education" + }, + "biometricCategorisation": { + "type": "boolean", + "description": "Art 5(1)(g): Biometric categorisation inferring sensitive attributes" + }, + "realTimeBiometricIdentification": { + "type": "boolean", + "description": "Art 5(1)(h): Real-time remote biometric identification in public spaces by law enforcement" + } + } + }, + "highRiskSystems": { + "type": "object", + "description": "Article 6 high-risk AI systems assessment", + "properties": { + "criticalInfrastructure": { + "type": "boolean", + "description": "Annex III(1): Critical infrastructure (transport, water, gas, electricity, etc.)" + }, + "educationVocationalTraining": { + "type": "boolean", + "description": "Annex III(3): Education and vocational training" + }, + "employment": { + "type": "boolean", + "description": "Annex III(4): Employment, workers management, and access to self-employment" + }, + "essentialPrivateServices": { + "type": "boolean", + "description": "Annex III(5): Access to essential private services (credit, insurance, etc.)" + }, + "essentialPublicServices": { + "type": "boolean", + "description": "Annex III(6): Law enforcement" + }, + "lawEnforcement": { + "type": "boolean", + "description": "Annex III(6): Law enforcement purposes" + }, + "migrationAsylumBorderControl": { + "type": "boolean", + "description": "Annex III(7): Migration, asylum, and border control management" + }, + "administrationOfJustice": { + "type": "boolean", + "description": "Annex III(8): Administration of justice and democratic processes" + } + } + }, + "conformityAssessment": { + "type": "object", + "description": "Conformity assessment status", + "properties": { + "assessmentRequired": { + "type": "boolean", + "description": "Whether conformity assessment is required" + }, + "assessmentType": { + "type": "string", + "description": "Type of conformity assessment", + "enum": ["Internal", "ThirdParty", "NotRequired"] + }, + "assessmentBody": { + "type": "string", + "description": "Name of notified body performing assessment" + }, + "certificateNumber": { + "type": "string", + "description": "Certificate number if issued" + }, + "validUntil": { + "$ref": "basic.json#/definitions/timestamp", + "description": "Certificate validity date" + } + } + }, + "transparencyObligations": { + "type": "object", + "description": "Article 50 transparency obligations", + "properties": { + "usersInformed": { + "type": "boolean", + "description": "Users are informed they are interacting with AI" + }, + "deepfakeLabeling": { + "type": "boolean", + "description": "AI-generated content is appropriately labeled" + }, + "emotionRecognitionDisclosure": { + "type": "boolean", + "description": "Emotion recognition or biometric categorization disclosed" + } + } + } + } + }, + "ethicalAIAssessment": { + "type": "object", + "javaType": "org.openmetadata.schema.type.EthicalAIAssessment", + "description": "Ethical AI framework assessment covering privacy, fairness, transparency, accountability, and environmental impact", + "additionalProperties": false, + "properties": { + "privacyLevel": { + "type": "string", + "description": "Level of privacy-sensitive data accessed", + "enum": ["Public", "Sensitive", "PersonalData"] + }, + "fairnessRisk": { + "type": "string", + "description": "Risk level for fairness and discrimination", + "enum": ["Low", "Medium", "High"] + }, + "biasMitigationCoverage": { + "type": "string", + "description": "Coverage of bias mitigation measures", + "enum": ["None", "Partial", "Full"] + }, + "reliabilitySafetyRisk": { + "type": "string", + "description": "Risk level for reliability and safety", + "enum": ["Low", "Moderate", "High"] + }, + "transparencyLevel": { + "type": "string", + "description": "Level of transparency in AI operations", + "enum": ["None", "Partial", "FullDisclosure"] + }, + "accountabilityMeasures": { + "type": "object", + "description": "Accountability measures in place", + "properties": { + "hasOwner": { + "type": "boolean", + "description": "Has designated owner responsible for AI system" + }, + "subjectToHumanOversight": { + "type": "boolean", + "description": "Subject to human oversight and intervention" + }, + "auditTrailEnabled": { + "type": "boolean", + "description": "Comprehensive audit trail enabled" + } + } + }, + "environmentalConsciousness": { + "type": "string", + "description": "Environmental impact risk level (carbon footprint, energy consumption)", + "enum": ["LowRisk", "MediumRisk", "HighRisk"] + } + } + }, + "aiComplianceRecord": { + "type": "object", + "javaType": "org.openmetadata.schema.type.AIComplianceRecord", + "description": "Single compliance record for a specific framework", + "additionalProperties": false, + "properties": { + "framework": { + "$ref": "#/definitions/complianceFramework" + }, + "assessedBy": { + "type": "string", + "description": "Person or team who performed the assessment" + }, + "assessedAt": { + "$ref": "basic.json#/definitions/timestamp", + "description": "When the assessment was performed" + }, + "nextReviewDate": { + "$ref": "basic.json#/definitions/timestamp", + "description": "When the next compliance review is due" + }, + "status": { + "type": "string", + "description": "Compliance status", + "enum": ["Compliant", "PartiallyCompliant", "NonCompliant", "UnderReview", "NotApplicable"] + }, + "euAIAct": { + "$ref": "#/definitions/euAIActCompliance", + "description": "EU AI Act specific assessment (only when framework is EU_AI_Act)" + }, + "ethicalAssessment": { + "$ref": "#/definitions/ethicalAIAssessment", + "description": "Ethical AI assessment applicable to most frameworks" + }, + "scopeAndDeployment": { + "type": "object", + "description": "Deployment scope relevant to compliance jurisdiction", + "properties": { + "scope": { + "type": "string", + "description": "Scope of AI usage", + "enum": ["Internal", "External", "Both"] + }, + "deploymentRegions": { + "type": "array", + "description": "Geographic regions where deployed (relevant for jurisdiction)", + "items": {"type": "string"} + }, + "affectedUserCount": { + "type": "integer", + "description": "Estimated number of affected users" + } + } + }, + "verification": { + "type": "object", + "description": "Verification and certification status", + "properties": { + "isVerified": { + "type": "boolean", + "description": "Whether compliance has been verified" + }, + "verifiedBy": { + "type": "string", + "description": "Verifier (internal auditor, external body, etc.)" + }, + "verifiedAt": { + "$ref": "basic.json#/definitions/timestamp", + "description": "Timestamp of verification" + }, + "verificationNotes": { + "type": "string", + "description": "Notes from verification process" + }, + "certificateUrl": { + "type": "string", + "description": "URL to certificate or compliance documentation" + } + } + }, + "notes": { + "type": "string", + "description": "Additional notes and findings from compliance assessment" + }, + "remediationRequired": { + "type": "array", + "description": "List of remediation actions required for compliance", + "items": {"type": "string"} + } + }, + "required": ["framework", "status"] + } + }, + "properties": { + "complianceRecords": { + "description": "List of compliance assessments for different frameworks", + "type": "array", + "items": { + "$ref": "#/definitions/aiComplianceRecord" + } + } + } +} diff --git a/schemas/type/basic.json b/schemas/type/basic.json index 2b8d1ff..5eeafb8 100644 --- a/schemas/type/basic.json +++ b/schemas/type/basic.json @@ -282,6 +282,12 @@ }, "provider": { "$ref": "#/definitions/providerType" + }, + "inherited": { + "title": "Inherited", + "description": "Whether this rule was inherited from a Data Product.", + "type": "boolean", + "default": false } }, "required": [ diff --git a/schemas/type/changeEventType.json b/schemas/type/changeEventType.json index ae0bd94..9a3415d 100644 --- a/schemas/type/changeEventType.json +++ b/schemas/type/changeEventType.json @@ -24,6 +24,8 @@ "suggestionUpdated", "suggestionAccepted", "suggestionRejected", - "suggestionDeleted" + "suggestionDeleted", + "userLogin", + "userLogout" ] } diff --git a/schemas/type/classificationLanguages.json b/schemas/type/classificationLanguages.json new file mode 100644 index 0000000..18c324c --- /dev/null +++ b/schemas/type/classificationLanguages.json @@ -0,0 +1,93 @@ +{ + "$id": "https://open-metadata.org/schema/type/classificationLanguages.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ClassificationLanguage", + "description": "Supported languages for auto classification recognizers (ISO 639-1 codes). Use 'any' to apply all recognizers regardless of their configured language.", + "type": "string", + "javaType": "org.openmetadata.schema.type.ClassificationLanguage", + "enum": [ + "any", + "af", + "sq", + "am", + "ar", + "hy", + "az", + "eu", + "be", + "bn", + "bs", + "bg", + "ca", + "zh", + "hr", + "cs", + "da", + "nl", + "en", + "et", + "fi", + "fr", + "gl", + "ka", + "de", + "el", + "gu", + "ht", + "he", + "hi", + "hu", + "is", + "id", + "ga", + "it", + "ja", + "kn", + "kk", + "km", + "ko", + "ku", + "ky", + "lo", + "lv", + "lt", + "mk", + "ms", + "ml", + "mt", + "mi", + "mr", + "mn", + "my", + "ne", + "no", + "ps", + "fa", + "pl", + "pt", + "pa", + "ro", + "ru", + "sr", + "si", + "sk", + "sl", + "so", + "es", + "sw", + "sv", + "tl", + "ta", + "te", + "th", + "tr", + "uk", + "ur", + "uz", + "vi", + "cy", + "yi", + "zu" + ], + "default": "en" +} \ No newline at end of file diff --git a/schemas/type/contextRecognizer.json b/schemas/type/contextRecognizer.json index 9c75730..ecc7d61 100644 --- a/schemas/type/contextRecognizer.json +++ b/schemas/type/contextRecognizer.json @@ -17,15 +17,10 @@ }, "minItems": 1 }, - "supportedEntity": { - "description": "The entity type this recognizer detects", - "$ref": "piiEntity.json" - }, "supportedLanguage": { - "description": "Language supported by this recognizer (ISO 639-1 code)", - "type": "string", - "default": "en", - "pattern": "^[a-z]{2}(-[A-Z]{2})?$" + "description": "Language supported by this recognizer", + "$ref": "classificationLanguages.json", + "default": "en" }, "minScore": { "description": "Minimum confidence score", @@ -47,6 +42,6 @@ "default": 0.05 } }, - "required": ["type", "contextWords", "supportedEntity", "supportedLanguage"], + "required": ["type", "contextWords", "supportedLanguage"], "additionalProperties": false } \ No newline at end of file diff --git a/schemas/type/customProperties/complexTypes.json b/schemas/type/customProperties/complexTypes.json index 6bb1023..db71dea 100644 --- a/schemas/type/customProperties/complexTypes.json +++ b/schemas/type/customProperties/complexTypes.json @@ -132,11 +132,34 @@ }, "required": ["columns"], "additionalProperties": false + }, + "hyperlink-cp": { + "$comment": "@om-field-type", + "title": "Hyperlink", + "description": "A hyperlink custom property containing a URL and optional display text. When display text is provided, it renders as a clickable hyperlink with the text label.", + "type": "object", + "javaType": "org.openmetadata.schema.type.customProperties.Hyperlink", + "properties": { + "url": { + "type": "string", + "description": "The URL that the hyperlink points to.", + "format": "uri" + }, + "displayText": { + "type": "string", + "description": "Optional display text for the hyperlink. If not provided, the URL will be displayed." + } + }, + "required": ["url"], + "additionalProperties": false } }, "properties": { "table-cp": { "$ref": "#/definitions/table-cp" + }, + "hyperlink-cp": { + "$ref": "#/definitions/hyperlink-cp" } } } \ No newline at end of file diff --git a/schemas/type/customRecognizer.json b/schemas/type/customRecognizer.json index 8279b62..8dccea1 100644 --- a/schemas/type/customRecognizer.json +++ b/schemas/type/customRecognizer.json @@ -18,17 +18,12 @@ "type": "object", "additionalProperties": true }, - "supportedEntity": { - "description": "The entity type this recognizer detects", - "$ref": "piiEntity.json" - }, "supportedLanguage": { - "description": "Language supported by this recognizer (ISO 639-1 code)", - "type": "string", - "default": "en", - "pattern": "^[a-z]{2}(-[A-Z]{2})?$" + "description": "Language supported by this recognizer", + "$ref": "classificationLanguages.json", + "default": "en" } }, - "required": ["type", "validatorFunction", "supportedEntity", "supportedLanguage"], + "required": ["type", "validatorFunction", "supportedLanguage"], "additionalProperties": false } \ No newline at end of file diff --git a/schemas/type/denyListRecognizer.json b/schemas/type/denyListRecognizer.json deleted file mode 100644 index a527e26..0000000 --- a/schemas/type/denyListRecognizer.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "$id": "https://open-metadata.org/schema/type/denyListRecognizer.json", - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "DenyListRecognizer", - "description": "Deny list recognizer that matches against a list of specific values", - "type": "object", - "javaType": "org.openmetadata.schema.type.DenyListRecognizer", - "properties": { - "type": { - "const": "deny_list" - }, - "denyList": { - "description": "List of values to match against", - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1 - }, - "supportedEntity": { - "description": "The entity type this recognizer detects", - "$ref": "piiEntity.json" - }, - "supportedLanguage": { - "description": "Language supported by this recognizer (ISO 639-1 code)", - "type": "string", - "default": "en", - "pattern": "^[a-z]{2}(-[A-Z]{2})?$" - }, - "regexFlags": { - "$ref": "recognizers/regexFlags.json" - } - }, - "required": ["type", "denyList", "supportedEntity", "supportedLanguage", "regexFlags"], - "additionalProperties": false -} \ No newline at end of file diff --git a/schemas/type/entityLineage.json b/schemas/type/entityLineage.json index ccf22ea..e7e51c5 100644 --- a/schemas/type/entityLineage.json +++ b/schemas/type/entityLineage.json @@ -6,6 +6,21 @@ "type": "object", "javaType": "org.openmetadata.schema.type.EntityLineage", "definitions": { + "tempLineageTable": { + "description": "A single hop in a temporary table lineage path.", + "type": "object", + "properties": { + "fromEntity": { + "description": "Source entity or table name for this hop.", + "type": "string" + }, + "toEntity": { + "description": "Target entity or table name for this hop.", + "type": "string" + } + }, + "required": ["fromEntity", "toEntity"] + }, "columnLineage": { "type" : "object", "properties": { @@ -75,6 +90,14 @@ "description": "Asset count in case of child assets lineage.", "type": "integer", "default": null + }, + "tempLineageTables": { + "description": "Lineage path through temporary/intermediate tables. Each element represents a hop with fromEntity and toEntity fields.", + "type": "array", + "items": { + "$ref": "#/definitions/tempLineageTable" + }, + "default": null } } }, diff --git a/schemas/type/entityRelationship.json b/schemas/type/entityRelationship.json index e27c088..f659312 100644 --- a/schemas/type/entityRelationship.json +++ b/schemas/type/entityRelationship.json @@ -34,7 +34,9 @@ "expert", "editedBy", "defaultsTo", - "relatesTo" + "relatesTo", + "inputPort", + "outputPort" ], "javaEnums": [ { "name": "CONTAINS" }, @@ -59,7 +61,9 @@ { "name": "EXPERT" }, { "name": "EDITED_BY" }, { "name": "DEFAULTS_TO" }, - { "name": "RELATES_TO" } + { "name": "RELATES_TO" }, + { "name": "INPUT_PORT" }, + { "name": "OUTPUT_PORT" } ] } }, diff --git a/schemas/type/exactTermsRecognizer.json b/schemas/type/exactTermsRecognizer.json new file mode 100644 index 0000000..6f63299 --- /dev/null +++ b/schemas/type/exactTermsRecognizer.json @@ -0,0 +1,31 @@ +{ + "$id": "https://open-metadata.org/schema/type/exactTermsRecognizer.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ExactTermsRecognizer", + "description": "Exact terms recognizer that matches against a list of specific values", + "type": "object", + "javaType": "org.openmetadata.schema.type.ExactTermsRecognizer", + "properties": { + "type": { + "const": "exact_terms" + }, + "exactTerms": { + "description": "List of values to match against", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "supportedLanguage": { + "description": "Language supported by this recognizer", + "$ref": "classificationLanguages.json", + "default": "en" + }, + "regexFlags": { + "$ref": "recognizers/regexFlags.json" + } + }, + "required": ["type", "exactTerms", "supportedLanguage", "regexFlags"], + "additionalProperties": false +} \ No newline at end of file diff --git a/schemas/type/patternRecognizer.json b/schemas/type/patternRecognizer.json index d7ef57d..74e4ca0 100644 --- a/schemas/type/patternRecognizer.json +++ b/schemas/type/patternRecognizer.json @@ -28,17 +28,12 @@ }, "default": [] }, - "supportedEntity": { - "description": "The entity type this recognizer detects", - "$ref": "piiEntity.json" - }, "supportedLanguage": { - "description": "Language supported by this recognizer (ISO 639-1 code)", - "type": "string", - "default": "en", - "pattern": "^[a-z]{2}(-[A-Z]{2})?$" + "description": "Language supported by this recognizer", + "$ref": "classificationLanguages.json", + "default": "en" } }, - "required": ["type", "patterns", "supportedEntity", "supportedLanguage", "regexFlags", "context"], + "required": ["type", "patterns", "supportedLanguage", "regexFlags"], "additionalProperties": false } \ No newline at end of file diff --git a/schemas/type/pipelineObservability.json b/schemas/type/pipelineObservability.json index e912bbc..da1fca2 100644 --- a/schemas/type/pipelineObservability.json +++ b/schemas/type/pipelineObservability.json @@ -35,11 +35,15 @@ "type": "string", "enum": [ "Successful", - "Failed", + "Failed", "Pending", "Skipped", "Running" ] + }, + "averageRunTime": { + "description": "Average runtime of the pipeline in milliseconds.", + "type": "number" } }, "required": ["pipeline"], diff --git a/schemas/type/predefinedRecognizer.json b/schemas/type/predefinedRecognizer.json index de94747..11b8096 100644 --- a/schemas/type/predefinedRecognizer.json +++ b/schemas/type/predefinedRecognizer.json @@ -58,10 +58,9 @@ "type": "string" }, "supportedLanguage": { - "description": "Language supported by this recognizer (ISO 639-1 code)", - "type": "string", - "default": "en", - "pattern": "^[a-z]{2}(-[A-Z]{2})?$" + "description": "Language supported by this recognizer", + "$ref": "classificationLanguages.json", + "default": "en" }, "context": { "description": "List of context words that can help boost confidence score", diff --git a/schemas/type/recognizer.json b/schemas/type/recognizer.json index c1d8db5..cd89801 100644 --- a/schemas/type/recognizer.json +++ b/schemas/type/recognizer.json @@ -37,14 +37,14 @@ "recognizerType": { "description": "Type of recognizer to use for detection", "type": "string", - "enum": ["pattern", "deny_list", "context", "predefined", "custom"], + "enum": ["pattern", "exact_terms", "context", "predefined", "custom"], "default": "pattern" }, "recognizerConfig": { "description": "Complete recognizer configuration", "oneOf": [ {"$ref": "patternRecognizer.json"}, - {"$ref": "denyListRecognizer.json"}, + {"$ref": "exactTermsRecognizer.json"}, {"$ref": "contextRecognizer.json"}, {"$ref": "customRecognizer.json"}, {"$ref": "predefinedRecognizer.json"} @@ -53,8 +53,8 @@ "patternRecognizerType": { "$ref": "patternRecognizer.json" }, - "denyListRecognizerType": { - "$ref": "denyListRecognizer.json" + "exactTermsRecognizerType": { + "$ref": "exactTermsRecognizer.json" }, "contextRecognizerType": { "$ref": "contextRecognizer.json" diff --git a/schemas/type/recognizerMetadata.json b/schemas/type/recognizerMetadata.json new file mode 100644 index 0000000..334c97f --- /dev/null +++ b/schemas/type/recognizerMetadata.json @@ -0,0 +1,62 @@ +{ + "$id": "https://open-metadata.org/schema/type/recognizerMetadata.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "RecognizerMetadata", + "description": "Metadata about the recognizer that applied a tag, including scoring and pattern information.", + "type": "object", + "javaType": "org.openmetadata.schema.type.RecognizerMetadata", + "properties": { + "recognizerId": { + "description": "Unique identifier of the recognizer that applied this tag", + "$ref": "basic.json#/definitions/uuid" + }, + "recognizerName": { + "description": "Human-readable name of the recognizer", + "type": "string" + }, + "score": { + "description": "Confidence score assigned by the recognizer (0.0 to 1.0)", + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + "target": { + "description": "What the recognizer analyzed to apply this tag", + "type": "string", + "enum": ["content", "column_name"] + }, + "patterns": { + "description": "Details of patterns that matched during recognition", + "type": "array", + "items": { + "$ref": "#/definitions/patternMatch" + } + } + }, + "required": ["recognizerId", "recognizerName", "score"], + "additionalProperties": false, + "definitions": { + "patternMatch": { + "type": "object", + "description": "Information about a pattern that matched during recognition", + "properties": { + "name": { + "description": "Name of the pattern that matched", + "type": "string" + }, + "regex": { + "description": "Regular expression or pattern definition", + "type": "string" + }, + "score": { + "description": "Confidence score for this specific pattern match", + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + } + }, + "required": ["name", "score"], + "additionalProperties": false + } + } +} diff --git a/schemas/type/regexMode.json b/schemas/type/regexMode.json new file mode 100644 index 0000000..13b0ed8 --- /dev/null +++ b/schemas/type/regexMode.json @@ -0,0 +1,17 @@ +{ + "$id": "https://open-metadata.org/schema/type/regexMode.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "RegexMode", + "description": "Controls how regex filters are applied when listing entities. 'include' returns entities matching the pattern, 'exclude' returns entities that do not match.", + "type": "string", + "javaType": "org.openmetadata.schema.type.RegexMode", + "enum": ["include", "exclude"], + "javaEnums": [ + { + "name": "INCLUDE" + }, + { + "name": "EXCLUDE" + } + ] +} diff --git a/schemas/type/status.json b/schemas/type/status.json index 923c542..c699ce7 100644 --- a/schemas/type/status.json +++ b/schemas/type/status.json @@ -9,6 +9,7 @@ "Draft", "In Review", "Approved", + "Archived", "Deprecated", "Rejected", "Unprocessed" diff --git a/schemas/type/tagLabel.json b/schemas/type/tagLabel.json index b850342..79aab5d 100644 --- a/schemas/type/tagLabel.json +++ b/schemas/type/tagLabel.json @@ -57,6 +57,19 @@ "reason": { "type": "string", "description": "An explanation of why this tag was proposed, specially for autoclassification tags" + }, + "appliedAt": { + "type": "string", + "format": "date-time", + "description": "Timestamp when this tag was applied in ISO 8601 format" + }, + "appliedBy": { + "type": "string", + "description": "Who it is that applied this tag (e.g: a bot, AI or a human)" + }, + "metadata": { + "description": "Additional metadata associated with this tag label, such as recognizer information for automatically applied tags.", + "$ref": "tagLabelMetadata.json" } }, "required": ["tagFQN", "source", "labelType", "state"], diff --git a/schemas/type/tagLabelMetadata.json b/schemas/type/tagLabelMetadata.json new file mode 100644 index 0000000..00dc5c6 --- /dev/null +++ b/schemas/type/tagLabelMetadata.json @@ -0,0 +1,20 @@ +{ + "$id": "https://open-metadata.org/schema/type/tagLabelMetadata.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TagLabelMetadata", + "description": "Additional metadata associated with a tag label, including information about how the tag was applied.", + "type": "object", + "javaType": "org.openmetadata.schema.type.TagLabelMetadata", + "properties": { + "recognizer": { + "description": "Metadata about the recognizer that automatically applied this tag", + "$ref": "tagLabelRecognizerMetadata.json" + }, + "expiryDate": { + "description": "Epoch time in milliseconds when the certification tag expires", + "type": "integer", + "format": "utc-millisec" + } + }, + "additionalProperties": false +} diff --git a/schemas/type/tagLabelRecognizerMetadata.json b/schemas/type/tagLabelRecognizerMetadata.json new file mode 100644 index 0000000..ec3e4bb --- /dev/null +++ b/schemas/type/tagLabelRecognizerMetadata.json @@ -0,0 +1,62 @@ +{ + "$id": "https://open-metadata.org/schema/type/tagLabelRecognizerMetadata.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TagLabelRecognizerMetadata", + "description": "Metadata about the recognizer that applied a tag, including scoring and pattern information.", + "type": "object", + "javaType": "org.openmetadata.schema.type.TagLabelRecognizerMetadata", + "properties": { + "recognizerId": { + "description": "Unique identifier of the recognizer that applied this tag", + "$ref": "basic.json#/definitions/uuid" + }, + "recognizerName": { + "description": "Human-readable name of the recognizer", + "type": "string" + }, + "score": { + "description": "Confidence score assigned by the recognizer (0.0 to 1.0)", + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + }, + "target": { + "description": "What the recognizer analyzed to apply this tag", + "type": "string", + "enum": ["content", "column_name"] + }, + "patterns": { + "description": "Details of patterns that matched during recognition", + "type": "array", + "items": { + "$ref": "#/definitions/patternMatch" + } + } + }, + "required": ["recognizerId", "recognizerName", "score"], + "additionalProperties": false, + "definitions": { + "patternMatch": { + "type": "object", + "description": "Information about a pattern that matched during recognition", + "properties": { + "name": { + "description": "Name of the pattern that matched", + "type": "string" + }, + "regex": { + "description": "Regular expression or pattern definition", + "type": "string" + }, + "score": { + "description": "Confidence score for this specific pattern match", + "type": "number", + "minimum": 0.0, + "maximum": 1.0 + } + }, + "required": ["name", "score"], + "additionalProperties": false + } + } +} diff --git a/schemas/type/termRelation.json b/schemas/type/termRelation.json new file mode 100644 index 0000000..052f9d3 --- /dev/null +++ b/schemas/type/termRelation.json @@ -0,0 +1,22 @@ +{ + "$id": "https://open-metadata.org/schema/type/termRelation.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TermRelation", + "description": "This schema defines the TermRelation type used for establishing typed semantic relationships between glossary terms.", + "type": "object", + "javaType": "org.openmetadata.schema.type.TermRelation", + "properties": { + "relationType": { + "description": "Type of the relation (e.g., 'broader', 'narrower', 'synonym', 'relatedTo'). Defaults to 'relatedTo' for backward compatibility.", + "type": "string", + "pattern": "^[a-zA-Z][a-zA-Z0-9]*$", + "default": "relatedTo" + }, + "term": { + "description": "Reference to the related glossary term.", + "$ref": "entityReference.json" + } + }, + "required": ["term"], + "additionalProperties": false +} diff --git a/schemas/type/workflowTriggerFields.json b/schemas/type/workflowTriggerFields.json new file mode 100644 index 0000000..aef7126 --- /dev/null +++ b/schemas/type/workflowTriggerFields.json @@ -0,0 +1,42 @@ +{ + "$id": "https://open-metadata.org/schema/type/workflowTriggerFields.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "WorkflowTriggerFields", + "description": "Fields that can trigger workflow execution when changed", + "type": "string", + "javaType": "org.openmetadata.schema.type.WorkflowTriggerFields", + "enum": [ + "name", + "displayName", + "fullyQualifiedName", + "description", + "owners", + "reviewers", + "tags", + "certification", + "domains", + "dataProducts", + "extension", + "deleted", + "synonyms", + "relatedTerms", + "references", + "glossary", + "parent", + "children", + "experts", + "style", + "lifeCycle", + "schema", + "semantics", + "qualityExpectations", + "termsOfUse", + "security", + "sla", + "testSuite", + "latestResult", + "consumesFrom", + "providesTo", + "lifecycleStage" + ] +} \ No newline at end of file