diff --git a/backend/Dockerfile b/backend/Dockerfile
index 6b0c816..356123f 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -36,7 +36,7 @@ RUN pip install --no-cache-dir uv
# Create a non-root user and group
RUN groupadd -r appgroup && useradd --no-log-init -r -g appgroup -d /app -s /sbin/nologin appuser
-
+RUN mkdir -p /data && chown -R appuser:appgroup /data
WORKDIR /app
# Copy dependency definition files and your submodule source code
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index d1b24aa..92a959d 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "vqlforge"
-version = "0.2"
+version = "0.25"
description = "VQLForge Backend"
readme = "README.md"
requires-python = ">=3.12"
@@ -18,7 +18,6 @@ dependencies = [
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
- "pytest-fastapi-client>=0.6.0",
"pytest-mock>=3.12.0",
]
@@ -28,4 +27,7 @@ build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src"]
[tool.hatch.metadata]
-allow-direct-references = true
\ No newline at end of file
+allow-direct-references = true
+
+[tool.hatch.paths]
+sqlglot = "src/sqlglot-vql"
\ No newline at end of file
diff --git a/backend/src/api/forge.py b/backend/src/api/forge.py
index 78a23b9..5e98eec 100644
--- a/backend/src/api/forge.py
+++ b/backend/src/api/forge.py
@@ -1,11 +1,22 @@
+"""
+API routes for the VQL Forge agentic process.
+
+This module provides a FastAPI endpoint for converting SQL to VQL in an
+iterative, agent-like process. It uses Server-Sent Events (SSE) to stream
+progress back to the client.
+"""
+
import logging
import json
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter
from fastapi.responses import StreamingResponse
+from src.schemas.validation import VqlValidationApiResponse
from src.schemas.agent import AgenticModeRequest, AgenticModeResponse, AgentStep
+from src.schemas.validation import VqlValidateRequest
from src.services.translation_service import run_translation
from src.services.validation_service import run_validation
+from src.utils.ai_analyzer import explain_vql_differences
from src.config import settings
logger = logging.getLogger(__name__)
@@ -13,18 +24,70 @@
async def format_sse(data: dict, event: str | None = None) -> str:
- """Formats a dictionary into an SSE message string."""
+ """Format a dictionary into a Server-Sent Event (SSE) message string.
+
+ Args:
+ data: The dictionary payload to send.
+ event: An optional event name for the SSE message.
+
+ Returns:
+ A string formatted as a compliant SSE message.
+ """
payload = json.dumps(data)
if event:
return f"event: {event}\ndata: {payload}\n\n"
return f"data: {payload}\n\n"
-@router.post("/forge", tags=["VQL Forge"])
-async def agentic_sql_to_vql_forge_stream(request: AgenticModeRequest):
+def format_explanation_as_markdown(explanation: str) -> str:
+ """Format the AI explanation into better structured markdown.
+
+ This function enhances a plain text explanation by attempting to structure
+ it as markdown. If markdown markers like '##' or '-' are already present,
+ it returns the original string to avoid double formatting.
+
+ Args:
+ explanation: The raw explanation string from the AI.
+
+ Returns:
+ A markdown-formatted explanation string.
"""
- Handles the agentic SQL-to-VQL process using a streaming response (SSE)
- to provide real-time updates of the agent's progress with a limited number of correction loops.
+ if not explanation:
+ return explanation
+
+ # If the explanation is already well-formatted, return as-is
+ if any(marker in explanation for marker in ['##', '- ', '* ', '\n- ', '\n* ']):
+ return explanation
+
+ # Otherwise, try to structure it better
+ lines = explanation.split('\n')
+ formatted_lines = []
+
+ for line in lines:
+ line = line.strip()
+ if not line:
+ formatted_lines.append('')
+ continue
+ else:
+ formatted_lines.append(line)
+
+ return '\n'.join(formatted_lines)
+
+
+@router.post("/forge", tags=["VQL Forge"])
+async def agentic_sql_to_vql_forge_stream(request: AgenticModeRequest,) -> StreamingResponse:
+ """Handle the agentic SQL-to-VQL process via a streaming response.
+
+ This endpoint uses Server-Sent Events (SSE) to provide real-time updates
+ of the agent's progress. The process involves an initial translation,
+ followed by a series of validation and correction loops until the VQL is
+ valid or the maximum number of attempts is reached.
+
+ Args:
+ request: The request containing the SQL query, its dialect, and the VDB.
+
+ Returns:
+ A StreamingResponse that sends SSE events to the client.
"""
async def event_generator():
process_log: list[AgentStep] = []
@@ -59,17 +122,43 @@ async def event_generator():
loop_count = i + 1
# Validation Step
- validation_step_name = "Validate" if i == 0 else f"Re-Validate (Attempt {loop_count})"
+ validation_step_name = "Validate" if i == 0 else f"Re-Validate (Step {loop_count})"
validation_step = AgentStep(step_name=validation_step_name,
- details=f"Validating VQL (Attempt {loop_count})...", success=True)
+ details=f"Validating VQL (Step {loop_count})...", success=True)
process_log.append(validation_step)
yield await format_sse(validation_step.model_dump(), event="step")
- validation_result = await run_validation(current_vql)
+ validation_request: VqlValidateRequest = VqlValidateRequest(
+ sql=request.sql, vql=current_vql, vdb=request.vdb, dialect=request.dialect)
+ validation_result: VqlValidationApiResponse = await run_validation(validation_request)
if validation_result.validated:
validation_step.details = "Validation successful."
yield await format_sse(validation_step.model_dump(), event="step")
+
+ # Explain Differences
+ explain_step = AgentStep(
+ step_name="Explain",
+ details="Analyzing differences between source SQL and final VQL...",
+ success=True
+ )
+ process_log.append(explain_step)
+ yield await format_sse(explain_step.model_dump(), event="step")
+
+ raw_explanation = await explain_vql_differences(
+ source_sql=request.sql,
+ source_dialect=request.dialect,
+ final_vql=current_vql
+ )
+
+ # Format the explanation with better structure
+ formatted_explanation = format_explanation_as_markdown(raw_explanation)
+
+ final_explanation = f"## Key Differences Between Source SQL and Final VQL\n\n{formatted_explanation}"
+
+ explain_step.details = final_explanation
+ yield await format_sse(explain_step.model_dump(), event="step")
+
final_success_result = AgenticModeResponse(
final_vql=current_vql, is_valid=True, process_log=process_log,
final_message="Agentic process complete. The VQL is valid."
@@ -106,16 +195,23 @@ async def event_generator():
# AI Analysis & Correction Step
analysis_step = AgentStep(
- step_name=f"Analyze (Attempt {loop_count})", details="AI is analyzing the error to find a correction...", success=True)
+ step_name=f"Analyze (Step {loop_count})",
+ details="AI is analyzing the error to find a correction...",
+ success=True
+ )
process_log.append(analysis_step)
yield await format_sse(analysis_step.model_dump(), event="step")
correction_step = AgentStep(
- step_name=f"Correct (Attempt {loop_count})", details="AI provided a corrected VQL.", success=True, output=error_analysis.sql_suggestion)
+ step_name=f"Correct (Step {loop_count})",
+ details="AI provided a corrected VQL.",
+ success=True,
+ output=error_analysis.sql_suggestion
+ )
process_log.append(correction_step)
yield await format_sse(correction_step.model_dump(), event="step")
- current_vql = error_analysis.sql_suggestion # Update VQL for the next loop
+ current_vql = error_analysis.sql_suggestion
except Exception as e:
logger.error(f"Error during agentic stream: {e}", exc_info=True)
diff --git a/backend/src/api/log_recorder.py b/backend/src/api/log_recorder.py
new file mode 100644
index 0000000..d5948fc
--- /dev/null
+++ b/backend/src/api/log_recorder.py
@@ -0,0 +1,153 @@
+"""
+API endpoints for logging and retrieving validated SQL-to-VQL query pairs.
+
+This module provides routes to log accepted queries and to retrieve those logs
+based on various filtering criteria, such as the tables they reference.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from sqlalchemy import select, func, column, String
+from sqlalchemy.orm import Session
+from sqlalchemy.sql.selectable import LateralFromClause
+from sqlglot import exp, parse_one
+
+# Assuming these schemas are defined in the specified paths
+from src.schemas.db_log import (
+ AcceptedQuery,
+ AcceptedQueryLogListResponse,
+ AcceptedQueryLogRequest,
+)
+from src.db.sqlite_session import get_sqlite_session
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+
+@router.post("/log/accepted", status_code=201, tags=["Logging"])
+async def log_accepted_query(request: AcceptedQueryLogRequest,
+ db: Session = Depends(get_sqlite_session)) -> dict[str, Any]:
+ """Log a successfully validated and accepted SQL-to-VQL pair.
+
+ This endpoint uses sqlglot to parse the source SQL, extracts the table
+ names, and stores the entire entry in the database.
+
+ Args:
+ request: The request body containing the source SQL, dialect, and target VQL.
+ db: The SQLAlchemy database session, injected as a dependency.
+
+ Raises:
+ HTTPException: A 500 error if the database write operation fails.
+
+ Returns:
+ A confirmation message and the ID of the newly created log entry.
+ """
+ try:
+ source_tables = json.dumps([table.name for table in parse_one(request.source_sql).find_all(exp.Table)])
+
+ db_log_entry = AcceptedQuery(
+ source_sql=request.source_sql,
+ source_dialect=request.source_dialect,
+ target_vql=request.target_vql,
+ tables=source_tables
+ )
+ db.add(db_log_entry)
+ db.commit()
+ db.refresh(db_log_entry)
+ logger.info(f"Successfully logged accepted query ID: {db_log_entry.id}")
+ return {"message": "Log entry created successfully.", "id": db_log_entry.id}
+ except Exception as e:
+ logger.error(f"Failed to log accepted query: {e}", exc_info=True)
+ db.rollback()
+ raise HTTPException(status_code=500, detail=f"Failed to write log to database: {str(e)}")
+
+
+@router.get("/log/all", response_model=AcceptedQueryLogListResponse, tags=["Logging"])
+async def get_all_logs(db: Session = Depends(get_sqlite_session)) -> AcceptedQueryLogListResponse:
+ """Retrieve all accepted query logs from the database.
+ The logs are ordered by timestamp, with the most recent entries first.
+
+ Args:
+ db: The SQLAlchemy database session, injected as a dependency.
+
+ Raises:
+ HTTPException: A 500 error if the database read operation fails.
+
+ Returns:
+ A response object containing a list of all log entries.
+ """
+ try:
+ logs: List[AcceptedQuery] = db.query(AcceptedQuery).order_by(AcceptedQuery.timestamp.desc()).all()
+ return AcceptedQueryLogListResponse(results=logs)
+ except Exception as e:
+ logger.error(f"Failed to fetch all logs: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail="Failed to retrieve logs from the database.")
+ finally:
+ db.close()
+
+
+@router.get("/log/filter", response_model=AcceptedQueryLogListResponse, tags=["Logging"])
+async def get_logs_by_tables(
+ tables: List[str] = Query(
+ ...,
+ description="A list of table names to filter by.",
+ example=["customers", "orders"],
+ ),
+ db: Session = Depends(get_sqlite_session),
+) -> AcceptedQueryLogListResponse:
+ """Retrieve logs by tables using a correctly formed lateral subquery.
+
+ This function filters records where the 'tables' JSON array contains any of
+ the specified table names. It works by creating a LATERAL subquery that
+ unnests the JSON array for each row and checks for a match. This is the
+ idiomatic SQLAlchemy approach for this type of query against SQLite.
+
+ Args:
+ tables: A list of table names provided as query parameters.
+ db: The SQLAlchemy database session.
+
+ Raises:
+ HTTPException: 400 if 'tables' is empty.
+ HTTPException: 500 if the database query fails.
+
+ Returns:
+ A response object containing matching log entries.
+ """
+ if not tables:
+ raise HTTPException(
+ status_code=400, detail="The 'tables' query parameter cannot be empty."
+ )
+
+ try:
+ # 1. Explicitly define the output columns of the `json_each` function.
+ # We only need the 'value' column, which contains the table names.
+ json_table_def = func.json_each(AcceptedQuery.tables).table_valued(
+ column("value", String),
+ name="json_table"
+ )
+
+ # 2. Create the correlated EXISTS subquery.
+ # Because we defined the columns, accessing `.c.value` now works
+ # correctly and returns a Column object with the `.in_()` method.
+ subquery = (
+ select(1)
+ .where(json_table_def.c.value.in_(tables))
+ ).exists()
+
+ # 3. Apply the filter to the main query.
+ query = (
+ db.query(AcceptedQuery)
+ .filter(subquery)
+ .order_by(AcceptedQuery.timestamp.desc())
+ .limit(10)
+ )
+
+ logs: List[AcceptedQuery] = query.all()
+ return AcceptedQueryLogListResponse(results=logs)
+ except Exception as e:
+ logger.error(f"Failed to fetch logs by tables: {e}", exc_info=True)
+ raise HTTPException(
+ status_code=500, detail="Failed to retrieve logs from the database."
+ )
diff --git a/backend/src/api/router.py b/backend/src/api/router.py
index eb3a39e..71def33 100644
--- a/backend/src/api/router.py
+++ b/backend/src/api/router.py
@@ -1,5 +1,5 @@
from fastapi import APIRouter
-from src.api import health, translate, validate, vdb_list, forge
+from src.api import health, translate, validate, vdb_list, forge, log_recorder
api_router = APIRouter()
api_router.include_router(health.router) # /health
@@ -7,3 +7,4 @@
api_router.include_router(validate.router) # /validate
api_router.include_router(vdb_list.router) # /vdbs
api_router.include_router(forge.router) # /forge
+api_router.include_router(log_recorder.router) # /log
diff --git a/backend/src/api/translate.py b/backend/src/api/translate.py
index 8a69c1b..cab4ca2 100644
--- a/backend/src/api/translate.py
+++ b/backend/src/api/translate.py
@@ -1,5 +1,13 @@
+"""
+API endpoint for translating SQL queries to VQL.
+
+This module provides a FastAPI route that accepts a source SQL query
+and its dialect, and utilizes a backend translation service to convert it
+into the equivalent VQL.
+"""
+
import logging
-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
from src.schemas.translation import SqlQueryRequest, TranslateApiResponse
from src.services.translation_service import run_translation
@@ -9,11 +17,44 @@
@router.post("/translate", response_model=TranslateApiResponse, tags=["VQL Forge"])
async def translate_sql_to_vql(request: SqlQueryRequest) -> TranslateApiResponse:
+ """Translate a source SQL string to VQL.
+
+ This endpoint serves as a proxy to the centralized translation service,
+ handling request validation, calling the service, and managing potential
+ errors.
+
+ Args:
+ request: An object containing the source SQL string, the dialect
+ (e.g., "BigQuery", "Snowflake"), and the target VDB.
+
+ Raises:
+ HTTPException: A 500 Internal Server Error if the translation service
+ fails unexpectedly.
+
+ Returns:
+ A `TranslateApiResponse` object containing the resulting VQL or
+ detailed error analysis if the translation was unsuccessful but
+ handled gracefully by the service.
"""
- Translates a source SQL string to VQL using the centralized translation service.
- """
- return await run_translation(
- source_sql=request.sql,
- dialect=request.dialect,
- vdb=request.vdb
- )
+ try:
+ logger.info(
+ f"Received translation request for dialect: {request.dialect}"
+ )
+ # The core translation logic is delegated to the service layer
+ translation_result: TranslateApiResponse = await run_translation(
+ source_sql=request.sql,
+ dialect=request.dialect,
+ vdb=request.vdb
+ )
+ return translation_result
+ except Exception as e:
+ # Log the full exception for debugging purposes
+ logger.error(
+ f"An unexpected error occurred during translation: {e}",
+ exc_info=True
+ )
+ # Raise an HTTPException to return a structured error response to the client
+ raise HTTPException(
+ status_code=500,
+ detail="An internal error occurred while translating the SQL query."
+ )
diff --git a/backend/src/api/validate.py b/backend/src/api/validate.py
index 755e964..9be4724 100644
--- a/backend/src/api/validate.py
+++ b/backend/src/api/validate.py
@@ -1,5 +1,13 @@
+"""
+API endpoint for validating VQL queries.
+
+This module exposes a FastAPI route that allows clients to submit a VQL query,
+along with its original source SQL context, for validation against a
+centralized service.
+"""
+
import logging
-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
from src.schemas.validation import VqlValidateRequest, VqlValidationApiResponse
from src.services.validation_service import run_validation
@@ -8,9 +16,42 @@
@router.post("/validate", response_model=VqlValidationApiResponse, tags=["VQL Forge"])
-async def validate_vql_query_endpoint(request: VqlValidateRequest) -> VqlValidationApiResponse:
- """
- Validates a VQL query using the centralized validation service.
+async def validate_vql_query(request: VqlValidateRequest) -> VqlValidationApiResponse:
+ """Validate a VQL query against its original SQL context.
+
+ This endpoint delegates the validation logic to the centralized `run_validation`
+ service. It handles the API request/response cycle and manages any
+ unexpected errors that may occur during the validation process.
+
+ Args:
+ request: A `VqlValidateRequest` object containing the VQL to validate,
+ as well as the original source SQL, dialect, and VDB for
+ contextual validation.
+
+ Raises:
+ HTTPException: A 500 Internal Server Error if the validation service
+ encounters an unexpected failure.
+
+ Returns:
+ A `VqlValidationApiResponse` object indicating whether the VQL is
+ valid and providing detailed error analysis if it is not.
"""
- # The original request.sql is not needed here as we only validate the VQL.
- return await run_validation(vql_to_validate=request.vql)
+
+ try:
+ logger.info(f"Received VQL validation request for dialect '{request.dialect}'.")
+
+ # The entire request object is passed to the service, which may use
+ # the source SQL and other context for a more comprehensive validation.
+ validation_result: VqlValidationApiResponse = await run_validation(request=request)
+ return validation_result
+ except Exception as e:
+ # Log the full exception details for debugging purposes
+ logger.error(
+ f"An unexpected error occurred during VQL validation: {e}",
+ exc_info=True
+ )
+ # Return a standardized error response to the client
+ raise HTTPException(
+ status_code=500,
+ detail="An internal error occurred while validating the VQL query."
+ )
diff --git a/backend/src/api/vdb_list.py b/backend/src/api/vdb_list.py
index bf803e1..6bfade4 100644
--- a/backend/src/api/vdb_list.py
+++ b/backend/src/api/vdb_list.py
@@ -1,39 +1,50 @@
+"""
+API endpoint for retrieving the list of available Virtual Databases (VDBs).
+
+This module reads a static configuration file to provide clients with a
+list of VDBs they can use in other parts of the application.
+"""
+
import logging
-from fastapi import APIRouter, HTTPException
-from src.schemas.common import VDBResponse, VDBConfigFile, VDBResponseItem
-from src.config import settings
import os
+from functools import lru_cache
+from typing import List
+
import yaml
-from typing import List, Dict, Any
-router = APIRouter()
+from fastapi import APIRouter, Depends, HTTPException
-logger = logging.getLogger(__name__)
+# Assuming these schemas are defined in the specified paths
+from src.config import settings
+from src.schemas.common import VDBConfigFile, VDBResponse, VDBResponseItem
+logger: logging.Logger = logging.getLogger(__name__)
+router: APIRouter = APIRouter()
-def load_vdb_config_from_file() -> VDBConfigFile:
- """
- Loads and parses the VDB configuration from the YAML file.
- Returns:
- VDBConfigFile: A Pydantic model representing the loaded configuration.
+@lru_cache(maxsize=1)
+def get_vdb_config() -> VDBConfigFile:
+ """Load, parse, and validate the VDB configuration from its YAML file.
+
+ This function is designed to be used as a cached FastAPI dependency.
+ The `@lru_cache(maxsize=1)` decorator ensures that the file is read from
+ disk and processed only once, on the first request. All subsequent
+ requests will receive the cached configuration object instantly.
Raises:
- HTTPException: If the file is not found, cannot be parsed, or is invalid.
+ HTTPException: A 500 error if the config file is not found, cannot be
+ parsed, or fails Pydantic validation.
+
+ Returns:
+ A validated VDBConfigFile object.
"""
- config_path = settings.APP_VDB_CONF
+ config_path: str = settings.APP_VDB_CONF
logger.info(f"Attempting to load VDB configuration from: {config_path}")
if not os.path.exists(config_path):
logger.error(f"VDB configuration file not found at: {config_path}")
raise HTTPException(
status_code=500,
- detail=f"Server configuration error: VDB config file not found at {config_path}"
- )
- if not os.path.isfile(config_path):
- logger.error(f"VDB configuration path is not a file: {config_path}")
- raise HTTPException(
- status_code=500,
- detail=f"Server configuration error: VDB config path is not a file at {config_path}"
+ detail="Server configuration error: VDB config file not found."
)
try:
@@ -41,72 +52,56 @@ def load_vdb_config_from_file() -> VDBConfigFile:
raw_config = yaml.safe_load(f)
# Validate the loaded YAML against the Pydantic model
vdb_config = VDBConfigFile.model_validate(raw_config)
- logger.info("Successfully loaded and validated VDB configuration.")
+ logger.info("Successfully loaded and cached VDB configuration.")
return vdb_config
except yaml.YAMLError as e:
logger.error(f"Error parsing vdb_conf.yaml: {e}", exc_info=True)
raise HTTPException(
status_code=500,
- detail=f"Server configuration error: Invalid YAML format in {config_path}. Details: {e}"
+ detail="Server configuration error: Invalid YAML format in VDB config file."
)
except Exception as e:
+ # This can catch Pydantic validation errors or other unexpected issues.
logger.error(f"Unexpected error loading VDB config: {e}", exc_info=True)
raise HTTPException(
status_code=500,
- detail=f"Server configuration error: Failed to load VDB config from {config_path}. Details: {e}"
+ detail="Server configuration error: Failed to load or validate VDB config."
)
-def transform_vdb_strings_to_response_items(string_list: List[str]) -> List[VDBResponseItem]:
- """
- Transforms a list of VDB names (strings) into a list of VDBResponseItem models.
+def transform_vdb_strings_to_response_items(
+ string_list: List[str],
+) -> List[VDBResponseItem]:
+ """Transform a list of VDB names into a list of VDBResponseItem models.
Args:
string_list: A list of strings, where each string is a VDB name.
Returns:
- A list of VDBResponseItem models, each with 'value' and 'label' set to the VDB name.
+ A list of VDBResponseItem models, each with 'value' and 'label'
+ set to the VDB name.
"""
return [VDBResponseItem(value=item, label=item) for item in string_list]
@router.get("/vdbs", response_model=VDBResponse, tags=["VQL Forge"])
-async def get_vdb_list() -> VDBResponse:
- """
- Retrieves a list of VDBs from the configuration file.
- """
- try:
- # Load and validate the config using the Pydantic model
- vdb_config = load_vdb_config_from_file()
+async def get_vdb_list(
+ config: VDBConfigFile = Depends(get_vdb_config),
+) -> VDBResponse:
+ """Retrieve a list of available VDBs from the server configuration.
- # Transform the list of strings from the config into the desired response format
- transformed_vdbs = transform_vdb_strings_to_response_items(vdb_config.vdbs)
-
- return VDBResponse(results=transformed_vdbs)
- except HTTPException as http_exc:
- # Re-raise HTTPExceptions as they are already formatted for FastAPI
- raise http_exc
- except Exception as e:
- logger.error(f"Unhandled error in get_vdb_list: {e}", exc_info=True)
- raise HTTPException(status_code=500, detail="An unexpected error occurred while fetching VDB list.")
+ This endpoint reads from a static configuration file on the server. The
+ configuration is cached after the first request for high performance.
+ Args:
+ config: The VDB configuration, injected as a dependency.
-@router.get("/vdbs", response_model=VDBResponse, tags=["VQL Forge"])
-async def get_vdb_list() -> VDBResponse:
- """
- Retrieves a list of VDBs from the configuration file.
+ Returns:
+ A VDBResponse object containing the list of available VDBs.
"""
- try:
- # Load and validate the config using the Pydantic model
- vdb_config = load_vdb_config_from_file()
-
- # Transform the list of strings from the config into the desired response format
- transformed_vdbs = transform_vdb_strings_to_response_items(vdb_config.vdbs)
-
- return VDBResponse(results=transformed_vdbs)
- except HTTPException as http_exc:
- # Re-raise HTTPExceptions as they are already formatted for FastAPI
- raise http_exc
- except Exception as e:
- logger.error(f"Unhandled error in get_vdb_list: {e}", exc_info=True)
- raise HTTPException(status_code=500, detail="An unexpected error occurred while fetching VDB list.")
+ # The dependency 'get_vdb_config' handles all loading, validation, and errors.
+ # The endpoint's logic is now simple, clean, and fast.
+ transformed_vdbs: List[VDBResponseItem] = transform_vdb_strings_to_response_items(
+ config.vdbs
+ )
+ return VDBResponse(results=transformed_vdbs)
diff --git a/backend/src/config.py b/backend/src/config.py
index 0a0cc2e..57e1dd8 100644
--- a/backend/src/config.py
+++ b/backend/src/config.py
@@ -1,8 +1,7 @@
-import os
from pydantic_settings import BaseSettings, SettingsConfigDict
from dotenv import load_dotenv
-load_dotenv() # Loads variables from .env file
+load_dotenv()
class Settings(BaseSettings):
@@ -16,12 +15,15 @@ class Settings(BaseSettings):
AZURE_OPENAI_ENDPOINT: str
AI_MODEL_NAME: str
- DATABASE_URL: str | None = None # Will be constructed
+ DATABASE_URL: str | None = None
APP_VDB_CONF: str
- # New setting for the agentic loop limit
+ # agentic loop limit
AGENTIC_MAX_LOOPS: int = 3
+ # SQLite logging database
+ SQLITE_DB_PATH: str = "/data/vqlforge_log.db"
+
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
def __init__(self, **values):
diff --git a/backend/src/db/session.py b/backend/src/db/session.py
index ab55529..1d1614e 100644
--- a/backend/src/db/session.py
+++ b/backend/src/db/session.py
@@ -1,22 +1,44 @@
+"""
+Database engine management for the application.
+
+This module is responsible for creating, managing, and providing access to the
+global SQLAlchemy engine that connects to the Denodo database. It includes
+functions for initialization and retrieval of the engine instance.
+"""
+
import logging
import sqlalchemy as db
-from sqlalchemy.exc import SQLAlchemyError, OperationalError
-from sqlalchemy.engine import Engine # For type hinting
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.engine import Engine
-from src.config import settings # Import the settings
+from src.config import settings
logger = logging.getLogger(__name__)
engine: Engine | None = None
def init_db_engine() -> Engine | None:
+ """Initialize the global SQLAlchemy database engine.
+
+ This function attempts to create a SQLAlchemy engine using the DATABASE_URL
+ from the application settings. It sets a global `engine` variable upon
+ successful connection and a test query. It is intended to be called once
+ at application startup.
+
+ If the DATABASE_URL is not configured, or if a connection cannot be
+ established due to a missing driver or other database errors, the
+ function will log a fatal error and set the global engine to None.
+
+ Returns:
+ The created SQLAlchemy Engine instance on success, or None on failure.
+ """
global engine
if settings.DATABASE_URL is None:
logger.fatal("DATABASE_URL is not configured.")
return None
try:
engine = db.create_engine(settings.DATABASE_URL)
- with engine.connect() as connection:
+ with engine.connect():
logger.info("Successfully connected to Denodo.")
return engine
except ImportError as e:
@@ -29,13 +51,20 @@ def init_db_engine() -> Engine | None:
return None
-# Initialize the engine when this module is imported
-# You might want to delay this if you have conditional DB usage
-engine = init_db_engine()
+def get_engine() -> Engine:
+ """Retrieve the initialized SQLAlchemy database engine.
+
+ This function provides access to the global engine instance. It is designed
+ to be called from other parts of the application (e.g., to create sessions)
+ after `init_db_engine` has been successfully executed.
+ Raises:
+ ConnectionError: If the database engine has not been initialized yet
+ (i.e., `init_db_engine` has not been called or has failed).
-def get_engine() -> Engine:
+ Returns:
+ The active SQLAlchemy Engine instance.
+ """
if engine is None:
- # This situation should ideally be handled at app startup
raise ConnectionError("Database engine is not initialized.")
return engine
diff --git a/backend/src/db/sqlite_session.py b/backend/src/db/sqlite_session.py
new file mode 100644
index 0000000..a44f409
--- /dev/null
+++ b/backend/src/db/sqlite_session.py
@@ -0,0 +1,81 @@
+"""
+Manages the connection and session for the application's SQLite database.
+
+This module is responsible for initializing the SQLite database engine, ensuring
+the database file and necessary tables exist, and providing a mechanism for
+FastAPI endpoints to acquire a database session for logging purposes.
+"""
+
+import logging
+import os # <-- Import the os module
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm.session import Session
+from src.config import settings
+from src.schemas.db_log import Base
+
+logger = logging.getLogger(__name__)
+
+sqlite_engine: Engine | None = None
+
+
+def init_sqlite_db() -> None:
+ """Initialize the global SQLite database engine and create tables.
+
+ This function performs the following setup actions:
+ 1. Reads the database file path from the application settings.
+ 2. Ensures the directory for the database file exists, creating it if necessary.
+ 3. Creates a global SQLAlchemy engine for the SQLite database.
+ 4. Sets `check_same_thread` to False, which is required for using SQLite in a
+ multi-threaded environment like FastAPI.
+ 5. Creates all tables defined in the SQLAlchemy declarative `Base` metadata.
+
+ This function should be called once at application startup to prepare the
+ database for use. If it fails, it logs a fatal error, and the engine will
+ remain `None`.
+ """
+ global sqlite_engine
+ try:
+ # ensure the directory exists
+ db_path: str = settings.SQLITE_DB_PATH
+ db_directory: str = os.path.dirname(db_path)
+ if db_directory: # Ensure there is a directory part to the path
+ os.makedirs(db_directory, exist_ok=True)
+ logger.info(f"Ensured database directory exists at: {db_directory}")
+
+ sqlite_engine = create_engine(
+ f"sqlite:///{db_path}",
+ connect_args={"check_same_thread": False}, # Required for SQLite with FastAPI
+ )
+ # Create tables
+ Base.metadata.create_all(bind=sqlite_engine)
+ logger.info(f"Successfully connected to SQLite DB at {db_path} and ensured tables exist.")
+ except Exception as e:
+ logger.fatal(f"Could not connect to or initialize SQLite database: {e}", exc_info=True)
+ sqlite_engine = None
+
+
+def get_sqlite_session() -> Session:
+ """Create and return a new SQLAlchemy session for the SQLite database.
+
+ This function is intended to be used as a FastAPI dependency to provide
+ a database session to an API endpoint. It relies on the global `sqlite_engine`
+ having been successfully initialized by `init_sqlite_db`.
+
+ Note: This function creates and returns a new session. In a typical FastAPI
+ pattern, the endpoint that receives this session is responsible for closing it
+ (e.g., within a `finally` block) to prevent resource leaks. A generator-based
+ dependency (`yield session`) is often preferred for automatic session cleanup.
+
+ Raises:
+ ConnectionError: If the SQLite database engine has not been
+ initialized before this function is called.
+
+ Returns:
+ A new SQLAlchemy `Session` instance connected to the SQLite database.
+ """
+ if sqlite_engine is None:
+ raise ConnectionError("SQLite database engine is not initialized.")
+ SessionLocal: sessionmaker[Session] = sessionmaker(autocommit=False, autoflush=False, bind=sqlite_engine)
+ return SessionLocal()
diff --git a/backend/src/main.py b/backend/src/main.py
index b83f273..3a5ef9d 100644
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -1,37 +1,54 @@
# src/main.py
+
import logging
+from contextlib import asynccontextmanager
from fastapi import FastAPI
-from src.api.router import api_router # Import the main router
-from src.config import settings # To access settings if needed for app config
-from src.db.session import engine as db_engine, init_db_engine # To ensure DB is up
-from src.utils.logging_config import setup_logging # Import the new logging setup
+from src.api.router import api_router
+from src.db.session import init_db_engine, engine
+from src.utils.logging_config import setup_logging
+from src.db.sqlite_session import init_sqlite_db
-# Configure logging using the new setup function
+# Configure logging first
setup_logging()
logger = logging.getLogger(__name__)
-# Initialize services
-if not db_engine:
- logger.warning("Database engine not initialized on import. Attempting explicit init.")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ # --- Startup Logic ---
+ logger.info("Application startup...")
+
+ # Initialize SQLite DB and tables
+ init_sqlite_db()
+
+ # Initialize the Denodo database engine
if init_db_engine() is None:
- logger.fatal("Application startup failed: Could not connect to the database.")
- exit(1)
+ logger.fatal("Application startup failed: Could not connect to the Denodo database.")
+ # The application will start but endpoints requiring the DB will fail.
+ # This prevents a hard crash on startup.
+ else:
+ logger.info("Denodo DB engine initialized successfully.")
+
+ yield
+
+ # --- Shutdown Logic ---
+ logger.info("Application shutdown...")
+ if engine:
+ engine.dispose()
+ logger.info("Denodo DB engine disposed.")
+
app = FastAPI(
title="VQLForge Backend",
description="The backend to transpile and validate SQL to VQL",
- version="0.2",
+ version="0.25",
+ lifespan=lifespan # Use the new lifespan manager
)
-# CORS is now handled by the NGINX reverse proxy.
-
# Include the main API router
app.include_router(api_router)
-# A simple root endpoint can remain here or be moved to its own router
-
@app.get("/", tags=["Default"])
def read_root():
diff --git a/backend/src/schemas/agent.py b/backend/src/schemas/agent.py
index 9531667..b8b6a72 100644
--- a/backend/src/schemas/agent.py
+++ b/backend/src/schemas/agent.py
@@ -7,6 +7,7 @@ class AgenticModeRequest(BaseModel):
sql: str = Field(..., example="SELECT count(*) AS total FROM some_view")
dialect: str
vdb: str
+ vql: str
class AgentStep(BaseModel):
diff --git a/backend/src/schemas/db_log.py b/backend/src/schemas/db_log.py
new file mode 100644
index 0000000..88af9f9
--- /dev/null
+++ b/backend/src/schemas/db_log.py
@@ -0,0 +1,44 @@
+from pydantic import BaseModel, Field, ConfigDict
+from sqlalchemy import Column, Integer, String, Text, DateTime
+from sqlalchemy.orm import declarative_base
+import datetime
+from typing import List
+
+# SQLAlchemy ORM Model
+Base = declarative_base()
+
+
+class AcceptedQuery(Base):
+ __tablename__ = "accepted_queries"
+
+ id: Column[int] = Column(Integer, primary_key=True, index=True)
+ timestamp: Column[datetime.datetime] = Column(DateTime, default=datetime.datetime.utcnow)
+ source_dialect: Column[str] = Column(String, index=True)
+ source_sql: Column[str] = Column(Text)
+ target_vql: Column[str] = Column(Text)
+ tables: Column[str] = Column(Text)
+
+# Pydantic Model for API request
+
+
+class AcceptedQueryLogRequest(BaseModel):
+ source_sql: str = Field(..., example="SELECT * FROM my_table")
+ source_dialect: str = Field(..., example="oracle")
+ target_vql: str = Field(..., example="SELECT * FROM my_table")
+
+# Pydantic model for a single log entry in an API response
+
+
+class AcceptedQueryLogResponse(BaseModel):
+ id: int
+ timestamp: datetime.datetime
+ source_dialect: str
+ source_sql: str
+ target_vql: str
+ tables: str # The 'tables' field is a JSON string
+ model_config = ConfigDict(from_attributes=True)
+
+
+# Pydantic model for a list of log entries
+class AcceptedQueryLogListResponse(BaseModel):
+ results: List[AcceptedQueryLogResponse]
diff --git a/backend/src/schemas/validation.py b/backend/src/schemas/validation.py
index 1a03e13..3c2113b 100644
--- a/backend/src/schemas/validation.py
+++ b/backend/src/schemas/validation.py
@@ -6,6 +6,8 @@
class VqlValidateRequest(BaseModel):
sql: str # The original SQL, for context if needed
vql: str # The VQL to validate
+ vdb: str
+ dialect: str
class VqlValidationApiResponse(BaseModel):
diff --git a/backend/src/services/translation_service.py b/backend/src/services/translation_service.py
index ab5c698..7d1aa17 100644
--- a/backend/src/services/translation_service.py
+++ b/backend/src/services/translation_service.py
@@ -1,3 +1,12 @@
+"""
+Core translation service for converting SQL to VQL.
+
+This module contains the primary logic for the SQL-to-VQL translation process,
+leveraging the `sqlglot` library for parsing and transformation. It also
+integrates with an AI service to analyze and provide suggestions for
+parsing errors.
+"""
+
import logging
from sqlglot import parse_one
from sqlglot.errors import ParseError
@@ -12,8 +21,34 @@
async def run_translation(source_sql: str, dialect: str, vdb: str) -> TranslateApiResponse:
- """
- Translates a source SQL string to VQL, handling errors and transformations.
+ """Translate a source SQL string to VQL, handling errors and transformations.
+
+ This function orchestrates the end-to-end translation process. It first
+ attempts to parse the source SQL using `sqlglot` according to the specified
+ dialect. If successful, it applies a series of transformations:
+ 1. Qualifies table names with the provided VDB, if any.
+ 2. Applies Oracle-specific transformations for `DUAL` functions.
+
+ If a `ParseError` occurs, it invokes an AI service to analyze the error
+ and the source SQL, aiming to provide a meaningful explanation and a
+ suggested fix.
+
+ Args:
+ source_sql: The raw SQL string to be translated.
+ dialect: The dialect of the source SQL (e.g., "oracle", "bigquery").
+ vdb: The name of the Virtual Database (VDB) to qualify table names with.
+ If empty, no VDB transformation is applied.
+
+ Returns:
+ An object containing either the successfully generated VQL string or
+ an `error_analysis` object with details from the AI service if parsing
+ failed. In case of a catastrophic failure, a simple error message is
+ returned.
+
+ Raises:
+ HTTPException: Re-raises any `HTTPException` that might occur during the
+ AI analysis sub-process, allowing it to be propagated to
+ the API layer.
"""
logger.debug(f"Running translation: dialect='{dialect}', vdb='{vdb}', SQL='{source_sql[:100]}...'")
diff --git a/backend/src/services/validation_service.py b/backend/src/services/validation_service.py
index e14218b..bb5798b 100644
--- a/backend/src/services/validation_service.py
+++ b/backend/src/services/validation_service.py
@@ -1,20 +1,34 @@
+"""Validates VQL queries against a Denodo server."""
+
import logging
import asyncio
from fastapi import HTTPException
+import re
from sqlalchemy import text
from sqlalchemy.exc import OperationalError, ProgrammingError, SQLAlchemyError
-from src.schemas.validation import VqlValidationApiResponse
-from src.schemas.translation import AIAnalysis # Import the unified error model
+from src.schemas.validation import VqlValidationApiResponse, VqlValidateRequest
+from src.schemas.translation import AIAnalysis
from src.utils.ai_analyzer import analyze_vql_validation_error
from src.db.session import get_engine
logger = logging.getLogger(__name__)
-async def run_validation(vql_to_validate: str) -> VqlValidationApiResponse:
- """
- Validates a VQL query against the Denodo database.
+async def run_validation(request: VqlValidateRequest) -> VqlValidationApiResponse:
+ """Validates a VQL query using a `DESC QUERYPLAN` statement.
+
+ This check is run in a separate thread to avoid blocking. If validation
+ fails, an AI service is called to analyze the error.
+
+ Args:
+ request: The VQL and its original SQL context.
+
+ Returns:
+ A validation response, with AI analysis on failure.
+
+ Raises:
+ HTTPException: If the database connection is unavailable.
"""
engine = get_engine()
if engine is None:
@@ -22,10 +36,16 @@ async def run_validation(vql_to_validate: str) -> VqlValidationApiResponse:
status_code=503,
detail="Database connection is not available. Check server logs.",
)
+ # DESC QUERYPLAN throws a syntax error when the query has LIMIT
+ limit_match = re.search(r"LIMIT\s+\d+", request.vql)
+ if limit_match:
+ vql_without_limit = request.vql[:limit_match.start()]
+ desc_query_plan_vql: str = f"DESC QUERYPLAN {vql_without_limit}"
+ else:
+ desc_query_plan_vql: str = f"DESC QUERYPLAN {request.vql}"
+ logger.info(f"Attempting to validate VQL (via DESC QUERYPLAN): {request.vql[:100]}...")
- desc_query_plan_vql: str = f"DESC QUERYPLAN {vql_to_validate}"
- logger.info(f"Attempting to validate VQL (via DESC QUERYPLAN): {vql_to_validate[:100]}...")
-
+ # This synchronous function is executed in a separate thread to prevent blocking.
def db_call():
try:
with engine.connect() as connection:
@@ -47,15 +67,13 @@ def db_call():
message="VQL syntax check successful!",
)
- # Re-raise the exception caught in the thread
raise result
except (OperationalError, ProgrammingError) as e:
db_error_message = str(getattr(e, "orig", e))
logger.warning(f"Denodo VQL validation failed: {db_error_message}")
try:
- # The result is now correctly typed as AIAnalysis
- ai_analysis_result: AIAnalysis = await analyze_vql_validation_error(db_error_message, vql_to_validate)
+ ai_analysis_result: AIAnalysis = await analyze_vql_validation_error(db_error_message, request)
return VqlValidationApiResponse(
validated=False, error_analysis=ai_analysis_result
)
diff --git a/backend/src/utils/ai_analyzer.py b/backend/src/utils/ai_analyzer.py
index 7fafe7e..de677ff 100644
--- a/backend/src/utils/ai_analyzer.py
+++ b/backend/src/utils/ai_analyzer.py
@@ -1,21 +1,42 @@
+# src/utils/ai_analyzer.py
+
import logging
-from typing import Type
+from typing import Type, Set, List
+
+from sqlalchemy.orm.query import Query
+from sqlalchemy.sql.elements import BinaryExpression
+from sqlalchemy.sql.schema import Column
from sqlglot import exp, parse_one
from fastapi import HTTPException
from pydantic_ai import Agent, RunContext, Tool
from pydantic_ai.models.google import GoogleModel
from pydantic_ai.providers.google import GoogleProvider
-from pydantic_ai.models.fallback import FallbackModel
from pydantic_ai.models.openai import OpenAIModel
from pydantic_ai.providers.azure import AzureProvider
+from dataclasses import dataclass
+from sqlalchemy.orm import Session
+from sqlalchemy import or_
+
+from src.db.sqlite_session import get_sqlite_session
+from src.schemas.db_log import AcceptedQuery
+
from src.config import settings
from src.schemas.translation import AIAnalysis
+from src.schemas.validation import VqlValidateRequest
from src.utils.denodo_client import get_available_views_from_denodo, get_denodo_functions_list, get_vdb_names_list, get_view_cols
logger = logging.getLogger(__name__)
+@dataclass
+class Deps:
+ tables: set[str]
+ vdb: str
+ sql: str
+ dialect: str
+
+
def _initialize_ai_agent(system_prompt: str, output_type: Type, tools: list[Tool] = []) -> Agent:
if settings.OPENAI_API_KEY:
logger.info("Using OpenAI model.")
@@ -41,20 +62,59 @@ def _initialize_ai_agent(system_prompt: str, output_type: Type, tools: list[Tool
model,
system_prompt=system_prompt,
output_type=output_type,
- deps_type=set[str],
+ deps_type=Deps,
tools=tools
)
+async def get_history_query_list(sql: str, tables: Set[str], dialect: str) -> List[str]:
+ """
+ Retrieves historical VQL queries from the database based on table names.
+ This function now directly queries the SQLite database.
+ """
+ if not tables:
+ return []
+
+ # Manually obtain and manage a database session for this function
+ db: Session = get_sqlite_session()
+ try:
+ conditions: List[BinaryExpression[bool]] = [AcceptedQuery.tables.like(f'%"{table}"%') for table in tables]
+
+ # Combine conditions with OR logic and execute the query
+ query: Query[AcceptedQuery] = db.query(AcceptedQuery).filter(or_(*conditions))
+
+ if dialect:
+ query = query.filter(AcceptedQuery.source_dialect == dialect)
+
+ query = query.order_by(AcceptedQuery.timestamp.desc()).limit(10)
+
+ history_vqls: List[dict[str, Column[str]]] = [
+ {"source_sql": log.source_sql, "target_vql": log.target_vql}
+ for log in query.all()
+ if log.source_sql and log.target_vql]
+ return history_vqls
+ except Exception as e:
+ logger.error(f"Failed to retrieve history queries from DB for tables {tables}: {e}", exc_info=True)
+ return []
+ finally:
+ db.close()
+
+
+async def _get_history(ctx: RunContext[Deps]) -> list[str]:
+ """Retrieves a list of correct translation and validation of queries. Use this tool always first to find already successful query translations."""
+ logger.info("Executing _get_history_query_list tool")
+ return await get_history_query_list(ctx.deps.sql, ctx.deps.tables, ctx.deps.dialect)
+
+
async def _get_functions() -> list[str]:
"""Retrieves a list of available Denodo functions. Use this tool when an error indicates a function was not found or has incorrect arity."""
logger.info("Executing _get_functions tool")
return await get_denodo_functions_list()
-async def _get_views() -> list[str]:
+async def _get_views(ctx: RunContext[Deps]) -> list[str]:
"""Retrieves a list of available Denodo views. Use this tool when an error suggests a table or view is missing or misspelled."""
- return await get_available_views_from_denodo()
+ return await get_available_views_from_denodo(ctx.deps.vdb)
async def _get_vdbs() -> list[str]:
@@ -62,9 +122,9 @@ async def _get_vdbs() -> list[str]:
return await get_vdb_names_list()
-async def _get_view_metadata(ctx: RunContext[set[str]]) -> list[dict[str, str]]:
+async def _get_view_metadata(ctx: RunContext[Deps]) -> list[dict[str, str]]:
"""Retrieves a list of columns for the views. Use this tool when an error refers to field not found in view error."""
- return await get_view_cols(ctx.deps)
+ return await get_view_cols(ctx.deps.tables)
def _extract_tables(input_vql: str) -> set[str]:
@@ -78,10 +138,10 @@ def _extract_tables(input_vql: str) -> set[str]:
return tables
-async def analyze_vql_validation_error(error: str, input_vql: str) -> AIAnalysis:
+async def analyze_vql_validation_error(error: str, request: VqlValidateRequest) -> AIAnalysis:
agent = _initialize_ai_agent(
"You are an SQL Validation assistant for Denodo VQL", AIAnalysis, tools=[
- Tool(_get_functions), Tool(_get_views), Tool(_get_vdbs), Tool(_get_view_metadata)]
+ Tool(_get_functions), Tool(_get_views), Tool(_get_vdbs), Tool(_get_view_metadata), Tool(_get_history)]
)
prompt: str = f"""You are an expert Denodo VQL Assistant. Your task is to analyze Denodo VQL validation errors.
@@ -90,7 +150,7 @@ async def analyze_vql_validation_error(error: str, input_vql: str) -> AIAnalysis
3. Provide an accurate, corrected VQL suggestion in the `sql_suggestion` field.
Do not explain what you are doing in the explanation, just provide the direct cause of the error.
-
+ At first always check the _get_history tool if the same or similar query was already successfully translated and validated.
If a table/view is missing, use the _get_views tool to find available views and suggest a likely replacement.
If a function is not found, use the _get_functions tool to check for available Denodo functions.
If a database name (VDB) is invalid, use _get_vdbs tool to check for valid database names.
@@ -100,11 +160,14 @@ async def analyze_vql_validation_error(error: str, input_vql: str) -> AIAnalysis
**Input VQL:**
```vql
- {input_vql}```"""
- vql_tables: set[str] = _extract_tables(input_vql)
+ {request.vql}```"""
+ vql_tables: set[str] = _extract_tables(request.vql)
+ deps = Deps(tables=vql_tables, vdb=request.vdb, sql=request.sql, dialect=request.dialect)
try:
- response = await agent.run(prompt, deps=vql_tables)
+ response = await agent.run(prompt, deps=deps)
if response and response.output:
+ sql_suggestion = parse_one(response.output.sql_suggestion).sql(pretty=True)
+ response.output.sql_suggestion = sql_suggestion
logger.info(f"AI Validation Analysis Category: {response.output.error_category}")
logger.info(f"AI Validation Analysis Explanation: {response.output.explanation}")
logger.info(f"AI Validation Analysis Suggestion: {response.output.sql_suggestion}")
@@ -130,7 +193,7 @@ async def analyze_sql_translation_error(exception_message: str, input_sql: str)
1. Categorize the error as "Translation Syntax Error". Set this in the `error_category` field.
2. Explain concisely in the `explanation` field why the `Input SQL` failed based on the `Error`.
3. Provide a corrected `Valid SQL Suggestion` in the `sql_suggestion` field that would be parsable by the original dialect or a hint for VQL.
-
+
Do not use ```sql markdown for the corrected SQL response. Do not explain what you are doing, just provide the explanation and the suggestion directly.
**ERROR:**
@@ -155,3 +218,45 @@ async def analyze_sql_translation_error(exception_message: str, input_sql: str)
raise HTTPException(
status_code=503, detail=f"AI service for translation unavailable or failed: {agent_error}"
)
+
+
+async def explain_vql_differences(source_sql: str, source_dialect: str, final_vql: str) -> str:
+ """
+ Uses an AI agent to analyze and explain the differences between a source SQL query
+ and its translated VQL counterpart.
+ """
+ agent = _initialize_ai_agent(
+ "You are an expert in SQL dialects and Denodo VQL. Your task is to explain the differences between a source SQL query and its translated VQL counterpart.",
+ AIAnalysis
+ )
+
+ prompt = f"""Analyze the differences between the source SQL and the final VQL.
+ 1. In the `explanation` field, provide a concise, Markdown-formatted bulleted list (using '-') explaining the key transformations that were applied.
+ 2. Focus on syntax changes, function replacements, and structural modifications (like adding a database name to a table).
+ 3. Keep the explanation clear and easy for a developer to understand.
+ 4. If there are no significant changes, state that the VQL is a direct equivalent.
+ 5. Do not populate the `sql_suggestion` or `error_category` fields.
+
+ **Source SQL ({source_dialect}):**
+ ```sql
+ {source_sql}
+ ```
+
+ **Final VQL:**
+ ```vql
+ {final_vql}
+ ```
+ """
+ try:
+ response = await agent.run(prompt)
+ if response and response.output and response.output.explanation:
+ explanation_text = response.output.explanation
+ logger.info(f"AI VQL Diff Explanation generated: {explanation_text[:150]}...")
+ return explanation_text
+ else:
+ logger.error(f"AI agent returned unexpected response for VQL diff explanation: {response}")
+ return "AI analysis of the VQL differences failed to produce an explanation."
+ except Exception as agent_error:
+ logger.error(f"Error calling AI Agent for VQL diff explanation: {agent_error}", exc_info=True)
+ # Return a user-friendly error message, not the raw exception
+ return "An error occurred while generating the explanation of VQL differences."
diff --git a/backend/src/utils/denodo_client.py b/backend/src/utils/denodo_client.py
index 3617363..898f5a8 100644
--- a/backend/src/utils/denodo_client.py
+++ b/backend/src/utils/denodo_client.py
@@ -2,14 +2,14 @@
import asyncio
from fastapi import HTTPException
from sqlalchemy import Engine, text
-from src.db.session import get_engine # Use the centralized engine
+from src.db.session import get_engine
logger = logging.getLogger(__name__)
async def get_available_views_from_denodo(vdb_name: str | None = None) -> list[str]:
engine: Engine = get_engine()
- vql = "SELECT database_name, name FROM get_views()"
+ vql = f"SELECT database_name, name FROM get_views() where input_database_name = '{vdb_name}'"
def db_call():
try:
@@ -29,10 +29,10 @@ def db_call():
async def get_denodo_functions_list() -> list[str]:
- engine = get_engine()
+ engine: Engine = get_engine()
vql = "LIST FUNCTIONS"
- def db_call():
+ def db_call() -> list[str]:
try:
with engine.connect() as connection:
result = connection.execute(text(vql))
@@ -50,7 +50,7 @@ def db_call():
async def get_vdb_names_list() -> list[str]:
- engine = get_engine()
+ engine: Engine = get_engine()
vql = "SELECT db_name FROM GET_DATABASES()"
def db_call():
@@ -75,16 +75,16 @@ async def get_view_cols(tables: list[str]) -> list[dict[str, str]]:
logger.info("No tables provided to get_view_cols, returning empty list.")
return []
- engine = get_engine()
+ engine: Engine = get_engine()
tables_in_clause: str = ",".join(f"'{s}'" for s in tables)
vql: str = f"select view_name, column_name, column_sql_type from GET_view_columns() where view_name in ({tables_in_clause})"
- def db_call():
+ def db_call() -> list[dict[str, str]]:
try:
with engine.connect() as connection:
result = connection.execute(text(vql))
column_details: list[dict[str, str]] = [dict(row._mapping) for row in result]
- logger.info(f"Successfully retrieved view cols")
+ logger.info("Successfully retrieved view cols")
return column_details
except Exception as e:
logger.error(f"Error executing VQL query '{vql}' to get view columns: {e}", exc_info=True)
diff --git a/backend/src/utils/logging_config.py b/backend/src/utils/logging_config.py
index 037d0c9..5fe5cd9 100644
--- a/backend/src/utils/logging_config.py
+++ b/backend/src/utils/logging_config.py
@@ -2,7 +2,6 @@
import logging
import logging.config
import os
-import sys
import structlog
from structlog.types import Processor
diff --git a/docker-compose.yml b/docker-compose.yml
index 4c83d26..a2fb413 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -36,7 +36,11 @@ services:
user: appuser
volumes:
- ${HOST_PROJECT_PATH}/backend/vdb_conf.yaml:/opt/vdb_conf.yaml
+ - vqlforge-data:/data
networks:
- - app_network
\ No newline at end of file
+ - app_network
+
+volumes:
+ vqlforge-data:
\ No newline at end of file
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index 26f29f8..41a98a6 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -31,6 +31,5 @@ COPY nginx.conf /etc/nginx/conf.d/default.conf
# Expose port 4999 for Nginx
EXPOSE 4999
-# The wrapper script is no longer needed.
# The original Nginx command is used directly.
CMD ["nginx", "-g", "daemon off;"]
\ No newline at end of file
diff --git a/frontend/package.json b/frontend/package.json
index 88fa605..5535c62 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,6 +1,6 @@
{
"name": "vqlforge-frontend",
- "version": "0.2.0",
+ "version": "0.25.0",
"private": true,
"dependencies": {
"@codemirror/lang-sql": "^6.8.0",
diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js
new file mode 100644
index 0000000..33ad091
--- /dev/null
+++ b/frontend/postcss.config.js
@@ -0,0 +1,6 @@
+module.exports = {
+ plugins: {
+ tailwindcss: {},
+ autoprefixer: {},
+ },
+}
diff --git a/frontend/src/App.js b/frontend/src/App.js
index 3bd8063..9a4fcaa 100644
--- a/frontend/src/App.js
+++ b/frontend/src/App.js
@@ -1,4 +1,3 @@
-// --- At the top of App.js ---
import React, { useState, useCallback, useEffect } from 'react';
import {
CssBaseline, AppBar, Toolbar, Typography, Container, Box,
@@ -16,20 +15,21 @@ import { sql } from '@codemirror/lang-sql';
import { purple, blueGrey } from '@mui/material/colors';
// Import API Service
-import { fetchVdbs, translateSql, validateSql, forgeSql } from './apiService.js';
+import { fetchVdbs, translateSql, validateSql, forgeSql, logAcceptedQuery } from './apiService.js';
// Import Custom Components
import CodeEditor from './components/Editors/CodeEditor.js';
import VqlForgeLogo from './Logo.js';
import AgenticStatusDisplay from './components/AgenticStatusDisplay.js';
import AgenticLogDisplay from './components/AgenticLogDisplay.js';
+import { useToast, ToastContainer } from './components/Toast/Toast.js';
-// --- Import Alert Components ---
+// Import Alert Components
import AiErrorAnalysis from './components/Alerts/AiErrorAnalysis.js';
import AiValidationErrorAnalysis from './components/Alerts/AiValidationErrorAnalysis.js';
-// --- Configuration ---
+// Configuration
const availableDialects = [{ value: 'athena', label: 'Athena' }, { value: 'bigquery', label: 'BigQuery' }, { value: 'clickhouse', label: 'ClickHouse' }, { value: 'databricks', label: 'Databricks' }, { value: 'doris', label: 'Doris' }, { value: 'drill', label: 'Drill' }, { value: 'druid', label: 'Druid' }, { value: 'duckdb', label: 'DuckDB' }, { value: 'dune', label: 'Dune' }, { value: 'hive', label: 'Hive' }, { value: 'materialize', label: 'Materialize' }, { value: 'mysql', label: 'MySQL' }, { value: 'oracle', label: 'Oracle' }, { value: 'postgres', label: 'PostgreSQL' }, { value: 'presto', label: 'Presto' }, { value: 'prql', label: 'PRQL' }, { value: 'redshift', label: 'Redshift' }, { value: 'risingwave', label: 'RisingWave' }, { value: 'snowflake', label: 'Snowflake' }, { value: 'spark', label: 'Spark SQL' }, { value: 'spark2', label: 'Spark SQL 2' }, { value: 'sqlite', label: 'SQLite' }, { value: 'starrocks', label: 'StarRocks' }, { value: 'tableau', label: 'Tableau' }, { value: 'teradata', label: 'Teradata' }, { value: 'trino', label: 'Trino' }];
const editorExtensions = [sql()];
const initialTargetSqlPlaceholder = '-- Target SQL will appear here after conversion...';
@@ -37,9 +37,10 @@ const conversionErrorPlaceholder = '-- Conversion Error --';
function App() {
const theme = useTheme();
+ const toast = useToast();
const [sourceDialect, setSourceDialect] = useState(availableDialects[0]);
- // --- VDB State ---
+ // VDB State
const [actualAvailableVDBs, setActualAvailableVDBs] = useState([]);
const [selectedVDB, setSelectedVDB] = useState(null);
const [vdbsLoading, setVdbsLoading] = useState(false);
@@ -51,7 +52,7 @@ function App() {
const [error, setError] = useState(null);
const [isValidating, setIsValidating] = useState(false);
const [validationResult, setValidationResult] = useState(null);
- // --- Agentic Mode State ---
+ // Agentic Mode State
const [isAgenticModeActive, setIsAgenticModeActive] = useState(false);
const [agenticStatusMessages, setAgenticStatusMessages] = useState([]);
const [currentAgenticStep, setCurrentAgenticStep] = useState(null);
@@ -68,7 +69,7 @@ function App() {
setShowFinalAgenticLog(false);
};
- // --- Fetch VDBs ---
+ // Fetch VDBs
useEffect(() => {
setVdbsLoading(true);
setVdbsError(null);
@@ -198,13 +199,48 @@ function App() {
}
try {
- const validationData = await validateSql(sourceSql, vqlWithoutLineBreaks);
+ const validationData = await validateSql(
+ sourceSql,
+ vqlWithoutLineBreaks,
+ selectedVDB.value,
+ sourceDialect.value
+ );
if (validationData.validated) {
- setValidationResult({
- status: 'success',
- message: validationData.message || `VQL syntax check successful!`
- });
+ const message = validationData.message || `VQL syntax check successful!`;
+
+ const actions = [
+ {
+ label: 'Accept',
+ onClick: async () => {
+ try {
+ const logData = {
+ source_sql: sourceSql,
+ source_dialect: sourceDialect.value,
+ target_vql: targetSql
+ };
+ await logAcceptedQuery(logData);
+ toast.info('Query pair saved successfully!', 'Saved');
+ } catch (err) {
+ console.error("Failed to log accepted query:", err);
+ toast.error(err.message || 'Could not save the query to the log.', 'Logging Error');
+ }
+ clearValidationState();
+ },
+ primary: true
+ },
+ {
+ label: 'Refuse',
+ onClick: () => {
+ clearValidationState();
+ setTargetSql(initialTargetSqlPlaceholder);
+ console.log('Validation refused by user');
+ },
+ primary: false
+ }
+ ];
+
+ toast.success(message, 'Validation Successful', 12000, actions);
} else {
if (validationData.error_analysis) {
setValidationResult({
@@ -233,6 +269,7 @@ function App() {
}
};
+
const handleAgenticForge = () => {
setIsAgenticModeActive(true);
clearErrorState();
@@ -241,11 +278,15 @@ function App() {
setCurrentAgenticStep(null);
setShowFinalAgenticLog(false);
setTargetSql(initialTargetSqlPlaceholder);
+ const initialVqlForForge = (targetSql === initialTargetSqlPlaceholder || targetSql === conversionErrorPlaceholder)
+ ? ""
+ : targetSql;
const requestBody = {
sql: sourceSql,
dialect: sourceDialect.value,
vdb: selectedVDB.value,
+ vql: initialVqlForForge,
};
const onMessage = ({ event, data }) => {
@@ -269,7 +310,25 @@ function App() {
}
if (is_valid) {
- setValidationResult({ status: 'success', message: final_message });
+ const actions = [
+ {
+ label: 'Accept',
+ onClick: () => {
+ console.log('Auto-Forge result accepted');
+ },
+ primary: true
+ },
+ {
+ label: 'Retry',
+ onClick: () => {
+ setTargetSql(initialTargetSqlPlaceholder);
+ console.log('Auto-Forge result rejected, ready for retry');
+ },
+ primary: false
+ }
+ ];
+
+ toast.success(final_message, 'Auto-Forge Successful', 12000, actions);
} else {
if (error_analysis) {
if (process_log.some(step => step.step_name === "Translate" && !step.success)) {
@@ -308,13 +367,7 @@ function App() {
if (!validationResult) return null;
const status = validationResult.status;
- if (status === 'success') {
- return {
- severity: 'success',
- icon: ,
- title: 'Validation Successful'
- };
- }
+
if (status === 'info') {
return {
severity: 'info',
@@ -335,218 +388,224 @@ function App() {
const validationAlertProps = getValidationAlertProps();
return (
-
-
-
-
-
-
- VQLForge
-
-
-
-
-
-
- {isAgenticModeActive && (
-
- )}
-
- {showFinalAgenticLog && agenticStatusMessages.length > 0 && (
-
- )}
-
- {error && typeof error === 'object' && error.explanation && error.sql_suggestion && (
-
- )}
- {error && typeof error === 'string' && (
-
- Error
- {error}
-
- )}
- {vdbsLoading && !vdbsError && (
- }>Loading VDB options...
- )}
- {vdbsError && (
- setVdbsError(null)}
- >
- VDB Load Issue
- {vdbsError} - VDB selection might be unavailable or incomplete.
-
- )}
- {validationResult?.status === 'error_ai' && validationResult.data && (
-
- )}
- {validationAlertProps && (
-
- {validationAlertProps.title}
- {validationResult.message}
-
- )}
-
-
-
+
+
+
+
-
-
-
-
-
- option.label || ""}
- value={sourceDialect}
- onChange={handleDialectChange}
- isOptionEqualToValue={(option, value) => option?.value === value?.value}
- disabled={anyLoading}
- fullWidth
- renderInput={(params) => }
- renderOption={(props, option) => (
- img': { mr: 2, flexShrink: 0 } }} {...props}>
- {option.label}
-
- )}
- />
- option.label || ""}
- value={selectedVDB}
- onChange={handleVDBChange}
- isOptionEqualToValue={(option, value) => option?.value === value?.value}
- disabled={anyLoading}
- fullWidth
- renderInput={(params) => (
-
- {vdbsLoading ? : null}
- {params.InputProps.endAdornment}
-
- ),
- }}
- />
- )}
- />
-
- : null} fullWidth sx={{ height: '56px', fontWeight: 600 }}>
- {isLoading ? 'Converting...' : 'Convert'}
-
- {isLoading && ()}
+
+
+
-
-
- : null}
- fullWidth sx={{ height: '56px', fontWeight: 600 }}
+
+ VQLForge
+
+
+
+
+
+
+ {isAgenticModeActive && (
+
+ )}
+
+ {showFinalAgenticLog && agenticStatusMessages.length > 0 && (
+
+ )}
+
+ {error && typeof error === 'object' && error.explanation && error.sql_suggestion && (
+
+ )}
+ {error && typeof error === 'string' && (
+
- {isValidating ? 'Validating...' : 'Validate VQL'}
-
- {isValidating && ()}
+ Error
+ {error}
+
+ )}
+ {vdbsLoading && !vdbsError && (
+ }>Loading VDB options...
+ )}
+ {vdbsError && (
+ setVdbsError(null)}
+ >
+ VDB Load Issue
+ {vdbsError} - VDB selection might be unavailable or incomplete.
+
+ )}
+
+ {validationResult?.status === 'error_ai' && validationResult.data && (
+
+ )}
+ {validationAlertProps && (
+
+ {validationAlertProps.title}
+ {validationResult.message}
+
+ )}
+
+
+
+
+
-
- : null}
+
+ option.label || ""}
+ value={sourceDialect}
+ onChange={handleDialectChange}
+ isOptionEqualToValue={(option, value) => option?.value === value?.value}
+ disabled={anyLoading}
fullWidth
- sx={{ height: '56px', fontWeight: 600 }}
- >
- {isAgenticModeActive ? 'Working...' : 'Auto-Forge'}
-
- {isAgenticModeActive && ()}
+ renderInput={(params) => }
+ renderOption={(props, option) => (
+ img': { mr: 2, flexShrink: 0 } }} {...props}>
+ {option.label}
+
+ )}
+ />
+ option.label || ""}
+ value={selectedVDB}
+ onChange={handleVDBChange}
+ isOptionEqualToValue={(option, value) => option?.value === value?.value}
+ disabled={anyLoading}
+ fullWidth
+ renderInput={(params) => (
+
+ {vdbsLoading ? : null}
+ {params.InputProps.endAdornment}
+
+ ),
+ }}
+ />
+ )}
+ />
+
+ : null} fullWidth sx={{ height: '56px', fontWeight: 600 }}>
+ {isLoading ? 'Converting...' : 'Convert'}
+
+ {isLoading && ()}
+
+
+
+ : null}
+ fullWidth sx={{ height: '56px', fontWeight: 600 }}
+ >
+ {isValidating ? 'Validating...' : 'Validate VQL'}
+
+ {isValidating && ()}
+
+
+
+ : null}
+ fullWidth
+ sx={{ height: '56px', fontWeight: 600 }}
+ >
+ {isAgenticModeActive ? 'Working...' : 'Auto-Forge'}
+
+ {isAgenticModeActive && ()}
+
-
-
-
+
+
+
-
-
+
-
- VQLForge 0.2 -
- MIT License
-
-
+
+ VQLForge 0.25 -
+ MIT License
+
+
+
-
+ >
);
}
diff --git a/frontend/src/Logo.js b/frontend/src/Logo.js
index fbb7591..bef9a42 100644
--- a/frontend/src/Logo.js
+++ b/frontend/src/Logo.js
@@ -1,8 +1,8 @@
import React from 'react';
-function Logo() {
+function Logo(props) {
return (
-