diff --git a/CLAUDE.md b/CLAUDE.md index 25f4585..f908334 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,663 +6,214 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co GoSQLX is a **production-ready**, **race-free**, high-performance SQL parsing SDK for Go that provides lexing, parsing, and AST generation with zero-copy optimizations. The library is designed for enterprise use with comprehensive object pooling for memory efficiency. -**Requirements**: Go 1.24+ +**Requirements**: Go 1.24+ (toolchain go1.25.0) - -### **Production Status**: ✅ **VALIDATED FOR PRODUCTION DEPLOYMENT** (v1.6.0+) -- **Thread Safety**: Confirmed race-free through comprehensive concurrent testing -- **Performance**: 1.38M+ operations/second sustained, up to 1.5M peak with memory-efficient object pooling -- **International**: Full Unicode support for global SQL processing -- **Reliability**: 95%+ success rate on real-world SQL queries -- **Standards**: Multi-dialect SQL compatibility (PostgreSQL, MySQL, SQL Server, Oracle, SQLite) -- **SQL Compliance**: ~80-85% SQL-99 compliance (includes window functions, CTEs, set operations) -- **Test Coverage**: AST package 73.4%, Models package 100% coverage +**Production Status**: ✅ Validated for production deployment (v1.6.0+) +- Thread-safe with zero race conditions (20,000+ concurrent operations tested) +- 1.38M+ ops/sec sustained, 1.5M peak with memory-efficient object pooling +- ~80-85% SQL-99 compliance (window functions, CTEs, set operations, MERGE, etc.) +- Multi-dialect support: PostgreSQL, MySQL, SQL Server, Oracle, SQLite ## Architecture ### Core Components -- **Tokenizer** (`pkg/sql/tokenizer/`): Zero-copy SQL lexer that converts SQL text into tokens -- **Parser** (`pkg/sql/parser/`): Recursive descent parser that builds AST from tokens -- **AST** (`pkg/sql/ast/`): Abstract Syntax Tree nodes with comprehensive SQL statement support -- **Keywords** (`pkg/sql/keywords/`): Categorized SQL keyword definitions across dialects -- **Models** (`pkg/models/`): Core data structures (tokens, spans, locations, errors) - 100% test coverage -- **Errors** (`pkg/errors/`): Structured error handling system with error codes and position tracking -- **Metrics** (`pkg/metrics/`): Production performance monitoring and observability -- **Security** (`pkg/sql/security/`): SQL injection detection with pattern scanning and severity classification -- **Linter** (`pkg/linter/`): SQL linting engine with 10 built-in rules (L001-L010) for style enforcement -- **CLI** (`cmd/gosqlx/`): Production-ready command-line tool for SQL validation, formatting, and analysis -- **LSP** (`pkg/lsp/`): Language Server Protocol server for IDE integration (diagnostics, hover, completion, formatting) - -### Object Pooling Architecture - -The codebase uses extensive object pooling for performance optimization: -- **AST Pool**: `ast.NewAST()` / `ast.ReleaseAST()` - Main AST container management -- **Tokenizer Pool**: `tokenizer.GetTokenizer()` / `tokenizer.PutTokenizer()` - Tokenizer instance reuse -- **Statement Pools**: Individual pools for SELECT, INSERT, UPDATE, DELETE statements -- **Expression Pools**: Pools for identifiers, binary expressions, literal values -- **Buffer Pool**: Internal buffer reuse in tokenizer operations - -### Token Processing Flow - -1. **Input**: Raw SQL bytes → `tokenizer.Tokenize()` → `[]models.TokenWithSpan` -2. **Conversion**: Token conversion → `parser.ConvertTokensForParser()` → `[]token.Token` -3. **Parsing**: Parser consumption → `parser.Parse()` → `*ast.AST` -4. **Cleanup**: Release pooled objects back to pools when done +- **Tokenizer** (`pkg/sql/tokenizer/`): Zero-copy SQL lexer with full UTF-8 support +- **Parser** (`pkg/sql/parser/`): Recursive descent parser with one-token lookahead +- **AST** (`pkg/sql/ast/`): Abstract Syntax Tree nodes with visitor pattern support +- **Keywords** (`pkg/sql/keywords/`): Multi-dialect SQL keyword definitions +- **Models** (`pkg/models/`): Core data structures (tokens, spans, locations) +- **Errors** (`pkg/errors/`): Structured error handling with position tracking +- **Metrics** (`pkg/metrics/`): Production performance monitoring +- **Security** (`pkg/sql/security/`): SQL injection detection with severity classification +- **Linter** (`pkg/linter/`): SQL linting engine with 10 built-in rules (L001-L010) +- **LSP** (`pkg/lsp/`): Language Server Protocol for IDE integration +- **GoSQLX** (`pkg/gosqlx/`): High-level simple API (recommended for most users) +- **Compatibility** (`pkg/compatibility/`): API stability testing -## Development Commands +### Token Processing Pipeline -This project uses [Task](https://taskfile.dev) as the task runner. Install with: -```bash -go install github.com/go-task/task/v3/cmd/task@latest -# Or: brew install go-task (macOS) ``` - -### Building and Testing -```bash -# Show all available tasks -task - -# Build all packages -task build - -# Build the CLI binary -task build:cli - -# Build CLI for all platforms -task build:cli:all - -# Install CLI globally -task install - -# Run all tests -task test - -# Run tests with race detection (CRITICAL) -task test:race - -# Run tests for specific package -task test:pkg PKG=./pkg/sql/parser - -# Run tests in short mode -task test:short - -# Run tests with coverage report -task coverage - -# Show coverage by function -task coverage:func - -# Run benchmarks -task bench - -# Run benchmarks with CPU profiling -task bench:cpu - -# Run fuzz tests -task fuzz +Raw SQL bytes → tokenizer.Tokenize() → []models.TokenWithSpan + → parser.ConvertTokensForParser() → []token.Token + → parser.Parse() → *ast.AST ``` -### Code Quality -```bash -# Format code -task fmt - -# Check formatting (fails if not formatted) -task fmt:check - -# Run go vet -task vet - -# Run golangci-lint -task lint - -# Run golangci-lint with auto-fix -task lint:fix - -# Run staticcheck -task staticcheck +### Object Pooling (Critical for Performance) -# Run all quality checks (fmt, vet, lint) -task quality +The codebase uses extensive sync.Pool for all major data structures: +- `ast.NewAST()` / `ast.ReleaseAST()` - AST container +- `tokenizer.GetTokenizer()` / `tokenizer.PutTokenizer()` - Tokenizer instances +- Individual pools for SELECT, INSERT, UPDATE, DELETE statements +- Expression pools for identifiers, binary expressions, literals -# Full check suite (format, vet, lint, test:race) -task check +### Module Dependencies -# CRITICAL: Always run race detection during development -task test:race +Clean hierarchy with minimal coupling: +``` +models → (no deps) +errors → models +keywords → models +tokenizer → models, keywords, metrics +ast → token +parser → tokenizer, ast, token, errors +gosqlx → all (high-level wrapper) ``` -### Pre-commit Hooks -The repository has pre-commit hooks that automatically run on every commit: -1. `go fmt` - Code formatting check -2. `go vet` - Static analysis -3. `go test -short` - Short test suite - -If a commit fails pre-commit checks, fix the issues and retry the commit. +## Development Commands -### Security +This project uses [Task](https://taskfile.dev) as the task runner: ```bash -# Run security vulnerability scan -task security:scan - -# Validate security setup -task security:validate +go install github.com/go-task/task/v3/cmd/task@latest +# Or: brew install go-task (macOS) ``` -### CI/CD +### Essential Commands ```bash -# Run full CI pipeline -task ci - -# Quick CI check (no race detection) -task ci:quick -``` - -### Running Examples +task # Show all available tasks +task build # Build all packages +task build:cli # Build CLI binary +task install # Install CLI globally +task test # Run all tests +task test:race # Run tests with race detection (CRITICAL) +task test:pkg PKG=./pkg/sql/parser # Test specific package +task bench # Run benchmarks with memory tracking +task coverage # Generate coverage report +task quality # Run fmt, vet, lint +task check # Full suite: format, vet, lint, test:race +task ci # Full CI pipeline +``` + +### Running a Single Test ```bash -# Run basic example -task examples - -# Run example tests -task examples:test - -# Or run directly: -go run ./examples/cmd/example.go +go test -v -run TestSpecificName ./pkg/sql/parser/ +go test -v -run "TestParser_Window.*" ./pkg/sql/parser/ ``` -### CLI Tool Usage (v1.4.0+) +### CLI Tool ```bash -# Validate SQL syntax -./gosqlx validate "SELECT * FROM users WHERE active = true" - -# Format SQL files with intelligent indentation +./gosqlx validate "SELECT * FROM users" ./gosqlx format -i query.sql - -# Analyze SQL structure and complexity ./gosqlx analyze "SELECT COUNT(*) FROM orders GROUP BY status" - -# Parse SQL to AST representation (JSON format) -./gosqlx parse -f json complex_query.sql - -# Start LSP server for IDE integration -./gosqlx lsp -./gosqlx lsp --log /tmp/lsp.log # With debug logging - -# Install globally -go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx@latest +./gosqlx parse -f json query.sql +./gosqlx lsp # Start LSP server +./gosqlx lint query.sql # Run linter ``` -### Additional Documentation -- `docs/GETTING_STARTED.md` - Quick start guide for new users -- `docs/USAGE_GUIDE.md` - Comprehensive usage guide -- `docs/LSP_GUIDE.md` - Complete LSP server documentation and IDE integration -- `docs/LINTING_RULES.md` - All 10 linting rules (L001-L010) reference -- `docs/CONFIGURATION.md` - Configuration file (.gosqlx.yml) guide -- `docs/SQL_COMPATIBILITY.md` - SQL dialect compatibility matrix - -## Key Implementation Details +## Key Implementation Patterns -### Memory Management (CRITICAL FOR PERFORMANCE) -**Always use `defer` with pool return functions** - prevents resource leaks and maintains performance: +### Memory Management (MANDATORY) +Always use `defer` with pool return functions: ```go -// CORRECT usage pattern for tokenizer +// High-level API (recommended for most use cases) +ast, err := gosqlx.Parse("SELECT * FROM users") +// No cleanup needed - handled automatically + +// Low-level API (for fine-grained control) tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) // MANDATORY -// CORRECT usage pattern for AST astObj := ast.NewAST() defer ast.ReleaseAST(astObj) // MANDATORY - -// Use objects -tokens, err := tkz.Tokenize(sqlBytes) -result, err := parser.Parse(tokens) ``` -- **Performance Impact**: Object pooling provides 60-80% memory reduction -- **Thread Safety**: All pool operations are race-condition free (validated) -- **Pool Efficiency**: 95%+ hit rate in production workloads - ### Parser Architecture -- **Type**: Recursive descent parser with one-token lookahead -- **Location**: `pkg/sql/parser/parser.go` -- **Statement Support**: DDL (CREATE, ALTER, DROP) and DML (SELECT, INSERT, UPDATE, DELETE) -- **Phase 2.5 Window Functions**: Complete SQL-99 window function support: - - `parseFunctionCall()` - Function calls with OVER clause detection - - `parseWindowSpec()` - PARTITION BY, ORDER BY, frame clause parsing - - `parseWindowFrame()` - ROWS/RANGE frame specifications - - `parseFrameBound()` - Individual frame bound parsing with expressions -- **Phase 2 Advanced Features**: CTEs (WITH clause), recursive CTEs, set operations (UNION/EXCEPT/INTERSECT) -- **Phase 1 JOIN Support**: All JOIN types with proper left-associative tree logic - -### AST Node Hierarchy -- **Base Interface**: All nodes implement `Node` interface (TokenLiteral, Children methods) -- **Statement Interface**: `Statement` extends `Node` for SQL statements -- **Expression Interface**: `Expression` extends `Node` for SQL expressions -- **Visitor Pattern**: Support in `pkg/sql/ast/visitor.go` for tree traversal -- **Pool Integration**: All major node types have dedicated pool management - -### Tokenizer Features -- **Zero-Copy Operations**: Direct byte slice operations without string allocation -- **Position Tracking**: Line/column information for error reporting -- **Token Types**: String literals, numbers, operators, keywords with proper categorization -- **Unicode Support**: Full UTF-8 support for international SQL queries -- **Dialect Support**: Multi-database keyword handling (PostgreSQL, MySQL, etc.) - -### Error Handling System (`pkg/errors/`) -- **Structured Errors**: Error codes with categorization (syntax, semantic, etc.) -- **Position Information**: Precise line/column tracking for error location -- **Context Preservation**: Error messages include relevant SQL context -- **Error Recovery**: Parser can recover from certain errors and continue parsing -- **Usage Pattern**: Always check errors returned from tokenizer and parser operations - -### Performance Monitoring Integration -- **Package**: `pkg/metrics/` provides production monitoring capabilities -- **Atomic Counters**: Lock-free performance tracking across components -- **Pool Metrics**: Tracks pool hit rates, gets/puts, memory efficiency -- **Query Metrics**: Size tracking, operation counts, error categorization - -## Production Readiness Status - -### ✅ **FULLY VALIDATED FOR PRODUCTION USE** -GoSQLX has passed comprehensive enterprise-grade testing: - -- **Race Detection**: ✅ ZERO race conditions (20,000+ concurrent operations tested) -- **Performance**: ✅ 1.5M ops/sec peak, 1.38M+ sustained, memory efficient with pooling -- **Unicode Support**: ✅ Full international compliance (8 languages tested) -- **SQL Compatibility**: ✅ Multi-dialect support with 115+ real-world queries validated -- **Memory Management**: ✅ Zero leaks detected, stable under extended load -- **Error Handling**: ✅ Robust error recovery with position information - -### Quality Metrics -- **Thread Safety**: ⭐⭐⭐⭐⭐ Race-free codebase confirmed -- **Performance**: ⭐⭐⭐⭐⭐ 1.38M+ ops/sec sustained, 1.5M peak, 8M tokens/sec -- **Reliability**: ⭐⭐⭐⭐⭐ 95%+ success rate on real-world SQL -- **Memory Efficiency**: ⭐⭐⭐⭐⭐ 60-80% reduction with pooling -- **Latency**: ⭐⭐⭐⭐⭐ <1μs for complex queries with window functions - -## Testing Methodology - -### **Always Use Race Detection** -Race detection is mandatory during development and CI/CD: - +- Recursive descent with one-token lookahead +- Main file: `pkg/sql/parser/parser.go` +- Window functions: `parseFunctionCall()`, `parseWindowSpec()`, `parseWindowFrame()` +- CTEs: WITH clause with RECURSIVE support +- Set operations: UNION/EXCEPT/INTERSECT with left-associative parsing +- JOINs: All types with proper left-associative tree logic + +### Error Handling +- Always check errors from tokenizer and parser +- Errors include position information (`models.Location`) +- Error codes: E1001-E3004 for tokenizer, parser, semantic errors +- Use `pkg/errors/` for structured error creation + +## Testing Requirements + +### Race Detection is Mandatory ```bash -# MANDATORY: Always run tests with race detection -go test -race ./... -go test -race -timeout 30s ./pkg/... -go test -race -timeout 60s -v ./... +task test:race # Primary method +go test -race -timeout 60s ./... # Direct command ``` -### Testing Structure -Tests are organized with comprehensive coverage (30+ test files, 6 benchmark files): - -- **Unit Tests**: `*_test.go` files for component testing -- **Integration Tests**: Real-world SQL query validation in examples -- **Performance Tests**: `*_bench_test.go` files with memory allocation tracking -- **Race Detection**: Concurrent usage validation across all components -- **Memory Tests**: Pool efficiency and leak detection -- **Scalability Tests**: Load testing with sustained throughput validation - -### Coverage Status by Package -- **pkg/models/**: 100% coverage - All core data structures fully tested -- **pkg/sql/ast/**: 73.4% coverage - AST nodes with comprehensive edge case testing -- **pkg/sql/tokenizer/**: High coverage - Zero-copy operations validated -- **pkg/sql/parser/**: High coverage - All SQL features tested including window functions -- **pkg/sql/keywords/**: High coverage - Multi-dialect keyword recognition -- **pkg/metrics/**: High coverage - Concurrent metric tracking validated - -### Component-Specific Testing -```bash -# Run a single test by name -go test -v -run TestSpecificTestName ./pkg/sql/parser/ - -# Run tests matching a pattern -go test -v -run "TestParser_Window.*" ./pkg/sql/parser/ - -# Core library testing with race detection -go test -race ./pkg/sql/tokenizer/ -v -go test -race ./pkg/sql/parser/ -v -go test -race ./pkg/sql/ast/ -v -go test -race ./pkg/sql/keywords/ -v -go test -race ./pkg/models/ -v -go test -race ./pkg/errors/ -v -go test -race ./pkg/metrics/ -v - -# Performance benchmarking with memory tracking -go test -bench=. -benchmem ./pkg/... - -# Window functions specific testing (Phase 2.5) -go test -v -run TestParser_.*Window.* ./pkg/sql/parser/ - -# Test coverage for specific packages -go test -coverprofile=coverage.out ./pkg/models/ && go tool cover -func=coverage.out -go test -coverprofile=coverage.out ./pkg/sql/ast/ && go tool cover -func=coverage.out +### Coverage by Package +- `pkg/models/`: 100% - All core data structures +- `pkg/sql/ast/`: 73.4% - AST nodes +- `pkg/sql/tokenizer/`: 76.1% - Zero-copy operations +- `pkg/sql/parser/`: 76.1% - All SQL features +- `pkg/errors/`: 95.6% - Error handling -# Comprehensive validation -go test -race -timeout 60s ./... +### Benchmarking +```bash +task bench # All benchmarks +go test -bench=BenchmarkName -benchmem ./pkg/sql/parser/ # Specific benchmark +go test -bench=. -benchmem -cpuprofile=cpu.prof ./pkg/... # With profiling ``` -### Production Deployment Requirements -1. **Race Detection**: Always run with race detection during development and CI/CD -2. **Memory Monitoring**: Object pools should maintain stable memory usage -3. **Load Testing**: Validate with realistic SQL workloads matching application usage -4. **Unicode Validation**: Test international character handling if applicable -5. **Concurrent Patterns**: Test access patterns matching production usage - -## Common Development Workflows +## Common Workflows ### Adding a New SQL Feature -1. **Update Token Types** (if needed): Add new tokens to `pkg/models/token.go` -2. **Update Keywords** (if needed): Add keywords to `pkg/sql/keywords/` -3. **Extend AST Nodes**: Add new node types to `pkg/sql/ast/` -4. **Update Parser**: Add parsing logic to `pkg/sql/parser/parser.go` -5. **Add Tests**: Create comprehensive tests covering edge cases -6. **Run Validation**: `go test -race ./... && go test -bench=. -benchmem ./...` -7. **Update Documentation**: Update CHANGELOG.md and relevant docs +1. Update tokens in `pkg/models/token.go` (if needed) +2. Add keywords to `pkg/sql/keywords/` (if needed) +3. Extend AST nodes in `pkg/sql/ast/` +4. Add parsing logic in `pkg/sql/parser/parser.go` +5. Write comprehensive tests +6. Run: `task test:race && task bench` +7. Update CHANGELOG.md ### Debugging Parsing Issues ```bash -# Enable verbose output for tokenizer go test -v -run TestTokenizer_YourTest ./pkg/sql/tokenizer/ - -# Debug parser with specific SQL go test -v -run TestParser_YourTest ./pkg/sql/parser/ - -# Check token generation -# Write a small test in pkg/sql/tokenizer/ to print tokens - -# Verify AST structure -# Use the visitor pattern in pkg/sql/ast/visitor.go to traverse and inspect -``` - -### Performance Testing New Features -```bash -# Benchmark specific feature -go test -bench=BenchmarkYourFeature -benchmem -cpuprofile=cpu.prof ./pkg/sql/parser/ - -# Analyze profile -go tool pprof cpu.prof - -# Memory profiling -go test -bench=BenchmarkYourFeature -benchmem -memprofile=mem.prof ./pkg/sql/parser/ -go tool pprof mem.prof - -# Race detection during benchmark -go test -race -bench=BenchmarkYourFeature ./pkg/sql/parser/ ``` -## High-Level Architecture - -### Cross-Component Interactions - -The architecture follows a pipeline design with well-defined interfaces: +Use the visitor pattern in `pkg/sql/ast/visitor.go` to traverse and inspect AST. -1. **Input Processing Pipeline**: - - Raw SQL bytes → `tokenizer.Tokenize()` → `[]models.TokenWithSpan` - - Token conversion → `parser.convertTokens()` → `[]token.Token` - - Parser processing → `parser.Parse()` → `*ast.AST` +## Release Workflow -2. **Object Pooling Strategy**: - - **Tokenizer Pool**: `tokenizerPool` manages reusable tokenizer instances - - **AST Pool**: `astPool` manages AST container objects - - **Statement Pools**: Individual pools for each statement type (SELECT, INSERT, etc.) - - **Expression Pools**: Pools for identifiers, binary expressions, literals - - **Buffer Pool**: Internal byte buffer reuse for tokenization operations - -3. **Error Propagation**: - - Tokenizer errors include detailed position information (`models.Location`) - - Parser errors maintain token context for debugging - - All errors bubble up with context preservation for troubleshooting - -4. **Performance Monitoring**: - - `pkg/metrics` package tracks atomic metrics across all components - - Pool hit rates, operation counts, error categorization - - Race-free metric collection with `MetricsSnapshot` - -### Critical Design Patterns - -1. **Zero-Copy Operations**: Tokenizer operates on byte slices without string allocation -2. **Object Pooling**: Extensive use of sync.Pool for all major data structures -3. **Visitor Pattern**: AST nodes support traversal via `ast.Visitor` interface -4. **Recursive Descent**: Parser uses predictive parsing with one-token lookahead -5. **Token Categorization**: Keywords module provides dialect-specific classification - -### Module Dependencies - -Clean dependency hierarchy with minimal coupling: -- `models` → Core types (no dependencies, 100% test coverage) -- `errors` → Structured error handling (depends on `models`) -- `keywords` → Depends on `models` only -- `tokenizer` → Depends on `models`, `keywords`, `metrics` -- `parser` → Depends on `tokenizer`, `ast`, `token`, `errors` -- `ast` → Depends on `token` only (minimal coupling, 73.4% test coverage) -- `metrics` → Standalone monitoring (no dependencies) -- `cmd/gosqlx` → CLI tool (depends on all packages) - -## Release Workflow (CRITICAL - Follow This Process) - -### **CORRECT Release Process** -Based on lessons learned from previous releases - main branch is protected: +**CRITICAL**: Main branch is protected. Never create tags in feature branches. ```bash -# 1. Feature development in PR branch -git checkout feature/branch-name - -# 2. Update documentation in PR branch (mark as [Unreleased]) -# - Update CHANGELOG.md with comprehensive feature documentation -# - Update README.md with performance highlights and new features -# - DO NOT create version tags yet - this is done post-merge -git add CHANGELOG.md README.md -git commit -m "feat: implement major features (mark as unreleased)" - -# 3. Push PR branch and request review +# 1. Develop in feature branch +git checkout -b feature/branch-name +# ... make changes, update CHANGELOG.md as [Unreleased] ... git push origin feature/branch-name -# Create PR via GitHub interface or gh cli - -# 4. After PR is merged, create release from main branch -git checkout main && git pull origin main - -# 5. Create documentation PR for release finalization -git checkout -b docs/vX.Y.Z-release-updates -# Update CHANGELOG.md to mark as released version with date -git add CHANGELOG.md -git commit -m "docs: finalize vX.Y.Z release documentation" -git push origin docs/vX.Y.Z-release-updates -# Create PR for documentation updates - -# 6. After docs PR merged, create release tag -git checkout main && git pull origin main -git tag vX.Y.Z -a -m "vX.Y.Z: Release Title with detailed notes" -git push origin vX.Y.Z -# 7. Create GitHub release from tag -gh release create vX.Y.Z --title "vX.Y.Z: Release Title" --notes "..." -``` +# 2. Create PR and get it merged -**CRITICAL**: Never create version tags in feature PR branches - only after successful merge to main. - -### **❌ WRONG Process (Don't Do This)** -These mistakes have been made before - avoid them: -- Creating version tags in PR branches before merge -- Pushing tags before PR is approved and merged -- Direct commits to main for documentation (main branch is protected) -- Creating releases before proper testing and validation - -### **Benefits of Correct Process** -- ✅ All feature changes reviewed together in PR before any release actions -- ✅ Version tags only created on stable, merged, tested code in main branch -- ✅ Clean git history with proper separation of development and release -- ✅ Respects protected main branch rules (enforced by GitHub) -- ✅ Allows for comprehensive testing and validation before tagging -- ✅ Enables rollback if critical issues are found before release - -## Current SQL Feature Support (v1.6.0) - -### GROUPING SETS, ROLLUP, CUBE (SQL-99 T431) - Complete ✅ -```sql --- GROUPING SETS - explicit grouping combinations -SELECT region, product, SUM(sales) -FROM orders -GROUP BY GROUPING SETS ((region), (product), (region, product), ()); - --- ROLLUP - hierarchical subtotals -SELECT year, quarter, month, SUM(revenue) -FROM sales -GROUP BY ROLLUP (year, quarter, month); - --- CUBE - all possible combinations -SELECT region, product, SUM(amount) -FROM sales -GROUP BY CUBE (region, product); -``` - -### MERGE Statements (SQL:2003 F312) - Complete ✅ -```sql -MERGE INTO target_table t -USING source_table s ON t.id = s.id -WHEN MATCHED THEN - UPDATE SET t.name = s.name, t.value = s.value -WHEN NOT MATCHED THEN - INSERT (id, name, value) VALUES (s.id, s.name, s.value); -``` +# 3. After merge, create docs PR for release finalization +git checkout main && git pull +git checkout -b docs/vX.Y.Z-release +# Update CHANGELOG.md with version and date +git push origin docs/vX.Y.Z-release -### Materialized Views - Complete ✅ -```sql -CREATE MATERIALIZED VIEW sales_summary AS -SELECT region, SUM(amount) as total FROM sales GROUP BY region; - -REFRESH MATERIALIZED VIEW CONCURRENTLY sales_summary; - -DROP MATERIALIZED VIEW IF EXISTS sales_summary; -``` - -### Expression Operators (BETWEEN, IN, LIKE, IS NULL) - Complete ✅ -```sql --- BETWEEN with expressions -SELECT * FROM orders WHERE amount BETWEEN 100 AND 500; - --- IN with subquery -SELECT * FROM users WHERE id IN (SELECT user_id FROM admins); - --- LIKE with pattern matching -SELECT * FROM products WHERE name LIKE '%widget%'; - --- IS NULL / IS NOT NULL -SELECT * FROM users WHERE deleted_at IS NULL; +# 4. After docs PR merged, create tag +git checkout main && git pull +git tag vX.Y.Z -a -m "vX.Y.Z: Release notes" +git push origin vX.Y.Z --- NULLS FIRST/LAST ordering (SQL-99 F851) -SELECT * FROM users ORDER BY last_login DESC NULLS LAST; +# 5. Create GitHub release +gh release create vX.Y.Z --title "vX.Y.Z: Title" --notes "..." ``` -### Window Functions (Phase 2.5) - Complete ✅ -```sql --- Ranking functions -SELECT name, salary, ROW_NUMBER() OVER (ORDER BY salary DESC) as rank FROM employees; -SELECT dept, name, RANK() OVER (PARTITION BY dept ORDER BY salary DESC) FROM employees; -SELECT name, DENSE_RANK() OVER (ORDER BY score), NTILE(4) OVER (ORDER BY score) FROM tests; - --- Analytic functions with offsets -SELECT name, salary, LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary FROM employees; -SELECT date, amount, LEAD(amount, 2, 0) OVER (ORDER BY date) as future_amount FROM transactions; - --- Window frames -SELECT date, amount, - SUM(amount) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) as rolling_sum, - AVG(amount) OVER (ORDER BY date RANGE UNBOUNDED PRECEDING) as running_avg -FROM transactions; - --- Complex window specifications -SELECT dept, name, salary, - FIRST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary DESC) as dept_max, - LAST_VALUE(salary) OVER (PARTITION BY dept ORDER BY salary RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as dept_min -FROM employees; -``` +## Pre-commit Hooks -### CTEs and Set Operations (Phase 2) - Complete ✅ -```sql --- Recursive CTE with proper termination -WITH RECURSIVE employee_hierarchy AS ( - SELECT id, name, manager_id, 1 as level FROM employees WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, e.manager_id, eh.level + 1 - FROM employees e JOIN employee_hierarchy eh ON e.manager_id = eh.id - WHERE eh.level < 10 -- Prevent infinite recursion -) -SELECT * FROM employee_hierarchy ORDER BY level, name; - --- Complex set operations with proper precedence -SELECT product FROM inventory -UNION SELECT product FROM orders -EXCEPT SELECT product FROM discontinued -INTERSECT SELECT product FROM active_catalog; - --- CTE with set operations -WITH active_products AS ( - SELECT product_id, product_name FROM products WHERE active = true -), -recent_orders AS ( - SELECT product_id, COUNT(*) as order_count FROM orders - WHERE order_date > '2023-01-01' GROUP BY product_id -) -SELECT ap.product_name, ro.order_count -FROM active_products ap -LEFT JOIN recent_orders ro ON ap.product_id = ro.product_id; -``` +The repository has pre-commit hooks that run: +1. `go fmt` - Code formatting +2. `go vet` - Static analysis +3. `go test -short` - Short test suite -### JOINs (Phase 1) - Complete ✅ -```sql --- Complex JOIN combinations with proper left-associative parsing -SELECT u.name, o.order_date, p.product_name, c.category_name -FROM users u -LEFT JOIN orders o ON u.id = o.user_id -INNER JOIN products p ON o.product_id = p.id -RIGHT JOIN categories c ON p.category_id = c.id -NATURAL JOIN user_preferences up -WHERE u.active = true AND o.order_date > '2023-01-01' -ORDER BY o.order_date DESC; - --- JOIN with USING clause -SELECT u.name, p.title FROM users u -JOIN posts p USING (user_id) -WHERE p.published = true; -``` +Install with: `task hooks:install` -### PostgreSQL Extensions (v1.6.0) - Complete ✅ -```sql --- LATERAL JOIN - correlated subqueries in FROM clause -SELECT u.name, r.order_date FROM users u, -LATERAL (SELECT * FROM orders WHERE user_id = u.id ORDER BY order_date DESC LIMIT 3) r; - --- JSON/JSONB Operators (->/->>/#>/#>>/@>/<@/?/?|/?&/#-) -SELECT data->>'name' AS name, data->'address'->>'city' AS city FROM users; -SELECT * FROM products WHERE attributes @> '{"color": "red"}'; -SELECT * FROM users WHERE profile ? 'email'; - --- DISTINCT ON - PostgreSQL-specific row selection -SELECT DISTINCT ON (dept_id) dept_id, name, salary -FROM employees ORDER BY dept_id, salary DESC; - --- FILTER Clause - conditional aggregation (SQL:2003) -SELECT COUNT(*) FILTER (WHERE status = 'active') AS active_count, - SUM(amount) FILTER (WHERE type = 'credit') AS total_credits -FROM transactions; - --- RETURNING Clause - return modified rows -INSERT INTO users (name, email) VALUES ('John', 'john@example.com') RETURNING id, created_at; -UPDATE products SET price = price * 1.1 WHERE category = 'Electronics' RETURNING id, price; -DELETE FROM sessions WHERE expired_at < NOW() RETURNING user_id; -``` +## Additional Documentation -### DDL and DML Operations - Complete ✅ -```sql --- Table operations -CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100), email VARCHAR(255)); -ALTER TABLE users ADD COLUMN created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP; -DROP TABLE temp_data; - --- Data manipulation with comprehensive expression support -INSERT INTO users (name, email) VALUES ('John Doe', 'john@example.com'); -UPDATE users SET email = 'newemail@example.com' WHERE id = 1; -DELETE FROM users WHERE created_at < '2023-01-01'; -``` \ No newline at end of file +- `docs/GETTING_STARTED.md` - Quick start guide +- `docs/USAGE_GUIDE.md` - Comprehensive usage patterns +- `docs/LSP_GUIDE.md` - LSP server and IDE integration +- `docs/LINTING_RULES.md` - All 10 linting rules reference +- `docs/SQL_COMPATIBILITY.md` - SQL dialect compatibility matrix +- `docs/ARCHITECTURE.md` - Detailed system design diff --git a/pkg/sql/parser/dml.go b/pkg/sql/parser/dml.go index 2461545..daf11b0 100644 --- a/pkg/sql/parser/dml.go +++ b/pkg/sql/parser/dml.go @@ -87,6 +87,21 @@ func (p *Parser) parseInsertStatement() (ast.Statement, error) { p.advance() // Consume ) } + // Parse ON CONFLICT clause if present (PostgreSQL UPSERT) + var onConflict *ast.OnConflict + if p.isType(models.TokenTypeOn) { + // Peek ahead to check for CONFLICT + if p.peekToken().Literal == "CONFLICT" { + p.advance() // Consume ON + p.advance() // Consume CONFLICT + var err error + onConflict, err = p.parseOnConflictClause() + if err != nil { + return nil, err + } + } + } + // Parse RETURNING clause if present (PostgreSQL) var returning []ast.Expression if p.isType(models.TokenTypeReturning) || p.currentToken.Literal == "RETURNING" { @@ -100,10 +115,11 @@ func (p *Parser) parseInsertStatement() (ast.Statement, error) { // Create INSERT statement return &ast.InsertStatement{ - TableName: tableName, - Columns: columns, - Values: values, - Returning: returning, + TableName: tableName, + Columns: columns, + Values: values, + OnConflict: onConflict, + Returning: returning, }, nil } @@ -558,5 +574,111 @@ func (p *Parser) parseReturningColumns() ([]ast.Expression, error) { return columns, nil } +// parseOnConflictClause parses the ON CONFLICT clause (PostgreSQL UPSERT) +// Syntax: ON CONFLICT [(columns)] | ON CONSTRAINT name DO NOTHING | DO UPDATE SET ... +func (p *Parser) parseOnConflictClause() (*ast.OnConflict, error) { + onConflict := &ast.OnConflict{} + + // Parse optional conflict target: (column_list) or ON CONSTRAINT constraint_name + if p.isType(models.TokenTypeLParen) { + p.advance() // Consume ( + var targets []ast.Expression + + for { + if !p.isType(models.TokenTypeIdentifier) { + return nil, p.expectedError("column name in ON CONFLICT target") + } + targets = append(targets, &ast.Identifier{Name: p.currentToken.Literal}) + p.advance() + + if !p.isType(models.TokenTypeComma) { + break + } + p.advance() // Consume comma + } + + if !p.isType(models.TokenTypeRParen) { + return nil, p.expectedError(")") + } + p.advance() // Consume ) + onConflict.Target = targets + } else if p.isType(models.TokenTypeOn) && p.peekToken().Literal == "CONSTRAINT" { + // ON CONSTRAINT constraint_name + p.advance() // Consume ON + p.advance() // Consume CONSTRAINT + if !p.isType(models.TokenTypeIdentifier) { + return nil, p.expectedError("constraint name") + } + onConflict.Constraint = p.currentToken.Literal + p.advance() + } + + // Parse DO keyword + if p.currentToken.Literal != "DO" { + return nil, p.expectedError("DO") + } + p.advance() // Consume DO + + // Parse action: NOTHING or UPDATE + if p.currentToken.Literal == "NOTHING" { + onConflict.Action = ast.OnConflictAction{DoNothing: true} + p.advance() // Consume NOTHING + } else if p.isType(models.TokenTypeUpdate) { + p.advance() // Consume UPDATE + + // Parse SET keyword + if !p.isType(models.TokenTypeSet) { + return nil, p.expectedError("SET") + } + p.advance() // Consume SET + + // Parse update assignments + var updates []ast.UpdateExpression + for { + if !p.isType(models.TokenTypeIdentifier) { + return nil, p.expectedError("column name") + } + columnName := p.currentToken.Literal + p.advance() + + if !p.isType(models.TokenTypeEq) { + return nil, p.expectedError("=") + } + p.advance() // Consume = + + // Parse value expression (supports EXCLUDED.column references) + value, err := p.parseExpression() + if err != nil { + return nil, fmt.Errorf("failed to parse ON CONFLICT UPDATE value: %w", err) + } + + updates = append(updates, ast.UpdateExpression{ + Column: &ast.Identifier{Name: columnName}, + Value: value, + }) + + if !p.isType(models.TokenTypeComma) { + break + } + p.advance() // Consume comma + } + onConflict.Action.DoUpdate = updates + + // Parse optional WHERE clause + if p.isType(models.TokenTypeWhere) { + p.advance() // Consume WHERE + where, err := p.parseExpression() + if err != nil { + return nil, fmt.Errorf("failed to parse ON CONFLICT WHERE clause: %w", err) + } + onConflict.Action.Where = where + } + } else { + return nil, p.expectedError("NOTHING or UPDATE") + } + + return onConflict, nil +} + // parseTableReference parses a simple table reference (table name) // Returns a TableReference with the Name field populated diff --git a/pkg/sql/parser/expressions.go b/pkg/sql/parser/expressions.go index aa21395..0d28061 100644 --- a/pkg/sql/parser/expressions.go +++ b/pkg/sql/parser/expressions.go @@ -5,6 +5,7 @@ package parser import ( "fmt" + "strings" goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors" "github.com/ajitpratap0/GoSQLX/pkg/models" @@ -419,8 +420,8 @@ func (p *Parser) parseMultiplicativeExpression() (ast.Expression, error) { return left, nil } -// parseJSONExpression parses JSON/JSONB operators (PostgreSQL) -// Handles: ->, ->>, #>, #>>, @>, <@, ?, ?|, ?&, #- +// parseJSONExpression parses JSON/JSONB operators (PostgreSQL) and type casting +// Handles: ->, ->>, #>, #>>, @>, <@, ?, ?|, ?&, #-, :: func (p *Parser) parseJSONExpression() (ast.Expression, error) { // Parse the left side using primary expression left, err := p.parsePrimaryExpression() @@ -428,6 +429,23 @@ func (p *Parser) parseJSONExpression() (ast.Expression, error) { return nil, err } + // Handle type casting (::) with highest precedence + // PostgreSQL: expr::type (e.g., '123'::integer, column::text) + for p.isType(models.TokenTypeDoubleColon) { + p.advance() // Consume :: + + // Parse the target data type + dataType, err := p.parseDataType() + if err != nil { + return nil, err + } + + left = &ast.CastExpression{ + Expr: left, + Type: dataType, + } + } + // Handle JSON operators (left-associative for chaining like data->'a'->'b') for p.isJSONOperator() { operator := p.currentToken.Literal @@ -448,11 +466,111 @@ func (p *Parser) parseJSONExpression() (ast.Expression, error) { // Store operator type for semantic analysis if needed _ = operatorType + + // Check for type casting after JSON operations + for p.isType(models.TokenTypeDoubleColon) { + p.advance() // Consume :: + + dataType, err := p.parseDataType() + if err != nil { + return nil, err + } + + left = &ast.CastExpression{ + Expr: left, + Type: dataType, + } + } } return left, nil } +// parseDataType parses a SQL data type for CAST or :: expressions +// Handles: simple types (INTEGER, TEXT), parameterized types (VARCHAR(100), NUMERIC(10,2)) +func (p *Parser) parseDataType() (string, error) { + // Data type can be an identifier or a keyword like INT, VARCHAR, etc. + if !p.isIdentifier() && !p.isDataTypeKeyword() { + return "", p.expectedError("data type") + } + + // Use strings.Builder for efficient string concatenation + var sb strings.Builder + sb.WriteString(p.currentToken.Literal) + p.advance() // Consume type name + + // Check for type parameters (e.g., VARCHAR(100), DECIMAL(10,2)) + if p.isType(models.TokenTypeLParen) { + p.advance() // Consume ( + sb.WriteByte('(') + + paramCount := 0 + for !p.isType(models.TokenTypeRParen) { + if paramCount > 0 { + if !p.isType(models.TokenTypeComma) { + return "", p.expectedError(", or )") + } + sb.WriteString(p.currentToken.Literal) + p.advance() // Consume comma + } + + // Parse parameter (should be a number or identifier) + // Use token type constants for consistency + if !p.isType(models.TokenTypeNumber) && !p.isType(models.TokenTypeIdentifier) && !p.isNumericLiteral() { + return "", goerrors.InvalidSyntaxError( + fmt.Sprintf("expected numeric type parameter, got '%s'", p.currentToken.Literal), + p.currentLocation(), + "Use TYPE(precision[, scale]) syntax", + ) + } + + sb.WriteString(p.currentToken.Literal) + p.advance() + paramCount++ + } + + sb.WriteByte(')') + + if !p.isType(models.TokenTypeRParen) { + return "", p.expectedError(")") + } + p.advance() // Consume ) + } + + // Check for array type suffix (e.g., INTEGER[], TEXT[]) + if p.isType(models.TokenTypeLBracket) { + p.advance() // Consume [ + if !p.isType(models.TokenTypeRBracket) { + return "", p.expectedError("]") + } + p.advance() // Consume ] + sb.WriteString("[]") + } + + return sb.String(), nil +} + +// isNumericLiteral checks if current token is a numeric literal (handles INT/NUMBER token types) +func (p *Parser) isNumericLiteral() bool { + // Check for various numeric token type representations + switch p.currentToken.Type { + case "INT", "NUMBER", "FLOAT": + return true + } + return p.isType(models.TokenTypeNumber) +} + +// isDataTypeKeyword checks if current token is a SQL data type keyword +func (p *Parser) isDataTypeKeyword() bool { + switch p.currentToken.Type { + case "INT", "INTEGER", "BIGINT", "SMALLINT", "FLOAT", "DOUBLE", "DECIMAL", + "NUMERIC", "VARCHAR", "CHAR", "TEXT", "BOOLEAN", "DATE", "TIME", + "TIMESTAMP", "INTERVAL", "BLOB", "CLOB", "JSON", "UUID": + return true + } + return false +} + // isJSONOperator checks if current token is a JSON/JSONB operator func (p *Parser) isJSONOperator() bool { switch p.currentToken.Type { diff --git a/pkg/sql/parser/on_conflict_test.go b/pkg/sql/parser/on_conflict_test.go new file mode 100644 index 0000000..120067c --- /dev/null +++ b/pkg/sql/parser/on_conflict_test.go @@ -0,0 +1,319 @@ +// Package parser - on_conflict_test.go +// Tests for INSERT ON CONFLICT (UPSERT) parsing (PostgreSQL) + +package parser + +import ( + "testing" + + "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" +) + +func TestParser_InsertOnConflict(t *testing.T) { + tests := []struct { + name string + input string + wantTableName string + wantDoNothing bool + wantTargetCols int + wantConstraint string + wantUpdateCount int + wantHasWhere bool + wantErr bool + }{ + { + name: "ON CONFLICT DO NOTHING", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT DO NOTHING", + wantTableName: "users", + wantDoNothing: true, + }, + { + name: "ON CONFLICT (column) DO NOTHING", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO NOTHING", + wantTableName: "users", + wantDoNothing: true, + wantTargetCols: 1, + }, + { + name: "ON CONFLICT (multiple columns) DO NOTHING", + input: "INSERT INTO users (id, email, name) VALUES (1, 'test@test.com', 'test') ON CONFLICT (id, email) DO NOTHING", + wantTableName: "users", + wantDoNothing: true, + wantTargetCols: 2, + }, + { + name: "ON CONFLICT DO UPDATE SET single column", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO UPDATE SET name = 'updated'", + wantTableName: "users", + wantTargetCols: 1, + wantUpdateCount: 1, + }, + { + name: "ON CONFLICT DO UPDATE SET multiple columns", + input: "INSERT INTO users (id, name, email) VALUES (1, 'test', 'test@test.com') ON CONFLICT (id) DO UPDATE SET name = 'updated', email = 'new@test.com'", + wantTableName: "users", + wantTargetCols: 1, + wantUpdateCount: 2, + }, + { + name: "ON CONFLICT DO UPDATE with EXCLUDED reference", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name", + wantTableName: "users", + wantTargetCols: 1, + wantUpdateCount: 1, + }, + { + name: "ON CONFLICT DO UPDATE with WHERE clause", + input: "INSERT INTO users (id, name, active) VALUES (1, 'test', true) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name WHERE users.active = true", + wantTableName: "users", + wantTargetCols: 1, + wantUpdateCount: 1, + wantHasWhere: true, + }, + { + name: "ON CONFLICT ON CONSTRAINT", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT ON CONSTRAINT users_pkey DO NOTHING", + wantTableName: "users", + wantDoNothing: true, + wantConstraint: "users_pkey", + }, + { + name: "ON CONFLICT ON CONSTRAINT DO UPDATE", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT ON CONSTRAINT users_pkey DO UPDATE SET name = 'updated'", + wantTableName: "users", + wantConstraint: "users_pkey", + wantUpdateCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + result, err := p.Parse(convertedTokens) + + if (err != nil) != tt.wantErr { + t.Fatalf("Parse() error = %v, wantErr %v", err, tt.wantErr) + } + + if tt.wantErr { + return + } + + if len(result.Statements) != 1 { + t.Fatalf("Expected 1 statement, got %d", len(result.Statements)) + } + + insertStmt, ok := result.Statements[0].(*ast.InsertStatement) + if !ok { + t.Fatalf("Expected InsertStatement, got %T", result.Statements[0]) + } + + if insertStmt.TableName != tt.wantTableName { + t.Errorf("TableName = %v, want %v", insertStmt.TableName, tt.wantTableName) + } + + if insertStmt.OnConflict == nil { + t.Fatal("OnConflict is nil, expected non-nil") + } + + oc := insertStmt.OnConflict + + if oc.Action.DoNothing != tt.wantDoNothing { + t.Errorf("DoNothing = %v, want %v", oc.Action.DoNothing, tt.wantDoNothing) + } + + if len(oc.Target) != tt.wantTargetCols { + t.Errorf("Target columns count = %d, want %d", len(oc.Target), tt.wantTargetCols) + } + + if oc.Constraint != tt.wantConstraint { + t.Errorf("Constraint = %v, want %v", oc.Constraint, tt.wantConstraint) + } + + if len(oc.Action.DoUpdate) != tt.wantUpdateCount { + t.Errorf("Update expressions count = %d, want %d", len(oc.Action.DoUpdate), tt.wantUpdateCount) + } + + if (oc.Action.Where != nil) != tt.wantHasWhere { + t.Errorf("Has WHERE = %v, want %v", oc.Action.Where != nil, tt.wantHasWhere) + } + }) + } +} + +func TestParser_InsertOnConflictWithReturning(t *testing.T) { + input := "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name RETURNING id, name" + + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + result, err := p.Parse(convertedTokens) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(result.Statements) != 1 { + t.Fatalf("Expected 1 statement, got %d", len(result.Statements)) + } + + insertStmt, ok := result.Statements[0].(*ast.InsertStatement) + if !ok { + t.Fatalf("Expected InsertStatement, got %T", result.Statements[0]) + } + + if insertStmt.OnConflict == nil { + t.Fatal("OnConflict is nil") + } + + if insertStmt.OnConflict.Action.DoNothing { + t.Error("Expected DoUpdate, got DoNothing") + } + + if len(insertStmt.OnConflict.Action.DoUpdate) != 1 { + t.Errorf("Expected 1 update expression, got %d", len(insertStmt.OnConflict.Action.DoUpdate)) + } + + if len(insertStmt.Returning) != 2 { + t.Errorf("Expected 2 RETURNING columns, got %d", len(insertStmt.Returning)) + } +} + +func TestParser_InsertOnConflictErrors(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "Missing DO keyword", + input: "INSERT INTO users (id) VALUES (1) ON CONFLICT (id) NOTHING", + }, + { + name: "Invalid action", + input: "INSERT INTO users (id) VALUES (1) ON CONFLICT (id) DO DELETE", + }, + { + name: "Missing SET after DO UPDATE", + input: "INSERT INTO users (id) VALUES (1) ON CONFLICT (id) DO UPDATE name = 'test'", + }, + { + name: "Missing = in SET clause", + input: "INSERT INTO users (id) VALUES (1) ON CONFLICT (id) DO UPDATE SET name 'test'", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + _, err = p.Parse(convertedTokens) + if err == nil { + t.Error("Parse() expected error, got nil") + } + }) + } +} + +func TestParser_InsertOnConflictComplexExpressions(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "Concat with EXCLUDED", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name || ' (updated)'", + }, + { + name: "COALESCE with EXCLUDED", + input: "INSERT INTO users (id, name) VALUES (1, 'test') ON CONFLICT (id) DO UPDATE SET name = COALESCE(EXCLUDED.name, users.name)", + }, + { + name: "Arithmetic expression", + input: "INSERT INTO products (id, quantity) VALUES (1, 10) ON CONFLICT (id) DO UPDATE SET quantity = products.quantity + EXCLUDED.quantity", + }, + { + name: "CASE expression in update", + input: "INSERT INTO users (id, status) VALUES (1, 'active') ON CONFLICT (id) DO UPDATE SET status = CASE WHEN EXCLUDED.status = 'active' THEN 'updated' ELSE users.status END", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + result, err := p.Parse(convertedTokens) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + if len(result.Statements) != 1 { + t.Fatalf("Expected 1 statement, got %d", len(result.Statements)) + } + + insertStmt, ok := result.Statements[0].(*ast.InsertStatement) + if !ok { + t.Fatalf("Expected InsertStatement, got %T", result.Statements[0]) + } + + if insertStmt.OnConflict == nil { + t.Fatal("OnConflict is nil") + } + + if len(insertStmt.OnConflict.Action.DoUpdate) == 0 { + t.Error("Expected update expressions") + } + }) + } +} diff --git a/pkg/sql/parser/type_casting_test.go b/pkg/sql/parser/type_casting_test.go new file mode 100644 index 0000000..49cab66 --- /dev/null +++ b/pkg/sql/parser/type_casting_test.go @@ -0,0 +1,248 @@ +// Package parser - type_casting_test.go +// Tests for PostgreSQL type casting (::) operator parsing + +package parser + +import ( + "testing" + + "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" +) + +func TestParser_TypeCasting(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + }{ + { + name: "Simple integer cast", + input: "SELECT '123'::INTEGER", + }, + { + name: "Cast to VARCHAR with length", + input: "SELECT name::VARCHAR(100) FROM users", + }, + { + name: "Cast in WHERE clause", + input: "SELECT * FROM orders WHERE amount::INTEGER > 100", + }, + { + name: "Cast to TEXT", + input: "SELECT id::TEXT FROM users", + }, + { + name: "Cast to NUMERIC with precision", + input: "SELECT price::NUMERIC(10,2) FROM products", + }, + { + name: "Cast to BOOLEAN", + input: "SELECT active::BOOLEAN FROM users", + }, + { + name: "Cast to TIMESTAMP", + input: "SELECT created_at::TIMESTAMP FROM events", + }, + { + name: "Cast to DATE", + input: "SELECT birth_date::DATE FROM users", + }, + { + name: "Chained casts", + input: "SELECT value::TEXT::VARCHAR(50) FROM data", + }, + { + name: "Cast with expression", + input: "SELECT (amount * 100)::INTEGER FROM orders", + }, + { + name: "Cast array type", + input: "SELECT tags::TEXT[] FROM posts", + }, + { + name: "Cast in function argument", + input: "SELECT LENGTH(name::TEXT) FROM users", + }, + { + name: "Cast NULL", + input: "SELECT NULL::INTEGER", + }, + { + name: "Cast column in ORDER BY", + input: "SELECT * FROM users ORDER BY id::TEXT", + }, + { + name: "Cast in CASE expression", + input: "SELECT CASE WHEN status = 'active' THEN 1::TEXT ELSE 0::TEXT END FROM users", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + result, err := p.Parse(convertedTokens) + + if (err != nil) != tt.wantErr { + t.Fatalf("Parse() error = %v, wantErr %v", err, tt.wantErr) + } + + if tt.wantErr { + return + } + + if len(result.Statements) != 1 { + t.Fatalf("Expected 1 statement, got %d", len(result.Statements)) + } + }) + } +} + +func TestParser_TypeCastingAST(t *testing.T) { + input := "SELECT value::INTEGER FROM data" + + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + result, err := p.Parse(convertedTokens) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + + selectStmt, ok := result.Statements[0].(*ast.SelectStatement) + if !ok { + t.Fatalf("Expected SelectStatement, got %T", result.Statements[0]) + } + + if len(selectStmt.Columns) != 1 { + t.Fatalf("Expected 1 column, got %d", len(selectStmt.Columns)) + } + + // The column should be a CastExpression directly + castExpr, ok := selectStmt.Columns[0].(*ast.CastExpression) + if !ok { + t.Fatalf("Expected CastExpression, got %T", selectStmt.Columns[0]) + } + + if castExpr.Type != "INTEGER" { + t.Errorf("Expected cast type INTEGER, got %s", castExpr.Type) + } + + // Check that the inner expression is an identifier + ident, ok := castExpr.Expr.(*ast.Identifier) + if !ok { + t.Fatalf("Expected Identifier in cast expression, got %T", castExpr.Expr) + } + + if ident.Name != "value" { + t.Errorf("Expected identifier 'value', got %s", ident.Name) + } +} + +func TestParser_TypeCastingWithJSON(t *testing.T) { + // Test that type casting works with JSON operators + tests := []struct { + name string + input string + }{ + { + name: "Cast JSON field to text", + input: "SELECT (data->>'name')::TEXT FROM users", + }, + { + name: "Cast JSON to integer", + input: "SELECT (data->>'age')::INTEGER FROM users", + }, + { + name: "Cast with JSON containment", + input: "SELECT * FROM users WHERE (data->>'score')::INTEGER > 100", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + t.Fatalf("ConvertTokensForParser() error = %v", err) + } + + p := NewParser() + defer p.Release() + _, err = p.Parse(convertedTokens) + if err != nil { + t.Fatalf("Parse() error = %v", err) + } + }) + } +} + +func TestParser_TypeCastingErrors(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "Missing type after cast", + input: "SELECT value:: FROM data", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := tokenizer.GetTokenizer() + defer tokenizer.PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + // Tokenizer error is acceptable + return + } + + convertedTokens, err := ConvertTokensForParser(tokens) + if err != nil { + return + } + + p := NewParser() + defer p.Release() + _, err = p.Parse(convertedTokens) + if err == nil { + t.Error("Parse() expected error, got nil") + } + }) + } +} diff --git a/pkg/sql/tokenizer/positional_params_test.go b/pkg/sql/tokenizer/positional_params_test.go new file mode 100644 index 0000000..b0c63aa --- /dev/null +++ b/pkg/sql/tokenizer/positional_params_test.go @@ -0,0 +1,197 @@ +// Package tokenizer - positional_params_test.go +// Tests for PostgreSQL positional parameter ($1, $2, etc.) tokenization + +package tokenizer + +import ( + "testing" + + "github.com/ajitpratap0/GoSQLX/pkg/models" +) + +func TestTokenizer_PositionalParameters(t *testing.T) { + tests := []struct { + name string + input string + expected []struct { + tokenType models.TokenType + value string + } + }{ + { + name: "Single positional parameter", + input: "SELECT * FROM users WHERE id = $1", + expected: []struct { + tokenType models.TokenType + value string + }{ + {models.TokenTypeSelect, "SELECT"}, + {models.TokenTypeMul, "*"}, + {models.TokenTypeFrom, "FROM"}, + {models.TokenTypeIdentifier, "users"}, + {models.TokenTypeWhere, "WHERE"}, + {models.TokenTypeIdentifier, "id"}, + {models.TokenTypeEq, "="}, + {models.TokenTypePlaceholder, "$1"}, + }, + }, + { + name: "Multiple positional parameters", + input: "INSERT INTO users (name, email) VALUES ($1, $2)", + expected: []struct { + tokenType models.TokenType + value string + }{ + {models.TokenTypeInsert, "INSERT"}, + {models.TokenTypeInto, "INTO"}, + {models.TokenTypeIdentifier, "users"}, + {models.TokenTypeLParen, "("}, + {models.TokenTypeIdentifier, "name"}, + {models.TokenTypeComma, ","}, + {models.TokenTypeIdentifier, "email"}, + {models.TokenTypeRParen, ")"}, + {models.TokenTypeValues, "VALUES"}, + {models.TokenTypeLParen, "("}, + {models.TokenTypePlaceholder, "$1"}, + {models.TokenTypeComma, ","}, + {models.TokenTypePlaceholder, "$2"}, + {models.TokenTypeRParen, ")"}, + }, + }, + { + name: "Double digit positional parameter", + input: "SELECT $10, $11, $12", + expected: []struct { + tokenType models.TokenType + value string + }{ + {models.TokenTypeSelect, "SELECT"}, + {models.TokenTypePlaceholder, "$10"}, + {models.TokenTypeComma, ","}, + {models.TokenTypePlaceholder, "$11"}, + {models.TokenTypeComma, ","}, + {models.TokenTypePlaceholder, "$12"}, + }, + }, + { + name: "Positional parameter in comparison", + input: "SELECT * FROM orders WHERE amount > $1 AND status = $2", + expected: []struct { + tokenType models.TokenType + value string + }{ + {models.TokenTypeSelect, "SELECT"}, + {models.TokenTypeMul, "*"}, + {models.TokenTypeFrom, "FROM"}, + {models.TokenTypeIdentifier, "orders"}, + {models.TokenTypeWhere, "WHERE"}, + {models.TokenTypeIdentifier, "amount"}, + {models.TokenTypeGt, ">"}, + {models.TokenTypePlaceholder, "$1"}, + {models.TokenTypeAnd, "AND"}, + {models.TokenTypeIdentifier, "status"}, + {models.TokenTypeEq, "="}, + {models.TokenTypePlaceholder, "$2"}, + }, + }, + { + name: "Positional parameter without space", + input: "SELECT name FROM users WHERE id=$1", + expected: []struct { + tokenType models.TokenType + value string + }{ + {models.TokenTypeSelect, "SELECT"}, + {models.TokenTypeIdentifier, "name"}, + {models.TokenTypeFrom, "FROM"}, + {models.TokenTypeIdentifier, "users"}, + {models.TokenTypeWhere, "WHERE"}, + {models.TokenTypeIdentifier, "id"}, + {models.TokenTypeEq, "="}, + {models.TokenTypePlaceholder, "$1"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := GetTokenizer() + defer PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + // Remove EOF token + tokens = tokens[:len(tokens)-1] + + if len(tokens) != len(tt.expected) { + t.Fatalf("Expected %d tokens, got %d", len(tt.expected), len(tokens)) + } + + for i, exp := range tt.expected { + if tokens[i].Token.Type != exp.tokenType { + t.Errorf("Token %d: expected type %s, got %s (value: %s)", + i, exp.tokenType.String(), tokens[i].Token.Type.String(), tokens[i].Token.Value) + } + if tokens[i].Token.Value != exp.value { + t.Errorf("Token %d: expected value %q, got %q", + i, exp.value, tokens[i].Token.Value) + } + } + }) + } +} + +func TestTokenizer_PositionalParametersEdgeCases(t *testing.T) { + tests := []struct { + name string + input string + wantPlaceholders int + }{ + { + name: "Parameter at start", + input: "$1", + wantPlaceholders: 1, + }, + { + name: "Parameter with leading zero", + input: "SELECT $01", + wantPlaceholders: 1, + }, + { + name: "Large parameter number", + input: "SELECT $999", + wantPlaceholders: 1, + }, + { + name: "Parameters in array", + input: "SELECT * FROM t WHERE id IN ($1, $2, $3)", + wantPlaceholders: 3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tkz := GetTokenizer() + defer PutTokenizer(tkz) + + tokens, err := tkz.Tokenize([]byte(tt.input)) + if err != nil { + t.Fatalf("Tokenize() error = %v", err) + } + + placeholderCount := 0 + for _, tok := range tokens { + if tok.Token.Type == models.TokenTypePlaceholder { + placeholderCount++ + } + } + + if placeholderCount != tt.wantPlaceholders { + t.Errorf("Expected %d placeholders, got %d", tt.wantPlaceholders, placeholderCount) + } + }) + } +} diff --git a/pkg/sql/tokenizer/tokenizer.go b/pkg/sql/tokenizer/tokenizer.go index de3d958..a0337aa 100644 --- a/pkg/sql/tokenizer/tokenizer.go +++ b/pkg/sql/tokenizer/tokenizer.go @@ -1430,6 +1430,32 @@ func (t *Tokenizer) readPunctuation() (models.Token, error) { } // Just a standalone ? symbol (used for single key existence check) return models.Token{Type: models.TokenTypeQuestion, Value: "?"}, nil + case '$': + // Handle PostgreSQL positional parameters ($1, $2, etc.) + t.pos.AdvanceRune(r, size) + if t.pos.Index < len(t.input) { + nextR, _ := utf8.DecodeRune(t.input[t.pos.Index:]) + // Check if followed by a digit (positional parameter) + if nextR >= '0' && nextR <= '9' { + // Read the number part + start := t.pos.Index + for t.pos.Index < len(t.input) { + digitR, digitSize := utf8.DecodeRune(t.input[t.pos.Index:]) + if digitR < '0' || digitR > '9' { + break + } + t.pos.AdvanceRune(digitR, digitSize) + } + paramNum := string(t.input[start:t.pos.Index]) + return models.Token{Type: models.TokenTypePlaceholder, Value: "$" + paramNum}, nil + } + } + // TODO(#189): PostgreSQL dollar-quoted strings ($tag$...$tag$) are not yet supported. + // Dollar-quoted strings allow arbitrary string content without escaping quotes. + // Example: $body$SELECT * FROM users WHERE name = 'John'$body$ + // For now, standalone $ is treated as a placeholder token. + // Future implementation should check for $identifier$ pattern and read until closing tag. + return models.Token{Type: models.TokenTypePlaceholder, Value: "$"}, nil } if isIdentifierStart(r) {