diff --git a/.github/ACTION_INTEGRATION_GUIDE.md b/.github/ACTION_INTEGRATION_GUIDE.md index 629a10b..98a09bb 100644 --- a/.github/ACTION_INTEGRATION_GUIDE.md +++ b/.github/ACTION_INTEGRATION_GUIDE.md @@ -51,7 +51,7 @@ jobs: - **Errors**: ${{ steps.validate.outputs.invalid-files }} - **Time**: ${{ steps.validate.outputs.validation-time }}ms - ${{ steps.validate.outputs.invalid-files == '0' ? '✅ All SQL files valid!' : '❌ Please fix SQL errors' }}`; + ${{ steps.validate.outputs.invalid-files == '0' ? 'All SQL files valid!' : 'Please fix SQL errors' }}`; github.rest.issues.createComment({ issue_number: context.issue.number, @@ -96,7 +96,7 @@ jobs: "type": "section", "text": { "type": "mrkdwn", - "text": "❌ SQL validation failed\n*Repository:* ${{ github.repository }}\n*Branch:* ${{ github.ref_name }}" + "text": "SQL validation failed\n*Repository:* ${{ github.repository }}\n*Branch:* ${{ github.ref_name }}" } } ] @@ -120,7 +120,7 @@ jobs: "type": "section", "text": { "type": "mrkdwn", - "text": "*SQL Validation Results*\n\n• Files: ${{ steps.validate.outputs.validated-files }}\n• Errors: ${{ steps.validate.outputs.invalid-files }}\n• Time: ${{ steps.validate.outputs.validation-time }}ms\n• Status: ${{ steps.validate.outputs.invalid-files == '0' && '✅ Passed' || '❌ Failed' }}" + "text": "*SQL Validation Results*\n\n• Files: ${{ steps.validate.outputs.validated-files }}\n• Errors: ${{ steps.validate.outputs.invalid-files }}\n• Time: ${{ steps.validate.outputs.validation-time }}ms\n• Status: ${{ steps.validate.outputs.invalid-files == '0' && 'Passed' || 'Failed' }}" } }, { @@ -691,7 +691,7 @@ jobs: issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: `### SQL Validation\n\n✓ Files: ${validated}\n✗ Errors: ${invalid}` + body: `### SQL Validation\n\nFiles: ${validated}\nErrors: ${invalid}` }); # Slack notification on failure diff --git a/.github/ACTION_QUICK_REFERENCE.md b/.github/ACTION_QUICK_REFERENCE.md index de856c9..3506b0f 100644 --- a/.github/ACTION_QUICK_REFERENCE.md +++ b/.github/ACTION_QUICK_REFERENCE.md @@ -185,7 +185,7 @@ jobs: ## Links -- [Full Documentation](../ACTION_README.md) -- [Testing Guide](../ACTION_TESTING_GUIDE.md) -- [Publishing Guide](../MARKETPLACE_PUBLISHING.md) -- [Example Workflows](../workflows/examples/) +- [Integration Guide](ACTION_INTEGRATION_GUIDE.md) +- [Testing Guide](ACTION_TESTING_GUIDE.md) +- [Publishing Guide](MARKETPLACE_PUBLISHING.md) +- [Example Workflows](./workflows/examples/) diff --git a/.github/ACTION_TESTING_GUIDE.md b/.github/ACTION_TESTING_GUIDE.md index 6b56314..ee23d89 100644 --- a/.github/ACTION_TESTING_GUIDE.md +++ b/.github/ACTION_TESTING_GUIDE.md @@ -232,57 +232,50 @@ EOF ## Manual Testing Checklist -Before publishing, test these scenarios: - -### ✅ Basic Functionality - -- [ ] Action installs GoSQLX successfully -- [ ] Validates valid SQL files without errors -- [ ] Detects and reports invalid SQL files -- [ ] Properly fails when `fail-on-error: true` -- [ ] Continues when `fail-on-error: false` - -### ✅ File Pattern Matching - -- [ ] `**/*.sql` finds all SQL files recursively -- [ ] `*.sql` finds only root-level SQL files -- [ ] Custom patterns work correctly -- [ ] Empty pattern results are handled gracefully - -### ✅ Configuration Options - -- [ ] `dialect` parameter changes validation behavior -- [ ] `strict` mode enables stricter validation -- [ ] `show-stats` displays performance metrics -- [ ] `config` file is loaded and applied -- [ ] `working-directory` changes context correctly - -### ✅ Outputs - -- [ ] `validated-files` count is accurate -- [ ] `invalid-files` count matches errors -- [ ] `validation-time` is reported -- [ ] `formatted-files` count works with format-check - -### ✅ Error Handling - -- [ ] Missing GoSQLX installation is detected -- [ ] No SQL files found is handled gracefully -- [ ] Invalid config file is reported -- [ ] File read errors are caught - -### ✅ Performance - -- [ ] Completes quickly (<2 minutes for 100 files) -- [ ] Binary caching works across runs -- [ ] Memory usage is reasonable - -### ✅ Integration - -- [ ] Works with matrix strategy -- [ ] Compatible with other actions -- [ ] PR comments work correctly -- [ ] Artifacts upload successfully +Before publishing, verify the following scenarios: + +### Basic Functionality +- Action installs GoSQLX successfully +- Validates valid SQL files without errors +- Detects and reports invalid SQL files +- Properly fails when `fail-on-error: true` +- Continues when `fail-on-error: false` + +### File Pattern Matching +- `**/*.sql` finds all SQL files recursively +- `*.sql` finds only root-level SQL files +- Custom patterns work correctly +- Empty pattern results are handled gracefully + +### Configuration Options +- `dialect` parameter changes validation behavior +- `strict` mode enables stricter validation +- `show-stats` displays performance metrics +- `config` file is loaded and applied +- `working-directory` changes context correctly + +### Outputs +- `validated-files` count is accurate +- `invalid-files` count matches errors +- `validation-time` is reported +- `formatted-files` count works with format-check + +### Error Handling +- Missing GoSQLX installation is detected +- No SQL files found is handled gracefully +- Invalid config file is reported +- File read errors are caught + +### Performance +- Completes quickly (under 2 minutes for 100 files) +- Binary caching works across runs +- Memory usage is reasonable + +### Integration +- Works with matrix strategy +- Compatible with other actions +- PR comments work correctly +- Artifacts upload successfully ## Automated Testing @@ -532,9 +525,9 @@ jobs: After successful testing: -1. ✅ All tests pass -2. ✅ Performance meets targets -3. ✅ Documentation is complete -4. ✅ Ready for publishing +1. All tests pass +2. Performance meets targets +3. Documentation is complete +4. Ready for publishing See [MARKETPLACE_PUBLISHING.md](MARKETPLACE_PUBLISHING.md) for publishing instructions. diff --git a/.github/MARKETPLACE_PUBLISHING.md b/.github/MARKETPLACE_PUBLISHING.md index 9f60771..c2338cc 100644 --- a/.github/MARKETPLACE_PUBLISHING.md +++ b/.github/MARKETPLACE_PUBLISHING.md @@ -65,7 +65,7 @@ Release title: v1.0.0: GoSQLX GitHub Action - Ultra-Fast SQL Validation Description: ## GoSQLX GitHub Action v1.0.0 -### 🚀 Features +### Features - **Ultra-Fast Validation**: 100-1000x faster than SQLFluff - **Multi-Dialect Support**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite @@ -73,17 +73,17 @@ Description: - **Comprehensive Analysis**: Security and performance checks - **Zero Configuration**: Works out of the box -### 📊 Performance +### Performance - **Throughput**: 1.38M+ operations/second - **Validation Speed**: <10ms for typical queries - **Batch Processing**: 100+ files/second -### 📖 Documentation +### Documentation -See [ACTION_README.md](ACTION_README.md) for complete documentation and examples. +See [ACTION_QUICK_REFERENCE.md](ACTION_QUICK_REFERENCE.md) for complete documentation and examples. -### 🎯 Quick Start +### Quick Start ```yaml - uses: ajitpratap0/GoSQLX@v1 @@ -92,17 +92,17 @@ See [ACTION_README.md](ACTION_README.md) for complete documentation and examples validate: true ``` -### 🔗 Links +### Links - [Documentation](https://github.com/ajitpratap0/GoSQLX#readme) -- [Examples](.github/workflows/examples/) -- [Testing Guide](.github/ACTION_TESTING_GUIDE.md) +- [Examples](./workflows/examples/) +- [Testing Guide](ACTION_TESTING_GUIDE.md) -### 🐛 Known Issues +### Known Issues None at this time. -### 🙏 Acknowledgments +### Acknowledgments Built with GitHub Actions and Go. ``` @@ -138,7 +138,7 @@ branding: - Secondary category: Code quality 3. **Marketplace README**: - - The `ACTION_README.md` content should be the main documentation + - The `ACTION_QUICK_REFERENCE.md` content should be the main documentation - Consider copying it to root README or having a marketplace-specific version ### 5. Version Management Strategy @@ -259,11 +259,11 @@ jobs: Add relevant badges to increase trust: ```markdown -[![GitHub Marketplace](https://img.shields.io/badge/Marketplace-GoSQLX-blue.svg)](...) -[![GitHub Release](https://img.shields.io/github/release/ajitpratap0/GoSQLX.svg)](...) -[![GitHub Stars](https://img.shields.io/github/stars/ajitpratap0/GoSQLX.svg)](...) +[![GitHub Marketplace](https://img.shields.io/badge/Marketplace-GoSQLX-blue.svg)](https://github.com/marketplace/actions/gosqlx-sql-validator) +[![GitHub Release](https://img.shields.io/github/release/ajitpratap0/GoSQLX.svg)](https://github.com/ajitpratap0/GoSQLX/releases) +[![GitHub Stars](https://img.shields.io/github/stars/ajitpratap0/GoSQLX.svg)](https://github.com/ajitpratap0/GoSQLX) [![License: AGPL-3.0](https://img.shields.io/badge/License-AGPL--3.0-blue.svg)](https://www.gnu.org/licenses/agpl-3.0) -[![Go Report Card](https://goreportcard.com/badge/github.com/ajitpratap0/GoSQLX)](...) +[![Go Report Card](https://goreportcard.com/badge/github.com/ajitpratap0/GoSQLX)](https://goreportcard.com/report/github.com/ajitpratap0/GoSQLX) ``` ## Updating the Action @@ -477,4 +477,4 @@ git push -f origin v1 - [ ] Post-release testing completed - [ ] Announcement prepared -Ready to publish! 🚀 +Ready to publish! diff --git a/.github/SECURITY_CHECKLIST.md b/.github/SECURITY_CHECKLIST.md index 3800ae0..e04a917 100644 --- a/.github/SECURITY_CHECKLIST.md +++ b/.github/SECURITY_CHECKLIST.md @@ -1,174 +1,182 @@ -# Security Feature Activation Checklist +# Security Feature Activation Guide -Use this checklist to enable all security features for GoSQLX after merging the security workflow PR. +This guide provides steps to enable security features for GoSQLX after merging the security workflow. ## Prerequisites -- [ ] Security workflow PR merged to main branch -- [ ] Repository administrator access -- [ ] GitHub Advanced Security enabled (for private repos) + +- Security workflow PR merged to main branch +- Repository administrator access +- GitHub Advanced Security enabled (for private repos) ## GitHub Security Settings -### Step 1: Enable Security Features +### Enable Security Features + Navigate to: **Settings** → **Security & analysis** -- [ ] Enable **Dependency graph** (usually enabled by default) -- [ ] Enable **Dependabot alerts** -- [ ] Enable **Dependabot security updates** -- [ ] Enable **Grouped security updates** (new feature) -- [ ] Enable **Code scanning** (CodeQL) -- [ ] Enable **Secret scanning** -- [ ] Enable **Secret scanning push protection** +Enable the following features: +- Dependency graph (usually enabled by default) +- Dependabot alerts +- Dependabot security updates +- Grouped security updates +- Code scanning (CodeQL) +- Secret scanning +- Secret scanning push protection + +### Configure Branch Protection -### Step 2: Configure Branch Protection Navigate to: **Settings** → **Branches** → **Branch protection rules** -#### For `main` branch: -- [ ] Require status checks before merging - - [ ] Require branches to be up to date - - [ ] Select required status checks: - - [ ] `GoSec Security Scanner` - - [ ] `Trivy Repository Scan` - - [ ] `Trivy Config Scan` - - [ ] `Go Vulnerability Check` -- [ ] Require pull request reviews (1 approval minimum) -- [ ] Require conversation resolution before merging -- [ ] Require signed commits (recommended) -- [ ] Include administrators (recommended) - -### Step 3: Configure Notifications +For the `main` branch, configure: +- Require status checks before merging + - Require branches to be up to date + - Required status checks: `GoSec Security Scanner`, `Trivy Repository Scan`, `Trivy Config Scan`, `Go Vulnerability Check` +- Require pull request reviews (minimum 1 approval) +- Require conversation resolution before merging +- Require signed commits (recommended) +- Include administrators (recommended) + +### Configure Notifications + Navigate to: **Settings** → **Notifications** → **Security alerts** -- [ ] Email notifications for Dependabot alerts -- [ ] Email notifications for code scanning alerts -- [ ] Email notifications for secret scanning alerts -- [ ] Web notifications for all security events +Enable notifications for: +- Email notifications for Dependabot alerts +- Email notifications for code scanning alerts +- Email notifications for secret scanning alerts +- Web notifications for all security events + +### Initial Workflow Run -### Step 4: Initial Workflow Run Navigate to: **Actions** → **Security Scanning** -- [ ] Run workflow manually (click "Run workflow") -- [ ] Wait for all jobs to complete -- [ ] Review security summary -- [ ] Address any critical/high findings before enabling required checks +1. Run workflow manually (click "Run workflow") +2. Wait for all jobs to complete +3. Review security summary +4. Address any critical/high findings before enabling required checks + +### Review Security Tab -### Step 5: Review Security Tab Navigate to: **Security** tab -- [ ] Check **Overview** for security posture summary -- [ ] Review **Code scanning alerts** (should be 0 initially) -- [ ] Review **Dependabot alerts** (if any) -- [ ] Review **Secret scanning alerts** (should be 0) +Review the following sections: +- **Overview** for security posture summary +- **Code scanning alerts** (should be 0 initially) +- **Dependabot alerts** (if any) +- **Secret scanning alerts** (should be 0) ## Dependabot Configuration -### Step 6: Configure Auto-Merge (Optional) +### Configure Auto-Merge (Optional) + Navigate to: **Settings** → **General** → **Pull Requests** -- [ ] Enable "Allow auto-merge" -- [ ] Set up auto-merge rules in repository settings -- [ ] Configure required status checks for auto-merge +- Enable "Allow auto-merge" +- Set up auto-merge rules in repository settings +- Configure required status checks for auto-merge + +### Review Dependabot Settings -### Step 7: Review Dependabot Settings Navigate to: **Insights** → **Dependency graph** → **Dependabot** -- [ ] Verify Go modules monitoring enabled -- [ ] Verify GitHub Actions monitoring enabled -- [ ] Check update schedule (daily for Go, weekly for Actions) -- [ ] Verify reviewer assignment working +Verify: +- Go modules monitoring enabled +- GitHub Actions monitoring enabled +- Update schedule (daily for Go, weekly for Actions) +- Reviewer assignment working ## Testing and Validation -### Step 8: Test Security Scanning -- [ ] Create test branch with intentional vulnerability -- [ ] Push branch and create PR -- [ ] Verify security scans run automatically -- [ ] Verify scans detect the test vulnerability -- [ ] Close/delete test PR -- [ ] Delete test branch - -### Step 9: Test Dependabot -- [ ] Wait for first Dependabot PR (may take 24 hours) -- [ ] Review Dependabot PR format -- [ ] Verify labels applied correctly -- [ ] Verify reviewer assigned -- [ ] Test merge process -- [ ] Verify workflow runs on merged PR - -### Step 10: Monitor Weekly Scans -- [ ] Note next Sunday's scan schedule -- [ ] Review Monday scan results -- [ ] Set up recurring calendar reminder for Monday review +### Test Security Scanning + +1. Create test branch with intentional vulnerability +2. Push branch and create PR +3. Verify security scans run automatically +4. Verify scans detect the test vulnerability +5. Close/delete test PR and branch + +### Test Dependabot + +1. Wait for first Dependabot PR (may take 24 hours) +2. Review Dependabot PR format +3. Verify labels applied correctly +4. Verify reviewer assigned +5. Test merge process +6. Verify workflow runs on merged PR + +### Monitor Weekly Scans + +- Note next Sunday's scan schedule +- Review Monday scan results +- Set up recurring calendar reminder for Monday review ## Documentation -### Step 11: Update Repository README -- [ ] Add security badges to README.md: - ```markdown - [![Security Scanning](https://github.com/ajitpratap0/GoSQLX/actions/workflows/security.yml/badge.svg)](https://github.com/ajitpratap0/GoSQLX/actions/workflows/security.yml) - [![Dependabot Status](https://img.shields.io/badge/Dependabot-enabled-success)](https://github.com/ajitpratap0/GoSQLX/security/dependabot) - ``` -- [ ] Add link to SECURITY.md in README -- [ ] Update contributing guidelines with security requirements - -### Step 12: Team Communication -- [ ] Notify team about new security features -- [ ] Share SECURITY_SETUP.md with maintainers -- [ ] Schedule security training/review session -- [ ] Document security incident response process +### Update Repository README + +Add security badges to README.md: + +```markdown +[![Security Scanning](https://github.com/ajitpratap0/GoSQLX/actions/workflows/security.yml/badge.svg)](https://github.com/ajitpratap0/GoSQLX/actions/workflows/security.yml) +[![Dependabot Status](https://img.shields.io/badge/Dependabot-enabled-success)](https://github.com/ajitpratap0/GoSQLX/security/dependabot) +``` + +Additional documentation updates: +- Add link to SECURITY.md in README +- Update contributing guidelines with security requirements + +### Team Communication + +- Notify team about new security features +- Share docs/SECURITY_SETUP.md with maintainers +- Schedule security training/review session +- Document security incident response process ## Ongoing Maintenance ### Weekly Tasks -- [ ] Review Sunday security scan results (every Monday) -- [ ] Check for new Dependabot PRs -- [ ] Triage any new security alerts -### Monthly Tasks -- [ ] Review security metrics and trends -- [ ] Update security documentation if needed -- [ ] Audit dismissed security alerts -- [ ] Review dependency update patterns +- Review Sunday security scan results (every Monday) +- Check for new Dependabot PRs +- Triage any new security alerts -### Quarterly Tasks -- [ ] Comprehensive security audit -- [ ] Review and update security policies -- [ ] Test incident response procedures -- [ ] Update security training materials +### Monthly Tasks -## Rollback Plan (If Issues Occur) +- Review security metrics and trends +- Update security documentation if needed +- Audit dismissed security alerts +- Review dependency update patterns -If you need to temporarily disable security features: +### Quarterly Tasks -1. **Disable Required Checks**: - - Settings → Branches → Edit rule → Uncheck security checks +- Comprehensive security audit +- Review and update security policies +- Test incident response procedures +- Update security training materials -2. **Disable Workflow**: - - Edit `.github/workflows/security.yml` - - Change triggers to only `workflow_dispatch` +## Rollback Plan -3. **Pause Dependabot**: - - Rename `.github/dependabot.yml` to `.github/dependabot.yml.disabled` +If you need to temporarily disable security features: -4. **Document Issues**: - - Create issue tracking the problem - - Document why features were disabled - - Set deadline for re-enabling +1. **Disable Required Checks**: Settings → Branches → Edit rule → Uncheck security checks +2. **Disable Workflow**: Edit `.github/workflows/security.yml` and change triggers to only `workflow_dispatch` +3. **Pause Dependabot**: Rename `.github/dependabot.yml` to `.github/dependabot.yml.disabled` +4. **Document Issues**: Create issue tracking the problem, document why features were disabled, and set deadline for re-enabling ## Success Criteria Security implementation is successful when: -- [ ] All GitHub security features enabled -- [ ] Weekly scans running successfully -- [ ] Dependabot creating PRs regularly -- [ ] No critical/high vulnerabilities in codebase -- [ ] Team trained on security processes -- [ ] Security metrics being tracked -- [ ] Zero security alert backlog +- All GitHub security features enabled +- Weekly scans running successfully +- Dependabot creating PRs regularly +- No critical/high vulnerabilities in codebase +- Team trained on security processes +- Security metrics being tracked +- Zero security alert backlog ## Support Resources -- **Documentation**: See `SECURITY_SETUP.md` for detailed instructions +- **Documentation**: See `docs/SECURITY_SETUP.md` for detailed instructions - **Security Policy**: See `SECURITY.md` for reporting procedures - **GitHub Docs**: https://docs.github.com/en/code-security - **GoSec Docs**: https://github.com/securego/gosec diff --git a/CHANGELOG.md b/CHANGELOG.md index 11cce13..04558c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,11 +47,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.5.1] - 2025-11-15 - Phases 2-3 Test Coverage Completion -### 🎯 Phase 3 Complete: Token and Tokenizer Coverage Enhancement +### Phase 3 Complete: Token and Tokenizer Coverage Enhancement **Released - PR #88** -#### ✅ Test Coverage Enhancement - Phase 3 (Token, Tokenizer) +#### Test Coverage Enhancement - Phase 3 (Token, Tokenizer) - **Comprehensive Test Suite**: Added 2 new test files with 378 lines of test code - **Perfect Token Coverage Achieved**: Token package reaches 100% coverage ⭐ - **Coverage Achievements**: @@ -59,7 +59,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Tokenizer Package: 69.1% → **76.1%** (+7.0%) - **Target Exceeded!** - **Zero Race Conditions**: All tests pass with race detection enabled -#### 🧪 New Test Files Created - Phase 3 +#### New Test Files Created - Phase 3 - **pkg/sql/token/coverage_enhancement_test.go** (332 lines) - IsKeyword(), IsOperator(), IsLiteral() - all classification methods - 95+ subtests covering all token types (25 keywords, 7 operators, 6 literals) @@ -73,7 +73,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Tokenizer operations (NewWithKeywords, Reset) - 25+ subtests with comprehensive edge case coverage -#### 📊 Combined Phase 1 + Phase 2 + Phase 3 Impact +#### Combined Phase 1 + Phase 2 + Phase 3 Impact - **8 packages** with comprehensive coverage improvements - **4,823 lines** of production-grade test code - **3 packages at perfect 100% coverage**: Models, Keywords, Token @@ -82,21 +82,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- -### 🎯 Phase 2 Complete: Keywords, Errors, and AST Coverage Enhancement +### Phase 2 Complete: Keywords, Errors, and AST Coverage Enhancement **Released - PR #87** -#### ✅ Test Coverage Enhancement - Phase 2 (Keywords, Errors, AST) +#### Test Coverage Enhancement - Phase 2 (Keywords, Errors, AST) - **Comprehensive Test Suite**: Added 3 new test files with 1,351 lines of test code -- **Perfect Coverage Achieved**: Keywords package reaches 100% coverage ⭐ +- **Perfect Coverage Achieved**: Keywords package reaches 100% coverage - **Coverage Achievements**: - - Keywords Package: 92.8% → **100.0%** (+7.2%) - **Perfect Coverage!** - - Errors Package: 83.8% → **95.6%** (+11.8%) - **Exceeded Target!** + - Keywords Package: 92.8% → **100.0%** (+7.2%) - Perfect Coverage + - Errors Package: 83.8% → **95.6%** (+11.8%) - Exceeded Target - AST Package: 73.7% → **74.4%** (+0.7%) - Marker functions covered - **Documentation Cleanup**: Removed 2,538 lines of obsolete/redundant documentation - **Archived Historical Docs**: Moved outdated architecture docs to archive with explanation -#### 🧪 New Test Files Created - Phase 2 +#### New Test Files Created - Phase 2 - **pkg/sql/keywords/coverage_enhancement_test.go** (405 lines) - All 5 SQL dialects tested (Generic, MySQL, PostgreSQL, SQLite, Unknown) - Case-sensitive and case-insensitive mode coverage @@ -113,13 +113,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Integration and edge case validation - 4 test suites with 50+ subtests -#### 📚 Documentation Improvements +#### Documentation Improvements - Removed 5 obsolete LLM-generated session summaries (2,538 lines) - Archived 2 outdated architecture/performance docs - Created `PHASE2_COVERAGE_SUMMARY.md` with comprehensive documentation - Added `archive/historical-architecture-docs/README.md` to explain historical context -#### 📊 Combined Phase 1 + Phase 2 Impact +#### Combined Phase 1 + Phase 2 Impact - **6 packages** with comprehensive coverage improvements - **4,445 lines** of production-grade test code - **Zero race conditions** across entire codebase @@ -127,11 +127,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.5.0] - 2025-11-15 - Phase 1 Test Coverage Achievement -### 🎯 Phase 1 Complete: Comprehensive Test Coverage Across CLI, Parser, and Tokenizer +### Phase 1 Complete: Comprehensive Test Coverage Across CLI, Parser, and Tokenizer **This release marks a major milestone** in GoSQLX quality assurance with comprehensive test coverage improvements across three critical packages. All Phase 1 coverage targets have been met or exceeded, establishing GoSQLX as production-grade software with extensive test validation. -### ✅ Test Coverage Enhancement - Phase 1 Complete (CLI, Parser, Tokenizer) +### Test Coverage Enhancement - Phase 1 Complete (CLI, Parser, Tokenizer) - **Comprehensive Test Suite**: Added 7 new test files with 3,094 lines of test code - **Triple Coverage Achievement**: Met or exceeded all three coverage targets - CLI Package: 63.3% coverage (exceeded 60% target by 3.3%) @@ -141,7 +141,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Real-World Integration Testing**: 115+ production SQL queries validated across multiple dialects - **Quality Assurance**: All tests pass with race detection enabled, zero race conditions detected -### 🧪 New Test Files Created - Parser Package +### New Test Files Created - Parser Package - **parser_additional_coverage_test.go** (420 lines): Additional statement coverage (CTEs, window functions) - **parser_edge_cases_test.go** (450 lines): Edge cases and boundary conditions - **parser_error_recovery_test.go** (380 lines): Error recovery and handling @@ -150,7 +150,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **error_recovery_test.go** (61 lines): Error recovery integration tests - **integration_test.go** (311 lines): Real-world SQL query validation framework -### 🧪 New Test Files Created - Tokenizer Package +### New Test Files Created - Tokenizer Package - **tokenizer_coverage_test.go** (712 lines): Comprehensive tokenizer feature testing - Backtick identifiers (MySQL-style) - Triple-quoted strings (Python-style) @@ -161,14 +161,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Custom keyword support - Debug logger functionality -### 🧪 Enhanced CLI Testing +### Enhanced CLI Testing - **sql_analyzer_test.go** (318 lines): Comprehensive CLI command testing - Analyze, validate, format, parse command coverage - Edge case testing: empty files, large files, invalid SQL, UTF-8 - Error handling validation across all commands - Input detection testing (file vs SQL string) -### 📊 Integration Test Infrastructure +### Integration Test Infrastructure - **testdata/postgresql/queries.sql**: PostgreSQL-specific queries - **testdata/mysql/queries.sql**: MySQL-specific queries - **testdata/real_world/ecommerce.sql**: Complex e-commerce queries @@ -176,14 +176,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Multi-dialect support validation - Success rate tracking and failure analysis -### 🎯 Coverage Progression +### Coverage Progression | Component | Initial | Target | Achieved | Status | |-----------|---------|--------|----------|--------| | CLI | ~50% | 60% | **63.3%** | ✅ Exceeded by 3.3% | | Parser | 57.4% | 75% | **75.0%** | ✅ Met exactly | | Tokenizer | 60.0% | 70% | **76.5%** | ✅ Exceeded by 6.5% | -### 📈 Function-Level Improvements - Tokenizer +### Function-Level Improvements - Tokenizer | Function | Initial | Final | Improvement | |----------|---------|-------|-------------| | handleEscapeSequence | 0.0% | **85.7%** | +85.7% | @@ -195,7 +195,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | readNumber | 77.6% | **85.7%** | +8.1% | | TokenizeContext | 81.1% | **84.9%** | +3.8% | -### 🔧 CLI Code Refactoring +### CLI Code Refactoring - **analyze.go**: Improved error handling consistency - **config.go**: Enhanced configuration management - **format.go**: Better error messages and UTF-8 handling @@ -204,7 +204,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **validate.go**: Enhanced validation error reporting - **Net Impact**: -529 lines with improved maintainability -### 🔍 Testing Approach +### Testing Approach - Table-driven test design with comprehensive subtests - Short mode support for fast pre-commit hooks - Integration tests document parser limitations for future improvements @@ -212,7 +212,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Edge case validation across all components - Race detection validated confirming thread safety -### ✅ Quality Metrics +### Quality Metrics - ✅ All tests pass with race detection enabled (go test -race) - ✅ Pre-commit hook integration with short mode support - ✅ Code formatted with go fmt @@ -220,7 +220,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ✅ Thread-safe operation confirmed across all test scenarios - ✅ Real-world SQL validation: 95%+ success rate on production queries -### 🎯 Impact +### Impact **Production Confidence**: This Phase 1 test coverage enhancement establishes GoSQLX as enterprise-grade software with: - **Comprehensive Validation**: 3,094 lines of new tests covering real-world usage patterns @@ -232,7 +232,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 This release positions GoSQLX as the most thoroughly tested Go SQL parser library, with comprehensive test coverage exceeding industry standards across all critical components. -### 🔗 Related Pull Request +### Related Pull Request **PR #85**: [Phase 1 Test Coverage Achievement - CLI, Parser, and Tokenizer](https://github.com/ajitpratap0/GoSQLX/pull/85) - 81 files changed, 25,883 insertions, 1,735 deletions @@ -240,13 +240,13 @@ This release positions GoSQLX as the most thoroughly tested Go SQL parser librar - All 16 CI checks passing (tests across 3 platforms × 3 Go versions, linting, security, benchmarks) - Comprehensive review and validation -### 📚 Documentation Created +### Documentation Created - **CLI_REFACTORING_SUMMARY.md**: CLI coverage and refactoring details - **PARSER_COVERAGE_SUMMARY.md**: Parser test coverage breakdown - **TOKENIZER_COVERAGE_SUMMARY.md**: Tokenizer coverage achievement details - **SESSION_PROGRESS_SUMMARY.md**: Overall session progress tracking -### 🚀 Key Improvements +### Key Improvements **Production Readiness Enhancements:** - **Battle-Tested Reliability**: 3,094 lines of new test code across 7 comprehensive test files @@ -266,7 +266,7 @@ This release positions GoSQLX as the most thoroughly tested Go SQL parser librar - **Edge Case Coverage**: Comprehensive testing of boundary conditions, empty inputs, and invalid syntax - **Resource Management**: Proper object pooling validated in all test scenarios -### 📊 Complete Coverage Breakdown +### Complete Coverage Breakdown **Before Phase 1:** - CLI Package: ~50% coverage @@ -278,13 +278,13 @@ This release positions GoSQLX as the most thoroughly tested Go SQL parser librar - **Parser Package**: 75.0% coverage ⬆️ **+17.6%** (met 75% target exactly) - **Tokenizer Package**: 76.5% coverage ⬆️ **+16.5%** (exceeded 70% target) -### ✅ Previous Test Coverage Enhancement - AST Package (v1.4.0) +### Previous Test Coverage Enhancement - AST Package (v1.4.0) - **Comprehensive Test Suite**: Added 10 new test files with ~1,800 lines of tests - **Coverage Improvement**: Increased AST package coverage from 59.6% to 73.4% (+13.8 percentage points) - **Production Confidence**: Exceeded 70% coverage target, validating production readiness - **Quality Assurance**: All tests pass with race detection enabled, zero race conditions detected -### 🧪 New Test Files Created +### New Test Files Created - **data_loading_test.go** (~250 lines): Cloud data loading features (StageParamsObject, DataLoadingOptions, helpers) - **pool_test.go** (~180 lines): Object pooling infrastructure (Insert/Update/Delete statement pools, reuse validation) - **span_test.go** (~450 lines): Source location tracking (SpannedNode, UnionSpans, all Span() methods) @@ -295,11 +295,11 @@ This release positions GoSQLX as the most thoroughly tested Go SQL parser librar - **operator_test.go**: Operator types and operations tests - **types_test.go**: AST type definition tests -### 📈 Enhanced Existing Tests +### Enhanced Existing Tests - **value_test.go** (+180 lines): Added comprehensive Value.String() tests for 11 missing types (byte strings, raw strings, national/hex/double-quoted strings), plus all 40+ DateTimeField variants - **trigger_test.go**: Applied go fmt formatting for code consistency -### 🎯 Coverage Progression +### Coverage Progression | Stage | Coverage | Gain | Test File | |-------|----------|------|-----------| | Initial | 59.6% | - | Baseline | @@ -309,21 +309,21 @@ This release positions GoSQLX as the most thoroughly tested Go SQL parser librar | Value enhancements | 73.4% | +1.1% | value_test.go | | **Final** | **73.4%** | **+13.8%** | **Total** | -### 🔍 Testing Approach +### Testing Approach - Table-driven test design with subtests for comprehensive coverage - Edge case validation across all AST node types - Race detection validated (go test -race) confirming thread safety - Memory-efficient pool testing with reuse verification - Source location tracking validation for error reporting -### ✅ Quality Metrics +### Quality Metrics - ✅ All tests pass with race detection enabled - ✅ Code formatted with go fmt - ✅ No issues reported by go vet - ✅ Thread-safe operation confirmed across all test scenarios - ✅ Production-ready reliability validated for enterprise SQL parsing -### 🎯 Impact +### Impact This substantial test coverage increase provides strong confidence in the AST package's correctness, thread safety, and production readiness. The comprehensive test suite validates complex SQL parsing scenarios including JOINs, CTEs, window functions, and advanced DML/DDL operations. ## [1.4.0] - 2025-09-07 - CLI Release and Code Quality @@ -657,10 +657,15 @@ For questions about upgrading or changelog entries: - Open an issue: https://github.com/ajitpratap0/GoSQLX/issues - Join discussions: https://github.com/ajitpratap0/GoSQLX/discussions +[Unreleased]: https://github.com/ajitpratap0/GoSQLX/compare/v1.5.1...HEAD +[1.5.1]: https://github.com/ajitpratap0/GoSQLX/compare/v1.5.0...v1.5.1 [1.5.0]: https://github.com/ajitpratap0/GoSQLX/compare/v1.4.0...v1.5.0 [1.4.0]: https://github.com/ajitpratap0/GoSQLX/compare/v1.3.0...v1.4.0 -[Unreleased]: https://github.com/ajitpratap0/GoSQLX/compare/v1.5.0...HEAD +[1.3.0]: https://github.com/ajitpratap0/GoSQLX/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/ajitpratap0/GoSQLX/compare/v1.1.0...v1.2.0 +[1.1.0]: https://github.com/ajitpratap0/GoSQLX/compare/v1.0.2...v1.1.0 [1.0.2]: https://github.com/ajitpratap0/GoSQLX/compare/v1.0.1...v1.0.2 [1.0.1]: https://github.com/ajitpratap0/GoSQLX/compare/v1.0.0...v1.0.1 [1.0.0]: https://github.com/ajitpratap0/GoSQLX/compare/v0.9.0...v1.0.0 -[0.9.0]: https://github.com/ajitpratap0/GoSQLX/releases/tag/v0.9.0 \ No newline at end of file +[0.9.0]: https://github.com/ajitpratap0/GoSQLX/compare/v0.8.0...v0.9.0 +[0.8.0]: https://github.com/ajitpratap0/GoSQLX/releases/tag/v0.8.0 \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75fab90..510e2dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,11 +2,11 @@ Thank you for your interest in contributing to GoSQLX! This document provides comprehensive guidelines for contributing to the project. -## 🎯 Project Mission +## Project Mission GoSQLX aims to be the **fastest, most reliable, and most comprehensive SQL parsing library for Go**, suitable for production use in enterprise environments. -## 🤝 Ways to Contribute +## Ways to Contribute ### 1. Code Contributions - **Bug fixes**: Resolve issues in tokenization, parsing, or performance @@ -33,7 +33,7 @@ GoSQLX aims to be the **fastest, most reliable, and most comprehensive SQL parsi --- -## 🛠️ Development Setup +## Development Setup ### Prerequisites - **Go 1.24+** (latest stable version recommended) @@ -114,11 +114,11 @@ git push origin feature/your-feature-name --- -## 📋 Contribution Guidelines +## Contribution Guidelines ### Code Quality Standards -#### 🔍 Testing Requirements +#### Testing Requirements - **100% test coverage** for new code (use `go test -cover`) - **Race detection** must pass: `go test -race ./...` - **Performance tests** for optimization changes @@ -153,7 +153,7 @@ func BenchmarkNewFeature(b *testing.B) { } ``` -#### 📝 Code Style +#### Code Style - **Go fmt**: All code must be formatted with `go fmt` - **Go vet**: Must pass `go vet` without warnings - **Golint**: Follow Go naming conventions @@ -181,7 +181,7 @@ func TokenizeSQL(sql []byte) ([]Token, error) { } ``` -#### 🔒 Security Guidelines +#### Security Guidelines - **Input validation**: Always validate external input - **Memory safety**: Use Go's memory safety features correctly - **Resource limits**: Implement bounds checking for large inputs @@ -210,7 +210,7 @@ func ProcessSQL(sql []byte) error { ### Performance Requirements -#### ⚡ Performance Standards +#### Performance Standards - **No performance regression**: New features must not slow down existing functionality - **Memory efficiency**: Minimize allocations in hot paths - **Concurrency safety**: All public APIs must be thread-safe @@ -230,7 +230,7 @@ func OptimizedFunction() { ### Git Commit Guidelines -#### 📝 Commit Message Format +#### Commit Message Format ``` (): @@ -270,7 +270,7 @@ Breaking change: ErrorLocation.Column now uses 1-based indexing --- -## 🧪 Testing Guidelines +## Testing Guidelines ### Test Organization ``` @@ -347,7 +347,7 @@ func FuzzTokenizer(f *testing.F) { --- -## 🚀 Feature Development Process +## Feature Development Process ### 1. RFC (Request for Comments) For significant features, create an RFC: @@ -390,7 +390,7 @@ Many applications use PostgreSQL's JSON features extensively... --- -## 📋 Pull Request Checklist +## Pull Request Checklist ### Before Submitting - [ ] **Git Hooks**: Pre-commit hooks installed and passing (`task hooks:install`) @@ -439,7 +439,7 @@ Why is this change needed? --- -## 🐛 Bug Reports +## Bug Reports ### Issue Template ```markdown @@ -485,7 +485,7 @@ Any other relevant information --- -## 🏗️ Architecture Guidelines +## Architecture Guidelines ### Project Structure ``` @@ -553,7 +553,7 @@ func BenchmarkOptimizationImproved(b *testing.B) { --- -## 📈 Performance Contribution Guidelines +## Performance Contribution Guidelines ### Optimization Principles - **Measure first**: Always benchmark before optimizing @@ -609,7 +609,7 @@ func TestMemoryLeak(t *testing.T) { --- -## 🌟 Recognition +## Recognition ### Contributors All contributors are recognized in: @@ -625,7 +625,7 @@ All contributors are recognized in: --- -## 📞 Getting Help +## Getting Help ### Communication Channels - **GitHub Issues**: Bug reports and feature requests @@ -646,7 +646,7 @@ Core maintainers hold virtual office hours: --- -## 🎯 Contribution Goals +## Contribution Goals ### Short-term (3 months) - **50+ contributors**: Grow the contributor base @@ -665,7 +665,7 @@ Core maintainers hold virtual office hours: --- -## 📄 Legal +## Legal ### License By contributing to GoSQLX, you agree that your contributions will be licensed under the same license as the project. @@ -674,15 +674,11 @@ By contributing to GoSQLX, you agree that your contributions will be licensed un Contributors retain copyright of their contributions while granting the project rights to use and distribute the code. ### Code of Conduct -All contributors must follow our [Code of Conduct](CODE_OF_CONDUCT.md), which promotes: -- **Respectful communication** -- **Inclusive environment** -- **Professional behavior** -- **Constructive feedback** +All contributors are expected to maintain professional and respectful communication, promote an inclusive environment, exhibit professional behavior, and provide constructive feedback. --- -**Thank you for contributing to GoSQLX!** 🚀 +**Thank you for contributing to GoSQLX!** Together, we're building the future of high-performance SQL parsing in Go. diff --git a/GITHUB_ACTION_IMPLEMENTATION.md b/GITHUB_ACTION_IMPLEMENTATION.md deleted file mode 100644 index e1feb2b..0000000 --- a/GITHUB_ACTION_IMPLEMENTATION.md +++ /dev/null @@ -1,519 +0,0 @@ -# GitHub Action Implementation Summary - -## Overview - -This document summarizes the complete implementation of the official GoSQLX GitHub Action (Issue #73 / INT-003). - -**Implementation Date**: 2025-11-16 -**Version**: v1.0.0 (ready for publishing) -**Type**: Composite Action -**Status**: ✅ Complete and ready for testing/publishing - -## Files Created - -### Core Action Files - -1. **`action.yml`** (Repository Root) - - Main action metadata and implementation - - Composite action using Bash scripts - - 11 inputs, 4 outputs - - Complete with branding and caching - -2. **`ACTION_README.md`** (Repository Root) - - Comprehensive user documentation - - 50+ usage examples - - Performance metrics and comparisons - - Troubleshooting guide - -### Documentation Files - -3. **`.github/ACTION_TESTING_GUIDE.md`** - - Local testing with `act` - - Integration testing strategies - - Automated test suite examples - - Debugging tips - -4. **`.github/MARKETPLACE_PUBLISHING.md`** - - Complete publishing workflow - - Version management strategy - - SEO and discoverability tips - - Post-publishing checklist - -5. **`.github/ACTION_QUICK_REFERENCE.md`** - - Quick reference for all features - - Common patterns and recipes - - Troubleshooting quick fixes - - Exit code reference - -6. **`.github/ACTION_INTEGRATION_GUIDE.md`** - - Integration with other GitHub Actions - - PR comments, Slack notifications - - Matrix builds, artifact handling - - Complete CI/CD examples - -### Example Workflows - -7. **`.github/workflows/examples/sql-validation-basic.yml`** - - Simple validation example - - Minimal configuration - - Good starting point - -8. **`.github/workflows/examples/sql-validation-advanced.yml`** - - Comprehensive validation - - PR comments with results - - Multiple validation steps - - Artifact uploads - -9. **`.github/workflows/examples/sql-validation-multi-dialect.yml`** - - Matrix strategy for dialects - - Parallel validation jobs - - Summary job aggregation - -10. **`.github/workflows/examples/sql-validation-changed-files.yml`** - - Optimized for PRs - - Only validates changed files - - Fast feedback loop - -11. **`.github/workflows/examples/sql-validation-scheduled.yml`** - - Weekly SQL audit - - Comprehensive analysis - - Issue creation on problems - - Report archiving - -12. **`.github/workflows/examples/.gosqlx-example.yml`** - - Example configuration file - - All supported options - - Comments explaining each setting - -### Testing Files - -13. **`.github/workflows/test-github-action.yml`** - - Comprehensive action testing - - 7 test scenarios - - Multi-OS testing (Ubuntu, macOS) - - Performance validation - - Automated summary - -## Action Features - -### Inputs (11 Parameters) - -| Input | Type | Default | Description | -|-------|------|---------|-------------| -| `files` | string | `**/*.sql` | Glob pattern for SQL files | -| `validate` | boolean | `true` | Enable validation | -| `lint` | boolean | `false` | Enable linting (Phase 4) | -| `format-check` | boolean | `false` | Check formatting | -| `fail-on-error` | boolean | `true` | Fail on errors | -| `config` | string | `` | Config file path | -| `dialect` | string | `` | SQL dialect | -| `strict` | boolean | `false` | Strict mode | -| `show-stats` | boolean | `false` | Show statistics | -| `gosqlx-version` | string | `latest` | Version to install | -| `working-directory` | string | `.` | Working directory | - -### Outputs (4 Values) - -| Output | Description | -|--------|-------------| -| `validated-files` | Number of files validated | -| `invalid-files` | Number of files with errors | -| `formatted-files` | Files needing formatting | -| `validation-time` | Total time in milliseconds | - -### Key Capabilities - -1. **Ultra-Fast Performance**: 100-1000x faster than SQLFluff -2. **Multi-Dialect Support**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite -3. **Intelligent File Discovery**: Glob pattern matching with multiple formats -4. **Comprehensive Validation**: Syntax checking with detailed error reporting -5. **Format Checking**: CI/CD mode for ensuring consistency -6. **Binary Caching**: Automatic caching for faster subsequent runs -7. **Detailed Logging**: Verbose output with GitHub annotations -8. **Job Summaries**: Automatic GitHub job summary generation -9. **Error Annotations**: File-level error annotations in PRs -10. **Performance Metrics**: Throughput and timing statistics - -## Implementation Details - -### Technology Stack - -- **Type**: Composite Action -- **Shell**: Bash (cross-platform compatible) -- **Go Version**: 1.24+ -- **Dependencies**: - - `actions/setup-go@v5` - - `actions/cache@v4` - -### Architecture - -``` -┌─────────────────────────────────────┐ -│ GitHub Workflow │ -└──────────────┬──────────────────────┘ - │ - ▼ -┌─────────────────────────────────────┐ -│ GoSQLX Action (action.yml) │ -├─────────────────────────────────────┤ -│ 1. Setup Go environment │ -│ 2. Cache/Install GoSQLX binary │ -│ 3. Find SQL files (glob pattern) │ -│ 4. Validate SQL files │ -│ 5. Check formatting (optional) │ -│ 6. Run linting (optional) │ -│ 7. Generate outputs & summaries │ -└──────────────┬──────────────────────┘ - │ - ▼ -┌─────────────────────────────────────┐ -│ gosqlx CLI (Go binary) │ -├─────────────────────────────────────┤ -│ • validate command │ -│ • format --check command │ -│ • analyze command │ -└─────────────────────────────────────┘ -``` - -### Workflow Steps - -1. **Setup Go**: Install Go 1.25 using `actions/setup-go@v5` -2. **Cache Binary**: Cache GoSQLX binary by version and OS -3. **Install GoSQLX**: Install from source using `go install` -4. **Find Files**: Use `find` command with glob patterns -5. **Validate**: Run `gosqlx validate` on each file -6. **Format Check**: Run `gosqlx format --check` if enabled -7. **Lint**: Run `gosqlx analyze` if enabled -8. **Generate Outputs**: Set GitHub outputs for downstream jobs -9. **Create Summary**: Generate GitHub job summary table -10. **Cleanup**: Remove temporary files - -### Error Handling - -- ✅ Graceful handling of no files found -- ✅ Proper exit codes (0 = success, 1 = errors) -- ✅ File-level error annotations -- ✅ Configurable failure behavior -- ✅ Continue-on-error support - -### Performance Optimizations - -- ✅ Binary caching (95%+ cache hit rate expected) -- ✅ Parallel file processing where possible -- ✅ Minimal overhead (<2 seconds for setup) -- ✅ Efficient file discovery -- ✅ Zero-copy SQL parsing (from core library) - -## Usage Examples - -### Minimal Configuration - -```yaml -- uses: ajitpratap0/GoSQLX@v1 - with: - files: '**/*.sql' -``` - -### Production Configuration - -```yaml -- uses: ajitpratap0/GoSQLX@v1 - id: validate - with: - files: '**/*.sql' - validate: true - format-check: true - strict: true - dialect: 'postgresql' - show-stats: true - fail-on-error: true - config: '.gosqlx.yml' - -- name: Use outputs - run: | - echo "Validated: ${{ steps.validate.outputs.validated-files }}" - echo "Errors: ${{ steps.validate.outputs.invalid-files }}" -``` - -### Multi-Dialect Matrix - -```yaml -strategy: - matrix: - dialect: [postgresql, mysql, sqlite] - -steps: - - uses: ajitpratap0/GoSQLX@v1 - with: - files: 'sql/${{ matrix.dialect }}/**/*.sql' - dialect: ${{ matrix.dialect }} - strict: true -``` - -## Testing Strategy - -### Test Coverage - -The action includes 7 comprehensive test scenarios: - -1. **Valid SQL Test**: Verifies correct validation of valid SQL -2. **Invalid SQL Test**: Ensures errors are detected -3. **Format Check Test**: Tests formatting validation -4. **Dialect Test**: Multi-dialect compatibility -5. **No Files Test**: Graceful handling of empty results -6. **Performance Test**: Validates throughput targets -7. **Strict Mode Test**: Strict validation behavior - -### Testing Workflow - -Automated testing via `.github/workflows/test-github-action.yml`: -- Runs on Ubuntu and macOS -- Tests all input combinations -- Verifies outputs are correct -- Checks performance targets -- Generates test summary - -### Manual Testing - -See `.github/ACTION_TESTING_GUIDE.md` for: -- Local testing with `act` -- Integration testing in forks -- Manual test checklist -- Debugging procedures - -## Publishing Workflow - -### Pre-Publishing Checklist - -- [ ] All tests passing (run test-github-action.yml) -- [ ] Documentation reviewed and complete -- [ ] Examples tested and working -- [ ] Version tag prepared (v1.0.0) -- [ ] Release notes written -- [ ] Security considerations addressed - -### Publishing Steps - -1. **Create Version Tag** - ```bash - git tag -a v1.0.0 -m "v1.0.0: Initial GoSQLX GitHub Action" - git push origin v1.0.0 - git tag -fa v1 -m "v1: Latest v1.x.x" - git push -f origin v1 - ``` - -2. **Create GitHub Release** - - Go to Releases → Draft new release - - Select tag v1.0.0 - - Check "Publish to GitHub Marketplace" - - Select categories: CI/CD, Code Quality - - Publish release - -3. **Post-Publishing** - - Verify Marketplace listing - - Test installation from Marketplace - - Update main README with badge - - Announce release - -See `.github/MARKETPLACE_PUBLISHING.md` for complete details. - -## Performance Targets - -### Expected Performance - -| Metric | Target | Actual (GoSQLX CLI) | -|--------|--------|---------------------| -| Setup Time | <5s | ~2-3s (cached) | -| Validation Speed | <10ms/file | <10ms (typical) | -| Throughput | >50 files/s | 100+ files/s | -| Total Time (100 files) | <5s | ~1-2s | - -### Comparison vs SQLFluff - -| Operation | GoSQLX | SQLFluff | Speedup | -|-----------|--------|----------|---------| -| 10 files | <1s | ~10-30s | 10-30x | -| 100 files | ~1-2s | ~100-300s | 50-150x | -| 1000 files | ~10-20s | ~1000-3000s | 50-150x | - -## Security Considerations - -### Action Security - -- ✅ No secrets in action code -- ✅ Minimal permissions required -- ✅ No data sent to external services -- ✅ Open source and auditable -- ✅ Pinned action dependencies - -### Required Permissions - -```yaml -permissions: - contents: read # For checkout (always required) - pull-requests: write # Optional, for PR comments -``` - -### Security Best Practices - -1. Pin action versions: `@v1.0.0` instead of `@v1` -2. Use dependabot for action updates -3. Review action logs for sensitive data -4. Use secrets for configuration if needed -5. Enable security scanning - -## Maintenance Plan - -### Version Strategy - -- **v1.0.0**: Initial release -- **v1.x.x**: Bug fixes and minor features (backwards compatible) -- **v2.0.0**: Breaking changes (when needed) - -### Update Process - -1. Fix/feature implementation -2. Update tests -3. Update documentation -4. Create new version tag -5. Update v1 tracking tag -6. Create GitHub release -7. Announce update - -### Support Channels - -- GitHub Issues: Bug reports and feature requests -- GitHub Discussions: Questions and community support -- Documentation: Comprehensive guides and examples - -## Integration Points - -The action integrates with: - -- ✅ Pull Request workflows -- ✅ Push workflows -- ✅ Scheduled workflows -- ✅ Manual workflows (workflow_dispatch) -- ✅ Matrix strategies -- ✅ Reusable workflows -- ✅ Other GitHub Actions (checkout, cache, etc.) - -See `.github/ACTION_INTEGRATION_GUIDE.md` for detailed integration examples. - -## Known Limitations - -1. **Linting Features**: Advanced linting is Phase 4 (basic analysis available) -2. **File Pattern Matching**: Limited to `find` command capabilities -3. **Windows Support**: Currently tested on Ubuntu/macOS (Windows should work) -4. **Large Repositories**: May need optimization for 10,000+ SQL files - -## Future Enhancements - -### Phase 1 (v1.1.0) - -- [ ] Windows runner support and testing -- [ ] Custom output formats (SARIF, JUnit XML) -- [ ] More granular error reporting -- [ ] Performance optimizations for large repos - -### Phase 2 (v1.2.0) - -- [ ] Advanced linting integration -- [ ] Security scanning results -- [ ] Fix suggestions in PR comments -- [ ] Auto-formatting option - -### Phase 3 (v2.0.0) - -- [ ] Docker action option -- [ ] Multiple file pattern support -- [ ] Configuration profiles -- [ ] Custom rule definitions - -## Resources - -### Documentation - -- [ACTION_README.md](ACTION_README.md) - User documentation -- [ACTION_TESTING_GUIDE.md](.github/ACTION_TESTING_GUIDE.md) - Testing guide -- [MARKETPLACE_PUBLISHING.md](.github/MARKETPLACE_PUBLISHING.md) - Publishing guide -- [ACTION_QUICK_REFERENCE.md](.github/ACTION_QUICK_REFERENCE.md) - Quick reference -- [ACTION_INTEGRATION_GUIDE.md](.github/ACTION_INTEGRATION_GUIDE.md) - Integration guide - -### Example Workflows - -- Basic validation -- Advanced validation with PR comments -- Multi-dialect matrix -- Changed files only -- Scheduled audits -- Configuration example - -### Testing - -- Automated test workflow -- Manual testing checklist -- Performance benchmarks -- Integration tests - -## Success Criteria - -All requirements from Issue #73 / INT-003 met: - -- ✅ GitHub Action structure created -- ✅ Action metadata complete (action.yml) -- ✅ All required inputs implemented (11 inputs) -- ✅ All outputs implemented (4 outputs) -- ✅ Composite action implementation working -- ✅ Comprehensive README with examples -- ✅ Example workflows created (5 examples) -- ✅ Testing guide complete -- ✅ Publishing instructions complete -- ✅ Integration examples provided - -## Next Steps - -1. **Test the Action** - - Run `.github/workflows/test-github-action.yml` - - Test manually in a fork - - Verify all examples work - -2. **Review Documentation** - - Read through all documentation files - - Verify examples are accurate - - Check for any gaps - -3. **Prepare for Publishing** - - Create release notes - - Update main README - - Prepare announcement - -4. **Publish to Marketplace** - - Follow `.github/MARKETPLACE_PUBLISHING.md` - - Create v1.0.0 release - - Enable Marketplace listing - -5. **Post-Launch** - - Monitor for issues - - Respond to feedback - - Plan v1.1.0 enhancements - -## Conclusion - -The official GoSQLX GitHub Action is **complete and ready for testing/publishing**. It provides: - -- 🚀 Ultra-fast SQL validation (100-1000x faster than alternatives) -- 🎯 Comprehensive feature set with 11 inputs and 4 outputs -- 📚 Extensive documentation with 50+ examples -- 🧪 Complete test suite with 7 test scenarios -- 🔧 Easy integration with existing workflows -- 📊 Performance metrics and summaries -- 🛡️ Production-ready with proper error handling - -Ready for v1.0.0 release! 🎉 - ---- - -**Implementation completed**: 2025-11-16 -**Ready for**: Testing → Publishing → Marketplace listing -**Status**: ✅ Production Ready diff --git a/README.md b/README.md index 9f6a7d3..0e6dcf5 100644 --- a/README.md +++ b/README.md @@ -36,31 +36,31 @@ --- -## 🎯 Overview +## Overview GoSQLX is a high-performance SQL parsing library designed for production use. It provides zero-copy tokenization, intelligent object pooling, and comprehensive SQL dialect support while maintaining a simple, idiomatic Go API. -### ✨ Key Features - -- **🚀 Blazing Fast**: **1.38M+ ops/sec** sustained, **1.5M+ ops/sec** peak throughput -- **💾 Memory Efficient**: **60-80% reduction** through intelligent object pooling -- **🔒 Thread-Safe**: **Race-free**, linear scaling to **128+ cores**, **0 race conditions** detected -- **✅ Production-Grade Testing**: **Token 100%** ⭐, **Keywords 100%** ⭐, **Errors 95.6%**, **Tokenizer 76.1%**, **Parser 76.1%**, **CLI 63.3%** coverage (Phases 1-3 complete) -- **🔗 Complete JOIN Support**: All JOIN types (INNER/LEFT/RIGHT/FULL OUTER/CROSS/NATURAL) with proper tree logic -- **🔄 Advanced SQL Features**: CTEs with RECURSIVE support, Set Operations (UNION/EXCEPT/INTERSECT) -- **🪟 Window Functions**: Complete SQL-99 window function support with OVER clause, PARTITION BY, ORDER BY, frame specifications -- **🔄 MERGE Statements**: Full SQL:2003 MERGE support with WHEN MATCHED/NOT MATCHED clauses -- **📊 Grouping Operations**: GROUPING SETS, ROLLUP, CUBE (SQL-99 T431) -- **🗃️ Materialized Views**: CREATE, DROP, REFRESH MATERIALIZED VIEW support -- **📋 Table Partitioning**: PARTITION BY RANGE, LIST, HASH support -- **🔐 SQL Injection Detection**: Built-in security scanner (`pkg/sql/security`) for injection pattern detection -- **🌍 Unicode Support**: Complete UTF-8 support for international SQL -- **🔧 Multi-Dialect**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite -- **📊 Zero-Copy**: Direct byte slice operations, **<1μs latency** -- **🔍 Intelligent Errors**: Structured error codes with typo detection, context highlighting, and helpful hints -- **🏗️ Production Ready**: Battle-tested with **0 race conditions** detected, **~80-85% SQL-99 compliance** - -### 🎯 Performance & Quality Highlights (v1.5.0 + Phases 2-3) +### Key Features + +- **Blazing Fast**: 1.38M+ ops/sec sustained, 1.5M+ ops/sec peak throughput +- **Memory Efficient**: 60-80% reduction through intelligent object pooling +- **Thread-Safe**: Race-free, linear scaling to 128+ cores, 0 race conditions detected +- **Production-Grade Testing**: Token 100%, Keywords 100%, Errors 95.6%, Tokenizer 76.1%, Parser 76.1%, CLI 63.3% coverage +- **Complete JOIN Support**: All JOIN types (INNER/LEFT/RIGHT/FULL OUTER/CROSS/NATURAL) with proper tree logic +- **Advanced SQL Features**: CTEs with RECURSIVE support, Set Operations (UNION/EXCEPT/INTERSECT) +- **Window Functions**: Complete SQL-99 window function support with OVER clause, PARTITION BY, ORDER BY, frame specs +- **MERGE Statements**: Full SQL:2003 MERGE support with WHEN MATCHED/NOT MATCHED clauses +- **Grouping Operations**: GROUPING SETS, ROLLUP, CUBE (SQL-99 T431) +- **Materialized Views**: CREATE, DROP, REFRESH MATERIALIZED VIEW support +- **Table Partitioning**: PARTITION BY RANGE, LIST, HASH support +- **SQL Injection Detection**: Built-in security scanner (`pkg/sql/security`) for injection pattern detection +- **Unicode Support**: Complete UTF-8 support for international SQL +- **Multi-Dialect**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite +- **Zero-Copy**: Direct byte slice operations, <1μs latency +- **Intelligent Errors**: Structured error codes with typo detection, context highlighting, and helpful hints +- **Production Ready**: Battle-tested with 0 race conditions detected, ~80-85% SQL-99 compliance + +### Performance & Quality Highlights (v1.5.0+)
@@ -72,7 +72,7 @@ GoSQLX is a high-performance SQL parsing library designed for production use. It
-### 📈 Project Stats +### Project Stats
@@ -85,7 +85,7 @@ GoSQLX is a high-performance SQL parsing library designed for production use. It
-## 📦 Installation +## Installation ### Library Installation ```bash @@ -107,7 +107,7 @@ go build -o gosqlx ./cmd/gosqlx - Go 1.24 or higher - No external dependencies -## 🚀 Quick Start +## Quick Start ### CLI Usage @@ -305,22 +305,21 @@ func main() { > **Note:** The simple API has < 1% performance overhead compared to low-level API. Use the simple API unless you need fine-grained control. -## 📚 Documentation +## Documentation -### 📖 Comprehensive Guides +### Comprehensive Guides | Guide | Description | |-------|-------------| -| [**Getting Started**](docs/GETTING_STARTED.md) | ⚡ Get started in 5 minutes | -| [**Comparison Guide**](docs/COMPARISON.md) | 🆚 GoSQLX vs SQLFluff, JSQLParser, pg_query | -| [**Error Reference**](docs/ERROR_REFERENCE.md) | 🔍 Complete error code reference with solutions | +| [**Getting Started**](docs/GETTING_STARTED.md) | Get started in 5 minutes | +| [**Comparison Guide**](docs/COMPARISON.md) | GoSQLX vs SQLFluff, JSQLParser, pg_query | | [**CLI Guide**](docs/CLI_GUIDE.md) | Complete CLI documentation and usage examples | | [**API Reference**](docs/API_REFERENCE.md) | Complete API documentation with examples | | [**Usage Guide**](docs/USAGE_GUIDE.md) | Detailed patterns and best practices | | [**Architecture**](docs/ARCHITECTURE.md) | System design and internal architecture | | [**Troubleshooting**](docs/TROUBLESHOOTING.md) | Common issues and solutions | -### 🚀 Getting Started +### Getting Started | Document | Purpose | |----------|---------| @@ -329,7 +328,7 @@ func main() { | [**Security Analysis**](docs/SECURITY.md) | Security assessment | | [**Examples**](examples/) | Working code examples | -### 📋 Quick Links +### Quick Links - [Installation & Setup](docs/USAGE_GUIDE.md#getting-started) - [Basic Usage](docs/USAGE_GUIDE.md#basic-usage) @@ -338,7 +337,7 @@ func main() { - [Error Handling](docs/TROUBLESHOOTING.md#error-messages) - [FAQ](docs/TROUBLESHOOTING.md#faq) -### 🔄 Advanced SQL Features (v1.2.0) +### Advanced SQL Features (v1.2.0) GoSQLX now supports Common Table Expressions (CTEs) and Set Operations alongside complete JOIN support: @@ -441,7 +440,7 @@ if selectStmt, ok := ast.Statements[0].(*ast.SelectStatement); ok { - ✅ `NATURAL JOIN` - Natural joins (implicit ON clause) - ✅ `USING (column)` - Single-column using clause -### 🆕 Advanced SQL Features (v1.4+) +### Advanced SQL Features (v1.4+) #### MERGE Statements (SQL:2003 F312) @@ -538,7 +537,7 @@ sql := `SELECT * FROM users WHERE deleted_at IS NULL` sql := `SELECT * FROM users ORDER BY last_login DESC NULLS LAST` ``` -## 💻 Examples +## Examples ### Multi-Dialect Support @@ -593,9 +592,9 @@ func ProcessConcurrently(queries []string) { } ``` -## 📊 Performance +## Performance -### 🎯 v1.0.0 Performance Improvements +### v1.0.0 Performance Improvements | Metric | Previous | **v1.0.0** | Improvement | |--------|----------|------------|-------------| @@ -635,9 +634,9 @@ BenchmarkTokensPerSecond-16 815,439 1,378 ns/op 8,847,625 t | **Scaling** | **Linear to 128+** | Perfect concurrency | | **Pool Efficiency** | **95%+ hit rate** | Effective reuse | -See [PERFORMANCE_REPORT.md](PERFORMANCE_REPORT.md) for detailed analysis. +Run `go test -bench=. -benchmem ./pkg/...` for detailed performance analysis. -## 🧪 Testing +## Testing ```bash # Run all tests with race detection @@ -655,7 +654,7 @@ go test -v ./pkg/sql/tokenizer/ go test -v ./pkg/sql/parser/ ``` -## 🏗️ Project Structure +## Project Structure ``` GoSQLX/ @@ -686,7 +685,7 @@ GoSQLX/ └── tools/ # Development tools ``` -## 🛠️ Development +## Development ### Prerequisites @@ -749,7 +748,7 @@ task quality task check ``` -## 🤝 Contributing +## Contributing We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. @@ -769,11 +768,7 @@ We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guid - Update documentation for API changes - Add benchmarks for performance-critical code -## 📄 License - -This project is licensed under the GNU Affero General Public License v3.0 (AGPL-3.0) - see the [LICENSE](LICENSE) file for details. - -## 🚀 Roadmap +## Roadmap ### Phase 1: Core SQL Enhancements (Q1 2025) - v1.1.0 ✅ - ✅ **Complete JOIN support** (INNER/LEFT/RIGHT/FULL OUTER/CROSS/NATURAL) @@ -804,9 +799,9 @@ This project is licensed under the GNU Affero General Public License v3.0 (AGPL- - 📋 Performance analysis and hints - 📋 Schema validation -[📄 Full Architectural Review & Roadmap](ARCHITECTURAL_REVIEW_AND_ROADMAP.md) +See [ARCHITECTURE.md](docs/ARCHITECTURE.md) for detailed system design -## 🤝 Community & Support +## Community & Support
@@ -826,7 +821,7 @@ This project is licensed under the GNU Affero General Public License v3.0 (AGPL-
-## 👥 Contributors +## Contributors
@@ -841,7 +836,6 @@ This project is licensed under the GNU Affero General Public License v3.0 (AGPL- We love your input! We want to make contributing as easy and transparent as possible. Contributing Guide -Code of Conduct Start Contributing #### Quick Contribution Guide @@ -854,7 +848,7 @@ We love your input! We want to make contributing as easy and transparent as poss
-## 🎯 Use Cases +## Use Cases
@@ -869,7 +863,7 @@ We love your input! We want to make contributing as easy and transparent as poss
-## 📊 Who's Using GoSQLX +## Who's Using GoSQLX
@@ -878,7 +872,7 @@ We love your input! We want to make contributing as easy and transparent as poss
-## 📈 Project Metrics +## Project Metrics
@@ -895,28 +889,7 @@ graph LR
-## 🗺️ Roadmap - -
- -### Release Timeline - -| Version | Status | Release Date | Features | -|---------|--------|--------------|----------| -| **v0.9.0** | ✅ Released | 2024-01-15 | Initial release | -| **v1.0.0** | ✅ Released | 2024-12-01 | Production ready, +47% performance | -| **v1.1.0** | ✅ Released | 2025-01-03 | Complete JOIN support, error handling | -| **v1.2.0** | ✅ Released | 2025-08-15 | CTEs, set operations, ~70% SQL-92 compliance | -| **v1.3.0** | ✅ Released | 2025-09-04 | Window functions, ~80-85% SQL-99 compliance | -| **v1.4.0** | ✅ Released | 2025-09-07 | Production CLI, high-performance commands, memory leak fixes | -| **v1.5.0** | 🎉 Current | 2025-11-15 | Phase 1 Test Coverage: CLI 63.3%, Parser 75%, Tokenizer 76.5% | -| **v2.0.0** | 🔮 Future | Q4 2025 | Dialect specialization, advanced features | - -Full Roadmap - -
- -## 💖 Support This Project +## Support This Project
@@ -936,7 +909,7 @@ If GoSQLX helps your project, please consider:
-## 📜 License +## License
diff --git a/RELEASE_NOTES_v1.5.0.md b/RELEASE_NOTES_v1.5.0.md deleted file mode 100644 index d019a14..0000000 --- a/RELEASE_NOTES_v1.5.0.md +++ /dev/null @@ -1,455 +0,0 @@ -# GoSQLX v1.5.0 - Phase 1 Test Coverage Achievement - -**Release Date**: November 15, 2025 -**Release Type**: Minor Version Release -**Focus**: Comprehensive Test Coverage & Quality Assurance - ---- - -## 🎯 Executive Summary - -GoSQLX v1.5.0 marks a **major quality milestone** with the completion of Phase 1 comprehensive test coverage improvements across CLI, Parser, and Tokenizer packages. This release establishes GoSQLX as **enterprise-grade software** with test coverage exceeding industry standards and validates production readiness through extensive real-world SQL query testing. - -### Key Achievements - -✅ **Triple Coverage Success**: All three Phase 1 coverage targets met or exceeded -✅ **3,094 Lines of Tests**: Comprehensive test suite across 7 new test files -✅ **115+ Real-World Queries**: Production SQL validation across multiple database dialects -✅ **Zero Race Conditions**: Thread-safe operation confirmed across 20,000+ concurrent operations -✅ **95%+ Success Rate**: Real-world SQL query parsing and validation -✅ **International Validation**: Full UTF-8 testing across 8 languages -✅ **Code Quality**: 529 lines removed through refactoring and modernization - ---- - -## 📊 Test Coverage Achievements - -### Coverage Targets vs. Results - -| Package | Baseline | Target | **Achieved** | Status | -|---------|----------|--------|--------------|--------| -| **CLI** | ~50% | 60% | **63.3%** | ✅ **Exceeded by 3.3%** | -| **Parser** | 57.4% | 75% | **75.0%** | ✅ **Met exactly** | -| **Tokenizer** | 60.0% | 70% | **76.5%** | ✅ **Exceeded by 6.5%** | - -### Improvement Metrics - -- **CLI**: +13.3 percentage points -- **Parser**: +17.6 percentage points -- **Tokenizer**: +16.5 percentage points -- **Total New Test Code**: 3,094 lines across 7 files -- **Total Test Cases**: 150+ comprehensive test scenarios - ---- - -## 🧪 New Test Files & Coverage - -### Parser Package (2,071 lines, 5 files) - -1. **parser_additional_coverage_test.go** (420 lines) - - CTE (Common Table Expression) parsing tests - - Window function comprehensive coverage - - Advanced SELECT statement scenarios - - Set operations (UNION, EXCEPT, INTERSECT) - -2. **parser_edge_cases_test.go** (450 lines) - - Boundary condition testing - - Malformed SQL handling - - Edge cases for all statement types - - Empty and null value handling - -3. **parser_error_recovery_test.go** (380 lines) - - Error recovery mechanisms - - Contextual error messages - - Multiple error scenarios - - Parser resilience testing - -4. **parser_final_coverage_test.go** (350 lines) - - Coverage gap filling - - Uncovered code path testing - - Complex query combinations - - Final validation scenarios - -5. **parser_targeted_coverage_test.go** (410 lines) - - Targeted function coverage improvements - - Specific parsing method validation - - Expression parsing edge cases - - Operator precedence testing - -6. **error_recovery_test.go** (61 lines) - - Integration-level error recovery - - Cross-module error handling - - Error propagation testing - -7. **integration_test.go** (311 lines) - - Real-world SQL validation framework - - 115+ production queries tested - - Multi-dialect support validation - - Success rate tracking and reporting - -### Tokenizer Package (712 lines, 1 file) - -1. **tokenizer_coverage_test.go** (712 lines) - - **Backtick Identifiers**: MySQL-style `` `identifier` `` support - - **Triple-Quoted Strings**: Python-style `'''string'''` and `"""string"""` - - **Escape Sequences**: Full coverage of `\n`, `\t`, `\r`, `\\`, `\'`, `\"` - - **Scientific Notation**: `1.23e4`, `1.23E+4`, `1.23e-4` formats - - **UTF-8 Multi-byte**: Chinese, Japanese, Korean, Arabic, emoji support - - **Operators & Punctuation**: Comprehensive operator tokenization - - **Custom Keywords**: User-defined keyword support testing - - **Debug Logger**: Logger functionality validation - - **13 test functions** with ~110 test cases total - -### CLI Package (318 lines, 1 file) - -1. **sql_analyzer_test.go** (318 lines) - - Analyze command comprehensive testing - - Validate command edge cases - - Format command UTF-8 handling - - Parse command output validation - - File vs SQL string detection - - Large file handling - - Invalid SQL error reporting - - Empty input edge cases - ---- - -## 📈 Function-Level Coverage Improvements - -### Tokenizer Package - -| Function | Before | **After** | Improvement | -|----------|--------|-----------|-------------| -| `handleEscapeSequence` | 0.0% | **85.7%** | **+85.7%** | -| `readTripleQuotedString` | 0.0% | **96.4%** | **+96.4%** | -| `readBacktickIdentifier` | 0.0% | **100%** | **+100%** ⭐ | -| `SetDebugLogger` | 0.0% | **100%** | **+100%** ⭐ | -| `readPunctuation` | 70.2% | **92.3%** | **+22.1%** | -| `readQuotedIdentifier` | 77.8% | **96.3%** | **+18.5%** | -| `readNumber` | 77.6% | **85.7%** | **+8.1%** | -| `TokenizeContext` | 81.1% | **84.9%** | **+3.8%** | - ---- - -## 🔧 CLI Code Refactoring - -### Code Reduction: -529 Lines - -**Files Improved:** -- `analyze.go` - Improved error handling consistency, legacy code removal -- `config.go` - Enhanced configuration management, cleaner structure -- `format.go` - Better error messages, enhanced UTF-8 handling -- `input_utils.go` - Consolidated input reading logic, DRY principle -- `parse.go` - Improved output formatting, cleaner error paths -- `validate.go` - Enhanced validation error reporting, better UX - -**Impact:** -- **Better Maintainability**: Cleaner code structure with less duplication -- **Enhanced Error Messages**: More helpful and actionable error feedback -- **Improved UTF-8 Handling**: Better international character support -- **Consolidated Logic**: Single source of truth for common operations - ---- - -## 🌍 Real-World SQL Testing - -### Test Data Structure - -``` -testdata/ -├── postgresql/ -│ └── queries.sql # PostgreSQL-specific query patterns -├── mysql/ -│ └── queries.sql # MySQL dialect queries -└── real_world/ - └── ecommerce.sql # Complex e-commerce workload queries -``` - -### Validation Results - -- **Total Queries Tested**: 115+ -- **Database Dialects**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite -- **Success Rate**: **95%+** on real-world production queries -- **Query Types**: SELECT, INSERT, UPDATE, DELETE, CTEs, Window Functions, JOINs -- **Complexity Levels**: Simple (1-table), Medium (2-5 tables), Complex (6+ tables, CTEs) - -### International SQL Support (UTF-8 Validation) - -**8 Languages Tested:** -1. **Chinese** (Simplified & Traditional) -2. **Japanese** (Hiragana, Katakana, Kanji) -3. **Korean** (Hangul) -4. **Arabic** (Right-to-left) -5. **Russian** (Cyrillic) -6. **Spanish** (Latin characters with accents) -7. **French** (Latin characters with diacritics) -8. **German** (Latin characters with umlauts) - -**Plus**: Emoji support (🚀, ✅, 📊, etc.) - ---- - -## ✅ Quality Assurance - -### All Quality Checks Passed - -- ✅ **Race Detection**: `go test -race ./...` - Zero race conditions detected -- ✅ **Code Formatting**: `go fmt ./...` - All code properly formatted -- ✅ **Static Analysis**: `go vet ./...` - No issues reported -- ✅ **Linting**: `golangci-lint` - All checks passing -- ✅ **Security**: GitGuardian - No security issues detected -- ✅ **Benchmarks**: All performance benchmarks passing -- ✅ **CI/CD**: 16/16 checks passing across all platforms and Go versions - -### Platform & Go Version Testing - -**Platforms Tested:** -- ✅ Ubuntu Latest (Linux) -- ✅ macOS Latest -- ✅ Windows Latest - -**Go Versions:** -- ✅ Go 1.19 -- ✅ Go 1.20 -- ✅ Go 1.21 - -### Testing Infrastructure Enhancements - -1. **Short Mode Support**: Fast pre-commit hooks for developer productivity - ```bash - go test -short ./... # Skips long-running integration tests - ``` - -2. **Integration Test Framework**: Real-world SQL validation with reporting - - Success rate tracking - - Failure analysis and categorization - - Performance metrics collection - -3. **Race Detection**: Comprehensive concurrent usage validation - ```bash - go test -race ./... # 20,000+ concurrent operations tested - ``` - -4. **Edge Case Coverage**: Boundary conditions, empty inputs, malformed SQL - - Empty SQL strings - - Extremely large queries (200+ columns) - - Deeply nested expressions - - Unicode edge cases - ---- - -## 🚀 Performance Validation - -### Maintained Performance Metrics - -| Metric | Value | Status | -|--------|-------|--------| -| **Sustained Throughput** | 1.38M+ ops/sec | ✅ Maintained | -| **Peak Throughput** | 1.5M+ ops/sec | ✅ Maintained | -| **Token Processing** | 8M+ tokens/sec | ✅ Maintained | -| **Simple Query Latency** | <280ns (p50) | ✅ Maintained | -| **Complex Query Latency** | <1μs (CTEs/Windows) | ✅ Maintained | -| **Memory Efficiency** | 60-80% reduction | ✅ Maintained | -| **Scaling** | Linear to 128+ cores | ✅ Maintained | -| **Pool Hit Rate** | 95%+ | ✅ Maintained | - -**Key Finding**: All new tests pass with zero performance regression across all metrics. - ---- - -## 📚 Documentation Created - -### Comprehensive Summary Documents - -1. **CLI_REFACTORING_SUMMARY.md** (987 lines) - - Detailed CLI coverage analysis - - Before/after comparisons - - Refactoring impact metrics - - Testing approach documentation - -2. **PARSER_COVERAGE_SUMMARY.md** (424 lines) - - Parser test coverage breakdown - - Function-level improvements - - Integration test results - - Coverage progression tracking - -3. **TOKENIZER_COVERAGE_SUMMARY.md** (454 lines) - - Tokenizer coverage achievement details - - Feature-by-feature testing documentation - - UTF-8 validation results - - Performance impact analysis - -4. **SESSION_PROGRESS_SUMMARY.md** (563 lines) - - Overall session progress tracking - - Task completion timeline - - Decision rationale documentation - - Lessons learned - -### Documentation Updates - -- **CHANGELOG.md**: Comprehensive v1.5.0 release notes -- **TASKS.md**: Marked TEST-001, TEST-002, TEST-006 as completed -- **README.md**: Updated with Phase 1 achievements -- **CLAUDE.md**: Project instructions updated with testing methodology - ---- - -## 🔗 Related Pull Request - -**PR #85**: [Phase 1 Test Coverage Achievement - CLI, Parser, and Tokenizer](https://github.com/ajitpratap0/GoSQLX/pull/85) - -### PR Statistics - -- **Files Changed**: 81 files -- **Additions**: +25,883 lines -- **Deletions**: -1,735 lines -- **Net Change**: +24,148 lines -- **Commits**: 20 total (12 feature + 8 CI/CD fixes) -- **CI Checks**: 16/16 passing - - 9 test jobs (3 platforms × 3 Go versions) - - 3 build jobs - - Lint, Security, Benchmark, Claude Review - -### CI/CD Fixes Applied - -During PR review, 8 commits were made to fix CI/CD issues: - -1. Fixed `.gitignore` pattern for CLI refactoring -2. Added missing CLI refactoring files -3. Fixed test skip conditions for unimplemented features -4. Fixed golangci-lint S1009 (unnecessary nil check on slice) -5. Fixed golangci-lint S1016 (struct literal vs type conversion) -6. Fixed pool cleanup bug (interface{} zero value) -7. Added Windows platform skip for permission test -8. Fixed staticcheck warnings (U1000 unused code, SA5011 nil dereference) - ---- - -## 🎯 Tasks Completed - -From **TASKS.md**: - -### ✅ TEST-001: Increase Parser Coverage to 75% -- **Status**: COMPLETED -- **Target**: 75% -- **Achieved**: 75.0% (met exactly) -- **Impact**: Production-ready parser with comprehensive test validation - -### ✅ TEST-002: Increase Tokenizer Coverage to 70% -- **Status**: COMPLETED -- **Target**: 70% -- **Achieved**: 76.5% (exceeded by 6.5%) -- **Impact**: Full feature coverage including UTF-8, escape sequences, scientific notation - -### ✅ TEST-006: CLI Commands Coverage to 60% -- **Status**: COMPLETED -- **Target**: 60% -- **Achieved**: 63.3% (exceeded by 3.3%) -- **Impact**: Production-ready CLI with validated edge cases and error handling - ---- - -## 🔄 Backward Compatibility - -### 100% Backward Compatible - -- ✅ **No Breaking Changes**: All existing APIs preserved -- ✅ **No Performance Regression**: All metrics maintained or improved -- ✅ **No Functionality Changes**: All existing features work identically -- ✅ **Test Compatibility**: All previous tests continue passing - -### Safe to Upgrade - -This is a **drop-in replacement** for v1.4.0 with no migration required. Simply update your dependency: - -```bash -go get -u github.com/ajitpratap0/GoSQLX@v1.5.0 -``` - ---- - -## 📦 Installation - -### Go Module - -```bash -go get github.com/ajitpratap0/GoSQLX@v1.5.0 -``` - -### CLI Tool - -```bash -go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx@v1.5.0 -``` - -### From Source - -```bash -git clone https://github.com/ajitpratap0/GoSQLX.git -cd GoSQLX -git checkout v1.5.0 -go build ./... -``` - ---- - -## 🚀 What's Next - -### Recommended Priorities (from TASKS.md) - -1. **TEST-003**: Increase Keywords Coverage to 75% (current: 50.6%) -2. **QW-002**: Enhanced Error Messages with context and suggestions -3. **TEST-004**: Add Fuzz Testing for security and robustness -4. **FEAT-002**: Streaming Parser API for large files (>10MB) -5. **INT-001**: Go IDEs Integration (VS Code, GoLand) - -### Upcoming Releases - -- **v1.6.0**: Keywords package coverage + Enhanced error messages -- **v1.7.0**: Fuzz testing + Security hardening -- **v2.0.0**: Dialect specialization + Advanced features - ---- - -## 🤝 Contributors - -Special thanks to all contributors who made this release possible! - -### Core Team -- [@ajitpratap0](https://github.com/ajitpratap0) - Lead Developer - -### Community -- All users who reported issues and provided feedback -- Contributors who submitted bug reports and feature requests - ---- - -## 📄 License - -GNU Affero General Public License v3.0 (AGPL-3.0) - see [LICENSE](LICENSE) file for details - ---- - -## 🔗 Links - -- **Repository**: https://github.com/ajitpratap0/GoSQLX -- **Documentation**: https://pkg.go.dev/github.com/ajitpratap0/GoSQLX -- **Issues**: https://github.com/ajitpratap0/GoSQLX/issues -- **Discussions**: https://github.com/ajitpratap0/GoSQLX/discussions -- **Changelog**: [CHANGELOG.md](CHANGELOG.md) - ---- - -
- -**🎉 Thank you for using GoSQLX! 🎉** - -Star This Repo - -
- ---- - -**Generated with [Claude Code](https://claude.com/claude-code)** - -Co-Authored-By: Claude diff --git a/SECURITY_SETUP.md b/SECURITY_SETUP.md deleted file mode 100644 index 4c5f1cf..0000000 --- a/SECURITY_SETUP.md +++ /dev/null @@ -1,344 +0,0 @@ -# Security Scanning Setup Guide - -This document provides instructions for maintainers on the security scanning infrastructure implemented for GoSQLX. - -## Overview - -GoSQLX implements a comprehensive security scanning system with four key components: - -1. **GoSec** - Static security analysis for Go code -2. **Trivy** - Vulnerability scanner for dependencies and configurations -3. **GovulnCheck** - Official Go vulnerability database checker -4. **Dependabot** - Automated dependency update management - -## Workflow Configuration - -### Security Workflow (`.github/workflows/security.yml`) - -**Triggers:** -- Push to `main` and `develop` branches -- Pull requests to `main` branch -- Weekly schedule (Sundays at midnight UTC) -- Manual dispatch via GitHub Actions UI - -**Jobs:** - -1. **GoSec Security Scanner** - - Scans Go code for security issues - - Uploads SARIF results to GitHub Security tab - - Fails on high/critical severity issues - - Uses: `securego/gosec@v2.21.4` - -2. **Trivy Repository Scan** - - Scans filesystem for vulnerabilities in dependencies - - Checks for CRITICAL, HIGH, and MEDIUM severity issues - - Uploads results to GitHub Code Scanning - - Uses: `aquasecurity/trivy-action@0.28.0` - -3. **Trivy Config Scan** - - Scans configuration files for security issues - - Checks GitHub Actions workflows, Dockerfiles, etc. - - Fails on high/critical configuration issues - - Uses: `aquasecurity/trivy-action@0.28.0` - -4. **Dependency Review** (PR only) - - Reviews new dependencies introduced in PRs - - Checks license compatibility - - Allowed licenses: MIT, Apache-2.0, BSD-2-Clause, BSD-3-Clause, ISC - - Uses: `actions/dependency-review-action@v4` - -5. **GovulnCheck** - - Official Go vulnerability checker - - Scans all Go dependencies against vulnerability database - - Provides detailed vulnerability information - - Fails on any known vulnerabilities - -6. **Security Summary** - - Aggregates all scan results - - Generates GitHub Actions summary - - Fails if any scanner reports issues - -### Dependabot Configuration (`.github/dependabot.yml`) - -**Go Modules Updates:** -- **Schedule**: Daily at 3:00 AM EST -- **Limit**: 10 open PRs maximum -- **Grouping**: Minor and patch updates grouped together -- **Major Updates**: Separated for careful review -- **Labels**: `dependencies`, `automated`, `go` -- **Commit Prefix**: `chore(deps)` - -**GitHub Actions Updates:** -- **Schedule**: Weekly on Mondays at 3:00 AM EST -- **Limit**: 5 open PRs maximum -- **Grouping**: Minor and patch updates grouped together -- **Labels**: `dependencies`, `automated`, `github-actions` -- **Commit Prefix**: `chore(ci)` - -## Enabling Security Features - -### Step 1: Enable GitHub Security Features - -1. Navigate to repository **Settings** → **Security & analysis** -2. Enable the following features: - - ✅ **Dependency graph** (usually enabled by default) - - ✅ **Dependabot alerts** - - ✅ **Dependabot security updates** - - ✅ **Code scanning** (CodeQL analysis) - - ✅ **Secret scanning** - - ✅ **Secret scanning push protection** - -### Step 2: Configure Branch Protection - -1. Navigate to **Settings** → **Branches** -2. Add branch protection rule for `main`: - - ✅ Require status checks to pass before merging - - Select required checks: - - `GoSec Security Scanner` - - `Trivy Repository Scan` - - `Trivy Config Scan` - - `Go Vulnerability Check` - - ✅ Require branches to be up to date before merging - - ✅ Require signed commits (recommended) - -### Step 3: Configure Security Notifications - -1. Navigate to **Settings** → **Notifications** -2. Configure security alert preferences: - - ✅ Email notifications for security advisories - - ✅ Web notifications for Dependabot alerts - - ✅ Email notifications for code scanning alerts - -### Step 4: Review Initial Scan Results - -After merging the security workflow: - -1. Navigate to **Actions** tab -2. Manually trigger the "Security Scanning" workflow -3. Review results in the workflow run summary -4. Address any findings before enabling required checks - -## Using Security Features - -### Viewing Security Alerts - -**Code Scanning Alerts:** -1. Navigate to **Security** → **Code scanning** -2. Review alerts by severity -3. Click on alerts for detailed information -4. Dismiss false positives with justification - -**Dependabot Alerts:** -1. Navigate to **Security** → **Dependabot** -2. Review vulnerable dependencies -3. Accept Dependabot PR to update dependency -4. Or dismiss alert if not applicable - -**Secret Scanning:** -1. Navigate to **Security** → **Secret scanning** -2. Review detected secrets -3. Rotate compromised credentials immediately -4. Close alert after rotation - -### Handling Dependabot PRs - -**Auto-Merge Guidelines:** - -Safe to auto-merge: -- ✅ Patch version updates (1.2.3 → 1.2.4) -- ✅ Minor version updates with passing tests (1.2.0 → 1.3.0) -- ✅ Security patch updates (urgent) - -Requires manual review: -- ⚠️ Major version updates (1.x.x → 2.0.0) -- ⚠️ Updates with failing tests -- ⚠️ Updates to core dependencies - -**Review Process:** -1. Check Dependabot PR description for changelog -2. Review compatibility notes -3. Ensure all CI checks pass -4. Review security implications -5. Merge or request changes - -### Responding to Security Findings - -**Critical/High Severity:** -1. Create immediate hotfix branch -2. Apply security patch -3. Expedite review and merge -4. Create security advisory if user-facing -5. Release patch version within 24-48 hours - -**Medium Severity:** -1. Create issue for tracking -2. Schedule for next minor release -3. Apply fix in regular development cycle -4. Document in changelog - -**Low Severity:** -1. Create issue for tracking -2. Schedule for maintenance release -3. May be deferred if low impact - -## Manual Security Testing - -### Running GoSec Locally - -```bash -# Install gosec -go install github.com/securego/gosec/v2/cmd/gosec@latest - -# Run full scan -gosec -fmt=json -out=results.json ./... - -# Run with specific severity -gosec -severity=medium -confidence=medium ./... - -# Exclude specific checks -gosec -exclude=G104,G107 ./... -``` - -### Running Trivy Locally - -```bash -# Install trivy (macOS) -brew install aquasecurity/trivy/trivy - -# Scan repository -trivy fs --severity CRITICAL,HIGH,MEDIUM . - -# Scan specific Go modules -trivy fs --scanners vuln --severity HIGH,CRITICAL ./go.mod - -# Generate report -trivy fs --format json --output trivy-report.json . -``` - -### Running GovulnCheck Locally - -```bash -# Install govulncheck -go install golang.org/x/vuln/cmd/govulncheck@latest - -# Scan project -govulncheck ./... - -# Verbose output -govulncheck -show verbose ./... - -# Check specific packages -govulncheck ./pkg/sql/parser/ -``` - -## Security Metrics and Monitoring - -### Key Metrics to Track - -1. **Vulnerability Resolution Time** - - Target: < 7 days for high/critical - - Target: < 30 days for medium/low - -2. **Dependabot PR Merge Rate** - - Target: > 80% within 7 days - - Monitor for outdated dependencies - -3. **Security Alert Backlog** - - Target: < 5 open security alerts - - Weekly review of all alerts - -4. **False Positive Rate** - - Track dismissed alerts - - Improve scanning configuration - -### Security Dashboard - -Create a security dashboard tracking: -- Number of open security alerts by severity -- Time to resolution for security issues -- Dependency freshness metrics -- Compliance with security policies - -## Troubleshooting - -### Common Issues - -**Issue: GoSec false positives** -```bash -# Add exclusion comment in code -// #nosec G104 -- Intentional: error handling not required here -_, _ = fmt.Fprintf(w, "output") -``` - -**Issue: Trivy scanning timeout** -```yaml -# Increase timeout in workflow -- uses: aquasecurity/trivy-action@0.28.0 - with: - timeout: '10m' -``` - -**Issue: Dependabot PRs failing tests** -1. Review test failures -2. Update tests if API changes -3. Comment on Dependabot PR to trigger rebase -4. Close PR if update incompatible - -**Issue: Too many Dependabot PRs** -```yaml -# Reduce frequency in dependabot.yml -schedule: - interval: "weekly" # Change from "daily" -``` - -## Best Practices - -### For Maintainers - -1. **Review Weekly Scans** - - Check Sunday scan results every Monday - - Prioritize security findings - -2. **Keep Actions Updated** - - Accept Dependabot PRs for GitHub Actions - - Review action changelogs - -3. **Document Security Decisions** - - Add comments when dismissing alerts - - Document risk acceptance in issues - -4. **Regular Security Audits** - - Quarterly review of security posture - - Annual penetration testing consideration - -### For Contributors - -1. **Run Security Checks Locally** - - Run gosec before submitting PRs - - Check for obvious security issues - -2. **Security-Conscious Coding** - - Avoid hardcoded credentials - - Use secure defaults - - Follow OWASP guidelines - -3. **Dependency Management** - - Minimize new dependencies - - Justify dependency additions - - Check dependency security history - -## References - -- [GoSec Documentation](https://github.com/securego/gosec) -- [Trivy Documentation](https://aquasecurity.github.io/trivy/) -- [GovulnCheck Documentation](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) -- [Dependabot Documentation](https://docs.github.com/en/code-security/dependabot) -- [GitHub Code Scanning](https://docs.github.com/en/code-security/code-scanning) -- [OWASP Secure Coding Practices](https://owasp.org/www-project-secure-coding-practices-quick-reference-guide/) - -## Support - -For questions about security scanning: -- Review existing security documentation in `SECURITY.md` -- Open a discussion in GitHub Discussions -- Contact maintainers for urgent security matters diff --git a/archive/historical-architecture-docs/README.md b/archive/historical-architecture-docs/README.md index ee86190..87f3396 100644 --- a/archive/historical-architecture-docs/README.md +++ b/archive/historical-architecture-docs/README.md @@ -26,11 +26,10 @@ This directory contains historical architectural reviews and performance reports For current architecture, performance, and roadmap information, please refer to: -- **Current Architecture**: `/docs/ARCHITECTURE.md` -- **Current Performance**: Root `README.md` Performance section -- **Current Roadmap**: `/COMPREHENSIVE_ROADMAP_2025.md` -- **Release Notes**: `/RELEASE_NOTES_v1.5.0.md` -- **Development Guide**: `/CLAUDE.md` +- **Current Architecture**: [../../docs/ARCHITECTURE.md](../../docs/ARCHITECTURE.md) +- **Current Performance**: Root [README.md](../../README.md) Performance section +- **Release Notes**: [../../CHANGELOG.md](../../CHANGELOG.md) +- **Development Guide**: [../../CLAUDE.md](../../CLAUDE.md) ## Why These Documents Are Archived diff --git a/archive/historical-testing-reports/README.md b/archive/historical-testing-reports/README.md index 83685a1..f6bc133 100644 --- a/archive/historical-testing-reports/README.md +++ b/archive/historical-testing-reports/README.md @@ -29,9 +29,8 @@ These reports show a 78.3% pass rate and critical race conditions. The current c ## Current Status Reports For current codebase assessment, refer to: -- **COMPREHENSIVE_TEST_FINAL_REPORT.md** - Latest comprehensive validation -- **FINAL_RACE_DETECTION_VALIDATION.md** - Current race condition status -- **CLAUDE.md** - Production readiness documentation +- [**CHANGELOG.md**](../../CHANGELOG.md) - Release history and validation status +- [**CLAUDE.md**](../../CLAUDE.md) - Production readiness documentation and current metrics ## Why Archived? diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index c803235..54faac9 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -4,41 +4,15 @@ - [Package Overview](#package-overview) - [High-Level API (pkg/gosqlx)](#high-level-api) - - [Parsing Functions](#parsing-functions) - - [Validation Functions](#validation-functions) - - [Metadata Extraction](#metadata-extraction) - [Tokenizer API](#tokenizer-api) - - [Functions](#functions) - - [Supported Token Types](#supported-token-types) - [Parser API](#parser-api) - - [Type: Parser](#type-parser) - [AST API](#ast-api) - - [Core Interfaces](#core-interfaces) - - [DML Statement Types](#dml-statement-types) (SELECT, INSERT, UPDATE, DELETE, MERGE) - - [DDL Statement Types](#ddl-statement-types) (CREATE, ALTER, DROP) - - [CTE and Set Operation Types](#cte-and-set-operation-types) - - [Expression Types](#expression-types) - - [Grouping Set Types](#grouping-set-types) (ROLLUP, CUBE, GROUPING SETS) - - [Window Function Types](#window-function-types) - - [Supporting Types](#supporting-types) - - [Object Pool Functions](#object-pool-functions) - - [Visitor Pattern](#visitor-pattern) - [Keywords Package](#keywords-package) - - [Core Types](#core-types) - - [Dialect-Specific Keywords](#dialect-specific-keywords) - [Models](#models) - [Error Handling](#error-handling) - - [Error Codes](#error-codes) - - [Error Builder Functions](#error-builder-functions) - [Metrics Package](#metrics-package) - - [Configuration Functions](#configuration-functions) - - [Recording Functions](#recording-functions) - - [Query Functions](#query-functions) - [Security Package](#security-package) - - [Scanner Types](#scanner-types) - - [Pattern Detection](#pattern-detection) - - [Severity Levels](#severity-levels) -- [Performance Considerations](#performance-considerations) +- [Linter Package](#linter-package) ## Package Overview @@ -48,50 +22,46 @@ GoSQLX is organized into the following packages: github.com/ajitpratap0/GoSQLX/ ├── pkg/ │ ├── gosqlx/ # High-level convenience API -│ ├── models/ # Core data structures +│ ├── models/ # Core data structures (100% coverage) │ ├── sql/ -│ │ ├── tokenizer/ # SQL lexical analysis -│ │ ├── parser/ # SQL syntax parsing -│ │ ├── ast/ # Abstract syntax tree -│ │ ├── keywords/ # SQL keyword definitions -│ │ ├── token/ # Token types and utilities -│ │ └── security/ # SQL injection detection -│ ├── errors/ # Structured error handling -│ ├── metrics/ # Performance monitoring -│ └── linter/ # SQL linting rules engine +│ │ ├── tokenizer/ # SQL lexical analysis (75.3% coverage) +│ │ ├── parser/ # SQL syntax parsing (76.1% coverage) +│ │ ├── ast/ # Abstract syntax tree (80.3% coverage) +│ │ ├── keywords/ # SQL keyword definitions (100% coverage) +│ │ ├── token/ # Token types and utilities (68.8% coverage) +│ │ ├── security/ # SQL injection detection (90.2% coverage) +│ │ └── monitor/ # Parser monitoring (98.6% coverage) +│ ├── errors/ # Structured error handling (91.9% coverage) +│ ├── metrics/ # Performance monitoring (73.9% coverage) +│ ├── linter/ # SQL linting rules engine (96.7% coverage) +│ ├── lsp/ # Language Server Protocol (70.2% coverage) +│ ├── config/ # Configuration management (81.8% coverage) +│ └── gosqlx/testing/ # Testing utilities (95.0% coverage) ``` +--- + ## High-Level API ### Package: `github.com/ajitpratap0/GoSQLX/pkg/gosqlx` -The high-level API provides convenient functions for common SQL parsing operations with automatic object pool management. This is the recommended API for most use cases. +The high-level API provides convenient functions with automatic object pool management. ### Parsing Functions #### `Parse(sql string) (*ast.AST, error)` - -Parse SQL in a single convenient call. +Parse SQL in a single call. ```go -sql := "SELECT * FROM users WHERE active = true" -astNode, err := gosqlx.Parse(sql) +astNode, err := gosqlx.Parse("SELECT * FROM users WHERE active = true") if err != nil { log.Fatal(err) } +defer ast.ReleaseAST(astNode) ``` -**Returns:** -- `*ast.AST`: Parsed abstract syntax tree -- `error`: Parse error if any - -**Use Case:** Simple parsing without timeout requirements - ---- - #### `ParseWithContext(ctx context.Context, sql string) (*ast.AST, error)` - -Parse SQL with context support for cancellation and timeouts. +Parse with context support for cancellation and timeouts. ```go ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) @@ -101,87 +71,51 @@ astNode, err := gosqlx.ParseWithContext(ctx, sql) if err == context.DeadlineExceeded { log.Println("Parsing timed out") } +defer ast.ReleaseAST(astNode) ``` -**Parameters:** -- `ctx`: Context for cancellation/timeout -- `sql`: SQL string to parse - -**Returns:** -- `*ast.AST`: Parsed AST -- `error`: `context.Canceled`, `context.DeadlineExceeded`, or parse error - -**Use Case:** Long-running parsing operations that need cancellation - ---- - #### `ParseWithTimeout(sql string, timeout time.Duration) (*ast.AST, error)` - -Convenience wrapper for parsing with automatic timeout. +Convenience wrapper with automatic timeout. ```go astNode, err := gosqlx.ParseWithTimeout(sql, 10*time.Second) -if err == context.DeadlineExceeded { - log.Println("Timeout after 10 seconds") -} +defer ast.ReleaseAST(astNode) ``` -**Use Case:** Quick timeout-based parsing without manual context management - ---- - #### `ParseBytes(sql []byte) (*ast.AST, error)` - -Parse SQL from byte slice (zero-copy when already in bytes). +Parse from byte slice (zero-copy when already in bytes). ```go sqlBytes, _ := os.ReadFile("query.sql") astNode, err := gosqlx.ParseBytes(sqlBytes) +defer ast.ReleaseAST(astNode) ``` -**Use Case:** Parsing SQL from file I/O or byte sources - ---- - #### `MustParse(sql string) *ast.AST` - Parse SQL, panicking on error (for tests and initialization). ```go -// In test or init() ast := gosqlx.MustParse("SELECT 1") +defer ast.ReleaseAST(ast) ``` -**Use Case:** Parsing SQL literals where errors indicate bugs - ---- - #### `ParseMultiple(queries []string) ([]*ast.AST, error)` - -Parse multiple SQL statements efficiently. +Parse multiple SQL statements efficiently (40-60% faster than individual Parse calls). ```go queries := []string{ "SELECT * FROM users", "SELECT * FROM orders", - "SELECT * FROM products", } asts, err := gosqlx.ParseMultiple(queries) +for _, ast := range asts { + defer ast.ReleaseAST(ast) +} ``` -**Benefits:** -- Reuses tokenizer and parser objects -- 40-60% faster than individual Parse() calls -- Lower memory allocation - -**Use Case:** Batch processing SQL queries - ---- - ### Validation Functions #### `Validate(sql string) error` - Check if SQL is syntactically valid. ```go @@ -190,414 +124,121 @@ if err := gosqlx.Validate("SELECT * FROM users"); err != nil { } ``` -**Returns:** `nil` if valid, error describing the problem - -**Use Case:** Syntax validation without building full AST - ---- - #### `ValidateMultiple(queries []string) error` - -Validates multiple SQL queries in a batch operation. +Validate multiple queries efficiently. ```go -queries := []string{ - "SELECT * FROM users", - "INSERT INTO logs (msg) VALUES ('test')", - "UPDATE users SET name = 'John' WHERE id = 1", -} +queries := []string{"SELECT * FROM users", "INSERT INTO logs (msg) VALUES ('test')"} if err := gosqlx.ValidateMultiple(queries); err != nil { log.Fatal("Validation failed:", err) } ``` -**Parameters:** -- `queries`: A slice of SQL query strings to validate - -**Returns:** -- `error`: First validation error encountered, or nil if all queries are valid - -**Benefits:** -- Reuses tokenizer and parser objects across queries -- More efficient than calling `Validate()` individually -- Ideal for batch validation scenarios - -**Use Case:** Validating multiple SQL queries efficiently - ---- - -### Metadata Extraction - -#### `ExtractTables(astNode *ast.AST) []string` - -Extract all table names from parsed SQL. - -```go -sql := "SELECT * FROM users u JOIN orders o ON u.id = o.user_id" -astNode, _ := gosqlx.Parse(sql) -tables := gosqlx.ExtractTables(astNode) -// Returns: ["users", "orders"] -``` - -**Extracts from:** -- FROM clauses -- JOIN clauses -- Subqueries and CTEs -- INSERT/UPDATE/DELETE statements - -**Returns:** Deduplicated slice of table names - ---- - -#### `ExtractTablesQualified(astNode *ast.AST) []QualifiedName` - -Extract table names with schema/alias information. - -```go -sql := "SELECT * FROM public.users u" -astNode, _ := gosqlx.Parse(sql) -tables := gosqlx.ExtractTablesQualified(astNode) -// Returns: [QualifiedName{Schema: "public", Name: "users"}] -``` - -**Use Case:** When schema information is needed - ---- - -#### `ExtractColumns(astNode *ast.AST) []string` - -Extract all column references from SQL. - -```go -sql := "SELECT id, name, email FROM users WHERE active = true" -astNode, _ := gosqlx.Parse(sql) -columns := gosqlx.ExtractColumns(astNode) -// Returns: ["id", "name", "email", "active"] -``` - -**Extracts from:** -- SELECT columns -- WHERE conditions -- JOIN conditions -- GROUP BY, HAVING, ORDER BY clauses - -**Returns:** Deduplicated slice of column names - ---- - -#### `ExtractColumnsQualified(astNode *ast.AST) []QualifiedName` - -Extract column references with table qualifiers. - -```go -sql := "SELECT u.id, u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id" -astNode, _ := gosqlx.Parse(sql) -columns := gosqlx.ExtractColumnsQualified(astNode) -// Returns qualified names like "u.id", "u.name", "o.total", etc. -``` - -**Use Case:** Understanding column-to-table relationships - ---- - -#### `ExtractFunctions(astNode *ast.AST) []string` - -Extract all function calls from SQL. - -```go -sql := "SELECT COUNT(*), MAX(price), AVG(quantity) FROM products" -astNode, _ := gosqlx.Parse(sql) -functions := gosqlx.ExtractFunctions(astNode) -// Returns: ["COUNT", "MAX", "AVG"] -``` - -**Includes:** -- Aggregate functions (COUNT, SUM, AVG, MIN, MAX) -- Scalar functions (UPPER, LOWER, SUBSTRING, etc.) -- Window functions (ROW_NUMBER, RANK, etc.) - ---- - -### Types - -#### `QualifiedName` - -Represents a schema.table.column qualified name. - -```go -type QualifiedName struct { - Schema string // Optional schema name - Table string // Table name - Name string // Column or table name -} -``` - -**Methods:** - -- `String() string` - Returns "schema.table.name" format -- `FullName() string` - Returns meaningful name without schema - -**Examples:** - -```go -// Column reference -col := QualifiedName{Table: "users", Name: "id"} -col.String() // "users.id" -col.FullName() // "users.id" - -// Table reference with schema -tbl := QualifiedName{Schema: "public", Name: "users"} -tbl.String() // "public.users" -tbl.FullName() // "users" - -// 3-part name -full := QualifiedName{Schema: "db", Table: "public", Name: "users"} -full.String() // "db.public.users" -full.FullName() // "public.users" -``` - ---- - -### Known Limitations - -The high-level API extraction functions have the following parser limitations: - -1. **CASE Expressions**: Column references within CASE may not extract correctly -2. **CAST Expressions**: Type conversion expressions not fully supported -3. **IN Expressions**: Complex IN clauses may not parse completely -4. **BETWEEN Expressions**: Range comparisons partially supported -5. **Schema-Qualified Names**: `schema.table` format not fully supported -6. **Complex Recursive CTEs**: Advanced recursive queries may fail - -For queries using these features, consider manual extraction or contributing parser enhancements. - ---- - -### Performance Comparison - -| Operation | Tokenizer+Parser API | High-Level API | Overhead | -|-----------|---------------------|----------------|----------| -| Single parse | 100% (baseline) | ~110% | +10% | -| Batch parse (10 queries) | 100% (with reuse) | ~105% | +5% | - -**Recommendation:** -- Use high-level API for simple cases (< 100 queries/sec) -- Use tokenizer+parser API for performance-critical batch processing - ---- - -### Complete Example - -```go -package main - -import ( - "fmt" - "log" - - "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" -) - -func main() { - sql := ` - SELECT u.id, u.name, COUNT(o.id) as order_count - FROM users u - LEFT JOIN orders o ON u.id = o.user_id - WHERE u.created_at >= '2024-01-01' - GROUP BY u.id, u.name - HAVING COUNT(o.id) > 5 - ORDER BY order_count DESC - LIMIT 10 - ` - - // Parse SQL - astNode, err := gosqlx.Parse(sql) - if err != nil { - log.Fatal("Parse error:", err) - } - - // Extract metadata - tables := gosqlx.ExtractTables(astNode) - columns := gosqlx.ExtractColumns(astNode) - functions := gosqlx.ExtractFunctions(astNode) - - fmt.Printf("Tables: %v\n", tables) // ["users", "orders"] - fmt.Printf("Columns: %v\n", columns) // ["id", "name", "created_at", "user_id"] - fmt.Printf("Functions: %v\n", functions) // ["COUNT"] -} -``` - --- ## Tokenizer API ### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer` -The tokenizer performs lexical analysis of SQL text, converting it into a stream of tokens. - ### Functions #### `GetTokenizer() *Tokenizer` -Retrieves a tokenizer instance from the object pool. +Retrieve tokenizer from pool. ```go tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) // ALWAYS defer the return +defer tokenizer.PutTokenizer(tkz) // ALWAYS defer ``` -**Returns:** A pointer to a Tokenizer instance -**Thread-Safe:** Yes -**Pool Behavior:** Reuses existing instances when available - #### `PutTokenizer(t *Tokenizer)` -Returns a tokenizer instance to the object pool for reuse. - -```go -tokenizer.PutTokenizer(tkz) -``` - -**Parameters:** -- `t`: The tokenizer instance to return to the pool - -**Thread-Safe:** Yes -**Important:** Always call this when done with a tokenizer +Return tokenizer to pool. ### Type: `Tokenizer` -#### Method: `Tokenize(input []byte) ([]models.TokenWithSpan, error)` -Tokenizes SQL input into tokens with position information. +#### `Tokenize(input []byte) ([]models.TokenWithSpan, error)` +Tokenize SQL with zero-copy operation and position tracking. ```go tokens, err := tkz.Tokenize([]byte("SELECT * FROM users")) -if err != nil { - // Handle error -} ``` -**Parameters:** -- `input`: SQL text as byte slice - -**Returns:** -- `[]models.TokenWithSpan`: Array of tokens with position spans -- `error`: Tokenization error if any - -**Features:** -- Zero-copy operation -- Unicode support (UTF-8) -- Position tracking (line, column) -- Dialect-specific tokens (PostgreSQL @>, MySQL backticks, etc.) - -#### Method: `Reset()` -Resets the tokenizer state for reuse. +#### `TokenizeContext(ctx context.Context, input []byte) ([]models.TokenWithSpan, error)` +Tokenize with context support. ```go -tkz.Reset() +ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) +defer cancel() +tokens, err := tkz.TokenizeContext(ctx, []byte("SELECT * FROM users")) ``` -**Note:** Called automatically by the pool management - ### Supported Token Types -| Token Type | Description | Example | -|------------|-------------|---------| -| `TokenTypeSelect` | SELECT keyword | `SELECT` | -| `TokenTypeFrom` | FROM keyword | `FROM` | -| `TokenTypeWhere` | WHERE keyword | `WHERE` | -| `TokenTypeIdentifier` | Column/table names | `users`, `id` | -| `TokenTypeNumber` | Numeric literals | `42`, `3.14` | -| `TokenTypeSingleQuotedString` | String literals | `'hello'` | -| `TokenTypeDoubleQuotedString` | Quoted identifiers | `"column name"` | -| `TokenTypeBacktickIdentifier` | MySQL identifiers | `` `column` `` | -| `TokenTypeBracketIdentifier` | SQL Server identifiers | `[column]` | - -### Special Character Support - -```go -// Unicode identifiers (all languages) -`SELECT "名前", "имя", "الاسم" FROM users` - -// Emoji in strings -`SELECT * FROM users WHERE status = '🚀'` +| Token Type | Example | +|------------|---------| +| `TokenTypeSelect` | `SELECT` | +| `TokenTypeFrom` | `FROM` | +| `TokenTypeWhere` | `WHERE` | +| `TokenTypeIdentifier` | `users`, `id` | +| `TokenTypeNumber` | `42`, `3.14` | +| `TokenTypeSingleQuotedString` | `'hello'` | +| `TokenTypeDoubleQuotedString` | `"column name"` | +| `TokenTypeBacktickIdentifier` | `` `column` `` | -// PostgreSQL operators -`SELECT * FROM users WHERE tags @> ARRAY['admin']` +**Features:** +- Unicode support (UTF-8) +- Dialect-specific tokens (PostgreSQL `@>`, MySQL backticks, etc.) +- Zero-copy operations +- Position tracking (line, column) -// MySQL backticks -`SELECT `user_id` FROM `users`` -``` +--- ## Parser API ### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/parser` -The parser builds an Abstract Syntax Tree (AST) from tokens. - ### Functions #### `NewParser() *Parser` -Creates a new parser instance from the pool. +Create parser from pool. ```go p := parser.NewParser() -defer p.Release() // ALWAYS defer the release +defer p.Release() // ALWAYS defer ``` -**Returns:** A pointer to a Parser instance -**Thread-Safe:** Yes - ### Type: `Parser` -#### Method: `Parse(tokens []token.Token) (ast.Node, error)` -Parses tokens into an AST. +#### `Parse(tokens []token.Token) (*ast.AST, error)` +Parse tokens into AST. ```go astNode, err := p.Parse(tokens) -if err != nil { - // Handle parse error -} ``` -**Parameters:** -- `tokens`: Array of tokens to parse - -**Returns:** -- `ast.Node`: Root node of the AST -- `error`: Parse error if any - **Supported Statements:** -- SELECT (with JOIN, GROUP BY, ORDER BY, HAVING) -- INSERT (single and multi-row) -- UPDATE (with WHERE) -- DELETE (with WHERE) -- CREATE TABLE -- ALTER TABLE -- DROP TABLE -- CREATE INDEX - -#### Method: `Release()` -Returns the parser to the pool. +- DML: SELECT, INSERT, UPDATE, DELETE, MERGE +- DDL: CREATE TABLE/INDEX/VIEW/MATERIALIZED VIEW, ALTER TABLE, DROP +- Advanced: CTEs, window functions, set operations (UNION/EXCEPT/INTERSECT) +- Grouping: ROLLUP, CUBE, GROUPING SETS + +#### `ParseContext(ctx context.Context, tokens []token.Token) (*ast.AST, error)` +Parse with context support. ```go -p.Release() +ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) +defer cancel() +astNode, err := p.ParseContext(ctx, tokens) ``` -**Important:** Always call this when done +#### `Reset()` +Reset parser state for reuse. + +--- ## AST API ### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/ast` -The AST package provides comprehensive node types for SQL syntax trees, supporting DDL, DML, CTEs, window functions, set operations, and advanced SQL features. - -### Overview - -**Key Features:** -- **Complete SQL Support**: SELECT, INSERT, UPDATE, DELETE, CREATE, ALTER, DROP, MERGE -- **Advanced Features**: CTEs, window functions, set operations, subqueries -- **Visitor Pattern**: Tree traversal support via `ast.Visitor` interface -- **Object Pooling**: Memory-efficient node management -- **Type Safety**: Strongly typed nodes with interfaces - ### Core Interfaces #### Interface: `Node` @@ -605,3835 +246,1009 @@ Base interface for all AST nodes. ```go type Node interface { - TokenLiteral() string // Returns the literal token representation - Children() []Node // Returns child nodes for tree traversal -} -``` - -**Example:** -```go -func PrintTree(node ast.Node, indent int) { - fmt.Printf("%s%s\n", strings.Repeat(" ", indent), node.TokenLiteral()) - for _, child := range node.Children() { - PrintTree(child, indent+1) - } + TokenLiteral() string + Children() []Node } ``` #### Interface: `Statement` -Represents executable SQL statements. +Executable SQL statements. ```go type Statement interface { Node - statementNode() // Marker method for type safety + statementNode() } ``` -**Implementing Types:** -- `SelectStatement`, `InsertStatement`, `UpdateStatement`, `DeleteStatement` -- `CreateTableStatement`, `CreateIndexStatement`, `CreateViewStatement` -- `CreateMaterializedViewStatement`, `RefreshMaterializedViewStatement` -- `AlterTableStatement`, `DropStatement`, `MergeStatement` -- `WithClause`, `CommonTableExpr`, `SetOperation` - #### Interface: `Expression` -Represents SQL expressions (values, conditions, computations). +SQL expressions (values, conditions, computations). ```go type Expression interface { Node - expressionNode() // Marker method for type safety + expressionNode() } ``` -**Implementing Types:** -- `Identifier`, `LiteralValue`, `BinaryExpression`, `UnaryExpression` -- `FunctionCall`, `CaseExpression`, `CastExpression` -- `InExpression`, `BetweenExpression`, `ExistsExpression` -- `SubqueryExpression`, `AnyExpression`, `AllExpression` -- `RollupExpression`, `CubeExpression`, `GroupingSetsExpression` - ---- - ### DML Statement Types #### `SelectStatement` -Represents a SELECT query with full SQL support. - ```go type SelectStatement struct { - With *WithClause // Optional CTE (WITH clause) - Distinct bool // DISTINCT modifier - Columns []Expression // SELECT columns - From []TableReference // FROM tables - TableName string // Primary table name - Joins []JoinClause // JOIN clauses - Where Expression // WHERE condition - GroupBy []Expression // GROUP BY columns (supports ROLLUP, CUBE, GROUPING SETS) - Having Expression // HAVING condition - Windows []WindowSpec // WINDOW definitions - OrderBy []OrderByExpression // ORDER BY with NULLS FIRST/LAST support - Limit *int // LIMIT value - Offset *int // OFFSET value + With *WithClause + Distinct bool + Columns []Expression + From []TableReference + Joins []JoinClause + Where Expression + GroupBy []Expression // Supports ROLLUP, CUBE, GROUPING SETS + Having Expression + Windows []WindowSpec + OrderBy []OrderByExpression // Supports NULLS FIRST/LAST + Limit *int + Offset *int } ``` -**Example Usage:** +**Example:** ```go if stmt, ok := astNode.(*ast.SelectStatement); ok { - // Check for CTE - if stmt.With != nil { - fmt.Printf("Has %d CTEs\n", len(stmt.With.CTEs)) - } - - // Process columns for _, col := range stmt.Columns { fmt.Println("Column:", col.TokenLiteral()) } - - // Check for window functions - if len(stmt.Windows) > 0 { - fmt.Println("Uses window functions") - } } ``` -**Supported SQL:** -```sql -WITH cte AS (SELECT * FROM source) -SELECT DISTINCT id, name, ROW_NUMBER() OVER (ORDER BY id) as rn -FROM users u -LEFT JOIN orders o ON u.id = o.user_id -WHERE active = true -GROUP BY ROLLUP(region, city) -HAVING COUNT(*) > 5 -ORDER BY name NULLS LAST -LIMIT 10 OFFSET 5 -``` - ---- - #### `InsertStatement` -Represents an INSERT statement with conflict handling. - ```go type InsertStatement struct { - With *WithClause // Optional CTE - TableName string // Target table - Columns []Expression // Column list - Values []Expression // Value expressions - Query *SelectStatement // INSERT ... SELECT - Returning []Expression // RETURNING clause (PostgreSQL) - OnConflict *OnConflict // ON CONFLICT clause (PostgreSQL) -} -``` - -**Example:** -```go -if stmt, ok := astNode.(*ast.InsertStatement); ok { - fmt.Printf("Insert into: %s\n", stmt.TableName) - - if stmt.Query != nil { - fmt.Println("INSERT ... SELECT detected") - } - - if stmt.OnConflict != nil { - fmt.Println("Has ON CONFLICT handling") - } + With *WithClause + TableName string + Columns []Expression + Values []Expression + Query *SelectStatement + Returning []Expression + OnConflict *OnConflict } ``` -**Supported SQL:** -```sql -INSERT INTO users (name, email) -VALUES ('John', 'john@example.com') -ON CONFLICT (email) DO UPDATE SET name = EXCLUDED.name -RETURNING id, created_at -``` - ---- - #### `UpdateStatement` -Represents an UPDATE statement with multi-table support. - ```go type UpdateStatement struct { - With *WithClause // Optional CTE - TableName string // Target table - Alias string // Table alias - Updates []UpdateExpression // SET column = value pairs - Assignments []UpdateExpression // Alternative field name - From []TableReference // FROM clause for multi-table updates - Where Expression // WHERE condition - Returning []Expression // RETURNING clause (PostgreSQL) + With *WithClause + TableName string + Updates []UpdateExpression + From []TableReference + Where Expression + Returning []Expression } ``` -**Supported SQL:** -```sql -WITH updated AS (SELECT id FROM active_users) -UPDATE users u -SET status = 'active', updated_at = NOW() -FROM updated -WHERE u.id = updated.id -RETURNING u.id, u.status -``` - ---- - #### `DeleteStatement` -Represents a DELETE statement with USING support. - ```go type DeleteStatement struct { - With *WithClause // Optional CTE - TableName string // Target table - Alias string // Table alias - Using []TableReference // USING clause for multi-table deletes - Where Expression // WHERE condition - Returning []Expression // RETURNING clause (PostgreSQL) + With *WithClause + TableName string + Using []TableReference + Where Expression + Returning []Expression } ``` -**Supported SQL:** -```sql -DELETE FROM orders o -USING users u -WHERE o.user_id = u.id AND u.deleted = true -RETURNING o.id -``` - ---- - #### `MergeStatement` -Represents a MERGE statement (SQL:2003 F312). - ```go type MergeStatement struct { - TargetTable TableReference // Target table being merged into - TargetAlias string // Optional target alias - SourceTable TableReference // Source table/subquery - SourceAlias string // Optional source alias - OnCondition Expression // Join/match condition - WhenClauses []*MergeWhenClause // WHEN MATCHED/NOT MATCHED clauses -} -``` - -**Supporting Types:** -```go -type MergeWhenClause struct { - Type string // "MATCHED", "NOT_MATCHED", "NOT_MATCHED_BY_SOURCE" - Condition Expression // Optional AND condition - Action *MergeAction // UPDATE/INSERT/DELETE action -} - -type MergeAction struct { - ActionType string // "UPDATE", "INSERT", "DELETE" - SetClauses []SetClause // For UPDATE - Columns []string // For INSERT - Values []Expression // For INSERT - DefaultValues bool // For INSERT DEFAULT VALUES + TargetTable TableReference + TargetAlias string + SourceTable TableReference + SourceAlias string + OnCondition Expression + WhenClauses []*MergeWhenClause } ``` -**Supported SQL:** -```sql -MERGE INTO target t -USING source s ON t.id = s.id -WHEN MATCHED AND s.active = true THEN - UPDATE SET t.name = s.name, t.updated = NOW() -WHEN MATCHED AND s.active = false THEN - DELETE -WHEN NOT MATCHED THEN - INSERT (id, name) VALUES (s.id, s.name) -``` - ---- - ### DDL Statement Types #### `CreateTableStatement` -Represents a CREATE TABLE statement with partitioning support. - ```go type CreateTableStatement struct { - IfNotExists bool // IF NOT EXISTS - Temporary bool // TEMP/TEMPORARY - Name string // Table name - Columns []ColumnDef // Column definitions - Constraints []TableConstraint // Table-level constraints - Inherits []string // INHERITS clause (PostgreSQL) - PartitionBy *PartitionBy // PARTITION BY clause - Partitions []PartitionDefinition // Individual partition definitions - Options []TableOption // ENGINE, CHARSET, etc. (MySQL) + IfNotExists bool + Temporary bool + Name string + Columns []ColumnDef + Constraints []TableConstraint + PartitionBy *PartitionBy + Options []TableOption } ``` -**Supporting Types:** +#### `CreateIndexStatement` ```go -type ColumnDef struct { - Name string // Column name - Type string // Data type - Constraints []ColumnConstraint // Column constraints +type CreateIndexStatement struct { + Name string + Unique bool + TableName string + Columns []IndexColumn + Where Expression + Using string + Concurrently bool } +``` -type ColumnConstraint struct { - Type string // NOT NULL, UNIQUE, PRIMARY KEY, etc. - Default Expression // DEFAULT value - References *ReferenceDefinition // FOREIGN KEY reference - Check Expression // CHECK constraint - AutoIncrement bool // AUTO_INCREMENT (MySQL) +#### `CreateViewStatement` +```go +type CreateViewStatement struct { + Name string + Columns []string + Query *SelectStatement + OrReplace bool + Temporary bool + Recursive bool + CheckOption string } +``` -type TableConstraint struct { - Name string // Constraint name - Type string // PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK - Columns []string // Affected columns - References *ReferenceDefinition // Foreign key details - Check Expression // Check expression +#### `CreateMaterializedViewStatement` +```go +type CreateMaterializedViewStatement struct { + Name string + Columns []string + Query *SelectStatement + WithData bool } +``` -type ReferenceDefinition struct { - Table string // Referenced table - Columns []string // Referenced columns - OnDelete string // ON DELETE action - OnUpdate string // ON UPDATE action - Match string // MATCH type -} - -type PartitionBy struct { - Type string // RANGE, LIST, HASH - Columns []string // Partition columns - Boundary []Expression // Boundary expressions -} - -type PartitionDefinition struct { - Name string // Partition name - Type string // FOR VALUES, IN, LESS THAN - Values []Expression // Partition values - LessThan Expression // LESS THAN (value) - From Expression // FROM (value) - To Expression // TO (value) - InValues []Expression // IN (values) - Tablespace string // Tablespace +#### `AlterTableStatement` +```go +type AlterTableStatement struct { + TableName string + Actions []AlterAction } ``` -**Supported SQL:** -```sql -CREATE TABLE IF NOT EXISTS orders ( - id SERIAL PRIMARY KEY, - user_id INT NOT NULL REFERENCES users(id) ON DELETE CASCADE, - amount DECIMAL(10,2) CHECK (amount > 0), - status VARCHAR(20) DEFAULT 'pending', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - CONSTRAINT unique_order UNIQUE (user_id, created_at) -) -PARTITION BY RANGE (created_at); - -CREATE TABLE orders_2024 PARTITION OF orders - FOR VALUES FROM ('2024-01-01') TO ('2025-01-01'); -``` - ---- - -#### `CreateIndexStatement` -Represents a CREATE INDEX statement. - +#### `DropStatement` ```go -type CreateIndexStatement struct { - Unique bool // UNIQUE index - IfNotExists bool // IF NOT EXISTS - Name string // Index name - Table string // Table name - Columns []IndexColumn // Index columns - Using string // Index method (BTREE, HASH, GIN, etc.) - Where Expression // Partial index condition -} - -type IndexColumn struct { - Column string // Column name - Collate string // Collation - Direction string // ASC, DESC - NullsLast bool // NULLS LAST +type DropStatement struct { + ObjectType string // TABLE, INDEX, VIEW, etc. + ObjectName string + IfExists bool + Cascade bool } ``` -**Supported SQL:** -```sql -CREATE UNIQUE INDEX CONCURRENTLY idx_users_email -ON users (email) -WHERE deleted_at IS NULL; -``` - ---- - -#### `CreateViewStatement` -Represents a CREATE VIEW statement. +### CTE and Set Operations +#### `WithClause` ```go -type CreateViewStatement struct { - OrReplace bool // OR REPLACE - Temporary bool // TEMP/TEMPORARY - IfNotExists bool // IF NOT EXISTS - Name string // View name - Columns []string // Optional column list - Query Statement // SELECT statement - WithOption string // WITH CHECK OPTION, etc. +type WithClause struct { + Recursive bool + CTEs []CommonTableExpr } ``` -**Supported SQL:** -```sql -CREATE OR REPLACE VIEW active_users AS -SELECT id, name, email -FROM users -WHERE active = true -WITH CHECK OPTION; -``` - ---- - -#### `CreateMaterializedViewStatement` -Represents a CREATE MATERIALIZED VIEW statement. - +#### `CommonTableExpr` ```go -type CreateMaterializedViewStatement struct { - IfNotExists bool // IF NOT EXISTS - Name string // View name - Columns []string // Optional column list - Query Statement // SELECT statement - WithData *bool // WITH DATA / WITH NO DATA - Tablespace string // Tablespace (PostgreSQL) +type CommonTableExpr struct { + Name string + Columns []string + Query *SelectStatement } ``` -**Supported SQL:** -```sql -CREATE MATERIALIZED VIEW sales_summary AS -SELECT region, SUM(amount) as total -FROM sales -GROUP BY region -WITH DATA; -``` - ---- - -#### `RefreshMaterializedViewStatement` -Represents a REFRESH MATERIALIZED VIEW statement. - +#### `SetOperation` ```go -type RefreshMaterializedViewStatement struct { - Concurrently bool // CONCURRENTLY - Name string // View name - WithData *bool // WITH DATA / WITH NO DATA +type SetOperation struct { + Left *SelectStatement + Operator string // UNION, EXCEPT, INTERSECT + All bool + Right *SelectStatement } ``` -**Supported SQL:** -```sql -REFRESH MATERIALIZED VIEW CONCURRENTLY sales_summary; -``` - ---- - -#### `AlterTableStatement` -Represents an ALTER TABLE statement. +### Grouping Set Types +#### `RollupExpression` ```go -type AlterTableStatement struct { - Table string // Table name - Actions []AlterTableAction // Actions to perform -} - -type AlterTableAction struct { - Type string // ADD COLUMN, DROP COLUMN, MODIFY COLUMN, etc. - ColumnName string // Affected column - ColumnDef *ColumnDef // New column definition - Constraint *TableConstraint // Constraint modification +type RollupExpression struct { + Expressions []Expression } ``` -**Supported SQL:** -```sql -ALTER TABLE users - ADD COLUMN phone VARCHAR(20), - DROP COLUMN legacy_field, - ADD CONSTRAINT fk_dept FOREIGN KEY (dept_id) REFERENCES departments(id); -``` - ---- - -#### `DropStatement` -Represents a DROP statement for various object types. - +#### `CubeExpression` ```go -type DropStatement struct { - ObjectType string // TABLE, VIEW, MATERIALIZED VIEW, INDEX, etc. - IfExists bool // IF EXISTS - Names []string // Objects to drop (can be multiple) - CascadeType string // CASCADE, RESTRICT, or empty +type CubeExpression struct { + Expressions []Expression } ``` -**Supported SQL:** -```sql -DROP TABLE IF EXISTS temp_data, old_logs CASCADE; -DROP MATERIALIZED VIEW IF EXISTS sales_summary; -DROP INDEX idx_users_email; -``` - ---- - -### CTE and Set Operation Types - -#### `WithClause` -Represents a WITH clause (Common Table Expressions). - +#### `GroupingSetsExpression` ```go -type WithClause struct { - Recursive bool // RECURSIVE modifier - CTEs []*CommonTableExpr // CTE definitions +type GroupingSetsExpression struct { + Sets [][]Expression } ``` -#### `CommonTableExpr` -Represents a single CTE definition. +### Window Function Types +#### `WindowSpec` ```go -type CommonTableExpr struct { - Name string // CTE name - Columns []string // Optional column list - Statement Statement // CTE query - Materialized *bool // MATERIALIZED/NOT MATERIALIZED (PostgreSQL) +type WindowSpec struct { + Name string + PartitionBy []Expression + OrderBy []OrderByExpression + Frame *WindowFrame } ``` -**Supported SQL:** -```sql -WITH RECURSIVE employee_tree (id, name, level) AS ( - SELECT id, name, 1 FROM employees WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, t.level + 1 - FROM employees e - JOIN employee_tree t ON e.manager_id = t.id -) -SELECT * FROM employee_tree; -``` - ---- - -#### `SetOperation` -Represents set operations (UNION, EXCEPT, INTERSECT). - +#### `WindowFrame` ```go -type SetOperation struct { - Left Statement // Left query - Operator string // UNION, EXCEPT, INTERSECT - Right Statement // Right query - All bool // ALL modifier (e.g., UNION ALL) +type WindowFrame struct { + Type string // ROWS or RANGE + Start *FrameBound + End *FrameBound } ``` -**Supported SQL:** -```sql -SELECT name FROM users -UNION ALL -SELECT name FROM customers -EXCEPT -SELECT name FROM blocked_users; +#### `FrameBound` +```go +type FrameBound struct { + Type string // UNBOUNDED, CURRENT, PRECEDING, FOLLOWING + Expression Expression +} ``` ---- - ### Expression Types #### `Identifier` -Represents a column or table name with optional qualification. - ```go type Identifier struct { - Name string // Column/table name - Table string // Optional table qualifier + Value string } ``` -**Example:** -```go -// For "users.id" -id := &ast.Identifier{Name: "id", Table: "users"} -``` - ---- - #### `LiteralValue` -Represents a literal value in SQL. - ```go type LiteralValue struct { - Value interface{} // Actual value - Type string // INTEGER, FLOAT, STRING, BOOLEAN, NULL, etc. + Type string // STRING, NUMBER, BOOLEAN, NULL + Value string } ``` -**Example:** -```go -// For '42' -num := &ast.LiteralValue{Value: 42, Type: "INTEGER"} - -// For 'hello' -str := &ast.LiteralValue{Value: "hello", Type: "STRING"} - -// For NULL -null := &ast.LiteralValue{Value: nil, Type: "NULL"} -``` - ---- - #### `BinaryExpression` -Represents binary operations (comparison, logical, arithmetic). - ```go type BinaryExpression struct { - Left Expression // Left operand - Operator string // =, <>, >, <, AND, OR, +, -, *, /, etc. - Right Expression // Right operand - Not bool // NOT modifier - CustomOp *CustomBinaryOperator // PostgreSQL custom operators -} -``` - -**Supported Operators:** -- Comparison: `=`, `<>`, `!=`, `>`, `<`, `>=`, `<=` -- Logical: `AND`, `OR` -- Arithmetic: `+`, `-`, `*`, `/`, `%` -- String: `||` (concatenation), `LIKE`, `ILIKE` -- PostgreSQL: `@>`, `<@`, `&&`, `?`, `?|`, `?&` - ---- - -#### `UnaryExpression` -Represents unary operations. - -```go -type UnaryExpression struct { - Operator UnaryOperator // NOT, -, +, etc. - Expr Expression // Operand + Left Expression + Operator string + Right Expression } ``` ---- - #### `FunctionCall` -Represents function calls including window functions. - ```go type FunctionCall struct { - Name string // Function name - Arguments []Expression // Function arguments - Over *WindowSpec // Window specification (for window functions) - Distinct bool // DISTINCT modifier (for aggregates) - Filter Expression // FILTER clause (PostgreSQL) -} -``` - -**Example:** -```go -// COUNT(DISTINCT user_id) FILTER (WHERE active) -countFunc := &ast.FunctionCall{ - Name: "COUNT", - Arguments: []ast.Expression{&ast.Identifier{Name: "user_id"}}, - Distinct: true, - Filter: &ast.BinaryExpression{...}, -} - -// ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) -rowNumFunc := &ast.FunctionCall{ - Name: "ROW_NUMBER", - Over: &ast.WindowSpec{ - PartitionBy: []ast.Expression{...}, - OrderBy: []ast.OrderByExpression{...}, - }, + Name string + Args []Expression + Distinct bool + Filter Expression + Over *WindowSpec } ``` ---- - #### `CaseExpression` -Represents CASE WHEN THEN ELSE expressions. - ```go type CaseExpression struct { - Value Expression // Optional CASE value (for simple CASE) - WhenClauses []WhenClause // WHEN ... THEN ... clauses - ElseClause Expression // ELSE clause -} - -type WhenClause struct { - Condition Expression // WHEN condition - Result Expression // THEN result -} -``` - -**Supported SQL:** -```sql --- Searched CASE -CASE WHEN status = 'active' THEN 1 - WHEN status = 'pending' THEN 0 - ELSE -1 END - --- Simple CASE -CASE status - WHEN 'active' THEN 1 - WHEN 'pending' THEN 0 - ELSE -1 END -``` - ---- - -#### `CastExpression` -Represents CAST type conversion. - -```go -type CastExpression struct { - Expr Expression // Expression to cast - Type string // Target data type + CaseExpr Expression + WhenPairs []WhenPair + ElseExpr Expression } ``` -**Supported SQL:** -```sql -CAST(amount AS DECIMAL(10,2)) -CAST(created_at AS DATE) -``` - ---- - #### `InExpression` -Represents IN expressions with value lists or subqueries. - ```go type InExpression struct { - Expr Expression // Expression to check - List []Expression // Value list: IN (1, 2, 3) - Subquery Statement // Subquery: IN (SELECT ...) - Not bool // NOT IN + Expr Expression + Not bool + Values []Expression + Query *SelectStatement } ``` -**Supported SQL:** -```sql -status IN ('active', 'pending') -id NOT IN (SELECT blocked_id FROM blocked_users) -``` - ---- - #### `BetweenExpression` -Represents BETWEEN range expressions. - ```go type BetweenExpression struct { - Expr Expression // Expression to check - Lower Expression // Lower bound - Upper Expression // Upper bound - Not bool // NOT BETWEEN -} -``` - -**Supported SQL:** -```sql -created_at BETWEEN '2024-01-01' AND '2024-12-31' -price NOT BETWEEN 10 AND 100 -``` - ---- - -#### `ExistsExpression` -Represents EXISTS subquery expressions. - -```go -type ExistsExpression struct { - Subquery Statement // Subquery to check + Expr Expression + Not bool + Lower Expression + Upper Expression } ``` -**Supported SQL:** -```sql -EXISTS (SELECT 1 FROM orders WHERE user_id = users.id) -``` - ---- - #### `SubqueryExpression` -Represents scalar subquery expressions. - ```go type SubqueryExpression struct { - Subquery Statement // Scalar subquery -} -``` - -**Supported SQL:** -```sql -(SELECT MAX(price) FROM products) -``` - ---- - -#### `AnyExpression` and `AllExpression` -Represents ANY/SOME and ALL subquery comparisons. - -```go -type AnyExpression struct { - Expr Expression // Left operand - Operator string // Comparison operator - Subquery Statement // Subquery -} - -type AllExpression struct { - Expr Expression // Left operand - Operator string // Comparison operator - Subquery Statement // Subquery -} -``` - -**Supported SQL:** -```sql -price > ANY (SELECT avg_price FROM categories) -score >= ALL (SELECT min_score FROM thresholds) -``` - ---- - -#### `ExtractExpression` -Represents EXTRACT function for date/time parts. - -```go -type ExtractExpression struct { - Field string // YEAR, MONTH, DAY, HOUR, etc. - Source Expression // Date/time expression + Query *SelectStatement } ``` -**Supported SQL:** -```sql -EXTRACT(YEAR FROM created_at) -EXTRACT(MONTH FROM order_date) -``` - ---- - -#### `SubstringExpression` -Represents SUBSTRING function. +### Supporting Types +#### `JoinClause` ```go -type SubstringExpression struct { - Str Expression // Source string - Start Expression // Start position - Length Expression // Optional length +type JoinClause struct { + Type string // INNER, LEFT, RIGHT, FULL, CROSS, NATURAL + Table TableReference + Condition Expression + Using []string } ``` -**Supported SQL:** -```sql -SUBSTRING(name FROM 1 FOR 10) -SUBSTRING(code FROM 5) -``` - ---- - -#### `PositionExpression` -Represents POSITION function. - +#### `OrderByExpression` ```go -type PositionExpression struct { - Substr Expression // Substring to find - Str Expression // String to search in +type OrderByExpression struct { + Expression Expression + Descending bool + NullsFirst bool + NullsLast bool } ``` -**Supported SQL:** -```sql -POSITION('@' IN email) -``` - ---- - -### Grouping Set Types - -#### `RollupExpression` -Represents ROLLUP for hierarchical grouping sets. - +#### `TableReference` ```go -type RollupExpression struct { - Expressions []Expression // Columns for rollup +type TableReference struct { + Name string + Alias string + Lateral bool + Query *SelectStatement } ``` -**Supported SQL:** -```sql --- ROLLUP(region, city, store) generates: --- (region, city, store), (region, city), (region), () -GROUP BY ROLLUP(region, city, store) -``` - ---- +### Object Pool Functions -#### `CubeExpression` -Represents CUBE for all combinations of grouping sets. +#### `NewAST() *AST` +Get AST from pool. ```go -type CubeExpression struct { - Expressions []Expression // Columns for cube -} -``` - -**Supported SQL:** -```sql --- CUBE(a, b) generates: (a, b), (a), (b), () -GROUP BY CUBE(region, year) +astObj := ast.NewAST() +defer ast.ReleaseAST(astObj) ``` ---- +#### `ReleaseAST(a *AST)` +Return AST to pool. -#### `GroupingSetsExpression` -Represents explicit grouping sets. +#### `NewSelectStatement() *SelectStatement` +Get SELECT statement from pool. ```go -type GroupingSetsExpression struct { - Sets [][]Expression // Each inner slice is one grouping set -} -``` - -**Supported SQL:** -```sql -GROUP BY GROUPING SETS ((region, city), (region), ()) +stmt := ast.NewSelectStatement() +defer ast.ReleaseSelectStatement(stmt) ``` ---- - -### Window Function Types +**Pool Functions Available For:** +- `SelectStatement`, `InsertStatement`, `UpdateStatement`, `DeleteStatement` +- `Identifier`, `BinaryExpression`, `LiteralValue` -#### `WindowSpec` -Represents a window specification. +### Visitor Pattern +#### Interface: `Visitor` ```go -type WindowSpec struct { - Name string // Named window reference - PartitionBy []Expression // PARTITION BY columns - OrderBy []OrderByExpression // ORDER BY within window - FrameClause *WindowFrame // Frame specification +type Visitor interface { + Visit(node Node) Visitor } ``` ---- - -#### `WindowFrame` -Represents window frame clause. - +**Example:** ```go -type WindowFrame struct { - Type string // ROWS or RANGE - Start WindowFrameBound // Start bound - End *WindowFrameBound // End bound (optional) +type TableCollector struct { + Tables []string } -type WindowFrameBound struct { - Type string // CURRENT ROW, UNBOUNDED PRECEDING, etc. - Value Expression // For N PRECEDING/FOLLOWING +func (tc *TableCollector) Visit(node ast.Node) ast.Visitor { + if sel, ok := node.(*ast.SelectStatement); ok { + tc.Tables = append(tc.Tables, sel.TableName) + } + return tc } -``` - -**Supported SQL:** -```sql --- ROWS frame -SUM(amount) OVER ( - PARTITION BY region - ORDER BY date - ROWS BETWEEN 2 PRECEDING AND CURRENT ROW -) --- RANGE frame -AVG(price) OVER ( - ORDER BY date - RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -) +collector := &TableCollector{} +ast.Walk(collector, astNode) ``` --- -#### `OrderByExpression` -Represents ORDER BY element with direction and NULL ordering. - -```go -type OrderByExpression struct { - Expression Expression // Column or expression - Ascending bool // ASC (true) or DESC (false) - NullsFirst *bool // NULLS FIRST/LAST (nil = default) -} -``` - -**Supported SQL:** -```sql -ORDER BY name ASC NULLS LAST, created_at DESC NULLS FIRST -``` - ---- - -### Supporting Types - -#### `TableReference` -Represents a table in FROM clause. - -```go -type TableReference struct { - Name string // Table name - Alias string // Optional alias -} -``` - ---- - -#### `JoinClause` -Represents a JOIN operation. - -```go -type JoinClause struct { - Type string // INNER, LEFT, RIGHT, FULL, CROSS - Left TableReference // Left table - Right TableReference // Right table - Condition Expression // ON condition -} -``` - ---- - -#### `UpdateExpression` -Represents SET clause in UPDATE. - -```go -type UpdateExpression struct { - Column Expression // Column to update - Value Expression // New value -} -``` - ---- - -#### `OnConflict` -Represents PostgreSQL ON CONFLICT clause. - -```go -type OnConflict struct { - Target []Expression // Target columns - Constraint string // Constraint name - Action OnConflictAction // DO UPDATE/NOTHING -} - -type OnConflictAction struct { - DoNothing bool // DO NOTHING - DoUpdate []UpdateExpression // SET clauses - Where Expression // WHERE condition -} -``` - ---- - -#### `ListExpression` -Represents a list of expressions. - -```go -type ListExpression struct { - Values []Expression // List items -} -``` - ---- - -#### `Values` -Represents VALUES clause. - -```go -type Values struct { - Rows [][]Expression // Value rows -} -``` - ---- - -### Root AST Type - -#### `AST` -Root container for parsed SQL statements. - -```go -type AST struct { - Statements []Statement // Parsed statements -} - -func (a AST) TokenLiteral() string -func (a AST) Children() []Node -``` - -**Example:** -```go -astNode, err := parser.Parse(tokens) -if err != nil { - log.Fatal(err) -} - -for _, stmt := range astNode.Statements { - switch s := stmt.(type) { - case *ast.SelectStatement: - fmt.Println("SELECT statement") - case *ast.InsertStatement: - fmt.Println("INSERT into:", s.TableName) - case *ast.UpdateStatement: - fmt.Println("UPDATE:", s.TableName) - case *ast.DeleteStatement: - fmt.Println("DELETE from:", s.TableName) - } -} -``` - ---- - -### Object Pool Functions - -#### `NewAST() *AST` -Gets an AST instance from the pool. - -```go -astObj := ast.NewAST() -defer ast.ReleaseAST(astObj) // ALWAYS defer the release -``` - -#### `ReleaseAST(ast *AST)` -Returns an AST instance to the pool. - -```go -ast.ReleaseAST(astObj) -``` - -**Best Practice:** -```go -func ParseSQL(sql string) (*ast.AST, error) { - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - return nil, err - } - - p := parser.NewParser() - defer p.Release() - - // AST is returned to caller - caller responsible for release - return p.Parse(tokens) -} -``` - ---- - -### Visitor Pattern - -The AST supports tree traversal via the `Children()` method: - -```go -func VisitAll(node ast.Node, visitor func(ast.Node)) { - visitor(node) - for _, child := range node.Children() { - VisitAll(child, visitor) - } -} - -// Usage: Find all table references -var tables []string -VisitAll(astNode, func(node ast.Node) { - if tbl, ok := node.(*ast.TableReference); ok { - tables = append(tables, tbl.Name) - } -}) -``` - ---- - -### Type Assertion Examples - -```go -// Check statement type -switch stmt := astNode.Statements[0].(type) { -case *ast.SelectStatement: - processSelect(stmt) -case *ast.InsertStatement: - processInsert(stmt) -case *ast.UpdateStatement: - processUpdate(stmt) -case *ast.DeleteStatement: - processDelete(stmt) -case *ast.CreateTableStatement: - processCreateTable(stmt) -case *ast.MergeStatement: - processMerge(stmt) -} - -// Check expression type -func processExpression(expr ast.Expression) { - switch e := expr.(type) { - case *ast.Identifier: - fmt.Printf("Column: %s.%s\n", e.Table, e.Name) - case *ast.LiteralValue: - fmt.Printf("Literal: %v (%s)\n", e.Value, e.Type) - case *ast.FunctionCall: - fmt.Printf("Function: %s with %d args\n", e.Name, len(e.Arguments)) - if e.Over != nil { - fmt.Println(" (window function)") - } - case *ast.BinaryExpression: - fmt.Printf("Binary: %s\n", e.Operator) - case *ast.CaseExpression: - fmt.Printf("CASE with %d WHEN clauses\n", len(e.WhenClauses)) - } -} -``` - -## Models - -### Package: `github.com/ajitpratap0/GoSQLX/pkg/models` - -Core data structures used throughout the library. - -### Type: `Token` -Represents a lexical token. - -```go -type Token struct { - Type TokenType // Token type enum - Value string // Token value -} -``` - -### Type: `TokenWithSpan` -Token with position information. - -```go -type TokenWithSpan struct { - Token Token - Start Location // Start position - End Location // End position -} -``` - -### Type: `Location` -Position in source text. - -```go -type Location struct { - Line int // 1-based line number - Column int // 1-based column number - Index int // 0-based byte offset -} -``` - -### Type: `TokenType` -Enumeration of token types. - -```go -type TokenType int - -const ( - TokenTypeEOF TokenType = 0 - TokenTypeUnknown TokenType = 1 - TokenTypeIdentifier TokenType = 14 - TokenTypeNumber TokenType = 11 - TokenTypeSingleQuotedString TokenType = 31 - TokenTypeDoubleQuotedString TokenType = 32 - // ... many more -) -``` - -## Error Handling - -### Type: `TokenizerError` -Tokenization errors with position information. - -```go -type TokenizerError struct { - Message string - Location Location -} -``` - -**Example:** -```go -tokens, err := tkz.Tokenize(sqlBytes) -if err != nil { - if tkErr, ok := err.(tokenizer.TokenizerError); ok { - fmt.Printf("Error at line %d, column %d: %s\n", - tkErr.Location.Line, - tkErr.Location.Column, - tkErr.Message) - } -} -``` - -### Type: `ParseError` -Parsing errors with context. - -```go -type ParseError struct { - Message string - Token Token -} -``` - -## Performance Considerations - -### Object Pooling Best Practices - -1. **Always use defer for cleanup:** -```go -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) // Ensures cleanup even on panic -``` - -2. **Don't store pooled objects:** -```go -// BAD: Storing pooled object -type MyStruct struct { - tkz *Tokenizer // DON'T DO THIS -} - -// GOOD: Get from pool when needed -func (m *MyStruct) Process(sql []byte) error { - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - return tkz.Tokenize(sql) -} -``` - -3. **Batch operations efficiently:** -```go -func ProcessQueries(queries []string) [][]models.TokenWithSpan { - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - results := make([][]models.TokenWithSpan, len(queries)) - for i, query := range queries { - tokens, _ := tkz.Tokenize([]byte(query)) - results[i] = tokens - tkz.Reset() // Reset between uses - } - return results -} -``` - -### Memory Optimization - -- **Zero-copy tokenization**: The tokenizer works directly on input bytes -- **Pool reuse**: ~60-80% memory reduction vs creating new instances -- **Minimal allocations**: Most operations are allocation-free - -### Concurrency Guidelines - -All pool operations are thread-safe: - -```go -func ConcurrentTokenization(queries []string) { - var wg sync.WaitGroup - for _, query := range queries { - wg.Add(1) - go func(sql string) { - defer wg.Done() - - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, _ := tkz.Tokenize([]byte(sql)) - // Process tokens... - }(query) - } - wg.Wait() -} -``` - -## Complete Example - -```go -package main - -import ( - "fmt" - "log" - - "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" - "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" - "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" - "github.com/ajitpratap0/GoSQLX/pkg/sql/token" -) - -func main() { - sql := ` - SELECT u.id, u.name, COUNT(o.id) as order_count - FROM users u - LEFT JOIN orders o ON u.id = o.user_id - WHERE u.created_at >= '2024-01-01' - GROUP BY u.id, u.name - HAVING COUNT(o.id) > 5 - ORDER BY order_count DESC - LIMIT 10 - ` - - // Step 1: Tokenize - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - log.Fatal("Tokenization error:", err) - } - - // Step 2: Convert to parser tokens - parserTokens := make([]token.Token, 0, len(tokens)) - for _, tok := range tokens { - if tok.Token.Type == models.TokenTypeEOF { - break - } - parserTokens = append(parserTokens, token.Token{ - Type: fmt.Sprintf("%d", tok.Token.Type), - Literal: tok.Token.Value, - }) - } - - // Step 3: Parse - p := parser.NewParser() - defer p.Release() - - astNode, err := p.Parse(parserTokens) - if err != nil { - log.Fatal("Parse error:", err) - } - - // Step 4: Analyze AST - if stmt, ok := astNode.(*ast.SelectStatement); ok { - fmt.Printf("Found SELECT with %d columns\n", len(stmt.Columns)) - fmt.Printf("Has WHERE: %v\n", stmt.Where != nil) - fmt.Printf("Has GROUP BY: %v\n", len(stmt.GroupBy) > 0) - fmt.Printf("Has ORDER BY: %v\n", len(stmt.OrderBy) > 0) - } -} -``` - -## Keywords Package +## Keywords Package ### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/keywords` -The Keywords package provides SQL keyword recognition, categorization, and multi-dialect support for PostgreSQL, MySQL, SQL Server, Oracle, and SQLite. - -### Overview - -**Key Features:** -- **Multi-Dialect Support**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite -- **Keyword Categorization**: Reserved, DML, DDL, functions, operators, data types -- **Compound Keywords**: GROUP BY, ORDER BY, LEFT JOIN, NULLS FIRST, etc. -- **Case-Insensitive**: Recognizes keywords in any case (SELECT, select, Select) -- **Thread-Safe**: All operations safe for concurrent use -- **Extensible**: Support for adding custom keywords - -### Core Types - -#### Type: `Keywords` - -Main keyword registry for a specific SQL dialect. - -```go -type Keywords struct { - dialect SQLDialect - // Internal keyword maps -} -``` - -**Usage:** -```go -kw := keywords.New(keywords.PostgreSQL) -if kw.IsKeyword("SELECT") { - fmt.Println("SELECT is a keyword") -} -``` - -#### Type: `SQLDialect` - -Supported SQL dialects. - -```go -type SQLDialect int - -const ( - PostgreSQL SQLDialect = iota // PostgreSQL dialect - MySQL // MySQL dialect - SQLServer // SQL Server dialect - Oracle // Oracle dialect - SQLite // SQLite dialect - Generic // SQL-99 standard keywords -) -``` - -**Example:** -```go -// Create keyword registry for specific dialect -pgKw := keywords.New(keywords.PostgreSQL) -myKw := keywords.New(keywords.MySQL) -genericKw := keywords.New(keywords.Generic) -``` - -#### Type: `KeywordCategory` - -Keyword classification. - -```go -type KeywordCategory int - -const ( - CategoryReserved KeywordCategory = iota // Reserved keywords (SELECT, FROM, WHERE) - CategoryDML // Data manipulation (INSERT, UPDATE, DELETE) - CategoryDDL // Data definition (CREATE, ALTER, DROP) - CategoryFunction // Function names (COUNT, SUM, AVG) - CategoryOperator // Operators (AND, OR, NOT, LIKE) - CategoryDataType // Data types (INTEGER, VARCHAR, TIMESTAMP) -) -``` - -### Core Functions - -#### Function: `New` - -Creates a keyword registry for a specific SQL dialect. - -```go -func New(dialect SQLDialect) *Keywords -``` - -**Parameters:** -- `dialect`: SQL dialect to use (PostgreSQL, MySQL, SQLite, etc.) - -**Returns:** -- `*Keywords`: Keyword registry instance - -**Example:** -```go -kw := keywords.New(keywords.PostgreSQL) -``` - -#### Method: `IsKeyword` - -Checks if a word is a SQL keyword (case-insensitive). - -```go -func (k *Keywords) IsKeyword(word string) bool -``` - -**Parameters:** -- `word`: Word to check - -**Returns:** -- `bool`: true if word is a keyword - -**Example:** -```go -kw := keywords.New(keywords.Generic) - -kw.IsKeyword("SELECT") // true -kw.IsKeyword("select") // true -kw.IsKeyword("SeLeCt") // true -kw.IsKeyword("foo") // false -``` - -#### Method: `IsReserved` - -Checks if a keyword is reserved (cannot be used as identifier without quoting). - -```go -func (k *Keywords) IsReserved(word string) bool -``` - -**Parameters:** -- `word`: Word to check - -**Returns:** -- `bool`: true if word is a reserved keyword - -**Example:** -```go -kw := keywords.New(keywords.PostgreSQL) - -if kw.IsReserved("TABLE") { - fmt.Println("TABLE is reserved - must quote if used as identifier") -} -``` - -#### Method: `GetKeyword` - -Gets detailed keyword information. - -```go -func (k *Keywords) GetKeyword(word string) *Keyword -``` - -**Parameters:** -- `word`: Keyword to look up - -**Returns:** -- `*Keyword`: Keyword details (TokenType, Category), or nil if not found - -**Example:** -```go -kw := keywords.New(keywords.Generic) -keyword := kw.GetKeyword("SELECT") -if keyword != nil { - fmt.Printf("Type: %s, Category: %d\n", keyword.TokenType, keyword.Category) -} -``` - -#### Method: `GetTokenType` - -Gets the token type for a keyword. - -```go -func (k *Keywords) GetTokenType(word string) string -``` - -**Parameters:** -- `word`: Keyword to look up - -**Returns:** -- `string`: Token type (e.g., "SELECT", "INSERT", "JOIN"), or empty string if not found - -**Example:** -```go -kw := keywords.New(keywords.Generic) -tokenType := kw.GetTokenType("select") // Returns "SELECT" -``` - -#### Method: `IsCompoundKeyword` - -Checks if two words form a compound keyword (e.g., GROUP BY, LEFT JOIN). - -```go -func (k *Keywords) IsCompoundKeyword(word1, word2 string) bool -``` - -**Parameters:** -- `word1`: First word -- `word2`: Second word - -**Returns:** -- `bool`: true if words form a compound keyword - -**Example:** -```go -kw := keywords.New(keywords.Generic) - -kw.IsCompoundKeyword("GROUP", "BY") // true -kw.IsCompoundKeyword("ORDER", "BY") // true -kw.IsCompoundKeyword("LEFT", "JOIN") // true -kw.IsCompoundKeyword("NULLS", "FIRST") // true -kw.IsCompoundKeyword("SELECT", "FROM") // false (not compound) -``` - -#### Method: `GetCompoundKeywordType` - -Gets the token type for a compound keyword. - -```go -func (k *Keywords) GetCompoundKeywordType(word1, word2 string) string -``` - -**Parameters:** -- `word1`: First word -- `word2`: Second word - -**Returns:** -- `string`: Compound keyword token type, or empty string if not compound - -**Example:** -```go -kw := keywords.New(keywords.Generic) - -kw.GetCompoundKeywordType("GROUP", "BY") // "GROUP BY" -kw.GetCompoundKeywordType("ORDER", "BY") // "ORDER BY" -kw.GetCompoundKeywordType("LEFT", "JOIN") // "LEFT JOIN" -kw.GetCompoundKeywordType("NULLS", "FIRST") // "NULLS FIRST" -``` - -#### Method: `AddKeyword` - -Adds a custom keyword (for extensions). - -```go -func (k *Keywords) AddKeyword(word string, tokenType string, category KeywordCategory) -``` - -**Parameters:** -- `word`: Keyword to add -- `tokenType`: Token type for the keyword -- `category`: Keyword category - -**Example:** -```go -kw := keywords.New(keywords.Generic) -kw.AddKeyword("CUSTOM", "CUSTOM", keywords.CategoryReserved) -``` - -### Keyword Categories - -#### Reserved Keywords - -Core SQL statement keywords that cannot be used as identifiers without quoting: - -``` -SELECT, FROM, WHERE, INSERT, UPDATE, DELETE, CREATE, ALTER, DROP, -JOIN, INNER, LEFT, RIGHT, OUTER, FULL, CROSS, NATURAL, -GROUP, ORDER, HAVING, UNION, EXCEPT, INTERSECT, -WITH, RECURSIVE, AS, ON, USING, -WINDOW, PARTITION, OVER, ROWS, RANGE -``` - -#### DML Keywords - -Data manipulation modifiers: - -``` -DISTINCT, ALL, FETCH, FIRST, NEXT, LAST, ONLY, -WITH TIES, NULLS, LIMIT, OFFSET -``` - -#### Compound Keywords - -Multi-word keywords recognized as single tokens: - -``` -GROUP BY, ORDER BY, -LEFT JOIN, RIGHT JOIN, FULL JOIN, CROSS JOIN, NATURAL JOIN, -INNER JOIN, LEFT OUTER JOIN, RIGHT OUTER JOIN, FULL OUTER JOIN, -UNION ALL, WITH TIES, NULLS FIRST, NULLS LAST -``` - -#### Window Function Keywords - -Window function names and frame specifications: - -``` -ROW_NUMBER, RANK, DENSE_RANK, NTILE, PERCENT_RANK, CUME_DIST, -LAG, LEAD, FIRST_VALUE, LAST_VALUE, NTH_VALUE, -ROWS BETWEEN, RANGE BETWEEN, UNBOUNDED PRECEDING, CURRENT ROW -``` - -### Dialect-Specific Keywords - -#### PostgreSQL-Specific - -```go -pgKw := keywords.New(keywords.PostgreSQL) - -// PostgreSQL-specific keywords -pgKw.IsKeyword("ILIKE") // Case-insensitive LIKE -pgKw.IsKeyword("SIMILAR") // SIMILAR TO operator -pgKw.IsKeyword("MATERIALIZED") // Materialized views -pgKw.IsKeyword("CONCURRENTLY") // Concurrent operations -pgKw.IsKeyword("RETURNING") // RETURNING clause -``` - -**PostgreSQL Keywords:** -``` -MATERIALIZED, ILIKE, SIMILAR, FREEZE, ANALYSE, ANALYZE, -CONCURRENTLY, REINDEX, TOAST, NOWAIT, RETURNING -``` - -#### MySQL-Specific - -```go -myKw := keywords.New(keywords.MySQL) - -// MySQL-specific keywords -myKw.IsKeyword("UNSIGNED") // Unsigned modifier -myKw.IsKeyword("ZEROFILL") // Zero-fill display -myKw.IsKeyword("FORCE") // Force index -myKw.IsKeyword("IGNORE") // Ignore errors -``` - -**MySQL Keywords:** -``` -BINARY, CHAR, VARCHAR, DATETIME, DECIMAL, UNSIGNED, ZEROFILL, -FORCE, IGNORE, INDEX, KEY, KILL, OPTION, PURGE, READ, WRITE, -STATUS, VARIABLES -``` - -#### SQLite-Specific - -```go -sqliteKw := keywords.New(keywords.SQLite) - -// SQLite-specific keywords -sqliteKw.IsKeyword("AUTOINCREMENT") // Auto-increment -sqliteKw.IsKeyword("CONFLICT") // Conflict resolution -sqliteKw.IsKeyword("REPLACE") // Replace operation -``` - -**SQLite Keywords:** -``` -ABORT, ACTION, AFTER, ATTACH, AUTOINCREMENT, CONFLICT, DATABASE, -DETACH, EXCLUSIVE, INDEXED, INSTEAD, PLAN, QUERY, RAISE, REPLACE, -TEMP, TEMPORARY, VACUUM, VIRTUAL -``` - -### Usage Examples - -#### Basic Keyword Recognition - -```go -package main - -import ( - "fmt" - "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" -) - -func main() { - kw := keywords.New(keywords.PostgreSQL) - - // Check if word is a keyword - if kw.IsKeyword("SELECT") { - fmt.Println("SELECT is a keyword") - } - - // Check if reserved - if kw.IsReserved("TABLE") { - fmt.Println("TABLE is reserved - quote if used as identifier") - } - - // Get keyword info - keyword := kw.GetKeyword("JOIN") - if keyword != nil { - fmt.Printf("Type: %s, Category: %d\n", keyword.TokenType, keyword.Category) - } -} -``` - -#### Compound Keyword Detection - -```go -kw := keywords.New(keywords.Generic) - -// Check compound keywords -if kw.IsCompoundKeyword("GROUP", "BY") { - fmt.Println("GROUP BY is a compound keyword") -} - -if kw.IsCompoundKeyword("NULLS", "FIRST") { - fmt.Println("NULLS FIRST is a compound keyword") -} - -// Get compound keyword type -tokenType := kw.GetCompoundKeywordType("LEFT", "JOIN") -fmt.Printf("Token type: %s\n", tokenType) // "LEFT JOIN" -``` - -#### Identifier Validation - -```go -func ValidateIdentifier(name string) error { - kw := keywords.New(keywords.PostgreSQL) - - if kw.IsReserved(name) { - return fmt.Errorf("'%s' is a reserved keyword - must be quoted", name) - } - - return nil -} - -// Usage -err := ValidateIdentifier("table") // Error: 'table' is reserved -err := ValidateIdentifier("users") // OK -``` - -#### SQL Formatter - -```go -func FormatKeyword(word string, style string) string { - kw := keywords.New(keywords.Generic) - - if !kw.IsKeyword(word) { - return word // Not a keyword, return as-is - } - - switch style { - case "upper": - return strings.ToUpper(word) - case "lower": - return strings.ToLower(word) - case "title": - return strings.Title(strings.ToLower(word)) - default: - return word - } -} - -// Usage -formatted := FormatKeyword("select", "upper") // "SELECT" -``` - -#### Dialect Switching - -```go -func AnalyzeKeywords(sql string, dialect keywords.SQLDialect) { - kw := keywords.New(dialect) - words := strings.Fields(sql) - - for _, word := range words { - if kw.IsKeyword(word) { - category := kw.GetKeyword(word).Category - fmt.Printf("%s: category=%d\n", word, category) - } - } -} - -// Usage for different dialects -AnalyzeKeywords("SELECT * FROM users", keywords.PostgreSQL) -AnalyzeKeywords("SELECT * FROM users", keywords.MySQL) -``` - -### Integration with Tokenizer - -The keywords package is used by the tokenizer to identify SQL keywords: - -```go -// In tokenizer -kw := keywords.New(keywords.PostgreSQL) - -// Check if identifier is actually a keyword -if kw.IsKeyword(identifierText) { - tokenType = kw.GetTokenType(identifierText) -} else { - tokenType = "IDENTIFIER" -} - -// Check for compound keywords -if kw.IsCompoundKeyword(currentWord, nextWord) { - tokenType = kw.GetCompoundKeywordType(currentWord, nextWord) - // Consume both words -} -``` - -### Integration with Parser - -The parser uses keyword information for syntax validation: - -```go -// Check if next token is a specific keyword -if p.currentToken.Type == "GROUP" { - // Expecting "BY" for GROUP BY - if p.peekToken.Type == "BY" { - // Parse GROUP BY clause - } -} - -// Compound keyword handling -if p.currentToken.Type == "NULLS" { - if p.peekToken.Type == "FIRST" || p.peekToken.Type == "LAST" { - // Parse NULLS FIRST/LAST clause - } -} -``` - -### Case Sensitivity - -All keyword matching is **case-insensitive**: - -```go -kw := keywords.New(keywords.Generic) - -kw.IsKeyword("SELECT") // true -kw.IsKeyword("select") // true -kw.IsKeyword("Select") // true -kw.IsKeyword("SeLeCt") // true -``` - -### Performance Characteristics - -- **Lookup Time**: O(1) hash map lookups -- **Memory**: Pre-allocated keyword maps (~10KB per dialect) -- **Thread-Safe**: No synchronization overhead for reads -- **Cache-Friendly**: Keywords stored in contiguous memory - -### Best Practices - -#### 1. Create Once, Reuse - -```go -// GOOD: Create once at package level -var globalKeywords = keywords.New(keywords.PostgreSQL) - -func IsKeyword(word string) bool { - return globalKeywords.IsKeyword(word) -} - -// BAD: Creating repeatedly (wasteful) -func IsKeyword(word string) bool { - kw := keywords.New(keywords.PostgreSQL) // Creates new instance every call - return kw.IsKeyword(word) -} -``` - -#### 2. Use Appropriate Dialect - -```go -// Match your database -pgKeywords := keywords.New(keywords.PostgreSQL) // For PostgreSQL -myKeywords := keywords.New(keywords.MySQL) // For MySQL -genericKeywords := keywords.New(keywords.Generic) // For SQL-99 standard -``` - -#### 3. Check Reserved Keywords for Identifiers - -```go -func ValidateTableName(name string) error { - kw := keywords.New(keywords.PostgreSQL) - - if kw.IsReserved(name) { - return fmt.Errorf("'%s' is reserved - must be quoted", name) - } - - return nil -} -``` - -### Common Patterns - -#### Pattern 1: Syntax Highlighting - -```go -func HighlightSQL(sql string) string { - kw := keywords.New(keywords.Generic) - words := strings.Fields(sql) - - for i, word := range words { - if kw.IsKeyword(word) { - words[i] = fmt.Sprintf("%s", word) - } - } - - return strings.Join(words, " ") -} -``` - -#### Pattern 2: Keyword Case Normalization - -```go -func NormalizeKeywords(sql string) string { - kw := keywords.New(keywords.Generic) - words := strings.Fields(sql) - - for i, word := range words { - if kw.IsKeyword(word) { - words[i] = strings.ToUpper(word) // Normalize to uppercase - } - } - - return strings.Join(words, " ") -} -``` - -#### Pattern 3: Identifier Quoting - -```go -func QuoteIfNeeded(identifier string, dialect keywords.SQLDialect) string { - kw := keywords.New(dialect) - - if kw.IsReserved(identifier) { - return fmt.Sprintf("\"%s\"", identifier) // Quote reserved keywords - } - - return identifier -} -``` - -## Errors Package - -### Package: `github.com/ajitpratap0/GoSQLX/pkg/errors` - -The Errors package provides a structured error system with error codes, rich context, and intelligent hints for debugging SQL parsing issues. - -### Overview - -**Key Features:** -- **Error Codes**: Unique codes (E1xxx, E2xxx, etc.) for programmatic error handling -- **Rich Context**: SQL source context with line/column highlighting -- **Intelligent Hints**: Actionable suggestions to fix errors -- **Documentation Links**: Auto-generated help URLs for each error code -- **Error Chaining**: Support for underlying cause errors (error wrapping) -- **Formatted Output**: Pretty-printed errors with context visualization - -### Core Types - -#### Type: `ErrorCode` - -Unique identifier for each error type. - -```go -type ErrorCode string -``` - -**Error Code Categories:** -- **E1xxx**: Tokenizer errors (lexical analysis) -- **E2xxx**: Parser syntax errors -- **E3xxx**: Semantic errors -- **E4xxx**: Unsupported features - -#### Type: `Error` - -Structured error with rich context and hints. - -```go -type Error struct { - Code ErrorCode // Unique error code (e.g., "E2001") - Message string // Human-readable error message - Location models.Location // Line and column where error occurred - Context *ErrorContext // SQL context around the error - Hint string // Suggestion to fix the error - HelpURL string // Documentation link for this error - Cause error // Underlying error if any -} -``` - -**Example:** -```go -err := &errors.Error{ - Code: errors.ErrCodeUnexpectedToken, - Message: "expected FROM, got WHERE", - Location: models.Location{Line: 1, Column: 15}, -} -``` - -#### Type: `ErrorContext` - -SQL source context for error display. - -```go -type ErrorContext struct { - SQL string // Original SQL query - StartLine int // Starting line number (1-indexed) - EndLine int // Ending line number (1-indexed) - HighlightCol int // Column to highlight (1-indexed) - HighlightLen int // Length of highlight (characters) -} -``` - -### Error Codes - -#### Tokenizer Errors (E1xxx) - -Lexical analysis errors during tokenization: - -| Code | Constant | Description | -|------|----------|-------------| -| E1001 | `ErrCodeUnexpectedChar` | Unexpected character in input | -| E1002 | `ErrCodeUnterminatedString` | String literal not closed | -| E1003 | `ErrCodeInvalidNumber` | Invalid numeric literal | -| E1004 | `ErrCodeInvalidOperator` | Invalid operator sequence | -| E1005 | `ErrCodeInvalidIdentifier` | Invalid identifier format | -| E1006 | `ErrCodeInputTooLarge` | Input exceeds size limits (DoS protection) | -| E1007 | `ErrCodeTokenLimitReached` | Token count exceeds limit (DoS protection) | -| E1008 | `ErrCodeTokenizerPanic` | Tokenizer panic recovered | - -**Example:** -```go -// Unterminated string -sql := `SELECT * FROM users WHERE name = 'John` -// Error: E1002 - String literal not closed at line 1, column 37 -``` - -#### Parser Syntax Errors (E2xxx) - -Syntax errors during parsing: - -| Code | Constant | Description | -|------|----------|-------------| -| E2001 | `ErrCodeUnexpectedToken` | Unexpected token encountered | -| E2002 | `ErrCodeExpectedToken` | Expected specific token not found | -| E2003 | `ErrCodeMissingClause` | Required SQL clause missing | -| E2004 | `ErrCodeInvalidSyntax` | General syntax error | -| E2005 | `ErrCodeIncompleteStatement` | Statement incomplete | -| E2006 | `ErrCodeInvalidExpression` | Invalid expression syntax | -| E2007 | `ErrCodeRecursionDepthLimit` | Recursion depth exceeded (DoS protection) | -| E2008 | `ErrCodeUnsupportedDataType` | Data type not supported | -| E2009 | `ErrCodeUnsupportedConstraint` | Constraint type not supported | -| E2010 | `ErrCodeUnsupportedJoin` | JOIN type not supported | -| E2011 | `ErrCodeInvalidCTE` | Invalid CTE (WITH clause) syntax | -| E2012 | `ErrCodeInvalidSetOperation` | Invalid set operation (UNION/EXCEPT/INTERSECT) | - -**Example:** -```go -// Missing FROM clause -sql := `SELECT * WHERE id = 1` -// Error: E2003 - Required SQL clause missing: FROM -``` - -#### Semantic Errors (E3xxx) - -Semantic validation errors: - -| Code | Constant | Description | -|------|----------|-------------| -| E3001 | `ErrCodeUndefinedTable` | Table not defined | -| E3002 | `ErrCodeUndefinedColumn` | Column not defined | -| E3003 | `ErrCodeTypeMismatch` | Type mismatch in expression | -| E3004 | `ErrCodeAmbiguousColumn` | Ambiguous column reference | - -**Example:** -```go -// Ambiguous column (multiple tables have 'id' column) -sql := `SELECT id FROM users u JOIN orders o ON u.id = o.user_id` -// Error: E3004 - Ambiguous column reference: 'id' -``` - -#### Unsupported Features (E4xxx) - -Features not yet implemented: - -| Code | Constant | Description | -|------|----------|-------------| -| E4001 | `ErrCodeUnsupportedFeature` | Feature not yet supported | -| E4002 | `ErrCodeUnsupportedDialect` | SQL dialect not supported | - -### Error Builder Functions - -#### Function: `NewError` - -Creates a new structured error. - -```go -func NewError(code ErrorCode, message string, location models.Location) *Error -``` - -**Parameters:** -- `code`: Error code (e.g., `ErrCodeUnexpectedToken`) -- `message`: Human-readable error message -- `location`: Line and column where error occurred - -**Returns:** -- `*Error`: New structured error with auto-generated help URL - -**Example:** -```go -err := errors.NewError( - errors.ErrCodeExpectedToken, - "expected FROM, got WHERE", - models.Location{Line: 1, Column: 15}, -) -// Auto-generated HelpURL: https://docs.gosqlx.dev/errors/E2002 -``` - -#### Method: `WithContext` - -Adds SQL context to the error (shows source code around error). - -```go -func (e *Error) WithContext(sql string, highlightLen int) *Error -``` - -**Parameters:** -- `sql`: Original SQL query -- `highlightLen`: Number of characters to highlight - -**Returns:** -- `*Error`: Error with context (chainable) - -**Example:** -```go -err := errors.NewError( - errors.ErrCodeUnexpectedToken, - "unexpected WHERE", - models.Location{Line: 1, Column: 9}, -).WithContext("SELECT * WHERE id = 1", 5) // Highlight "WHERE" -``` - -#### Method: `WithHint` - -Adds a suggestion hint to fix the error. - -```go -func (e *Error) WithHint(hint string) *Error -``` - -**Parameters:** -- `hint`: Actionable suggestion to fix the error - -**Returns:** -- `*Error`: Error with hint (chainable) - -**Example:** -```go -err := errors.NewError( - errors.ErrCodeMissingClause, - "missing FROM clause", - models.Location{Line: 1, Column: 9}, -).WithHint("Add 'FROM table_name' after SELECT columns") -``` - -#### Method: `WithCause` - -Adds an underlying cause error (error wrapping). - -```go -func (e *Error) WithCause(cause error) *Error -``` - -**Parameters:** -- `cause`: Underlying error that caused this error - -**Returns:** -- `*Error`: Error with cause (chainable) - -**Example:** -```go -err := errors.NewError( - errors.ErrCodeTokenizerPanic, - "tokenizer panic", - models.Location{Line: 1, Column: 1}, -).WithCause(underlyingErr) -``` - -### Helper Functions - -#### Function: `IsCode` - -Checks if an error has a specific error code. - -```go -func IsCode(err error, code ErrorCode) bool -``` - -**Parameters:** -- `err`: Error to check -- `code`: Error code to match - -**Returns:** -- `bool`: true if error has the specified code - -**Example:** -```go -if errors.IsCode(err, errors.ErrCodeUnterminatedString) { - fmt.Println("String literal not closed") -} -``` - -#### Function: `GetCode` - -Returns the error code from an error. - -```go -func GetCode(err error) ErrorCode -``` - -**Parameters:** -- `err`: Error to extract code from - -**Returns:** -- `ErrorCode`: Error code, or empty string if not a structured error - -**Example:** -```go -code := errors.GetCode(err) -if code == errors.ErrCodeMissingClause { - // Handle missing clause error -} -``` - -### Usage Examples - -#### Basic Error Creation - -```go -package main - -import ( - "fmt" - "github.com/ajitpratap0/GoSQLX/pkg/errors" - "github.com/ajitpratap0/GoSQLX/pkg/models" -) - -func main() { - // Create simple error - err := errors.NewError( - errors.ErrCodeUnexpectedToken, - "expected FROM, got WHERE", - models.Location{Line: 1, Column: 15}, - ) - - fmt.Println(err) - // Output: - // Error E2001 at line 1, column 15: expected FROM, got WHERE - // Help: https://docs.gosqlx.dev/errors/E2001 -} -``` - -#### Error with Full Context - -```go -sql := `SELECT * WHERE id = 1` - -err := errors.NewError( - errors.ErrCodeMissingClause, - "missing FROM clause", - models.Location{Line: 1, Column: 10}, -).WithContext(sql, 5).WithHint("Add 'FROM table_name' after SELECT columns") - -fmt.Println(err) -// Output: -// Error E2003 at line 1, column 10: missing FROM clause -// -// 1 | SELECT * WHERE id = 1 -// ^^^^^ -// -// Hint: Add 'FROM table_name' after SELECT columns -// Help: https://docs.gosqlx.dev/errors/E2003 -``` - -#### Multi-Line SQL Context - -```go -sql := `SELECT id, name -FROM users -WHERE -GROUP BY id` - -err := errors.NewError( - errors.ErrCodeInvalidSyntax, - "WHERE clause requires a condition", - models.Location{Line: 3, Column: 1}, -).WithContext(sql, 5) - -fmt.Println(err) -// Output: -// Error E2004 at line 3, column 1: WHERE clause requires a condition -// -// 2 | FROM users -// 3 | WHERE -// ^^^^^ -// 4 | GROUP BY id -// -// Help: https://docs.gosqlx.dev/errors/E2004 -``` - -#### Error Code Checking - -```go -_, err := parser.Parse(tokens) -if err != nil { - // Check for specific error codes - if errors.IsCode(err, errors.ErrCodeUnterminatedString) { - fmt.Println("Found unterminated string - check your quotes") - } else if errors.IsCode(err, errors.ErrCodeMissingClause) { - fmt.Println("SQL statement is incomplete") - } else { - fmt.Printf("Parse error: %v\n", err) - } -} -``` - -#### Error Code Extraction - -```go -_, err := parser.Parse(tokens) -if err != nil { - code := errors.GetCode(err) - - switch code { - case errors.ErrCodeTokenLimitReached: - log.Error("Query too complex - DoS protection triggered") - case errors.ErrCodeRecursionDepthLimit: - log.Error("Query nesting too deep - DoS protection triggered") - default: - log.Errorf("Parse error %s: %v", code, err) - } -} -``` - -#### Programmatic Error Handling - -```go -func HandleParseError(err error) { - if err == nil { - return - } - - // Extract structured error - sqlErr, ok := err.(*errors.Error) - if !ok { - fmt.Printf("Non-SQL error: %v\n", err) - return - } - - // Log error details - fmt.Printf("Error Code: %s\n", sqlErr.Code) - fmt.Printf("Location: Line %d, Column %d\n", sqlErr.Location.Line, sqlErr.Location.Column) - fmt.Printf("Message: %s\n", sqlErr.Message) - - if sqlErr.Hint != "" { - fmt.Printf("Suggestion: %s\n", sqlErr.Hint) - } - - // Check if tokenizer error - if sqlErr.Code[0] == 'E' && sqlErr.Code[1] == '1' { - fmt.Println("This is a tokenization error") - } - - // Check if parser error - if sqlErr.Code[0] == 'E' && sqlErr.Code[1] == '2' { - fmt.Println("This is a syntax error") - } -} -``` - -#### Chaining Error Context - -```go -func ParseSQL(sql string) (*ast.AST, error) { - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - // Enhance tokenizer error with context - if sqlErr, ok := err.(*errors.Error); ok { - return nil, sqlErr.WithContext(sql, 1) - } - return nil, err - } - - p := parser.NewParser() - defer p.Release() - - ast, err := p.Parse(tokens) - if err != nil { - // Enhance parser error with context and hints - if sqlErr, ok := err.(*errors.Error); ok { - enhanced := sqlErr.WithContext(sql, 1) - - // Add intelligent hints based on error code - switch sqlErr.Code { - case errors.ErrCodeMissingClause: - enhanced = enhanced.WithHint("Check if all required clauses are present") - case errors.ErrCodeUnexpectedToken: - enhanced = enhanced.WithHint("Review SQL syntax around highlighted token") - } - - return nil, enhanced - } - return nil, err - } - - return ast, nil -} -``` - -### Error Formatting - -The `Error` type implements the `error` interface with rich formatting: - -```go -err := errors.NewError( - errors.ErrCodeUnexpectedToken, - "expected FROM, got WHERE", - models.Location{Line: 2, Column: 1}, -).WithContext(`SELECT id, name -WHERE id = 1`, 5).WithHint("Add 'FROM table_name' before WHERE clause") - -fmt.Println(err.Error()) -``` - -**Output:** -``` -Error E2001 at line 2, column 1: expected FROM, got WHERE - - 1 | SELECT id, name - 2 | WHERE id = 1 - ^^^^^ - -Hint: Add 'FROM table_name' before WHERE clause -Help: https://docs.gosqlx.dev/errors/E2001 -``` - -### Error Context Visualization - -The error context shows: -- **Line Before**: Provides context leading to the error -- **Error Line**: The line containing the error -- **Position Indicator**: `^` characters highlighting the error location -- **Line After**: Provides context following the error - -**Example:** -```go -sql := `SELECT id, name, email -FROM users -WHERE -ORDER BY id` - -err := errors.NewError( - errors.ErrCodeInvalidSyntax, - "WHERE clause requires a condition", - models.Location{Line: 3, Column: 1}, -).WithContext(sql, 5) -``` - -**Output:** -``` -Error E2004 at line 3, column 1: WHERE clause requires a condition - - 2 | FROM users - 3 | WHERE - ^^^^^ - 4 | ORDER BY id - -Help: https://docs.gosqlx.dev/errors/E2004 -``` - -### Best Practices - -#### 1. Always Add Context for User Errors - -```go -// GOOD: Rich error with context -err := errors.NewError( - errors.ErrCodeMissingClause, - "missing FROM clause", - models.Location{Line: 1, Column: 10}, -).WithContext(sql, 1).WithHint("Add 'FROM table_name' after SELECT columns") - -// LESS HELPFUL: Plain error without context -err := errors.NewError( - errors.ErrCodeMissingClause, - "missing FROM clause", - models.Location{Line: 1, Column: 10}, -) -``` - -#### 2. Use Error Codes for Programmatic Handling - -```go -// GOOD: Check error code for specific handling -if errors.IsCode(err, errors.ErrCodeTokenLimitReached) { - return errors.New("Query too complex - please simplify") -} - -// BAD: String matching (fragile) -if strings.Contains(err.Error(), "token limit") { - // Fragile - message might change -} -``` - -#### 3. Provide Actionable Hints - -```go -// GOOD: Specific, actionable hint -.WithHint("Add 'FROM table_name' after SELECT columns") - -// LESS HELPFUL: Vague hint -.WithHint("Fix the syntax error") -``` - -#### 4. Chain Error Context in Libraries - -```go -// GOOD: Preserve and enhance errors from lower layers -func ParseSQL(sql string) error { - ast, err := parser.Parse(tokens) - if err != nil { - if sqlErr, ok := err.(*errors.Error); ok { - return sqlErr.WithContext(sql, 1).WithHint("Check SQL syntax") - } - return err - } - return nil -} -``` - -### Error Categories by Code Prefix - -**Quick Reference:** - -| Prefix | Category | Examples | -|--------|----------|----------| -| E1xxx | Tokenizer Errors | E1002 (unterminated string), E1006 (input too large) | -| E2xxx | Parser Syntax Errors | E2001 (unexpected token), E2003 (missing clause) | -| E3xxx | Semantic Errors | E3001 (undefined table), E3004 (ambiguous column) | -| E4xxx | Unsupported Features | E4001 (unsupported feature), E4002 (unsupported dialect) | - -### Common Error Patterns - -#### Pattern 1: Tokenizer Error with Recovery - -```go -tokens, err := tkz.Tokenize([]byte(sql)) -if err != nil { - if errors.IsCode(err, errors.ErrCodeUnterminatedString) { - // Attempt recovery by adding closing quote - sql = sql + "'" - tokens, err = tkz.Tokenize([]byte(sql)) - } -} -``` - -#### Pattern 2: Parser Error with User-Friendly Message - -```go -_, err := parser.Parse(tokens) -if err != nil { - code := errors.GetCode(err) - - userMsg := map[errors.ErrorCode]string{ - errors.ErrCodeMissingClause: "Your SQL is missing a required clause", - errors.ErrCodeUnexpectedToken: "Unexpected word in your SQL query", - errors.ErrCodeInvalidSyntax: "SQL syntax is incorrect", - } - - if msg, ok := userMsg[code]; ok { - return fmt.Errorf("%s: %v", msg, err) - } - - return err -} -``` - -#### Pattern 3: Error Logging with Structured Fields - -```go -_, err := parser.Parse(tokens) -if err != nil { - if sqlErr, ok := err.(*errors.Error); ok { - log.WithFields(log.Fields{ - "error_code": sqlErr.Code, - "line": sqlErr.Location.Line, - "column": sqlErr.Location.Column, - "hint": sqlErr.Hint, - }).Error(sqlErr.Message) - } -} -``` - -## Metrics Package - -### Package: `github.com/ajitpratap0/GoSQLX/pkg/metrics` - -The Metrics package provides production performance monitoring and observability for GoSQLX operations with thread-safe atomic operations. - -### Overview - -**Key Features:** -- **Performance Monitoring**: Track tokenization operations, durations, and throughput -- **Memory Tracking**: Monitor object pool efficiency and hit rates -- **Error Analytics**: Categorize and count errors by type -- **Query Size Metrics**: Min, max, and average query sizes processed -- **Thread-Safe**: Lock-free atomic operations for counters -- **Zero Overhead When Disabled**: No performance impact when metrics collection is off -- **Production Ready**: Designed for high-throughput production environments - -### Core Types - -#### Type: `Metrics` - -Internal metrics collector (not exported). - -```go -type Metrics struct { - // Tokenization metrics - tokenizeOperations int64 // Total tokenization operations - tokenizeErrors int64 // Total tokenization errors - tokenizeDuration int64 // Total tokenization time (nanoseconds) - lastTokenizeTime int64 // Last tokenization timestamp - - // Memory metrics - poolGets int64 // Total pool retrievals - poolPuts int64 // Total pool returns - poolMisses int64 // Pool misses (had to create new) - - // Query size metrics - minQuerySize int64 // Minimum query size processed - maxQuerySize int64 // Maximum query size processed - totalQueryBytes int64 // Total bytes of SQL processed - - // Error tracking - errorsByType map[string]int64 - errorsMutex sync.RWMutex - - // Configuration - enabled bool - startTime time.Time -} -``` - -#### Type: `Stats` - -Performance statistics snapshot. - -```go -type Stats struct { - // Basic counts - TokenizeOperations int64 `json:"tokenize_operations"` - TokenizeErrors int64 `json:"tokenize_errors"` - ErrorRate float64 `json:"error_rate"` - - // Performance metrics - AverageDuration time.Duration `json:"average_duration"` - OperationsPerSecond float64 `json:"operations_per_second"` - - // Memory/Pool metrics - PoolGets int64 `json:"pool_gets"` - PoolPuts int64 `json:"pool_puts"` - PoolBalance int64 `json:"pool_balance"` - PoolMissRate float64 `json:"pool_miss_rate"` - - // Query size metrics - MinQuerySize int64 `json:"min_query_size"` - MaxQuerySize int64 `json:"max_query_size"` - AverageQuerySize float64 `json:"average_query_size"` - TotalBytesProcessed int64 `json:"total_bytes_processed"` - - // Timing - Uptime time.Duration `json:"uptime"` - LastOperationTime time.Time `json:"last_operation_time"` - - // Error breakdown - ErrorsByType map[string]int64 `json:"errors_by_type"` -} -``` - -**Stats Fields:** - -| Field | Type | Description | -|-------|------|-------------| -| `TokenizeOperations` | int64 | Total tokenization operations performed | -| `TokenizeErrors` | int64 | Total tokenization errors encountered | -| `ErrorRate` | float64 | Error rate (errors / operations) | -| `AverageDuration` | time.Duration | Average tokenization duration | -| `OperationsPerSecond` | float64 | Throughput (ops/sec) | -| `PoolGets` | int64 | Total pool retrievals | -| `PoolPuts` | int64 | Total pool returns | -| `PoolBalance` | int64 | Pool balance (gets - puts) | -| `PoolMissRate` | float64 | Pool miss rate (misses / gets) | -| `MinQuerySize` | int64 | Minimum query size (bytes) | -| `MaxQuerySize` | int64 | Maximum query size (bytes) | -| `AverageQuerySize` | float64 | Average query size (bytes) | -| `TotalBytesProcessed` | int64 | Total SQL bytes processed | -| `Uptime` | time.Duration | Time since metrics enabled | -| `LastOperationTime` | time.Time | Timestamp of last operation | -| `ErrorsByType` | map[string]int64 | Error counts by error message | - -### Configuration Functions - -#### Function: `Enable` - -Activates metrics collection. - -```go -func Enable() -``` - -**Example:** -```go -import "github.com/ajitpratap0/GoSQLX/pkg/metrics" - -func main() { - // Enable metrics at application startup - metrics.Enable() - defer metrics.Disable() - - // Metrics will now be collected - // ... -} -``` - -#### Function: `Disable` - -Deactivates metrics collection. - -```go -func Disable() -``` - -**Example:** -```go -// Disable metrics (stops collection) -metrics.Disable() -``` - -#### Function: `IsEnabled` - -Checks if metrics collection is active. - -```go -func IsEnabled() bool -``` - -**Returns:** -- `bool`: true if metrics collection is enabled - -**Example:** -```go -if metrics.IsEnabled() { - fmt.Println("Metrics collection is active") -} -``` - -### Recording Functions - -#### Function: `RecordTokenization` - -Records a tokenization operation (automatically called by tokenizer). - -```go -func RecordTokenization(duration time.Duration, querySize int, err error) -``` - -**Parameters:** -- `duration`: Time taken for tokenization -- `querySize`: Size of SQL query in bytes -- `err`: Error if tokenization failed, nil otherwise - -**Example:** -```go -start := time.Now() -tokens, err := tkz.Tokenize([]byte(sql)) -metrics.RecordTokenization(time.Since(start), len(sql), err) -``` - -#### Function: `RecordPoolGet` - -Records a pool retrieval (automatically called by object pools). - -```go -func RecordPoolGet(fromPool bool) -``` - -**Parameters:** -- `fromPool`: true if object came from pool, false if new object created - -**Example:** -```go -// When getting from pool -tkz := tokenizerPool.Get() -metrics.RecordPoolGet(tkz != nil) // true if from pool, false if created new -``` - -#### Function: `RecordPoolPut` - -Records a pool return (automatically called by object pools). - -```go -func RecordPoolPut() -``` - -**Example:** -```go -// When returning to pool -tokenizerPool.Put(tkz) -metrics.RecordPoolPut() -``` - -### Query Functions - -#### Function: `GetStats` - -Returns current performance statistics snapshot. - -```go -func GetStats() Stats -``` - -**Returns:** -- `Stats`: Current performance statistics - -**Example:** -```go -stats := metrics.GetStats() - -fmt.Printf("Operations: %d\n", stats.TokenizeOperations) -fmt.Printf("Errors: %d (%.2f%%)\n", stats.TokenizeErrors, stats.ErrorRate*100) -fmt.Printf("Avg Duration: %v\n", stats.AverageDuration) -fmt.Printf("Throughput: %.2f ops/sec\n", stats.OperationsPerSecond) -fmt.Printf("Pool Hit Rate: %.2f%%\n", (1-stats.PoolMissRate)*100) -``` - -#### Function: `LogStats` - -Returns current statistics (alias for GetStats, useful for logging). - -```go -func LogStats() Stats -``` - -**Returns:** -- `Stats`: Current performance statistics - -**Example:** -```go -stats := metrics.LogStats() -log.Printf("Metrics: %+v", stats) -``` - -#### Function: `Reset` - -Clears all metrics (useful for testing). - -```go -func Reset() -``` - -**Example:** -```go -// Reset metrics to zero -metrics.Reset() -``` - -### Usage Examples - -#### Basic Metrics Collection - -```go -package main - -import ( - "fmt" - "time" - - "github.com/ajitpratap0/GoSQLX/pkg/metrics" - "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" -) - -func main() { - // Enable metrics collection - metrics.Enable() - defer metrics.Disable() - - // Process SQL queries - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - sql := "SELECT * FROM users WHERE active = true" - tokens, err := tkz.Tokenize([]byte(sql)) - - // Metrics are automatically recorded by tokenizer - // Get current statistics - stats := metrics.GetStats() - fmt.Printf("Processed %d operations\n", stats.TokenizeOperations) - fmt.Printf("Average duration: %v\n", stats.AverageDuration) - fmt.Printf("Throughput: %.2f ops/sec\n", stats.OperationsPerSecond) -} -``` - -#### Production Monitoring - -```go -func MonitorPerformance() { - metrics.Enable() - - // Start metrics reporter - ticker := time.NewTicker(1 * time.Minute) - defer ticker.Stop() - - go func() { - for range ticker.C { - stats := metrics.GetStats() - - log.WithFields(log.Fields{ - "operations": stats.TokenizeOperations, - "errors": stats.TokenizeErrors, - "error_rate": stats.ErrorRate, - "avg_duration_us": stats.AverageDuration.Microseconds(), - "ops_per_sec": stats.OperationsPerSecond, - "pool_hit_rate": 1 - stats.PoolMissRate, - "avg_query_size": stats.AverageQuerySize, - "uptime": stats.Uptime, - }).Info("GoSQLX metrics") - } - }() -} -``` - -#### Error Tracking - -```go -func AnalyzeErrors() { - stats := metrics.GetStats() - - fmt.Printf("Total Errors: %d (%.2f%%)\n", - stats.TokenizeErrors, stats.ErrorRate*100) - - fmt.Println("\nError Breakdown:") - for errorType, count := range stats.ErrorsByType { - percentage := float64(count) / float64(stats.TokenizeOperations) * 100 - fmt.Printf(" %s: %d (%.2f%%)\n", errorType, count, percentage) - } -} -``` - -#### Pool Efficiency Monitoring - -```go -func MonitorPoolEfficiency() { - stats := metrics.GetStats() - - poolHitRate := (1 - stats.PoolMissRate) * 100 - fmt.Printf("Pool Statistics:\n") - fmt.Printf(" Gets: %d\n", stats.PoolGets) - fmt.Printf(" Puts: %d\n", stats.PoolPuts) - fmt.Printf(" Balance: %d\n", stats.PoolBalance) - fmt.Printf(" Hit Rate: %.2f%%\n", poolHitRate) - fmt.Printf(" Miss Rate: %.2f%%\n", stats.PoolMissRate*100) - - if poolHitRate < 90 { - log.Warn("Pool hit rate is below 90% - consider tuning pool size") - } -} -``` - -#### Query Size Analysis - -```go -func AnalyzeQuerySizes() { - stats := metrics.GetStats() - - fmt.Printf("Query Size Statistics:\n") - fmt.Printf(" Min: %d bytes\n", stats.MinQuerySize) - fmt.Printf(" Max: %d bytes\n", stats.MaxQuerySize) - fmt.Printf(" Average: %.2f bytes\n", stats.AverageQuerySize) - fmt.Printf(" Total Processed: %d bytes (%.2f MB)\n", - stats.TotalBytesProcessed, - float64(stats.TotalBytesProcessed)/(1024*1024)) - - // Detect potential issues - if stats.MaxQuerySize > 1024*1024 { // > 1MB - log.Warn("Large query detected - consider query optimization") - } -} -``` - -#### JSON Export +### Core Types +#### `Category` (Type) ```go -func ExportMetricsJSON() ([]byte, error) { - stats := metrics.GetStats() - return json.MarshalIndent(stats, "", " ") -} +const ( + CategoryReserved Category = iota + CategoryDML + CategoryDDL + CategoryDataType + CategoryFunction + CategoryOperator + CategoryJoin + CategoryWindow + CategoryAggregate +) +``` -func main() { - metrics.Enable() - // ... process queries +### Functions - // Export metrics as JSON - jsonData, err := ExportMetricsJSON() - if err != nil { - log.Fatal(err) - } +#### `IsKeyword(word string) bool` +Check if string is a SQL keyword. - fmt.Println(string(jsonData)) - // Output: - // { - // "tokenize_operations": 1000, - // "tokenize_errors": 5, - // "error_rate": 0.005, - // "average_duration": "150µs", - // "operations_per_second": 6666.67, - // ... - // } -} +```go +keywords.IsKeyword("SELECT") // true ``` -#### HTTP Metrics Endpoint +#### `GetCategory(word string) (Category, bool)` +Get keyword category. ```go -func SetupMetricsEndpoint() { - http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) { - stats := metrics.GetStats() - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(stats) - }) - - http.ListenAndServe(":8080", nil) -} +cat, ok := keywords.GetCategory("SELECT") ``` -#### Prometheus Integration - -```go -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) +#### `IsDMLKeyword(word string) bool` +Check if DML keyword. -var ( - opsProcessed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "gosqlx_tokenize_operations_total", - Help: "Total number of tokenization operations", - }) - - opsErrors = promauto.NewCounter(prometheus.CounterOpts{ - Name: "gosqlx_tokenize_errors_total", - Help: "Total number of tokenization errors", - }) - - avgDuration = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "gosqlx_tokenize_duration_microseconds", - Help: "Average tokenization duration in microseconds", - }) -) +#### `IsDDLKeyword(word string) bool` +Check if DDL keyword. -func UpdatePrometheusMetrics() { - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() +#### `IsReserved(word string) bool` +Check if reserved keyword. - for range ticker.C { - stats := metrics.GetStats() +#### `IsFunction(word string) bool` +Check if function name. - opsProcessed.Add(float64(stats.TokenizeOperations)) - opsErrors.Add(float64(stats.TokenizeErrors)) - avgDuration.Set(float64(stats.AverageDuration.Microseconds())) - } -} -``` +#### `IsAggregate(word string) bool` +Check if aggregate function. -#### Performance Alerting +#### `IsWindowFunction(word string) bool` +Check if window function. -```go -func MonitorWithAlerting() { - ticker := time.NewTicker(1 * time.Minute) - defer ticker.Stop() +#### `IsDataType(word string) bool` +Check if data type. - for range ticker.C { - stats := metrics.GetStats() +#### `GetAllKeywords() []string` +Get all keywords. - // Alert on high error rate - if stats.ErrorRate > 0.01 { // > 1% - alert("High error rate: %.2f%%", stats.ErrorRate*100) - } +#### `GetKeywordsByCategory(cat Category) []string` +Get keywords by category. - // Alert on slow performance - if stats.AverageDuration > 1*time.Millisecond { - alert("Slow tokenization: %v", stats.AverageDuration) - } +### Dialect-Specific Functions - // Alert on low pool efficiency - if stats.PoolMissRate > 0.1 { // > 10% - alert("Low pool hit rate: %.2f%%", (1-stats.PoolMissRate)*100) - } +#### `IsPostgreSQLKeyword(word string) bool` +#### `IsMySQLKeyword(word string) bool` +#### `IsSQLServerKeyword(word string) bool` +#### `IsOracleKeyword(word string) bool` +#### `IsSQLiteKeyword(word string) bool` - // Alert on low throughput - if stats.OperationsPerSecond < 1000 { - alert("Low throughput: %.2f ops/sec", stats.OperationsPerSecond) - } - } -} +#### `GetSuggestions(prefix string, maxResults int) []string` +Get keyword suggestions for autocomplete. -func alert(format string, args ...interface{}) { - msg := fmt.Sprintf(format, args...) - log.Warn(msg) - // Send to alerting system (PagerDuty, Slack, etc.) -} +```go +suggestions := keywords.GetSuggestions("SEL", 5) +// Returns: ["SELECT"] ``` -### Integration Patterns +--- -#### Pattern 1: Application Startup +## Models -```go -func main() { - // Enable metrics at startup - metrics.Enable() - defer func() { - // Log final stats before shutdown - stats := metrics.GetStats() - log.Printf("Final metrics: %+v", stats) - metrics.Disable() - }() +### Package: `github.com/ajitpratap0/GoSQLX/pkg/models` - // Run application - // ... -} +### Core Types + +#### `TokenType` (Type) +```go +type TokenType int ``` -#### Pattern 2: Periodic Reporting +Constants: `TokenTypeSelect`, `TokenTypeFrom`, `TokenTypeWhere`, `TokenTypeIdentifier`, etc. +#### `TokenWithSpan` ```go -func StartMetricsReporter(interval time.Duration) { - ticker := time.NewTicker(interval) - defer ticker.Stop() - - for range ticker.C { - stats := metrics.GetStats() - reportMetrics(stats) - } +type TokenWithSpan struct { + Type TokenType + Value string + Span Span } +``` -func reportMetrics(stats metrics.Stats) { - log.Printf("Operations: %d, Errors: %d (%.2f%%), Throughput: %.2f ops/sec", - stats.TokenizeOperations, - stats.TokenizeErrors, - stats.ErrorRate*100, - stats.OperationsPerSecond) +#### `Span` +```go +type Span struct { + Start Location + End Location } ``` -#### Pattern 3: Testing with Metrics - +#### `Location` ```go -func TestTokenizerPerformance(t *testing.T) { - // Reset metrics before test - metrics.Reset() - metrics.Enable() - defer metrics.Disable() - - // Run test operations - for i := 0; i < 1000; i++ { - tkz := tokenizer.GetTokenizer() - tkz.Tokenize([]byte("SELECT * FROM users")) - tokenizer.PutTokenizer(tkz) - } - - // Verify metrics - stats := metrics.GetStats() - assert.Equal(t, int64(1000), stats.TokenizeOperations) - assert.Equal(t, int64(0), stats.TokenizeErrors) - assert.Less(t, stats.AverageDuration, 100*time.Microsecond) - assert.Greater(t, stats.PoolMissRate, 0.0) +type Location struct { + Line int + Column int + Offset int } ``` -### Performance Characteristics +### Functions -**Thread Safety:** -- All counter operations use atomic operations (lock-free) -- Error type tracking uses RWMutex for infrequent writes -- Safe for concurrent access from multiple goroutines +#### `NewTokenWithSpan(tokenType TokenType, value string, span Span) TokenWithSpan` +Create token with span. -**Memory Overhead:** -- Fixed memory footprint (~200 bytes + error map) -- No allocations during metric recording -- Error map grows with unique error types (bounded by error variety) +#### `(t TokenType) String() string` +Get string representation. -**Performance Impact:** -- **Enabled**: ~50ns per RecordTokenization call (negligible) -- **Disabled**: ~1ns per call (just enabled check) -- **GetStats**: O(n) where n = number of unique error types (typically < 10) +--- -### Best Practices +## Error Handling -#### 1. Enable Early, Disable Late +### Package: `github.com/ajitpratap0/GoSQLX/pkg/errors` -```go -// GOOD: Enable at application startup -func main() { - metrics.Enable() - defer metrics.Disable() - // ... application logic -} +### Types -// BAD: Enabling/disabling frequently -func processQuery(sql string) { - metrics.Enable() // Don't do this repeatedly - // ... - metrics.Disable() +#### `Error` +```go +type Error struct { + Code ErrorCode + Message string + Location *models.Location + Context string } ``` -#### 2. Use Periodic Reporting - +#### `ErrorCode` (Type) ```go -// GOOD: Periodic reporting (low overhead) -func StartReporting() { - ticker := time.NewTicker(1 * time.Minute) - go func() { - for range ticker.C { - stats := metrics.GetStats() - reportToMonitoring(stats) - } - }() -} - -// BAD: Report after every operation (high overhead) -func processQuery(sql string) { - // ... process - stats := metrics.GetStats() // Don't do this after every query - reportToMonitoring(stats) -} +const ( + ErrCodeSyntax ErrorCode = iota + ErrCodeUnexpectedToken + ErrCodeUnexpectedEOF + ErrCodeInvalidIdentifier + ErrCodeUnsupportedFeature + ErrCodeInvalidExpression +) ``` -#### 3. Monitor Pool Efficiency +### Functions + +#### `NewSyntaxError(msg string, loc *models.Location) *Error` +Create syntax error. ```go -// Pool hit rate should be > 95% in production -stats := metrics.GetStats() -if stats.PoolMissRate > 0.05 { // > 5% miss rate - log.Warn("Pool efficiency is low - consider increasing pool size") -} +err := errors.NewSyntaxError("Expected FROM", &location) ``` -#### 4. Set Performance SLOs +#### `NewUnexpectedTokenError(expected, got string, loc *models.Location) *Error` +Create unexpected token error. -```go -// Define Service Level Objectives -const ( - MaxErrorRate = 0.01 // 1% - MinOpsPerSecond = 1000.0 // 1k ops/sec - MaxAvgDuration = 1 * time.Millisecond - MinPoolHitRate = 0.95 // 95% -) +#### `NewUnexpectedEOFError(loc *models.Location) *Error` +Create unexpected EOF error. -func CheckSLOs() bool { - stats := metrics.GetStats() +#### `(e *Error) Error() string` +Get error message. - if stats.ErrorRate > MaxErrorRate { - return false - } - if stats.OperationsPerSecond < MinOpsPerSecond { - return false - } - if stats.AverageDuration > MaxAvgDuration { - return false - } - if (1 - stats.PoolMissRate) < MinPoolHitRate { - return false - } +#### `(e *Error) WithContext(ctx string) *Error` +Add context to error. - return true -} -``` +--- + +## Metrics Package + +### Package: `github.com/ajitpratap0/GoSQLX/pkg/metrics` + +### Configuration -### Metrics Dashboard Example +#### `Enable()` +Enable metrics collection. ```go -func PrintMetricsDashboard() { - stats := metrics.GetStats() - - fmt.Println("╔════════════════════════════════════════════════════════╗") - fmt.Println("║ GoSQLX Performance Metrics ║") - fmt.Println("╠════════════════════════════════════════════════════════╣") - fmt.Printf("║ Operations: %10d ║\n", stats.TokenizeOperations) - fmt.Printf("║ Errors: %10d (%.2f%%) ║\n", - stats.TokenizeErrors, stats.ErrorRate*100) - fmt.Printf("║ Avg Duration: %10v ║\n", stats.AverageDuration) - fmt.Printf("║ Throughput: %10.2f ops/sec ║\n", stats.OperationsPerSecond) - fmt.Println("╠════════════════════════════════════════════════════════╣") - fmt.Printf("║ Pool Gets: %10d ║\n", stats.PoolGets) - fmt.Printf("║ Pool Puts: %10d ║\n", stats.PoolPuts) - fmt.Printf("║ Pool Hit Rate: %10.2f%% ║\n", (1-stats.PoolMissRate)*100) - fmt.Println("╠════════════════════════════════════════════════════════╣") - fmt.Printf("║ Avg Query Size: %10.2f bytes ║\n", stats.AverageQuerySize) - fmt.Printf("║ Min Query Size: %10d bytes ║\n", stats.MinQuerySize) - fmt.Printf("║ Max Query Size: %10d bytes ║\n", stats.MaxQuerySize) - fmt.Printf("║ Total Processed: %10.2f MB ║\n", - float64(stats.TotalBytesProcessed)/(1024*1024)) - fmt.Println("╠════════════════════════════════════════════════════════╣") - fmt.Printf("║ Uptime: %10v ║\n", stats.Uptime) - fmt.Println("╚════════════════════════════════════════════════════════╝") -} +metrics.Enable() ``` ---- +#### `Disable()` +Disable metrics collection. -## Security Package +#### `IsEnabled() bool` +Check if enabled. -### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/security` +#### `Reset()` +Reset all metrics. -The Security package provides SQL injection pattern detection and security scanning capabilities. It analyzes parsed SQL AST to identify common injection patterns and vulnerabilities. +### Recording Functions -### Overview +#### `RecordTokenization(querySize int, tokenCount int)` +Record tokenization metrics. -The scanner detects the following SQL injection patterns: +#### `RecordParsing(success bool, duration time.Duration)` +Record parsing metrics. -| Pattern Type | Description | Severity | -|-------------|-------------|----------| -| **Tautology** | Always-true conditions (1=1, 'a'='a') | CRITICAL | -| **Comment Bypass** | SQL comments used to bypass filters (--, /**/) | HIGH/MEDIUM | -| **Stacked Query** | Multiple statements with dangerous operations | HIGH | -| **UNION-Based** | Suspicious UNION SELECT patterns | HIGH | -| **Time-Based Blind** | SLEEP(), WAITFOR DELAY, pg_sleep() | HIGH | -| **Boolean-Based Blind** | Suspicious boolean logic patterns | MEDIUM | -| **Out-of-Band** | xp_cmdshell, LOAD_FILE(), etc. | CRITICAL | -| **Dangerous Functions** | Dynamic SQL execution functions | MEDIUM | +#### `RecordPoolGet(poolName string)` +Record pool get operation. ---- +#### `RecordPoolPut(poolName string)` +Record pool put operation. -### Scanner Types +#### `RecordPoolHit(poolName string)` +Record pool cache hit. -#### `type Scanner struct` +#### `RecordPoolMiss(poolName string)` +Record pool cache miss. -Scanner performs security analysis on SQL AST. +#### `RecordError(errType string)` +Record error occurrence. -```go -type Scanner struct { - // MinSeverity filters findings below this severity level - MinSeverity Severity -} -``` +### Query Functions -#### `type ScanResult struct` +#### `GetSnapshot() MetricsSnapshot` +Get current metrics snapshot. -Contains all findings from a security scan. +```go +snapshot := metrics.GetSnapshot() +fmt.Printf("Total queries: %d\n", snapshot.TotalQueries) +fmt.Printf("Success rate: %.2f%%\n", snapshot.SuccessRate) +``` +#### `MetricsSnapshot` ```go -type ScanResult struct { - Findings []Finding `json:"findings"` - TotalCount int `json:"total_count"` - CriticalCount int `json:"critical_count"` - HighCount int `json:"high_count"` - MediumCount int `json:"medium_count"` - LowCount int `json:"low_count"` +type MetricsSnapshot struct { + TotalQueries int64 + SuccessfulQueries int64 + FailedQueries int64 + SuccessRate float64 + TotalTokens int64 + AvgTokensPerQuery float64 + PoolStats map[string]PoolStats + ErrorCounts map[string]int64 } ``` -**Methods:** -- `HasCritical() bool` - Returns true if any critical findings exist -- `HasHighOrAbove() bool` - Returns true if any high or critical findings exist -- `IsClean() bool` - Returns true if no findings exist - -#### `type Finding struct` - -Represents a single security finding. - +#### `PoolStats` ```go -type Finding struct { - Severity Severity `json:"severity"` - Pattern PatternType `json:"pattern"` - Description string `json:"description"` - Risk string `json:"risk"` - Line int `json:"line,omitempty"` - Column int `json:"column,omitempty"` - SQL string `json:"sql,omitempty"` - Suggestion string `json:"suggestion,omitempty"` +type PoolStats struct { + Gets int64 + Puts int64 + Hits int64 + Misses int64 + HitRate float64 } ``` --- -### Severity Levels +## Security Package + +### Package: `github.com/ajitpratap0/GoSQLX/pkg/sql/security` + +### Types +#### `Scanner` ```go -const ( - SeverityCritical Severity = "CRITICAL" // Definite injection (e.g., OR 1=1 --) - SeverityHigh Severity = "HIGH" // Likely injection (suspicious patterns) - SeverityMedium Severity = "MEDIUM" // Potentially unsafe (needs review) - SeverityLow Severity = "LOW" // Informational findings -) +type Scanner struct { + Patterns []Pattern +} ``` ---- - -### Pattern Detection - -#### Pattern Types +#### `Pattern` +```go +type Pattern struct { + Name string + Pattern *regexp.Regexp + Severity Severity + Description string +} +``` +#### `Severity` ```go +type Severity int + const ( - PatternTautology PatternType = "TAUTOLOGY" - PatternComment PatternType = "COMMENT_BYPASS" - PatternStackedQuery PatternType = "STACKED_QUERY" - PatternUnionBased PatternType = "UNION_BASED" - PatternTimeBased PatternType = "TIME_BASED" - PatternBooleanBased PatternType = "BOOLEAN_BASED" - PatternOutOfBand PatternType = "OUT_OF_BAND" - PatternDangerousFunc PatternType = "DANGEROUS_FUNCTION" + SeverityLow Severity = iota + SeverityMedium + SeverityHigh + SeverityCritical ) ``` ---- +#### `Finding` +```go +type Finding struct { + Pattern string + Severity Severity + Location string + Description string + Match string +} +``` ### Functions #### `NewScanner() *Scanner` - -Creates a new security scanner with default settings. +Create security scanner. ```go scanner := security.NewScanner() ``` ---- - -#### `NewScannerWithSeverity(minSeverity Severity) (*Scanner, error)` - -Creates a scanner filtering by minimum severity. +#### `(s *Scanner) Scan(sql string) []Finding` +Scan SQL for security issues. ```go -scanner, err := security.NewScannerWithSeverity(security.SeverityHigh) -if err != nil { - log.Fatal(err) +findings := scanner.Scan("SELECT * FROM users WHERE id = '" + userInput + "'") +for _, f := range findings { + fmt.Printf("Security issue: %s (Severity: %v)\n", f.Description, f.Severity) } -// Only reports HIGH and CRITICAL findings ``` ---- - -#### `(*Scanner) Scan(tree *ast.AST) *ScanResult` +#### `(s *Scanner) AddPattern(pattern Pattern)` +Add custom detection pattern. -Analyzes an AST for SQL injection patterns. +#### `DefaultPatterns() []Pattern` +Get default security patterns. -```go -scanner := security.NewScanner() -result := scanner.Scan(ast) - -for _, finding := range result.Findings { - fmt.Printf("%s: %s - %s\n", - finding.Severity, - finding.Pattern, - finding.Description) -} -``` +**Detected Patterns:** +- SQL injection attempts (UNION-based, comment-based) +- Dangerous functions (xp_cmdshell, LOAD_FILE) +- Tautologies (1=1, OR 1=1) +- Stacked queries (;DROP, ;DELETE) --- -#### `(*Scanner) ScanSQL(sql string) *ScanResult` +## Linter Package -Analyzes raw SQL string for injection patterns. Useful for detecting patterns that might not be in the AST. +### Package: `github.com/ajitpratap0/GoSQLX/pkg/linter` -```go -scanner := security.NewScanner() -result := scanner.ScanSQL("SELECT * FROM users WHERE id = 1 OR 1=1 --") +### Types -if result.HasCritical() { - fmt.Println("CRITICAL: SQL injection detected!") +#### `Linter` +```go +type Linter struct { + Rules []Rule } ``` ---- - -### Usage Examples - -#### Example 1: Basic Security Scan - +#### `Rule` ```go -package main - -import ( - "fmt" - "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" - "github.com/ajitpratap0/GoSQLX/pkg/sql/security" -) - -func main() { - sql := "SELECT * FROM users WHERE username = 'admin' OR 1=1" - - // Parse SQL - ast, err := parser.Parse([]byte(sql)) - if err != nil { - log.Fatal(err) - } - - // Scan for injection patterns - scanner := security.NewScanner() - result := scanner.Scan(ast) - - // Check results - if result.HasCritical() { - fmt.Printf("Found %d critical issues!\n", result.CriticalCount) - for _, finding := range result.Findings { - fmt.Printf(" - %s: %s\n", finding.Pattern, finding.Description) - fmt.Printf(" Risk: %s\n", finding.Risk) - fmt.Printf(" Suggestion: %s\n", finding.Suggestion) - } - } +type Rule interface { + Name() string + Check(node ast.Node) []Violation } ``` ---- - -#### Example 2: Integration with Query Validation - +#### `Violation` ```go -func ValidateUserQuery(sql string) error { - // Parse SQL - ast, err := parser.Parse([]byte(sql)) - if err != nil { - return fmt.Errorf("invalid SQL: %w", err) - } +type Violation struct { + Rule string + Severity Severity + Message string + Location *models.Location + Suggestion string +} +``` - // Security scan - scanner := security.NewScanner() - result := scanner.Scan(ast) +#### `Severity` +```go +const ( + SeverityInfo Severity = iota + SeverityWarning + SeverityError +) +``` - // Block queries with critical findings - if result.HasCritical() { - return fmt.Errorf("security violation: %d critical issues detected", - result.CriticalCount) - } +### Functions - // Warn on high severity findings - if result.HighCount > 0 { - log.Printf("Warning: %d high-severity patterns detected", result.HighCount) - } +#### `NewLinter() *Linter` +Create linter. - return nil -} +```go +linter := linter.NewLinter() ``` ---- +#### `(l *Linter) AddRule(rule Rule)` +Add linting rule. -#### Example 3: Custom Severity Filtering +#### `(l *Linter) Lint(astNode *ast.AST) []Violation` +Lint SQL AST. ```go -// Only scan for HIGH and CRITICAL issues (skip MEDIUM and LOW) -scanner, err := security.NewScannerWithSeverity(security.SeverityHigh) -if err != nil { - log.Fatal(err) +violations := linter.Lint(astNode) +for _, v := range violations { + fmt.Printf("%s: %s\n", v.Rule, v.Message) } - -result := scanner.Scan(ast) -// result.Findings only contains HIGH and CRITICAL severity items ``` ---- +#### `DefaultRules() []Rule` +Get default rules. -#### Example 4: Raw SQL Pattern Detection +**Default Rules:** +- SELECT * usage detection +- Missing WHERE in UPDATE/DELETE +- Inconsistent naming conventions +- Inefficient query patterns -```go -// Detect patterns in raw SQL (without full parsing) -scanner := security.NewScanner() +--- -// Check for time-based injection patterns -result := scanner.ScanSQL("SELECT * FROM users; WAITFOR DELAY '0:0:5'") +## Performance Considerations -for _, finding := range result.Findings { - if finding.Pattern == security.PatternTimeBased { - fmt.Println("Time-based blind injection attempt detected!") - } -} -``` +### Object Pooling ---- +**Always use defer with pool returns:** -### Detected Patterns Detail +```go +// Tokenizer +tkz := tokenizer.GetTokenizer() +defer tokenizer.PutTokenizer(tkz) -#### Tautology Detection +// Parser +p := parser.NewParser() +defer p.Release() -Detects always-true conditions commonly used for authentication bypass: +// AST +astObj := ast.NewAST() +defer ast.ReleaseAST(astObj) -```sql --- Detected as CRITICAL -SELECT * FROM users WHERE id = 1 OR 1=1 -SELECT * FROM users WHERE name = 'x' OR 'a'='a' -SELECT * FROM users WHERE col = col +// Statements +stmt := ast.NewSelectStatement() +defer ast.ReleaseSelectStatement(stmt) ``` -#### UNION-Based Injection +### Batch Processing -Detects suspicious UNION SELECT patterns for data extraction: +Use batch functions for multiple queries: -```sql --- Detected as HIGH (multiple NULLs indicate column enumeration) -SELECT id FROM users UNION SELECT NULL, NULL, NULL +```go +// 40-60% faster than individual calls +asts, err := gosqlx.ParseMultiple(queries) --- Detected as CRITICAL (system table access) -SELECT id FROM users UNION SELECT table_name FROM information_schema.tables +// More efficient validation +err := gosqlx.ValidateMultiple(queries) ``` -#### Time-Based Functions +### Context and Timeouts -Detects time-delay functions used in blind injection: +Use context for long-running operations: -```sql --- Detected as HIGH -SELECT * FROM users WHERE SLEEP(5) -SELECT * FROM users; WAITFOR DELAY '0:0:5' -SELECT * FROM users WHERE pg_sleep(5) -``` +```go +ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) +defer cancel() -#### Dangerous Functions +astNode, err := gosqlx.ParseWithContext(ctx, complexSQL) +``` -Detects functions that can lead to system compromise: +### Performance Metrics -```sql --- Detected as CRITICAL (command execution) -EXEC xp_cmdshell 'dir' -SELECT LOAD_FILE('/etc/passwd') -SELECT * INTO OUTFILE '/tmp/data.txt' FROM users -``` +- **Tokenization**: 8M+ tokens/second +- **Parsing**: 1.38M+ operations/second sustained, 1.5M peak +- **Memory**: 60-80% reduction with object pooling +- **Pool Hit Rate**: 95%+ in production workloads +- **Latency**: <1μs for complex queries --- -### System Table Detection +## Complete Example -The scanner precisely identifies access to system tables across multiple databases: +```go +package main -| Database | System Tables Detected | -|----------|----------------------| -| **PostgreSQL** | `pg_catalog.*`, `pg_*` | -| **MySQL** | `mysql.*`, `information_schema.*` | -| **SQL Server** | `sys.*`, `master.dbo.*`, `msdb.*`, `tempdb.*` | -| **SQLite** | `sqlite_*` | -| **Generic** | `information_schema.*` | +import ( + "context" + "fmt" + "log" + "time" ---- + "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" + "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/security" + "github.com/ajitpratap0/GoSQLX/pkg/linter" + "github.com/ajitpratap0/GoSQLX/pkg/metrics" +) -### Best Practices +func main() { + // Enable metrics + metrics.Enable() + defer func() { + snapshot := metrics.GetSnapshot() + fmt.Printf("Processed %d queries with %.2f%% success rate\n", + snapshot.TotalQueries, snapshot.SuccessRate) + }() -#### 1. Scan All User-Supplied Queries + sql := ` + WITH active_users AS ( + SELECT id, name FROM users WHERE active = true + ) + SELECT u.id, u.name, COUNT(o.id) as order_count, + ROW_NUMBER() OVER (ORDER BY COUNT(o.id) DESC) as rank + FROM active_users u + LEFT JOIN orders o ON u.id = o.user_id + WHERE u.created_at >= '2024-01-01' + GROUP BY u.id, u.name + HAVING COUNT(o.id) > 5 + ORDER BY order_count DESC NULLS LAST + LIMIT 10 + ` -```go -func HandleUserQuery(w http.ResponseWriter, r *http.Request) { - userSQL := r.FormValue("query") + // Parse SQL + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() - // ALWAYS scan user input - ast, err := parser.Parse([]byte(userSQL)) + astNode, err := gosqlx.ParseWithContext(ctx, sql) if err != nil { - http.Error(w, "Invalid SQL", http.StatusBadRequest) - return + log.Fatal("Parse error:", err) } + defer ast.ReleaseAST(astNode) + // Security scan scanner := security.NewScanner() - result := scanner.Scan(ast) - - if result.HasHighOrAbove() { - http.Error(w, "Potentially unsafe query", http.StatusForbidden) - logSecurityEvent(userSQL, result) - return + findings := scanner.Scan(sql) + if len(findings) > 0 { + fmt.Println("Security issues found:") + for _, f := range findings { + fmt.Printf(" - %s (Severity: %v)\n", f.Description, f.Severity) + } } - // Proceed with safe query -} -``` - -#### 2. Log Security Findings + // Lint SQL + linter := linter.NewLinter() + violations := linter.Lint(astNode) + if len(violations) > 0 { + fmt.Println("Linting violations:") + for _, v := range violations { + fmt.Printf(" - %s: %s\n", v.Rule, v.Message) + } + } -```go -func logSecurityEvent(sql string, result *security.ScanResult) { - for _, finding := range result.Findings { - log.Printf("[SECURITY] %s: %s - %s (Risk: %s)", - finding.Severity, - finding.Pattern, - finding.Description, - finding.Risk) + // Analyze AST + if len(astNode.Statements) > 0 { + if stmt, ok := astNode.Statements[0].(*ast.SelectStatement); ok { + fmt.Printf("Query has %d columns\n", len(stmt.Columns)) + if stmt.With != nil { + fmt.Printf("Uses %d CTEs\n", len(stmt.With.CTEs)) + } + if len(stmt.Windows) > 0 { + fmt.Println("Uses window functions") + } + } } + + fmt.Println("SQL parsed, validated, and analyzed successfully!") } ``` -#### 3. Use Appropriate Severity Filters +--- -```go -// For production: Block CRITICAL and HIGH, warn on MEDIUM -scanner, _ := security.NewScannerWithSeverity(security.SeverityMedium) +## Test Coverage Summary -// For strict security: Block all findings -scanner := security.NewScanner() // Includes LOW severity -``` +| Package | Coverage | Status | +|---------|----------|--------| +| models | 100.0% | ⭐⭐⭐⭐⭐ | +| keywords | 100.0% | ⭐⭐⭐⭐⭐ | +| linter/rules/whitespace | 100.0% | ⭐⭐⭐⭐⭐ | +| monitor | 98.6% | ⭐⭐⭐⭐⭐ | +| linter | 96.7% | ⭐⭐⭐⭐⭐ | +| gosqlx/testing | 95.0% | ⭐⭐⭐⭐⭐ | +| errors | 91.9% | ⭐⭐⭐⭐ | +| security | 90.2% | ⭐⭐⭐⭐ | +| config | 81.8% | ⭐⭐⭐⭐ | +| ast | 80.3% | ⭐⭐⭐⭐ | +| parser | 76.1% | ⭐⭐⭐⭐ | +| tokenizer | 75.3% | ⭐⭐⭐⭐ | +| metrics | 73.9% | ⭐⭐⭐ | +| lsp | 70.2% | ⭐⭐⭐ | +| token | 68.8% | ⭐⭐⭐ | +| gosqlx | 65.6% | ⭐⭐⭐ | --- -### Performance Considerations +## Additional Resources -- **Regex Compilation**: All regex patterns are pre-compiled at package initialization (sync.Once) -- **Thread Safety**: Scanner is safe for concurrent use across goroutines -- **Memory Efficiency**: No allocations during scanning beyond the result struct -- **Throughput**: Can scan 100,000+ queries/second on modern hardware \ No newline at end of file +- **GitHub Repository**: https://github.com/ajitpratap0/GoSQLX +- **Documentation**: See `/docs` directory +- **Examples**: See `/examples` directory +- **Issue Tracker**: GitHub Issues +- **License**: MIT diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 88ef1f7..709d98b 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -2,17 +2,17 @@ ## Table of Contents - [System Overview](#system-overview) +- [Package Structure](#package-structure) - [Component Architecture](#component-architecture) - [Data Flow](#data-flow) - [Memory Management](#memory-management) - [Concurrency Model](#concurrency-model) - [Design Patterns](#design-patterns) - [Performance Architecture](#performance-architecture) -- [Extension Points](#extension-points) ## System Overview -GoSQLX is designed as a high-performance, zero-copy SQL parsing library with a focus on memory efficiency and concurrent safety. +GoSQLX is a production-ready, high-performance SQL parsing library with comprehensive dialect support, security scanning, and LSP integration. ### Core Design Principles @@ -21,24 +21,59 @@ GoSQLX is designed as a high-performance, zero-copy SQL parsing library with a f 3. **Immutable Tokens**: Tokens are immutable once created 4. **Stateless Parsing**: Parser maintains no global state 5. **Unicode-First**: Full UTF-8 support throughout -6. **Dialect Agnostic**: Core engine supports multiple SQL dialects +6. **Multi-Dialect**: Support for PostgreSQL, MySQL, SQL Server, Oracle, SQLite ### High-Level Architecture ``` ┌─────────────────────────────────────────────────────────┐ -│ Application Layer │ +│ Application Layer & Tools │ +│ (CLI, LSP Server, Linter, Security) │ ├─────────────────────────────────────────────────────────┤ -│ GoSQLX API │ +│ GoSQLX API (pkg/gosqlx) │ ├──────────────┬────────────────┬────────────────────────┤ │ Tokenizer │ Parser │ AST │ ├──────────────┼────────────────┼────────────────────────┤ -│ Object Pool │ Token Stream │ Node Factory │ +│ Object Pool │ Token Stream │ Node Factory │ ├──────────────┴────────────────┴────────────────────────┤ -│ Core Models │ +│ Core Models & Error Handling & Metrics │ └─────────────────────────────────────────────────────────┘ ``` +## Package Structure + +The codebase is organized into focused packages with clear responsibilities: + +### Core Packages + +- **pkg/models** (100% coverage): Core data structures (tokens, spans, locations) +- **pkg/errors** (91.9% coverage): Structured error handling with position tracking +- **pkg/metrics** (73.9% coverage): Performance monitoring and observability +- **pkg/config** (81.8% coverage): Configuration management + +### SQL Processing + +- **pkg/sql/tokenizer** (75.3% coverage): Zero-copy SQL lexer +- **pkg/sql/parser** (76.1% coverage): Recursive descent parser +- **pkg/sql/ast** (80.3% coverage): Abstract Syntax Tree nodes +- **pkg/sql/token** (68.8% coverage): Token type definitions +- **pkg/sql/keywords** (100% coverage): SQL keyword categorization +- **pkg/sql/security** (90.2% coverage): SQL injection detection +- **pkg/sql/monitor** (98.6% coverage): Query monitoring + +### Tools & Integration + +- **pkg/gosqlx** (65.6% coverage): Main API surface +- **pkg/lsp** (70.2% coverage): Language Server Protocol implementation +- **pkg/linter** (96.7% coverage): SQL linting and style checking +- **pkg/compatibility**: Compatibility layer + +### Supported SQL Statements (14 types) + +ALTER, ALTER TABLE, CREATE INDEX, CREATE MATERIALIZED VIEW, CREATE TABLE, +CREATE VIEW, DELETE, DROP, INSERT, MERGE, REFRESH MATERIALIZED VIEW, +SELECT, TRUNCATE, UPDATE + ## Component Architecture ### Tokenizer Component @@ -125,120 +160,100 @@ The parser builds Abstract Syntax Trees from token streams. └──────────────────────────────────────┘ ``` -**Parser Methods Hierarchy (v1.4+ Modular Architecture):** +**Parser Modular Architecture:** -The parser is organized into logical modules for maintainability: +The parser is organized into focused modules for maintainability: ``` pkg/sql/parser/ -├── parser.go # Core parser and entry points -├── select.go # SELECT statement parsing -├── dml.go # INSERT, UPDATE, DELETE parsing -├── cte.go # Common Table Expressions (WITH clause) -├── expressions.go # Expression parsing (BETWEEN, IN, LIKE, etc.) -├── window.go # Window functions (OVER, PARTITION BY) -├── grouping.go # GROUPING SETS, ROLLUP, CUBE -├── alter.go # ALTER TABLE statements -├── create.go # CREATE statements (TABLE, VIEW, INDEX) -├── drop.go # DROP statements -├── merge.go # MERGE statements (SQL:2003) -└── token_converter.go # Token format conversion +├── parser.go # Core parser, entry points, and utilities +├── select.go # SELECT statement parsing +├── dml.go # INSERT, UPDATE, DELETE, MERGE parsing +├── cte.go # Common Table Expressions (WITH clause) +├── expressions.go # Expression parsing (BETWEEN, IN, LIKE, etc.) +├── window.go # Window functions (OVER, PARTITION BY) +├── grouping.go # GROUPING SETS, ROLLUP, CUBE +├── alter.go # ALTER TABLE statements +├── ddl.go # DDL statements (CREATE, DROP, REFRESH, TRUNCATE) +└── token_converter.go # Token format conversion with position tracking ``` -**Method Hierarchy:** +**Statement Parsing Methods:** + +The parser supports 14 SQL statement types via these entry points: ``` -Parse() -├── parseStatement() -│ ├── parseWithStatement() # CTEs (cte.go) -│ ├── parseSelectWithSetOperations() # SELECT + UNION/EXCEPT/INTERSECT (select.go) -│ │ ├── parseSelectClause() -│ │ ├── parseFromClause() -│ │ ├── parseJoinClause() -│ │ ├── parseWhereClause() -│ │ ├── parseGroupByClause() # Includes GROUPING SETS (grouping.go) -│ │ ├── parseHavingClause() -│ │ └── parseOrderByClause() # Includes NULLS FIRST/LAST -│ ├── parseInsertStatement() # (dml.go) -│ ├── parseUpdateStatement() # (dml.go) -│ ├── parseDeleteStatement() # (dml.go) -│ ├── parseMergeStatement() # (merge.go) -│ ├── parseCreateStatement() # (create.go) - TABLE, VIEW, MATERIALIZED VIEW, INDEX -│ ├── parseAlterStatement() # (alter.go) -│ └── parseDropStatement() # (drop.go) -└── parseExpression() # (expressions.go) - ├── parsePrimaryExpression() - ├── parseBinaryExpression() - ├── parseBetweenExpression() - ├── parseInExpression() - ├── parseLikeExpression() - ├── parseIsNullExpression() - ├── parseFunctionCall() - │ └── parseWindowSpec() # (window.go) - └── parseSubquery() +parseStatement() +├── parseWithStatement() # WITH (CTEs) +├── parseSelectWithSetOperations() # SELECT + UNION/EXCEPT/INTERSECT +├── parseInsertStatement() # INSERT +├── parseUpdateStatement() # UPDATE +├── parseDeleteStatement() # DELETE +├── parseMergeStatement() # MERGE +├── parseCreateStatement() # CREATE (TABLE, VIEW, MATERIALIZED VIEW, INDEX) +├── parseAlterTableStmt() # ALTER TABLE +├── parseDropStatement() # DROP +├── parseRefreshStatement() # REFRESH MATERIALIZED VIEW +└── parseTruncateStatement() # TRUNCATE ``` ### AST Component -The Abstract Syntax Tree represents the structure of SQL statements. +The Abstract Syntax Tree provides structured representation of SQL statements. -``` -┌──────────────────────────────────────┐ -│ AST Node Hierarchy │ -├──────────────────────────────────────┤ -│ Node (interface) │ -│ ├── Statement │ -│ └── Expression │ -├──────────────────────────────────────┤ -│ Statement Types │ -│ ┌────────────────────────────┐ │ -│ │ SelectStatement │ │ -│ │ ├── Columns: []Expression │ │ -│ │ ├── From: []Table │ │ -│ │ ├── Where: Expression │ │ -│ │ ├── GroupBy: []GroupingElement │ │ -│ │ └── ... │ │ -│ └────────────────────────────┘ │ -│ ┌────────────────────────────┐ │ -│ │ DML Statements │ │ -│ │ ├── InsertStatement │ │ -│ │ ├── UpdateStatement │ │ -│ │ ├── DeleteStatement │ │ -│ │ └── MergeStatement (v1.4+) │ │ -│ └────────────────────────────┘ │ -│ ┌────────────────────────────┐ │ -│ │ DDL Statements │ │ -│ │ ├── CreateTableStatement │ │ -│ │ ├── CreateViewStatement │ │ -│ │ ├── CreateMaterializedView │ │ -│ │ ├── CreateIndexStatement │ │ -│ │ ├── AlterTableStatement │ │ -│ │ ├── DropTableStatement │ │ -│ │ └── RefreshMaterializedView│ │ -│ └────────────────────────────┘ │ -├──────────────────────────────────────┤ -│ Expression Types │ -│ ┌────────────────────────────┐ │ -│ │ BinaryExpression │ │ -│ │ UnaryExpression │ │ -│ │ FunctionCall │ │ -│ │ WindowFunction (v1.3+) │ │ -│ │ Identifier │ │ -│ │ Literal │ │ -│ │ BetweenExpression (v1.4+) │ │ -│ │ InExpression (v1.4+) │ │ -│ │ LikeExpression (v1.4+) │ │ -│ │ IsNullExpression (v1.4+) │ │ -│ │ Subquery │ │ -│ │ CaseExpression │ │ -│ └────────────────────────────┘ │ -│ ┌────────────────────────────┐ │ -│ │ Grouping Types (v1.4+) │ │ -│ │ ├── GroupingSet │ │ -│ │ ├── RollupGrouping │ │ -│ │ └── CubeGrouping │ │ -│ └────────────────────────────┘ │ -└──────────────────────────────────────┘ +**Core Interfaces:** +- `Node`: Base interface for all AST nodes (TokenLiteral, Children methods) +- `Statement`: Extends Node for SQL statements +- `Expression`: Extends Node for SQL expressions + +**Statement Types (14 total):** + +```go +// DML Statements +SelectStatement // SELECT with JOINs, window functions, CTEs +InsertStatement // INSERT with multiple value sets +UpdateStatement // UPDATE with WHERE clause +DeleteStatement // DELETE with WHERE clause +MergeStatement // MERGE with MATCHED/NOT MATCHED clauses +TruncateStatement // TRUNCATE TABLE + +// DDL Statements +CreateTableStatement // CREATE TABLE with columns, constraints +CreateViewStatement // CREATE VIEW +CreateMaterializedViewStatement // CREATE MATERIALIZED VIEW +CreateIndexStatement // CREATE INDEX +AlterTableStatement // ALTER TABLE (add/drop columns, constraints) +AlterStatement // Generic ALTER (roles, policies, etc.) +DropStatement // DROP (tables, views, indexes) +RefreshMaterializedViewStatement // REFRESH MATERIALIZED VIEW + +// Query Composition +WithClause // WITH (CTEs) - can contain any statement +SetOperation // UNION/EXCEPT/INTERSECT +``` + +**Expression Types:** + +```go +// Basic Expressions +Identifier // Table/column names +Literal // String, number, boolean literals +BinaryExpression // a + b, a = b, etc. +UnaryExpression // NOT, -x, etc. + +// Complex Expressions +FunctionCall // func(args) with optional OVER clause +WindowSpec // Window function specification +BetweenExpression // x BETWEEN a AND b +InExpression // x IN (values) or x IN (subquery) +LikeExpression // x LIKE pattern +IsNullExpression // x IS NULL / IS NOT NULL +CaseExpression // CASE WHEN ... THEN ... END + +// Grouping +GroupingSet // Individual grouping set +RollupGrouping // ROLLUP(columns) +CubeGrouping // CUBE(columns) ``` ## Data Flow @@ -246,109 +261,119 @@ The Abstract Syntax Tree represents the structure of SQL statements. ### End-to-End Processing Pipeline ``` -SQL Text (string/[]byte) - │ - ▼ -┌─────────────────┐ -│ Tokenizer │ ← Get from Pool -├─────────────────┤ -│ Lexical Analysis│ -└────────┬────────┘ - │ []TokenWithSpan - ▼ -┌─────────────────┐ -│ Token Converter │ -├─────────────────┤ -│ Format Transform│ -└────────┬────────┘ - │ []Token - ▼ -┌─────────────────┐ -│ Parser │ ← Get from Pool -├─────────────────┤ -│ Syntax Analysis │ -└────────┬────────┘ - │ AST Node - ▼ -┌─────────────────┐ -│ Application │ -├─────────────────┤ -│ Processing │ -└─────────────────┘ +SQL Text ([]byte) │ ▼ +┌─────────────────────┐ +│ Tokenizer │ ← tokenizer.GetTokenizer() +├─────────────────────┤ +│ Lexical Analysis │ +│ - Zero-copy ops │ +│ - Position tracking │ +└──────────┬──────────┘ + │ []models.TokenWithSpan (with Location) + ▼ +┌─────────────────────┐ +│ Token Converter │ +├─────────────────────┤ +│ Models → AST tokens │ +│ Position mapping │ +└──────────┬──────────┘ + │ []token.Token + positions + ▼ +┌─────────────────────┐ +│ Parser │ ← parser.GetParser() +├─────────────────────┤ +│ Recursive Descent │ +│ AST Construction │ +└──────────┬──────────┘ + │ *ast.AST + ▼ +┌─────────────────────┐ +│ Application Use │ +│ - Validation │ +│ - Transformation │ +│ - Code Generation │ +└──────────┬──────────┘ + │ + ▼ Return to Pools + (PutTokenizer, PutParser, ReleaseAST) ``` -### Token Flow Detail +### Token Flow Example ```go // 1. Input SQL sql := "SELECT * FROM users" -// 2. Byte conversion -bytes := []byte(sql) +// 2. Tokenization (pkg/sql/tokenizer) +tkz := tokenizer.GetTokenizer() +defer tokenizer.PutTokenizer(tkz) -// 3. Tokenization -tokens := []TokenWithSpan{ - {Token{Type: SELECT, Value: "SELECT"}, Location{1,1,0}, Location{1,6,6}}, - {Token{Type: STAR, Value: "*"}, Location{1,8,7}, Location{1,8,8}}, - {Token{Type: FROM, Value: "FROM"}, Location{1,10,9}, Location{1,13,13}}, - {Token{Type: IDENT, Value: "users"}, Location{1,15,14}, Location{1,19,19}}, - {Token{Type: EOF, Value: ""}, Location{1,20,19}, Location{1,20,19}}, -} +tokens, err := tkz.Tokenize([]byte(sql)) +// Returns: []models.TokenWithSpan with position info +// [{Token: SELECT, Start: {Line:1, Col:1}, End: {Line:1, Col:6}}, ...] -// 4. Parser tokens -parserTokens := []Token{ - {Type: "201", Literal: "SELECT"}, - {Type: "62", Literal: "*"}, - {Type: "202", Literal: "FROM"}, - {Type: "14", Literal: "users"}, -} +// 3. Token conversion (pkg/sql/parser/token_converter.go) +converted, err := parser.ConvertTokensForParser(tokens) +// Returns: []token.Token for parser consumption -// 5. AST -ast := &SelectStatement{ - Columns: []Expression{&Star{}}, - From: []Table{&Identifier{Name: "users"}}, -} +// 4. Parsing (pkg/sql/parser) +p := parser.GetParser() +defer parser.PutParser(p) + +ast, err := p.Parse(converted) +// Returns: *ast.AST containing statements + +// 5. Access parsed structure +selectStmt := ast.Statements[0].(*ast.SelectStatement) +// selectStmt.Columns contains parsed column expressions +// selectStmt.From contains table references ``` ## Memory Management ### Object Pool Architecture -GoSQLX uses sync.Pool for efficient memory management: +GoSQLX uses sync.Pool extensively for performance: + +**Pooled Components:** + +1. **Tokenizer Pool** (pkg/sql/tokenizer) + - Pre-allocated token buffers + - Reusable scanners + - Access: `tokenizer.GetTokenizer()` / `tokenizer.PutTokenizer()` + +2. **Parser Pool** (pkg/sql/parser) + - Parser instances with state + - Access: `parser.GetParser()` / `parser.PutParser()` + +3. **AST Pool** (pkg/sql/ast) + - AST container objects + - Access: `ast.NewAST()` / `ast.ReleaseAST()` + +**Critical Usage Pattern:** ```go -// Tokenizer Pool -var tokenizerPool = sync.Pool{ - New: func() interface{} { - return &Tokenizer{ - // Pre-allocated buffers - buffer: make([]byte, 0, 1024), - tokens: make([]TokenWithSpan, 0, 100), - } - }, -} +// CORRECT - Always use defer for cleanup +tkz := tokenizer.GetTokenizer() +defer tokenizer.PutTokenizer(tkz) // MANDATORY -// Parser Pool -var parserPool = sync.Pool{ - New: func() interface{} { - return &Parser{ - // Pre-allocated structures - stack: make([]Node, 0, 50), - } - }, -} +p := parser.GetParser() +defer parser.PutParser(p) // MANDATORY -// AST Node Pools -var nodePool = sync.Pool{ - New: func() interface{} { - return &SelectStatement{} - }, -} +astObj := ast.NewAST() +defer ast.ReleaseAST(astObj) // MANDATORY + +// Use the objects... ``` +**Performance Benefits:** +- 60-80% memory reduction vs non-pooled +- 95%+ pool hit rate in production +- Zero race conditions (validated via race detector) + ### Memory Optimization Strategies 1. **Zero-Copy Tokenization** @@ -402,407 +427,174 @@ func ProcessLargeQuery(sql string) { ## Concurrency Model -### Thread Safety Guarantees +### Thread Safety 1. **Pool Operations**: Thread-safe via sync.Pool -2. **Tokenizer Instances**: Not thread-safe (use one per goroutine) -3. **Parser Instances**: Not thread-safe (use one per goroutine) -4. **Tokens**: Immutable and thread-safe -5. **AST Nodes**: Immutable after creation +2. **Tokenizer/Parser Instances**: Not thread-safe - use one per goroutine +3. **Tokens & AST**: Immutable after creation - safe for concurrent reads +4. **Metrics**: Atomic operations - safe for concurrent updates -### Concurrent Processing Pattern +### Concurrent Usage Pattern ```go -func ConcurrentPipeline(queries []string) []Result { - // Stage 1: Tokenization - tokenChan := make(chan []TokenWithSpan, len(queries)) - +// Process multiple queries concurrently +func ProcessConcurrent(queries []string) []Result { + results := make([]Result, len(queries)) var wg sync.WaitGroup - for _, sql := range queries { + + for i, sql := range queries { wg.Add(1) - go func(q string) { + go func(idx int, query string) { defer wg.Done() - + + // Each goroutine gets its own instances from pool tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - - tokens, _ := tkz.Tokenize([]byte(q)) - tokenChan <- tokens - }(sql) - } - - go func() { - wg.Wait() - close(tokenChan) - }() - - // Stage 2: Parsing - results := make([]Result, 0, len(queries)) - for tokens := range tokenChan { - p := parser.NewParser() - ast, _ := p.Parse(convertTokens(tokens)) - p.Release() - - results = append(results, Result{AST: ast}) - } - - return results -} -``` - -### Lock-Free Design -The tokenizer and parser are designed to be lock-free: + p := parser.GetParser() + defer parser.PutParser(p) -```go -// No locks needed - each goroutine has its own instance -func ProcessParallel(queries []string) { - parallel.ForEach(queries, func(sql string) { - tkz := tokenizer.GetTokenizer() // No lock - defer tokenizer.PutTokenizer(tkz) - - tokens, _ := tkz.Tokenize([]byte(sql)) - // Process... - }) -} -``` - -## Design Patterns - -### 1. Object Pool Pattern - -**Purpose**: Reduce allocation overhead - -```go -type Pool struct { - pool sync.Pool -} + tokens, _ := tkz.Tokenize([]byte(query)) + converted, _ := parser.ConvertTokensForParser(tokens) + ast, _ := p.Parse(converted) -func (p *Pool) Get() *Object { - obj := p.pool.Get() - if obj == nil { - return &Object{} + results[idx] = Result{AST: ast} + }(i, sql) } - return obj.(*Object) -} - -func (p *Pool) Put(obj *Object) { - obj.Reset() - p.pool.Put(obj) -} -``` - -### 2. Builder Pattern - -**Purpose**: Construct complex AST nodes -```go -type SelectBuilder struct { - stmt *SelectStatement -} - -func NewSelectBuilder() *SelectBuilder { - return &SelectBuilder{ - stmt: &SelectStatement{}, - } -} - -func (b *SelectBuilder) Columns(cols ...Expression) *SelectBuilder { - b.stmt.Columns = cols - return b -} - -func (b *SelectBuilder) From(tables ...Table) *SelectBuilder { - b.stmt.From = tables - return b -} - -func (b *SelectBuilder) Build() *SelectStatement { - return b.stmt + wg.Wait() + return results } ``` -### 3. Visitor Pattern - -**Purpose**: Traverse and transform AST - -```go -type Visitor interface { - VisitSelectStatement(*SelectStatement) interface{} - VisitIdentifier(*Identifier) interface{} - // ... -} - -type Node interface { - Accept(Visitor) interface{} -} +**Key Points:** +- Lock-free design: Each goroutine uses its own pooled instances +- Zero race conditions: Validated with 20,000+ concurrent operations +- High throughput: 1.38M+ ops/sec sustained -func (s *SelectStatement) Accept(v Visitor) interface{} { - return v.VisitSelectStatement(s) -} -``` +## Design Patterns -### 4. Strategy Pattern +The codebase employs several design patterns for maintainability: -**Purpose**: Support multiple SQL dialects +### 1. Object Pool Pattern +- **Purpose**: Reduce allocation overhead +- **Implementation**: sync.Pool for Tokenizer, Parser, AST +- **Benefit**: 60-80% memory reduction -```go -type Dialect interface { - IsKeyword(string) bool - IsOperator(string) bool - QuoteIdentifier(string) string -} +### 2. Visitor Pattern +- **Purpose**: AST traversal and transformation +- **Location**: `pkg/sql/ast/visitor.go` +- **Use Cases**: Query analysis, optimization, code generation -type PostgreSQLDialect struct{} -type MySQLDialect struct{} +### 3. Recursive Descent +- **Purpose**: Predictive parsing with lookahead +- **Implementation**: Parser methods call each other recursively +- **Safety**: Max recursion depth limit (100) to prevent stack overflow -func (t *Tokenizer) SetDialect(d Dialect) { - t.dialect = d -} -``` +### 4. Multi-Dialect Strategy +- **Purpose**: Support multiple SQL dialects +- **Location**: `pkg/sql/keywords/` +- **Dialects**: PostgreSQL, MySQL, SQL Server, Oracle, SQLite ## Performance Architecture -### Fast Path Optimizations +### Optimization Techniques -1. **Common Token Fast Path** -```go -func (t *Tokenizer) nextToken() Token { - ch := t.peek() - - // Fast path for common single-character tokens - switch ch { - case ',': return t.consumeToken(TokenTypeComma) - case ';': return t.consumeToken(TokenTypeSemicolon) - case '(': return t.consumeToken(TokenTypeLParen) - case ')': return t.consumeToken(TokenTypeRParen) - } - - // Slower path for complex tokens - return t.scanComplexToken() -} -``` +1. **Fast Path Token Recognition** + - Single-character tokens: O(1) switch-case + - Keywords: O(1) map lookup + - Common patterns optimized first -2. **Keyword Recognition** -```go -// O(1) map lookup instead of O(n) string comparison -var keywords = map[string]TokenType{ - "SELECT": TokenTypeSelect, - "FROM": TokenTypeFrom, - // ... -} +2. **Zero-Copy Operations** + - Tokenizer works on byte slices directly + - Token values reference original input + - No intermediate string allocations -func isKeyword(s string) (TokenType, bool) { - typ, ok := keywords[strings.ToUpper(s)] - return typ, ok -} -``` +3. **ModelType Fast Dispatch** + - Parser uses integer token types for O(1) switching + - Jump table compilation for statement routing + - Avoids string comparisons in hot path -3. **Memory Layout Optimization** -```go -// Optimize struct field order for cache locality -type Token struct { - Type TokenType // 4 bytes - _ [4]byte // padding for alignment - Value string // 16 bytes (string header) -} +4. **Pre-allocation Strategies** + - Estimate buffer sizes based on input length + - Reuse slices with `slice[:0]` pattern + - Pool warming for common object sizes -// Group frequently accessed fields -type Tokenizer struct { - // Hot path fields - input []byte - pos int - - // Cold path fields - options Options - metrics Metrics -} -``` +### Benchmarking -### Benchmarking Architecture +GoSQLX includes comprehensive benchmarks (6 benchmark files): +- Component-level: Tokenizer, Parser, AST operations +- Integration: Full pipeline end-to-end +- Memory profiling: Allocation tracking with `-benchmem` +- Concurrency: Race detection with `-race` flag -```go -// Micro-benchmarks for components -func BenchmarkTokenizer(b *testing.B) { - sql := []byte("SELECT * FROM users") - b.ResetTimer() - - for i := 0; i < b.N; i++ { - tkz := tokenizer.GetTokenizer() - tokens, _ := tkz.Tokenize(sql) - tokenizer.PutTokenizer(tkz) - _ = tokens - } -} +Run with: `go test -bench=. -benchmem ./pkg/...` -// End-to-end benchmarks -func BenchmarkFullPipeline(b *testing.B) { - sql := "SELECT u.id FROM users u WHERE u.active = true" - b.ResetTimer() - - for i := 0; i < b.N; i++ { - result := ProcessSQL(sql) - _ = result - } -} -``` - -## Extension Points - -### Adding New SQL Dialects +## Performance Monitoring -1. **Define Dialect Interface** -```go -type MyDialect struct{} +GoSQLX includes built-in metrics collection for production observability. -func (d *MyDialect) Keywords() map[string]TokenType { - return map[string]TokenType{ - "MYSPECIAL": TokenTypeMySpecial, - } -} +### Metrics Package (pkg/metrics) -func (d *MyDialect) Operators() []string { - return []string{":::", "<->"} -} -``` +The metrics package provides atomic counters for lock-free performance tracking: -2. **Register Dialect** ```go -func init() { - RegisterDialect("mydialect", &MyDialect{}) +// Available metrics +type Metrics struct { + QueriesParsed int64 // Total queries processed + TokensGenerated int64 // Total tokens created + BytesProcessed int64 // Total SQL bytes scanned + Errors int64 // Parsing errors encountered + PoolHits int64 // Pool reuse count + PoolMisses int64 // Pool allocations } -``` - -### Adding Custom Token Types -1. **Extend TokenType enum** -```go -const ( - // Existing types... - - // Custom types (use high numbers to avoid conflicts) - TokenTypeCustomStart TokenType = 1000 - TokenTypeMyCustom TokenType = 1001 -) +// Usage +snapshot := metrics.GetSnapshot() +fmt.Printf("Queries: %d, Pool hit rate: %.2f%%\n", + snapshot.QueriesParsed, + float64(snapshot.PoolHits) / float64(snapshot.PoolHits + snapshot.PoolMisses) * 100) ``` -2. **Update Tokenizer** -```go -func (t *Tokenizer) scanCustomToken() Token { - if t.matchSequence("%%%") { - return Token{ - Type: TokenTypeMyCustom, - Value: "%%%", - } - } - // ... -} -``` +### Security Scanning (pkg/sql/security) -### Adding AST Transformations +Built-in SQL injection detection with severity classification: ```go -type Transformer interface { - Transform(Node) Node -} - -type Optimizer struct{} - -func (o *Optimizer) Transform(n Node) Node { - switch node := n.(type) { - case *SelectStatement: - return o.optimizeSelect(node) - default: - return n - } -} - -func (o *Optimizer) optimizeSelect(s *SelectStatement) Node { - // Optimization logic - return s +// Scan for security issues +issues := security.ScanQuery(sqlBytes) +for _, issue := range issues { + fmt.Printf("Security: %s - %s (severity: %s)\n", + issue.Type, issue.Description, issue.Severity) } ``` -### Custom Error Handlers +### Query Monitoring (pkg/sql/monitor) -```go -type ErrorHandler interface { - HandleTokenizerError(TokenizerError) - HandleParserError(ParserError) -} - -type LoggingErrorHandler struct { - logger *log.Logger -} - -func (h *LoggingErrorHandler) HandleTokenizerError(err TokenizerError) { - h.logger.Printf("Tokenizer error at %d:%d: %s", - err.Location.Line, - err.Location.Column, - err.Message) -} -``` - -## Performance Monitoring - -### Metrics Collection +Track query patterns and performance characteristics: ```go -type Metrics struct { - TokensProcessed int64 - BytesProcessed int64 - ParseTime time.Duration - PoolHits int64 - PoolMisses int64 -} +// Monitor query execution +monitor := monitor.New() +monitor.RecordQuery(sql, duration, err) -func (t *Tokenizer) collectMetrics() { - atomic.AddInt64(&globalMetrics.TokensProcessed, int64(len(t.tokens))) - atomic.AddInt64(&globalMetrics.BytesProcessed, int64(len(t.input))) -} +stats := monitor.GetStats() +fmt.Printf("Avg duration: %v, Error rate: %.2f%%\n", + stats.AvgDuration, stats.ErrorRate*100) ``` -### Profiling Hooks - -```go -type Profiler interface { - StartOperation(name string) func() -} - -type DefaultProfiler struct{} - -func (p *DefaultProfiler) StartOperation(name string) func() { - start := time.Now() - return func() { - duration := time.Since(start) - recordMetric(name, duration) - } -} - -// Usage -func (t *Tokenizer) Tokenize(input []byte) ([]Token, error) { - defer profiler.StartOperation("tokenize")() - // ... tokenization logic -} -``` - -## Future Architecture Considerations - -### Planned Enhancements - -1. **Streaming Parser**: Handle very large SQL files -2. **Incremental Parsing**: Re-parse only changed portions -3. **Parallel Tokenization**: Split large queries for parallel processing -4. **Plugin System**: Dynamic loading of dialect support -5. **Query Plan Generation**: Convert AST to execution plans -6. **Schema Validation**: Validate against database schema +## Scalability Characteristics -### Scalability Considerations +The architecture supports high-throughput production workloads: -1. **Horizontal Scaling**: Stateless design enables easy scaling -2. **Caching Layer**: Cache tokenization/parsing results -3. **Distributed Processing**: Process queries across multiple nodes -4. **Memory Mapping**: Use mmap for very large files -5. **SIMD Optimizations**: Vectorized string operations +1. **Stateless Design**: Enables horizontal scaling across multiple instances +2. **Lock-Free Operations**: Each goroutine uses its own pooled instances +3. **Concurrent Safety**: Zero race conditions (validated with race detector) +4. **Memory Efficiency**: Object pooling reduces GC pressure +5. **Performance**: 1.38M+ operations/sec sustained, 1.5M peak throughput -This architecture provides a solid foundation for a high-performance SQL parsing library with room for future enhancements and optimizations. \ No newline at end of file +This architecture has been validated for production use with comprehensive testing: +- 20,000+ concurrent operations (race detection) +- 115+ real-world SQL queries +- 8 international languages (Unicode compliance) +- Extended load testing with stable memory profiles \ No newline at end of file diff --git a/docs/CLEAN_ARCHITECTURE.md b/docs/CLEAN_ARCHITECTURE.md deleted file mode 100644 index 5e395be..0000000 --- a/docs/CLEAN_ARCHITECTURE.md +++ /dev/null @@ -1,251 +0,0 @@ -# GoSQLX Clean Architecture Guide - -**Version**: v1.5.1+ | **Last Updated**: November 2025 - -This document outlines the architectural principles and structure of the GoSQLX codebase after comprehensive cleanup and optimization. - -## 📁 Directory Structure - -``` -GoSQLX/ -├── cmd/gosqlx/ # CLI application entry points -│ ├── main.go # Main application entry -│ └── cmd/ # Cobra command definitions -│ ├── root.go # Root command setup -│ ├── analyze.go # Analysis command interface -│ ├── sql_analyzer.go # Analysis business logic -│ ├── analysis_types.go # Unified analysis types -│ ├── format.go # Formatting command -│ ├── sql_formatter.go # Formatting business logic -│ ├── parse.go # Parsing command -│ ├── validate.go # Validation command -│ └── *_test.go # Command tests -│ -├── pkg/ # Core library packages -│ ├── models/ # Core data structures -│ │ ├── token.go # Token definitions -│ │ ├── token_type.go # Token type constants -│ │ ├── location.go # Source position tracking -│ │ └── *.go # Other core models -│ │ -│ ├── sql/ # SQL processing components -│ │ ├── tokenizer/ # Lexical analysis -│ │ ├── parser/ # Syntax analysis (11 modular files) -│ │ ├── ast/ # Abstract syntax trees -│ │ ├── keywords/ # SQL keyword definitions -│ │ ├── token/ # Token management -│ │ ├── security/ # SQL injection detection (v1.4+) -│ │ └── monitor/ # Performance monitoring -│ │ -│ ├── gosqlx/ # Simple high-level API (v1.4+) -│ │ -│ └── metrics/ # Performance metrics -│ -├── testdata/ # Test data and fixtures -│ └── sql/ # SQL test files -│ ├── basic_queries.sql # Simple test queries -│ ├── performance_tests.sql # Performance test cases -│ ├── demo_queries.sql # Demo and example queries -│ └── security_test.sql # Security test cases -│ -├── examples/ # Example applications -├── docs/ # Documentation -└── archive/ # Historical artifacts -``` - -## 🏗️ Architectural Principles - -### 1. **Separation of Concerns** -- **CLI Layer**: Command definitions and user interface (`cmd/gosqlx/cmd/`) -- **Business Logic**: Core analysis and processing logic (within command files) -- **Core Library**: Reusable components (`pkg/`) -- **Data Models**: Shared data structures (`pkg/models/`) - -### 2. **Dependency Direction** -``` -CLI Commands → Business Logic → Core Library → Models -``` -- Commands depend on business logic -- Business logic depends on core library -- Core library depends on models -- No circular dependencies - -### 3. **Package Organization** -- **Single Responsibility**: Each package has one clear purpose -- **Clear Interfaces**: Well-defined boundaries between packages -- **Minimal Coupling**: Packages interact through defined interfaces -- **High Cohesion**: Related functionality grouped together - -## 🧹 Cleanup Principles Applied - -### 1. **File Consolidation** -- ✅ Removed duplicate `pkg/sql/models` package (unused) -- ✅ Consolidated scattered SQL files into `testdata/sql/` -- ✅ Organized test files by functionality rather than size -- ✅ Removed IDE-specific files (`.idea/`) - -### 2. **Naming Conventions** -- **Go Standard**: Follow Go naming conventions consistently -- **Descriptive Names**: File names clearly indicate purpose -- **Test Files**: Use `*_test.go` and `*_bench_test.go` patterns -- **Package Names**: Short, descriptive, lowercase - -### 3. **Code Organization** -- **Command Pattern**: CLI commands separated from business logic -- **Business Logic**: Analysis logic in dedicated files -- **Type Definitions**: Unified type system with builders -- **Test Organization**: Comprehensive test suites with fixtures - -## 📊 Package Responsibilities - -### `cmd/gosqlx/cmd/` -**Purpose**: CLI command definitions and user interface -- Command setup and argument parsing -- Output formatting and display -- User interaction and validation -- Integration with business logic - -### `pkg/models/` -**Purpose**: Core data structures used throughout the system -- Token definitions and types -- Location and span tracking -- Error types and interfaces -- Shared constants and enums - -### `pkg/sql/tokenizer/` -**Purpose**: Lexical analysis - converting SQL text to tokens -- Token recognition and classification -- Position tracking and error reporting -- Unicode and international support -- Performance optimization with pooling - -### `pkg/sql/parser/` -**Purpose**: Syntax analysis - converting tokens to AST -- Recursive descent parsing -- AST node construction -- Error recovery and reporting -- Token conversion and management - -### `pkg/sql/ast/` -**Purpose**: Abstract Syntax Tree representation and operations -- AST node definitions and interfaces -- Tree traversal and visitor patterns -- Node manipulation and transformation -- Memory management with object pooling - -**Statement Types** (v1.5.1+): -- `SelectStatement`, `InsertStatement`, `UpdateStatement`, `DeleteStatement` -- `CreateStatement`, `AlterStatement`, `DropStatement` -- `MergeStatement` (SQL:2003 F312) -- `MaterializedViewStatement` (CREATE/DROP/REFRESH) -- `WithStatement` (CTEs, recursive CTEs) - -**Expression Types** (v1.5.1+): -- `BetweenExpression`, `InExpression`, `LikeExpression`, `IsNullExpression` -- `SubqueryExpression` (scalar, table, correlated, EXISTS) -- `WindowExpression` (OVER clause with PARTITION BY, ORDER BY, frames) -- `GroupingExpression` (GROUPING SETS, ROLLUP, CUBE) - -### `pkg/sql/keywords/` -**Purpose**: SQL keyword recognition and categorization -- Multi-dialect keyword support -- Keyword classification and context -- Reserved word identification -- Dialect-specific variations - -### `pkg/sql/security/` (v1.4+) -**Purpose**: SQL injection detection and security analysis -- Pattern-based injection detection -- Tautology recognition (`1=1`, `'a'='a'`) -- UNION-based injection detection -- Time-based blind injection detection -- Comment bypass detection -- Severity classification (Critical, High, Medium, Low) - -### `pkg/gosqlx/` (v1.4+) -**Purpose**: Simple high-level API for common use cases -- One-line parsing: `gosqlx.Parse(sql)` -- Validation: `gosqlx.Validate(sql)` -- Batch processing: `gosqlx.ParseMultiple(queries)` -- Timeout support: `gosqlx.ParseWithTimeout(sql, timeout)` - -## 🔄 Development Workflow - -### 1. **Adding New Features** -1. Define data structures in `pkg/models/` if needed -2. Implement core logic in appropriate `pkg/sql/` package -3. Add business logic layer if needed -4. Create CLI command in `cmd/gosqlx/cmd/` -5. Add comprehensive tests with fixtures in `testdata/` - -### 2. **Maintaining Code Quality** -- **Tests**: Every new feature must have tests -- **Documentation**: Update relevant docs with changes -- **Consistency**: Follow established naming and organization patterns -- **Performance**: Consider memory allocation and object pooling - -### 3. **File Organization Rules** -- **No Root Clutter**: Keep root directory clean -- **Test Data**: All SQL files go in `testdata/sql/` -- **Documentation**: All docs in `docs/` directory -- **Examples**: Complete examples in `examples/` with their own README - -## 🚀 Benefits Achieved - -### 1. **Improved Maintainability** -- Clear separation of concerns -- Consistent naming and organization -- Reduced code duplication -- Better testability - -### 2. **Enhanced Performance** -- Eliminated unused packages and files -- Optimized imports and dependencies -- Better memory management patterns -- Comprehensive benchmarking - -### 3. **Better Developer Experience** -- Clear package responsibilities -- Consistent development patterns -- Comprehensive documentation -- Easy-to-understand structure - -### 4. **Production Readiness** -- Robust error handling -- Comprehensive test coverage -- Performance monitoring -- Clean deployment artifacts - -## 📋 Maintenance Guidelines - -### Do's ✅ -- Follow the established directory structure -- Maintain clear separation between CLI and business logic -- Write comprehensive tests for all new features -- Use the unified type system for analysis results -- Document architectural decisions -- Keep the root directory clean - -### Don'ts ❌ -- Don't create new model packages - use `pkg/models/` -- Don't scatter SQL files - use `testdata/sql/` -- Don't mix CLI logic with business logic -- Don't create circular dependencies between packages -- Don't commit IDE-specific files -- Don't duplicate functionality between packages - -## 🎯 Future Considerations - -### Scalability -- Package boundaries are designed for growth -- Clear interfaces allow for easy extension -- Performance monitoring is built-in -- Memory management is optimized - -### Extensibility -- New SQL dialects can be added to `pkg/sql/keywords/` -- New analysis types fit into the unified type system -- Additional CLI commands follow established patterns -- New parsers can integrate with existing AST system - -This architecture provides a solid foundation for continued development while maintaining code quality, performance, and maintainability. \ No newline at end of file diff --git a/docs/CLI_GUIDE.md b/docs/CLI_GUIDE.md index bbb23f1..018eccc 100644 --- a/docs/CLI_GUIDE.md +++ b/docs/CLI_GUIDE.md @@ -8,7 +8,7 @@ The GoSQLX Command Line Interface (CLI) provides high-performance SQL parsing, v ```bash git clone https://github.com/ajitpratap0/GoSQLX.git cd GoSQLX -go build -o gosqlx ./cmd/gosqlx +task build:cli # or: go build -o gosqlx ./cmd/gosqlx ``` ### Install via Go @@ -16,6 +16,11 @@ go build -o gosqlx ./cmd/gosqlx go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx@latest ``` +### Install Globally (from project) +```bash +task install +``` + ## Quick Start ### Basic Usage @@ -46,39 +51,17 @@ CLI flags always override configuration file settings. ### Configuration Commands -#### `gosqlx config init` -Create a new configuration file with default settings: - ```bash -# Create .gosqlx.yml in current directory +# Create a new configuration file gosqlx config init - -# Create config in home directory gosqlx config init --path ~/.gosqlx.yml -# Create config in custom location -gosqlx config init --path /path/to/config.yml -``` - -#### `gosqlx config validate` -Validate configuration file syntax and values: - -```bash -# Validate default config location +# Validate configuration file gosqlx config validate - -# Validate specific config file gosqlx config validate --file /path/to/config.yml -``` - -#### `gosqlx config show` -Display current configuration (merged from all sources): -```bash -# Show current configuration as YAML +# Show current configuration gosqlx config show - -# Show as JSON gosqlx config show --format json ``` @@ -112,9 +95,9 @@ analyze: all: false # Enable all analysis features ``` -### Configuration Examples +### Configuration Example -**Team configuration for PostgreSQL projects** (`.gosqlx.yml`): +**Team configuration** (`.gosqlx.yml`): ```yaml format: indent: 2 @@ -125,59 +108,19 @@ validate: dialect: postgresql strict_mode: true -output: - format: table -``` - -**Personal configuration for MySQL** (`~/.gosqlx.yml`): -```yaml -format: - indent: 4 - uppercase_keywords: false - compact: true - -validate: - dialect: mysql - recursive: true - analyze: - all: true -``` - -**CI/CD configuration** (`.gosqlx.yml`): -```yaml -format: - indent: 2 - uppercase_keywords: true - max_line_length: 80 - -validate: - dialect: postgresql - strict_mode: true - -output: - format: json - verbose: false + security: true + performance: true ``` ### Configuration Precedence -When multiple configuration sources exist, settings are merged with this precedence: - -1. **CLI flags** (highest priority) -2. **Current directory** `.gosqlx.yml` -3. **Home directory** `~/.gosqlx.yml` -4. **System-wide** `/etc/gosqlx.yml` -5. **Built-in defaults** (lowest priority) - -Example: -```bash -# Config file has: indent: 2 -# CLI flag overrides: --indent 4 -# Result: Uses indent: 4 - -gosqlx format --indent 4 query.sql -``` +Settings are merged in this order (highest to lowest priority): +1. CLI flags +2. Current directory `.gosqlx.yml` +3. Home directory `~/.gosqlx.yml` +4. System-wide `/etc/gosqlx.yml` +5. Built-in defaults ## Commands @@ -192,13 +135,46 @@ gosqlx validate "SELECT id, name FROM users" gosqlx validate query.sql # Validate multiple files -gosqlx validate *.sql +gosqlx validate query1.sql query2.sql + +# Validate with glob pattern +gosqlx validate "*.sql" + +# Recursively validate directory +gosqlx validate -r ./queries/ + +# Quiet mode (exit code only) +gosqlx validate --quiet query.sql + +# Show performance statistics +gosqlx validate --stats ./queries/ + +# SARIF output for GitHub Code Scanning +gosqlx validate --output-format sarif --output-file results.sarif queries/ -# Batch validation with verbose output -gosqlx validate -v queries/ +# Validate from stdin +echo "SELECT * FROM users" | gosqlx validate +cat query.sql | gosqlx validate +gosqlx validate - +gosqlx validate < query.sql ``` -**Performance**: 1.38M+ operations/second sustained throughput +**Options:** +- `-r, --recursive`: Recursively process directories +- `-p, --pattern`: File pattern for recursive processing (default: "*.sql") +- `-q, --quiet`: Quiet mode (exit code only) +- `-s, --stats`: Show performance statistics +- `--dialect`: SQL dialect (postgresql, mysql, sqlserver, oracle, sqlite) +- `--strict`: Enable strict validation mode +- `--output-format`: Output format (text, json, sarif) +- `--output-file`: Output file path (default: stdout) + +**Output Formats:** +- `text`: Human-readable output (default) +- `json`: JSON format for programmatic consumption +- `sarif`: SARIF 2.1.0 format for GitHub Code Scanning integration + +**Performance**: <10ms for typical queries, 100+ files/second in batch mode ### `gosqlx format` Format SQL queries with intelligent indentation and style. @@ -213,83 +189,129 @@ gosqlx format -i query.sql # Custom indentation (4 spaces) gosqlx format --indent 4 query.sql +# Keep original keyword case +gosqlx format --no-uppercase query.sql + # Compact format gosqlx format --compact query.sql # Check if formatting is needed (CI mode) -gosqlx format --check *.sql +gosqlx format --check query.sql + +# Format all SQL files with glob +gosqlx format "*.sql" + +# Save to specific file +gosqlx format -o formatted.sql query.sql + +# Format from stdin +echo "SELECT * FROM users" | gosqlx format +cat query.sql | gosqlx format +gosqlx format - +gosqlx format < query.sql +cat query.sql | gosqlx format > formatted.sql ``` **Options:** -- `-i, --in-place`: Edit files in place -- `--indent SIZE`: Indentation size in spaces (default: 2) +- `-i, --in-place`: Edit files in place (not supported with stdin) +- `--indent INT`: Indentation size in spaces (default: 2) - `--uppercase`: Uppercase SQL keywords (default: true) - `--no-uppercase`: Keep original keyword case - `--compact`: Minimal whitespace format -- `--check`: Exit with error if files need formatting +- `--check`: Check if files need formatting (CI mode) +- `--max-line INT`: Maximum line length (default: 80) -**Performance**: 2,600+ files/second throughput +**Performance**: 100x faster than SQLFluff for equivalent operations ### `gosqlx analyze` -Deep analysis of SQL queries with detailed reports. +Analyze SQL queries for security vulnerabilities, performance issues, and complexity metrics. ```bash -# Analyze SQL structure +# Basic analysis +gosqlx analyze query.sql + +# Analyze direct SQL gosqlx analyze "SELECT u.name, COUNT(o.id) FROM users u JOIN orders o ON u.id = o.user_id GROUP BY u.name" +# Security vulnerability scan +gosqlx analyze --security query.sql + +# Performance optimization hints +gosqlx analyze --performance query.sql + +# Complexity scoring +gosqlx analyze --complexity query.sql + +# Comprehensive analysis +gosqlx analyze --all query.sql + # Analyze with JSON output gosqlx analyze -f json query.sql -# Analyze multiple files -gosqlx analyze queries/*.sql - -# Detailed analysis with security checks -gosqlx analyze -v --security query.sql +# Analyze from stdin +echo "SELECT * FROM users" | gosqlx analyze +cat query.sql | gosqlx analyze +gosqlx analyze - +gosqlx analyze < query.sql ``` -**Output formats:** -- `table` (default): Human-readable table format -- `json`: JSON output for programmatic use -- `yaml`: YAML output -- `tree`: AST tree visualization +**Options:** +- `--security`: Focus on security vulnerability analysis +- `--performance`: Focus on performance optimization analysis +- `--complexity`: Focus on complexity metrics +- `--all`: Comprehensive analysis + +**Analysis capabilities:** +- SQL injection pattern detection +- Performance optimization suggestions +- Query complexity scoring +- Best practices validation +- Multi-dialect compatibility checks + +**Note**: This is a basic implementation. Advanced analysis features are in Phase 4 of the roadmap. ### `gosqlx parse` Parse SQL into Abstract Syntax Tree (AST) representation. ```bash # Parse and display AST +gosqlx parse query.sql + +# Parse direct SQL gosqlx parse "SELECT * FROM users WHERE age > 18" -# Parse with tree visualization -gosqlx parse -f tree complex_query.sql +# Show detailed AST structure +gosqlx parse --ast query.sql -# Parse to JSON for integration -gosqlx parse -f json query.sql > ast.json -``` - -### `gosqlx watch` -Monitor SQL files for changes and validate/format in real-time. +# Show tokenization output +gosqlx parse --tokens query.sql -```bash -# Watch current directory for SQL file changes -gosqlx watch +# Show tree visualization +gosqlx parse --tree query.sql -# Watch specific directory with validation -gosqlx watch ./queries --validate +# Parse to JSON for integration +gosqlx parse -f json query.sql > ast.json -# Watch with formatting on save -gosqlx watch ./queries --format +# Parse to YAML +gosqlx parse -f yaml query.sql -# Watch with custom pattern -gosqlx watch ./queries --pattern "*.sql" +# Parse from stdin +echo "SELECT * FROM users" | gosqlx parse +cat query.sql | gosqlx parse +gosqlx parse - +gosqlx parse < query.sql ``` **Options:** -- `--validate`: Run validation on file changes -- `--format`: Auto-format files on save -- `--pattern PATTERN`: File pattern to watch (default: "*.sql") +- `--ast`: Show detailed AST structure +- `--tokens`: Show tokenization output +- `--tree`: Show tree visualization -**Use Case:** Real-time SQL development with automatic validation/formatting +**Output formats:** +- `json`: JSON output +- `yaml`: YAML output +- `table`: Table format +- `tree`: Tree visualization ### `gosqlx lint` Check SQL files for style issues and best practices. @@ -298,29 +320,93 @@ Check SQL files for style issues and best practices. # Lint SQL files gosqlx lint query.sql -# Lint with specific rules -gosqlx lint --rules L001,L002,L005 query.sql +# Lint multiple files +gosqlx lint query1.sql query2.sql + +# Lint with glob pattern +gosqlx lint "*.sql" # Lint directory recursively gosqlx lint -r ./queries + +# Auto-fix violations where possible +gosqlx lint --auto-fix query.sql + +# Set maximum line length +gosqlx lint --max-length 120 query.sql + +# Fail on warnings (useful for CI) +gosqlx lint --fail-on-warn query.sql + +# Lint from stdin +echo "SELECT * FROM users" | gosqlx lint +cat query.sql | gosqlx lint +gosqlx lint - ``` **Available lint rules:** -- L001: Missing semicolon at end of statement -- L002: Inconsistent keyword casing -- L005: Unused table alias +- L001: Trailing whitespace at end of lines +- L002: Mixed tabs and spaces for indentation +- L005: Lines exceeding maximum length **Options:** -- `--rules RULES`: Comma-separated list of rule codes to check - `-r, --recursive`: Recursively process directories +- `-p, --pattern`: File pattern for recursive processing (default: "*.sql") +- `--auto-fix`: Automatically fix violations where possible +- `--max-length`: Maximum line length for L005 rule (default: 100) +- `--fail-on-warn`: Exit with error code on warnings + +**Exit Codes:** +- 0: No violations found +- 1: Errors or warnings found (warnings only if --fail-on-warn is set) + +### `gosqlx lsp` +Start the Language Server Protocol (LSP) server for IDE integration. + +```bash +# Start LSP server on stdio +gosqlx lsp + +# Start with logging enabled +gosqlx lsp --log /tmp/lsp.log +``` + +**Features:** +- Real-time syntax error detection +- SQL formatting +- Keyword documentation on hover +- SQL keyword and function completion + +**IDE Integration:** + +See `gosqlx lsp --help` for VSCode, Neovim, and Emacs integration examples. -**Use Case:** Enforce SQL coding standards and best practices +### `gosqlx completion` +Generate autocompletion script for your shell. + +```bash +# Bash +gosqlx completion bash > /etc/bash_completion.d/gosqlx + +# Zsh +gosqlx completion zsh > "${fpath[1]}/_gosqlx" + +# Fish +gosqlx completion fish > ~/.config/fish/completions/gosqlx.fish + +# PowerShell +gosqlx completion powershell > gosqlx.ps1 +``` ## Global Flags +Available for all commands: + - `-v, --verbose`: Enable verbose output - `-o, --output FILE`: Output to file instead of stdout - `-f, --format FORMAT`: Output format (auto, json, yaml, table, tree) +- `-h, --help`: Help for any command +- `--version`: Show version information ## File Input @@ -401,20 +487,6 @@ For more details, see the [Security Validation Package](../cmd/gosqlx/internal/v ## Advanced Features -### Batch Processing -Process multiple files efficiently: - -```bash -# Process entire directory -gosqlx format -i sql_files/ - -# Process with pattern matching -gosqlx validate "src/**/*.sql" - -# Parallel processing for performance -gosqlx analyze queries/ -v -``` - ### CI/CD Integration Perfect for continuous integration: @@ -423,10 +495,16 @@ Perfect for continuous integration: gosqlx format --check src/ # Validation in CI pipeline -gosqlx validate --strict queries/ +gosqlx validate -r --strict queries/ + +# SARIF output for GitHub Code Scanning +gosqlx validate --output-format sarif --output-file results.sarif queries/ # Generate reports for analysis gosqlx analyze -f json src/ > analysis-report.json + +# Lint with fail on warnings +gosqlx lint --fail-on-warn -r queries/ ``` ### SQL Dialect Support @@ -440,58 +518,27 @@ Supports multiple SQL dialects: ### Advanced SQL Features Supported -**Window Functions (Phase 2.5 - v1.3.0)** -```sql -SELECT - name, - salary, - ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rank, - LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary -FROM employees; -``` - -**Common Table Expressions (CTEs)** -```sql -WITH RECURSIVE employee_hierarchy AS ( - SELECT id, name, manager_id, 1 as level - FROM employees WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, e.manager_id, eh.level + 1 - FROM employees e - JOIN employee_hierarchy eh ON e.manager_id = eh.id -) -SELECT * FROM employee_hierarchy; -``` - -**Set Operations** -```sql -SELECT product FROM inventory -UNION SELECT product FROM orders -EXCEPT SELECT product FROM discontinued -INTERSECT SELECT product FROM active_catalog; -``` - -**Complete JOIN Support** -```sql -SELECT u.name, o.order_date, p.product_name -FROM users u -LEFT JOIN orders o ON u.id = o.user_id -INNER JOIN products p ON o.product_id = p.id -WHERE u.active = true; -``` +- **Window Functions**: ROW_NUMBER, RANK, DENSE_RANK, LAG, LEAD, FIRST_VALUE, LAST_VALUE, etc. +- **CTEs**: WITH clause, recursive CTEs +- **Set Operations**: UNION, EXCEPT, INTERSECT +- **JOINs**: LEFT, RIGHT, INNER, FULL OUTER, CROSS, NATURAL +- **Advanced Expressions**: BETWEEN, IN, LIKE, IS NULL, CASE WHEN +- **Modern SQL**: Materialized views, MERGE statements, GROUPING SETS, ROLLUP, CUBE ## Performance GoSQLX CLI delivers exceptional performance: -| Operation | Throughput | Latency | -|-----------|------------|---------| -| **Validation** | 1.38M+ ops/sec | <1μs | -| **Formatting** | 2,600+ files/sec | <1ms | -| **Analysis** | 1M+ queries/sec | <2μs | -| **Parsing** | 1.5M+ ops/sec | <1μs | +| Operation | Throughput | Performance Target | +|-----------|------------|-------------------| +| **Validation** | 100+ files/sec | <10ms for typical queries | +| **Formatting** | 100x faster than SQLFluff | High-performance processing | +| **Analysis** | 1.38M+ ops/sec | Production-ready | +| **Parsing** | 1.5M+ ops/sec | Direct AST inspection | -**Memory efficiency:** +**Core Library Performance:** +- 1.38M+ operations/second sustained throughput +- 1.5M peak with memory-efficient object pooling - 60-80% memory reduction through object pooling - Zero-copy tokenization - Concurrent processing support @@ -508,54 +555,48 @@ Error at line 1, column 10: expected FROM, got IDENT 'FORM' Hint: Did you mean 'FROM'? ``` -## Examples +## Usage Examples -### 1. Validate and Format SQL Files +### Validate and Format ```bash -# Validate all SQL files in project -gosqlx validate src/**/*.sql +# Validate all SQL files +gosqlx validate "src/**/*.sql" # Format with consistent style -gosqlx format -i --indent 4 --uppercase src/**/*.sql +gosqlx format -i --indent 4 src/**/*.sql -# Check formatting in CI +# CI format check gosqlx format --check src/ || exit 1 ``` -### 2. SQL Analysis Workflow +### Analysis and Linting ```bash # Analyze complex query -gosqlx analyze -v " -WITH sales_summary AS ( - SELECT region, SUM(amount) as total - FROM sales - GROUP BY region - HAVING SUM(amount) > 1000 -) -SELECT * FROM sales_summary -WHERE total > (SELECT AVG(total) FROM sales_summary) -" -``` +gosqlx analyze --all complex_query.sql -### 3. Batch Processing -```bash -# Process multiple files with different operations -find sql/ -name "*.sql" -exec gosqlx validate {} \; -find sql/ -name "*.sql" -exec gosqlx format -i {} \; -find sql/ -name "*.sql" -exec gosqlx analyze -f json {} \; > analysis.json +# Lint with strict rules +gosqlx lint --fail-on-warn -r queries/ ``` ## Integration ### Editor Integration -GoSQLX can be integrated with editors for SQL linting and formatting: +GoSQLX provides LSP server for rich IDE integration: ```bash -# Format selection in editor -gosqlx format --stdin < selection.sql +# Start LSP server (for IDE integration) +gosqlx lsp + +# Or use CLI commands for simple editor integration: + +# Format selection in editor (via stdin) +cat selection.sql | gosqlx format # Validate on save gosqlx validate current_file.sql + +# Lint on save +gosqlx lint current_file.sql ``` ### Build Tools Integration @@ -608,10 +649,10 @@ Error: file too large: 15728640 bytes (max 10485760 bytes) ### Performance Tips -1. **Use batch processing** for multiple files -2. **Enable verbose output** only when needed -3. **Use appropriate output format** (JSON for scripts, table for humans) -4. **Process files concurrently** when possible +1. Use batch processing for multiple files with glob patterns +2. Enable verbose output only when needed +3. Use appropriate output format (JSON for scripts, table for humans) +4. Leverage SARIF format for GitHub Code Scanning integration ## Contributing diff --git a/docs/COMPARISON.md b/docs/COMPARISON.md index 8f74a98..5cd47ed 100644 --- a/docs/COMPARISON.md +++ b/docs/COMPARISON.md @@ -1,18 +1,18 @@ # GoSQLX vs Alternatives: Comprehensive Comparison -**Last Updated:** 2025-11-04 -**GoSQLX Version:** v1.4.0 +**Last Updated:** 2025-11-28 +**GoSQLX Version:** v1.5.1 This guide helps you choose the right SQL parsing tool for your needs. We provide an honest assessment of GoSQLX's strengths and limitations compared to popular alternatives. --- -## 📊 Quick Comparison Matrix +## Quick Comparison Matrix | Feature | **GoSQLX** | SQLFluff | sqlfmt | JSQLParser | pg_query | |---------|------------|----------|--------|------------|----------| | **Language** | Go | Python | Python | Java | C/Ruby | -| **Performance (ops/sec)** | 1.38M+ | ~1K | ~5K | ~50K | ~100K | +| **Performance (ops/sec)** | ~800K sustained | ~1K | ~5K | ~50K | ~100K | | **Memory/Query** | 1.8KB | ~50KB | ~20KB | ~10KB | ~5KB | | **SQL-99 Compliance** | ~80-85% | ~75% | N/A | ~85% | ~95%* | | **Concurrent Processing** | Native | Limited (GIL) | Limited (GIL) | Native | Limited | @@ -23,8 +23,8 @@ This guide helps you choose the right SQL parsing tool for your needs. We provid | **Multi-Dialect** | ✅ 5 dialects | ✅ 60+ dialects | ⚠️ Limited | ⚠️ 4 dialects | ❌ PostgreSQL only | | **CLI Tool** | ✅ Fast | ✅ Feature-rich | ✅ Simple | ❌ No | ⚠️ Limited | | **Library API** | ✅ Simple | ✅ Complex | ⚠️ Limited | ✅ Full | ✅ Full | -| **IDE Integration** | ⚠️ Planned | ✅ VSCode | ❌ No | ⚠️ Limited | ❌ No | -| **Config Files** | ⚠️ Planned | ✅ .sqlfluff | ⚠️ Limited | ⚠️ Limited | ❌ No | +| **IDE Integration** | ✅ LSP + VSCode | ✅ VSCode | ❌ No | ⚠️ Limited | ❌ No | +| **Config Files** | ✅ .gosqlx.yml | ✅ .sqlfluff | ⚠️ Limited | ⚠️ Limited | ❌ No | | **Active Development** | ✅ Yes | ✅ Yes | ⚠️ Slow | ✅ Yes | ✅ Yes | | **License** | AGPL-3.0 | MIT | MIT | Apache 2.0 | BSD | @@ -32,490 +32,224 @@ This guide helps you choose the right SQL parsing tool for your needs. We provid --- -## 🚀 Performance Comparison +## Performance Comparison ### Throughput Benchmarks -Real-world benchmark parsing 1000 SQL queries: +Real-world benchmark parsing complex SQL queries (sustained load): ``` -GoSQLX: 1,380,000 queries/sec (100% baseline) -pg_query: 100,000 queries/sec (7.2% of GoSQLX) -JSQLParser: 50,000 queries/sec (3.6% of GoSQLX) -sqlfmt: 5,000 queries/sec (0.36% of GoSQLX) -SQLFluff: 1,000 queries/sec (0.07% of GoSQLX) +GoSQLX: ~800,000 queries/sec (100% baseline, sustained) +pg_query: ~100,000 queries/sec (12% of GoSQLX) +JSQLParser: ~50,000 queries/sec (6% of GoSQLX) +sqlfmt: ~5,000 queries/sec (0.6% of GoSQLX) +SQLFluff: ~1,000 queries/sec (0.1% of GoSQLX) ``` -**GoSQLX is 100-1000x faster** than Python alternatives! +**Note**: GoSQLX benchmarks show 800K+ ops/sec sustained throughput with peaks up to 1.5M ops/sec for simple queries. ### Memory Usage -Parsing `SELECT u.id, u.name, COUNT(o.id) FROM users u JOIN orders o ON u.id = o.user_id GROUP BY u.id`: +Parsing typical JOIN query with GROUP BY: ``` -GoSQLX: 1.8 KB (100% baseline) -pg_query: 5.0 KB (278% of GoSQLX) -JSQLParser: 10.0 KB (556% of GoSQLX) -sqlfmt: 20.0 KB (1111% of GoSQLX) -SQLFluff: 50.0 KB (2778% of GoSQLX) +GoSQLX: ~2 KB (100% baseline) +pg_query: ~5 KB (2.5x) +JSQLParser: ~10 KB (5x) +sqlfmt: ~20 KB (10x) +SQLFluff: ~50 KB (25x) ``` -**GoSQLX uses 60-80% less memory** through intelligent object pooling. +**GoSQLX uses 60-80% less memory** through object pooling. Actual usage varies by query complexity. -### Latency +### Latency & Concurrency -Single query parsing latency (p50/p99): +**Single Query (p50/p99):** +- GoSQLX: 0.7ms / 1.2ms +- pg_query: 2ms / 5ms +- JSQLParser: 5ms / 15ms +- SQLFluff/sqlfmt: 15-50ms / 50-200ms -``` -GoSQLX: 0.7ms / 1.2ms -pg_query: 2.0ms / 5.0ms -JSQLParser: 5.0ms / 15.0ms -sqlfmt: 15.0ms / 50.0ms -SQLFluff: 50.0ms / 200.0ms -``` - -**GoSQLX delivers sub-millisecond latency** for most queries. - -### Concurrency Scaling - -Processing 10,000 queries across 16 CPU cores: - -``` -GoSQLX: Linear scaling to 128+ cores (16x speedup) -JSQLParser: Linear scaling (16x speedup) -pg_query: Limited by FFI overhead (~10x speedup) -SQLFluff: Limited by Python GIL (~2x speedup) -sqlfmt: Limited by Python GIL (~2x speedup) -``` - -**GoSQLX and native-compiled tools scale linearly.** +**Concurrent Scaling (16 cores):** +- GoSQLX & JSQLParser: ~Linear (16x) +- pg_query: ~10x (FFI overhead) +- Python tools: ~2x (GIL limited) --- -## 🆚 Detailed Comparisons +## Detailed Comparisons ### GoSQLX vs SQLFluff -**SQLFluff** is a popular SQL linter and formatter written in Python. - -#### When to Choose GoSQLX: -- ✅ **Performance is critical** (CI/CD pipelines, real-time validation) -- ✅ **Go ecosystem** (native Go integration, no FFI) -- ✅ **Memory constraints** (processing large SQL files) -- ✅ **High concurrency** (validate 1000s of queries in parallel) -- ✅ **Sub-second feedback** needed in development workflow - -#### When to Choose SQLFluff: -- ✅ **Need extensive linting rules** (60+ rules, GoSQLX has 0 currently) -- ✅ **Python ecosystem** (easy pip install, Python scripts) -- ✅ **Dialect coverage** (60+ SQL dialects vs GoSQLX's 5) -- ✅ **Mature tooling** (VSCode extension, stable rules) -- ✅ **Team already uses Python** tooling - -#### Migration Path: -- **Performance**: Expect 100-1000x speedup -- **API**: GoSQLX has simpler API (`gosqlx.Parse()` vs SQLFluff's complex config) -- **Missing**: Linting rules (planned for v1.5.0) -- **Gain**: Native concurrency, better CI/CD performance - -**Example:** -```bash -# SQLFluff (slow, but feature-rich) -sqlfluff lint queries/*.sql # Takes 30 seconds - -# GoSQLX (fast, basic validation) -gosqlx validate queries/*.sql # Takes 0.3 seconds (100x faster) -``` +**SQLFluff** is a popular SQL linter and formatter in Python with 60+ linting rules. ---- +**Choose GoSQLX if:** +- Performance is critical (100-800x faster) +- Go ecosystem integration needed +- High-throughput validation (CI/CD, real-time) +- Memory efficiency matters -### GoSQLX vs sqlfmt +**Choose SQLFluff if:** +- Need extensive linting rules (60+ vs 0) +- Need 60+ SQL dialects (vs 5) +- Python ecosystem preferred +- Mature rule enforcement required -**sqlfmt** is an opinionated SQL formatter in Python. +**Migration:** Expect 100-800x speedup, but lose linting rules (planned for future). -#### When to Choose GoSQLX: -- ✅ **Need parsing + formatting** (sqlfmt is format-only) -- ✅ **Performance matters** (275x faster) -- ✅ **Programmatic API** needed (GoSQLX has full API) -- ✅ **Batch processing** (format 1000s of files quickly) -- ✅ **CI/CD integration** (faster pre-commit hooks) +--- -#### When to Choose sqlfmt: -- ✅ **Want opinionated, beautiful formatting** (sqlfmt has specific style) -- ✅ **Python-only project** -- ✅ **Don't need parsing** (just formatting) +### GoSQLX vs sqlfmt -#### Migration Path: -- **Performance**: 275x faster formatting -- **API**: GoSQLX provides programmatic formatting API -- **Compatibility**: Format style differs (configurable in GoSQLX) +**sqlfmt** is an opinionated SQL formatter in Python (formatting only, no parsing API). -**Example:** -```bash -# sqlfmt (slow, opinionated) -sqlfmt query.sql # Takes ~15ms +**Choose GoSQLX if:** +- Need parsing + formatting + validation +- Performance matters (100x+ faster) +- Batch processing thousands of files +- Go ecosystem -# GoSQLX (fast, configurable) -gosqlx format query.sql # Takes ~0.05ms (300x faster) -``` +**Choose sqlfmt if:** +- Only need formatting +- Prefer opinionated style +- Python-only project --- ### GoSQLX vs JSQLParser -**JSQLParser** is a popular SQL parser for Java. - -#### When to Choose GoSQLX: -- ✅ **Go projects** (no JVM startup overhead) -- ✅ **Performance-critical** (25-50x faster) -- ✅ **Memory-constrained** (50% less memory) -- ✅ **Simpler API** (fewer classes, cleaner design) -- ✅ **Faster startup** (no JVM warmup) - -#### When to Choose JSQLParser: -- ✅ **Java ecosystem** (Spring, JDBC integration) -- ✅ **Need stored procedures** (JSQLParser has better support) -- ✅ **PL/SQL parsing** (Oracle-specific features) -- ✅ **Mature, stable** (10+ years development) - -#### Migration Path: -- **Performance**: 25-50x speedup in Go applications -- **API**: Similar AST structure, easier traversal -- **Missing**: Some Oracle-specific features -- **Gain**: No JVM dependency, faster startup - -**Example:** -```java -// JSQLParser (Java, verbose) -Statement stmt = CCJSqlParserUtil.parse(sql); -if (stmt instanceof Select) { - Select select = (Select) stmt; - // ... complex type checking -} - -// GoSQLX (Go, simple) -ast, _ := gosqlx.Parse(sql) -// ... clean interface -``` +**JSQLParser** is a mature SQL parser for Java (10+ years development). ---- +**Choose GoSQLX if:** +- Go projects (no JVM overhead) +- Performance critical (10-25x faster) +- Memory constrained (50% less) +- Simpler API preferred -### GoSQLX vs pg_query - -**pg_query** uses PostgreSQL's official parser via FFI. - -#### When to Choose GoSQLX: -- ✅ **Multi-dialect support** (MySQL, SQL Server, Oracle, SQLite) -- ✅ **Pure Go** (no C dependencies, easier deployment) -- ✅ **Better concurrency** (no FFI overhead) -- ✅ **Faster for simple queries** (no cross-language calls) -- ✅ **Easier to extend** (add custom features) - -#### When to Choose pg_query: -- ✅ **PostgreSQL-only** (100% PostgreSQL compliance guaranteed) -- ✅ **Need latest PostgreSQL features** immediately -- ✅ **Trust official parser** over third-party -- ✅ **PL/pgSQL support** required - -#### Migration Path: -- **Performance**: Similar for simple queries, GoSQLX faster at scale -- **API**: Different AST structure (GoSQLX is simpler) -- **Missing**: Some PostgreSQL-specific features -- **Gain**: Multi-dialect support, pure Go deployment - -**Example:** -```ruby -# pg_query (Ruby FFI, PostgreSQL-specific) -result = PgQuery.parse("SELECT * FROM users") -# C library call overhead - -# GoSQLX (Go, native) -ast, _ := gosqlx.Parse("SELECT * FROM users") -# Pure Go, no FFI -``` +**Choose JSQLParser if:** +- Java ecosystem (Spring, JDBC) +- PL/SQL support needed +- Mature, stable solution required +- Oracle-specific features needed --- -## 🎯 Decision Matrix - -### Choose GoSQLX if: - -✅ **Performance is critical** -- CI/CD pipelines need fast SQL validation -- Processing thousands of queries per second -- Real-time SQL validation in web applications -- Batch processing large SQL files - -✅ **You're in the Go ecosystem** -- Building Go applications or tools -- Want zero dependencies (just `go get`) -- Need native concurrency -- Deploying to containers (small binary size) - -✅ **You need multi-dialect support** -- Supporting PostgreSQL, MySQL, SQL Server, Oracle, SQLite -- Database migration tools -- Cross-database compatibility checking - -✅ **Memory efficiency matters** -- Embedded systems or memory-constrained environments -- Processing very large SQL files -- High-throughput services +### GoSQLX vs pg_query -### Choose SQLFluff if: +**pg_query** uses PostgreSQL's official parser via FFI (100% PostgreSQL compliance). -✅ **You need extensive linting** -- Enforcing SQL style guidelines across teams -- Custom linting rules -- Mature rule set (60+ rules) +**Choose GoSQLX if:** +- Multi-dialect support needed (5 dialects vs 1) +- Pure Go deployment (no C dependencies) +- Higher concurrency needed (no FFI overhead) +- Easier customization required -✅ **Python ecosystem preferred** -- Python-based CI/CD -- Easy pip install for developers -- Python scripts for automation +**Choose pg_query if:** +- PostgreSQL-only environment +- 100% PostgreSQL compliance required +- Latest PostgreSQL features needed immediately +- PL/pgSQL support required -✅ **Need many SQL dialects** -- Supporting 60+ SQL variants -- Exotic or legacy SQL dialects +**Note:** pg_query guarantees PostgreSQL compliance; GoSQLX covers ~80-85% of SQL-99 across multiple dialects. -### Choose sqlfmt if: +--- -✅ **Only need formatting** -- Don't need parsing or validation -- Want opinionated, beautiful SQL -- Python project +## Decision Matrix -### Choose JSQLParser if: +**Choose GoSQLX if:** +- Performance critical (CI/CD, real-time, batch processing) +- Go ecosystem (native integration, small binaries) +- Multi-dialect support needed (5 dialects) +- Memory efficiency matters +- LSP/IDE integration needed -✅ **Java ecosystem** -- Spring Boot applications -- JDBC-based tools -- Enterprise Java projects +**Choose SQLFluff if:** +- Need 60+ linting rules +- Python ecosystem preferred +- Need 60+ SQL dialects +- Mature rule enforcement required -✅ **Need PL/SQL support** -- Oracle-heavy environment -- Stored procedure parsing +**Choose sqlfmt if:** +- Only need formatting (no parsing) +- Prefer opinionated style +- Python-only project -### Choose pg_query if: +**Choose JSQLParser if:** +- Java ecosystem (Spring, JDBC) +- PL/SQL support needed +- 10+ year mature solution preferred -✅ **PostgreSQL-only** -- 100% PostgreSQL compliance required -- Need latest PostgreSQL features immediately -- Trust official parser over third-party +**Choose pg_query if:** +- PostgreSQL-only (100% compliance) +- Latest PostgreSQL features needed +- Official parser required --- -## ❌ When NOT to Use GoSQLX +## Limitations -Be honest about limitations: +**Don't Choose GoSQLX if:** -### Don't Choose GoSQLX if: +❌ **You need linting rules** +- SQLFluff has 60+ rules, GoSQLX has 0 (planned) -❌ **You need linting rules** (not yet available) -- SQLFluff has 60+ rules, GoSQLX has 0 (planned for v1.5.0) -- No style enforcement yet -- No auto-fix capabilities yet - -❌ **You need exotic SQL dialects** -- SQLFluff supports 60+ dialects, GoSQLX supports 5 -- Missing: Snowflake, BigQuery-specific features, etc. +❌ **You need 60+ SQL dialects** +- GoSQLX supports 5 dialects +- Missing: Snowflake, BigQuery-specific features ❌ **You're heavily invested in Python** -- No Python bindings yet (planned for v2.0) -- Would require Go installation - -❌ **You need mature IDE integration** -- SQLFluff has VSCode extension -- GoSQLX IDE integration planned (v1.6.0) +- No Python bindings yet (planned) -❌ **You need stored procedure parsing** +❌ **You need advanced stored procedure parsing** - PL/pgSQL, T-SQL, PL/SQL support is basic -- JSQLParser has better support currently - ---- - -## 🔄 Migration Guides - -**Complete migration guides with working code examples now available:** - -### Quick Links - -- **[From SQLFluff](migrations/FROM_SQLFLUFF.md)** - Python SQL linter/formatter to GoSQLX - - 1,380x faster (42 min → 3.6 sec for 5,000 files) - - Complete API mapping with code examples - - Configuration conversion guide - - Performance comparison with benchmarks - -- **[From JSQLParser](migrations/FROM_JSQLPARSER.md)** - Java SQL parser to GoSQLX - - 27x faster parsing, 70x faster startup - - Type mapping table (Statement → SelectStatement, etc.) - - Service wrapper for gradual migration - - Real-world migration patterns - -- **[From pg_query](migrations/FROM_PG_QUERY.md)** - PostgreSQL parser wrapper to GoSQLX - - 14x faster (no FFI overhead) - - 95% PostgreSQL compatibility - - Multi-dialect support - - Hybrid approach for PL/pgSQL - -### Working Code Examples - -Ready-to-run migration examples in `examples/migrations/`: - -```bash -# SQLFluff migration example -go run examples/migrations/from_sqlfluff_example.go - -# JSQLParser migration example -go run examples/migrations/from_jsqlparser_example.go - -# pg_query migration example -go run examples/migrations/from_pg_query_example.go -``` - -### Quick Migration Examples - -#### From SQLFluff (Python): -```bash -# Before (SQLFluff - slow) -sqlfluff lint query.sql # Takes 30 seconds for 5 files - -# After (GoSQLX - fast) -gosqlx validate query.sql # Takes 0.02 seconds -``` - -#### From JSQLParser (Java): -```java -// Before (JSQLParser) -Statement stmt = CCJSqlParserUtil.parse(sql); -if (stmt instanceof Select) { - Select select = (Select) stmt; -} - -// After (GoSQLX in Go) -ast, _ := parser.Parse([]byte(sql)) -if selectStmt, ok := ast.Statements[0].(*ast.SelectStatement); ok { - // Type-safe access -} -``` - -#### From pg_query (Ruby): -```ruby -# Before (pg_query - FFI overhead) -result = PgQuery.parse(sql) -tree = result.tree - -# After (GoSQLX - pure Go) -ast, _ := parser.Parse([]byte(sql)) -# No FFI, 14x faster -``` +- JSQLParser/pg_query have better support --- -## 📈 Performance Details - -### Test Methodology - -All benchmarks run on: -- **Hardware**: 16-core AMD EPYC, 32GB RAM -- **OS**: Linux 5.15 -- **Go**: 1.21 -- **Python**: 3.11 -- **Java**: OpenJDK 17 - -**Test Query:** -```sql -SELECT u.id, u.name, u.email, COUNT(o.id) as order_count -FROM users u -LEFT JOIN orders o ON u.id = o.user_id -WHERE u.active = true AND u.created_at > '2023-01-01' -GROUP BY u.id, u.name, u.email -HAVING COUNT(o.id) > 5 -ORDER BY order_count DESC -LIMIT 100 -``` +## Migration Guides -### Benchmark Results +Complete guides with working examples: -#### Single Query Parsing (1000 iterations) +- **[From SQLFluff](migration/FROM_SQLFLUFF.md)** - 100-800x faster, API mapping, config conversion +- **[From JSQLParser](migration/FROM_JSQLPARSER.md)** - Type mapping, service wrappers, patterns +- **[From pg_query](migration/FROM_PG_QUERY.md)** - FFI elimination, multi-dialect support -``` -GoSQLX: 0.72ms avg (1,388,889 ops/sec) -pg_query: 10.0ms avg (100,000 ops/sec) -JSQLParser: 20.0ms avg (50,000 ops/sec) -sqlfmt: 200ms avg (5,000 ops/sec) -SQLFluff: 1000ms avg (1,000 ops/sec) -``` - -#### Concurrent Parsing (10,000 queries, 16 threads) - -``` -GoSQLX: 7.2 seconds (1,388,889 ops/sec) - 16x speedup -JSQLParser: 10 seconds (1,000,000 ops/sec) - 10x speedup -pg_query: 50 seconds (200,000 ops/sec) - 4x speedup -SQLFluff: 5000 seconds (2,000 ops/sec) - 2x speedup -sqlfmt: 2000 seconds (5,000 ops/sec) - 2x speedup -``` - -#### Memory Usage (10,000 queries parsed) - -``` -GoSQLX: 18 MB (1.8KB per query) -pg_query: 50 MB (5.0KB per query) -JSQLParser: 100 MB (10KB per query) -sqlfmt: 200 MB (20KB per query) -SQLFluff: 500 MB (50KB per query) -``` +See individual migration guides for code examples and patterns. --- -## 💡 Real-World Use Cases +## Performance Benchmarks -### Use Case 1: CI/CD SQL Validation +**Test Environment:** 16-core AMD EPYC, 32GB RAM, Linux 5.15, Go 1.21 -**Scenario**: Validate 5,000 SQL files in pre-commit hook +**Sustained Load (30 sec):** +- GoSQLX: ~800K ops/sec (peak 1.5M for simple queries) +- pg_query: ~100K ops/sec +- JSQLParser: ~50K ops/sec +- Python tools: ~1-5K ops/sec -```bash -# SQLFluff: ~2500 seconds (41 minutes) ❌ -time sqlfluff lint migrations/*.sql +**Memory (10K queries):** +- GoSQLX: ~18 MB (1.8KB/query) +- Others: 50-500 MB (5-50KB/query) -# GoSQLX: ~3.6 seconds ✅ -time gosqlx validate migrations/*.sql +## Real-World Use Cases -# Result: 694x faster, practical for pre-commit hooks -``` +**CI/CD Validation (5,000 files):** +- SQLFluff: ~41 minutes +- GoSQLX: ~3.6 seconds (680x faster) -### Use Case 2: Real-Time SQL Validation API +**Real-Time API (1,000 req/sec):** +- Python tools: Cannot handle +- JSQLParser: Requires 20 servers +- GoSQLX: Requires 1 server -**Scenario**: Web API validating SQL queries in real-time - -``` -Load: 1000 requests/second - -SQLFluff: Cannot handle (1 query/sec max per thread) -sqlfmt: Cannot handle (5 queries/sec max per thread) -JSQLParser: Requires 20 servers -GoSQLX: Requires 1 server (1.38M ops/sec) - -Cost Savings: 95% reduction in infrastructure -``` - -### Use Case 3: SQL File Processing - -**Scenario**: Process 10GB SQL dump file - -``` -SQLFluff: 3 hours, 8GB RAM, crashes on large files -sqlfmt: 1 hour, 4GB RAM -JSQLParser: 15 minutes, 2GB RAM -GoSQLX: 5 minutes, 300MB RAM (with streaming planned) - -Result: 36x faster, 95% less memory -``` +**Large File Processing (10GB dump):** +- Python tools: 1-3 hours, 4-8GB RAM +- GoSQLX: ~5 minutes, ~300MB RAM --- -## 🎓 Feature Comparison Details +## Feature Comparison Details ### SQL Standard Support @@ -546,63 +280,37 @@ Result: 36x faster, 95% less memory --- -## 🔮 Roadmap Comparison - -### GoSQLX Roadmap - -**v1.5.0 (Q1 2025)** - Linting & Analysis -- Basic linting rules engine (10 rules) -- Configuration file support (.gosqlx.yml) -- Enhanced error messages with fix suggestions +## Roadmap -**v1.6.0 (Q2 2025)** - IDE Integration -- VSCode extension -- Language Server Protocol (LSP) -- Real-time validation - -**v2.0.0 (Q4 2025)** - Platform Expansion -- Python bindings -- JavaScript/Node.js bindings +**GoSQLX (Upcoming):** +- Linting rules engine (10+ rules) +- Enhanced error messages with fixes +- Python/JavaScript bindings - Enhanced dialect support (20+ dialects) -### Competitor Status - -**SQLFluff**: Mature, stable, slow development -**sqlfmt**: Slow development, niche use case -**JSQLParser**: Active, Java-focused -**pg_query**: Active, PostgreSQL-focused - ---- - -## 📞 Get Help Choosing - -Still unsure? Here's how to get help: - -- **[GitHub Discussions](https://github.com/ajitpratap0/GoSQLX/discussions)** - Ask the community -- **[Create an Issue](https://github.com/ajitpratap0/GoSQLX/issues/new)** - Describe your use case -- **Check Examples** - See [real-world examples](../examples/) - ---- - -## 📚 Additional Resources - -- **[Getting Started Guide](GETTING_STARTED.md)** - Quick 5-minute intro -- **[Usage Guide](USAGE_GUIDE.md)** - Comprehensive patterns -- **[API Reference](API_REFERENCE.md)** - Complete API docs -- **[Benchmarks](../PERFORMANCE_REPORT.md)** - Detailed performance analysis +**Competitors:** +- SQLFluff: Mature, stable +- JSQLParser: Active, Java-focused +- pg_query: Active, PostgreSQL-focused --- -## 🤝 Contributing +## Resources -See something wrong or want to add a comparison? Please open a PR! +**Documentation:** +- [Getting Started Guide](GETTING_STARTED.md) +- [Usage Guide](USAGE_GUIDE.md) +- [API Reference](API_REFERENCE.md) +- [Performance Tuning](PERFORMANCE_TUNING.md) -- **Report Inaccuracies**: [GitHub Issues](https://github.com/ajitpratap0/GoSQLX/issues) -- **Suggest Improvements**: [Pull Requests Welcome](../CONTRIBUTING.md) +**Help & Community:** +- [GitHub Discussions](https://github.com/ajitpratap0/GoSQLX/discussions) +- [Report Issues](https://github.com/ajitpratap0/GoSQLX/issues) +- [Examples](../examples/) --- -**Last Updated:** 2025-11-04 -**Maintained by:** GoSQLX Community +**Last Updated:** 2025-11-28 +**Version:** v1.5.1 -*All benchmark numbers are reproducible. See `/benchmarks` directory for test scripts.* +*Benchmark numbers are reproducible. See `/benchmarks` directory.* diff --git a/docs/ERROR_CODES.md b/docs/ERROR_CODES.md index c1f2e78..1281449 100644 --- a/docs/ERROR_CODES.md +++ b/docs/ERROR_CODES.md @@ -1,6 +1,6 @@ # GoSQLX Error Codes Reference -This document provides a comprehensive reference for all error codes in GoSQLX with detailed examples, common causes, and solutions. +Comprehensive reference for all error codes in GoSQLX with examples and solutions. ## Quick Reference @@ -21,6 +21,11 @@ This document provides a comprehensive reference for all error codes in GoSQLX w | E2005 | Parser | Incomplete statement | | E2006 | Parser | Invalid expression | | E2007 | DoS Protection | Expression nesting exceeds maximum depth (100) | +| E2008 | Parser | Unsupported data type | +| E2009 | Parser | Unsupported constraint type | +| E2010 | Parser | Unsupported JOIN type | +| E2011 | Parser | Invalid CTE (WITH clause) syntax | +| E2012 | Parser | Invalid set operation (UNION/EXCEPT/INTERSECT) | | E3001 | Semantic | Undefined table | | E3002 | Semantic | Undefined column | | E3003 | Semantic | Type mismatch | @@ -30,79 +35,30 @@ This document provides a comprehensive reference for all error codes in GoSQLX w --- -## Error Code Categories +## E1xxx - Tokenizer Errors -### E1xxx - Tokenizer Errors (Lexical Analysis) +### E1001 - Unexpected Character -These errors occur during the tokenization phase when GoSQLX converts SQL text into tokens. +Invalid or unsupported character in SQL input. -#### E1001 - Unexpected Character - -**When it occurs**: An unexpected or invalid character is found in the SQL input. - -**Example**: -```sql -SELECT * FROM users WHERE name = 'John' & age > 18 - ^ -``` - -**Error message**: -``` -Error E1001 at line 1, column 39: unexpected character '&' - 1 | SELECT * FROM users WHERE name = 'John' & age > 18 - ^ -Hint: Remove or escape the character '&' -Help: https://docs.gosqlx.dev/errors/E1001 -``` - -**Common causes**: -- Using unsupported operators (use `AND` instead of `&`, `OR` instead of `|`) -- Special characters in identifiers without proper quoting -- Copy-paste errors introducing non-SQL characters -- Hidden Unicode characters - -**Solutions**: ```sql -- Wrong: Using bitwise operator SELECT * FROM users WHERE name = 'John' & age > 18 --- Right: Use logical AND operator +-- Right: Use logical AND SELECT * FROM users WHERE name = 'John' AND age > 18 - --- Wrong: Special characters in identifier -SELECT user-id FROM accounts - --- Right: Quote the identifier -SELECT "user-id" FROM accounts ``` ---- +**Common fixes:** +- Use `AND` instead of `&`, `OR` instead of `|` +- Quote identifiers with special characters: `"user-id"` -#### E1002 - Unterminated String - -**When it occurs**: A string literal is not properly closed with a matching quote. - -**Example**: -```sql -SELECT * FROM users WHERE name = 'John - ^ -``` +--- -**Error message**: -``` -Error E1002 at line 1, column 34: unterminated string literal - 1 | SELECT * FROM users WHERE name = 'John - ^ -Hint: Make sure all string literals are properly closed with matching quotes -``` +### E1002 - Unterminated String -**Common causes**: -- Missing closing quote -- Unescaped quotes within strings -- Multiline strings without proper formatting -- Wrong quote type (mixing ' and ") +String literal not properly closed. -**Solutions**: ```sql -- Wrong: Missing closing quote SELECT * FROM users WHERE name = 'John @@ -110,40 +66,16 @@ SELECT * FROM users WHERE name = 'John -- Right: Add closing quote SELECT * FROM users WHERE name = 'John' --- Wrong: Unescaped quote -SELECT * FROM users WHERE name = 'O'Brien' - --- Right: Escape the quote +-- Escape quotes within strings SELECT * FROM users WHERE name = 'O''Brien' ``` --- -#### E1003 - Invalid Number +### E1003 - Invalid Number -**When it occurs**: A numeric literal has invalid format. +Numeric literal has invalid format. -**Example**: -```sql -SELECT * FROM products WHERE price > 19.99.5 - ^^^^^^^ -``` - -**Error message**: -``` -Error E1003 at line 1, column 37: invalid numeric literal: '19.99.5' - 1 | SELECT * FROM products WHERE price > 19.99.5 - ^^^^^^^ -Hint: Check the numeric format (e.g., 123, 123.45, 1.23e10) -``` - -**Common causes**: -- Multiple decimal points -- Invalid scientific notation -- Non-numeric characters in numbers -- Trailing/leading decimals without digits - -**Solutions**: ```sql -- Wrong: Multiple decimal points SELECT * FROM products WHERE price > 19.99.5 @@ -151,37 +83,50 @@ SELECT * FROM products WHERE price > 19.99.5 -- Right: Valid decimal SELECT * FROM products WHERE price > 19.99 --- Wrong: Invalid scientific notation -SELECT * FROM data WHERE value = 1.5e - --- Right: Valid scientific notation +-- Valid scientific notation SELECT * FROM data WHERE value = 1.5e10 ``` --- -#### E1006 - Input Too Large +### E1004 - Invalid Operator Sequence -**When it occurs**: Input SQL exceeds the maximum allowed size (10MB). +Invalid operator combination encountered. -**Example**: ```sql --- Attempting to parse a 15MB SQL file -``` +-- Wrong: Double equals +SELECT * FROM users WHERE age >= = 18 -**Error message**: +-- Right: Single comparison +SELECT * FROM users WHERE age >= 18 + +-- Use correct operator +SELECT * FROM users WHERE name != 'John' OR name <> 'John' ``` -Error E1006: input exceeds maximum size limit of 10485760 bytes (received 15728640 bytes) -Hint: Split large SQL files into smaller batches or increase the size limit if appropriate + +--- + +### E1005 - Invalid Identifier Format + +Identifier (table/column name) has invalid format. + +```sql +-- Wrong: Identifier starts with number +SELECT * FROM 123users + +-- Right: Quote the identifier +SELECT * FROM "123users" + +-- Quote reserved keywords +SELECT "select" FROM "table" ``` -**Common causes**: -- Very large SQL dump files -- Programmatically generated SQL with millions of INSERT statements -- Malicious input attempting denial-of-service attack -- Concatenated SQL files without proper splitting +--- + +### E1006 - Input Too Large + +Input SQL exceeds maximum size (10MB). -**Solutions**: ```go // Wrong: Parse entire large file at once largeSQL, _ := os.ReadFile("huge_dump.sql") @@ -197,29 +142,10 @@ for _, batch := range batches { --- -#### E1007 - Token Limit Exceeded - -**When it occurs**: The number of tokens exceeds the maximum allowed (1,000,000 tokens). - -**Example**: -```sql --- SQL with hundreds of thousands of columns or values -INSERT INTO logs VALUES (...), (...), (...) -- repeated 500,000 times -``` - -**Error message**: -``` -Error E1007: token count exceeds limit of 1000000 tokens -Hint: Break down large batch operations into smaller chunks -``` +### E1007 - Token Limit Exceeded -**Common causes**: -- Massive batch INSERT statements -- Extremely complex queries with thousands of JOINs or subqueries -- Code generation gone wrong -- DoS attack attempts +Token count exceeds maximum (1,000,000 tokens). -**Solutions**: ```go // Wrong: Single massive INSERT INSERT INTO logs VALUES (1, 'a'), (2, 'b'), ... // 100,000 rows @@ -228,36 +154,16 @@ INSERT INTO logs VALUES (1, 'a'), (2, 'b'), ... // 100,000 rows batchSize := 1000 for i := 0; i < len(data); i += batchSize { batch := data[i:min(i+batchSize, len(data))] - // Generate INSERT for this batch - // Parse and execute + // Generate and parse INSERT for this batch } ``` --- -#### E1008 - Tokenizer Panic Recovered +### E1008 - Tokenizer Panic Recovered -**When it occurs**: The tokenizer encountered an internal error and recovered from a panic. +Tokenizer encountered internal error. -**Example**: -```sql --- Malformed input that triggers internal tokenizer error -SELECT * FROM users WHERE id = \x00\x00\x00 -``` - -**Error message**: -``` -Error E1008: tokenizer panic recovered: runtime error -Hint: The input may contain malformed or malicious content -``` - -**Common causes**: -- Binary data mixed with SQL text -- Corrupted file encoding -- Null bytes or other control characters in input -- Internal tokenizer bugs (please report these!) - -**Solutions**: ```go // Validate input encoding before parsing if !utf8.Valid(sqlBytes) { @@ -266,43 +172,17 @@ if !utf8.Valid(sqlBytes) { // Sanitize input to remove control characters sqlBytes = removeControlCharacters(sqlBytes) - -// Then parse ast, err := gosqlx.ParseBytes(sqlBytes) ``` --- -### E2xxx - Parser Errors (Syntax Analysis) - -These errors occur during parsing when GoSQLX validates SQL grammar and structure. +## E2xxx - Parser Errors -#### E2001 - Unexpected Token - -**When it occurs**: The parser encounters a token that doesn't fit the SQL grammar at this position. - -**Example**: -```sql -SELECT * FORM users - ^^^^ -``` - -**Error message**: -``` -Error E2001 at line 1, column 10: unexpected token: IDENT ('FORM') - 1 | SELECT * FORM users - ^^^^ -Hint: Did you mean 'FROM'? -Help: https://docs.gosqlx.dev/errors/E2001 -``` +### E2001 - Unexpected Token -**Common causes**: -- Typos in SQL keywords -- Missing or extra tokens -- Incorrect SQL syntax -- Wrong keyword order +Token doesn't fit SQL grammar at this position. -**Solutions**: ```sql -- Wrong: Typo in FROM SELECT * FORM users @@ -319,30 +199,10 @@ SELECT id, name FROM users --- -#### E2002 - Expected Token +### E2002 - Expected Token -**When it occurs**: The parser expected a specific token but found something else. +Parser expected specific token but found something else. -**Example**: -```sql -SELECT * WHERE age > 18 - ^^^^^ -``` - -**Error message**: -``` -Error E2002 at line 1, column 10: expected FROM, got WHERE - 1 | SELECT * WHERE age > 18 - ^^^^^ -Hint: Add the required 'FROM' clause to complete this statement -``` - -**Common causes**: -- Missing required keywords -- Incorrect clause order -- Omitted table name or other required elements - -**Solutions**: ```sql -- Wrong: Missing FROM clause SELECT * WHERE age > 18 @@ -350,39 +210,16 @@ SELECT * WHERE age > 18 -- Right: Add FROM clause SELECT * FROM users WHERE age > 18 --- Wrong: Wrong order -SELECT * WHERE age > 18 FROM users - --- Right: Correct order +-- Ensure correct clause order SELECT * FROM users WHERE age > 18 ``` --- -#### E2003 - Missing Clause +### E2003 - Missing Clause -**When it occurs**: A required SQL clause is missing from the statement. +Required SQL clause is missing. -**Example**: -```sql -INSERT users VALUES ('John', 25) - ^^^^^ -``` - -**Error message**: -``` -Error E2003 at line 1, column 8: missing required INTO clause - 1 | INSERT users VALUES ('John', 25) - ^^^^^ -Hint: Add the required 'INTO' clause to complete this statement -``` - -**Common causes**: -- Forgetting required keywords (INTO, FROM, SET) -- Incomplete statement structure -- Misunderstanding SQL syntax requirements - -**Solutions**: ```sql -- Wrong: Missing INTO INSERT users VALUES ('John', 25) @@ -399,35 +236,64 @@ UPDATE users SET name = 'John' --- -#### E2007 - Recursion Depth Limit Exceeded +### E2004 - Invalid Syntax -**When it occurs**: Expression nesting exceeds the maximum allowed depth (100 levels). +General SQL syntax error. -**Example**: ```sql --- Deeply nested subqueries or expressions -SELECT * FROM ( - SELECT * FROM ( - SELECT * FROM ( - -- ... 100+ levels deep - ) - ) -) +-- Wrong: Duplicate WHERE +SELECT * FROM users WHERE WHERE age > 18 + +-- Right: Single WHERE clause +SELECT * FROM users WHERE age > 18 ``` -**Error message**: +--- + +### E2005 - Incomplete Statement + +SQL statement started but not completed. + +```sql +-- Wrong: Incomplete WHERE +SELECT * FROM users WHERE + +-- Right: Complete the condition +SELECT * FROM users WHERE age > 18 + +-- Wrong: Incomplete INSERT +INSERT INTO users (name, age) VALUES + +-- Right: Provide values +INSERT INTO users (name, age) VALUES ('John', 25) ``` -Error E2007: expression nesting exceeds maximum depth of 100 -Hint: Simplify the query by reducing nesting levels or breaking it into multiple statements + +--- + +### E2006 - Invalid Expression + +Expression has invalid syntax. + +```sql +-- Wrong: Double comparison operator +SELECT * FROM users WHERE age > > 18 + +-- Right: Single operator +SELECT * FROM users WHERE age > 18 + +-- Wrong: Invalid function syntax +SELECT COUNT FROM users + +-- Right: Proper function call +SELECT COUNT(*) FROM users ``` -**Common causes**: -- Programmatically generated queries with excessive nesting -- Recursive query generation without depth limits -- DoS attack attempts with deeply nested structures -- Overly complex WHERE clauses with many nested conditions +--- + +### E2007 - Recursion Depth Limit Exceeded + +Expression nesting exceeds maximum depth (100 levels). -**Solutions**: ```sql -- Wrong: Excessive nesting SELECT * FROM users WHERE (((((((status = 'active'))))))))) -- 100+ levels @@ -435,16 +301,7 @@ SELECT * FROM users WHERE (((((((status = 'active'))))))))) -- 100+ levels -- Right: Flatten the structure SELECT * FROM users WHERE status = 'active' --- Wrong: Deeply nested subqueries -SELECT * FROM ( - SELECT * FROM ( - SELECT * FROM ( - -- Many levels deep - ) - ) -) - --- Right: Use CTEs to flatten +-- Use CTEs instead of deep nesting WITH level1 AS ( SELECT * FROM base_table ), @@ -454,142 +311,105 @@ level2 AS ( SELECT * FROM level2 ``` -**Code example for generated queries**: -```go -// Wrong: No depth limit checking -func buildNestedQuery(depth int) string { - if depth == 0 { - return "SELECT * FROM base" - } - return fmt.Sprintf("SELECT * FROM (%s)", buildNestedQuery(depth-1)) -} +--- -// Right: Enforce depth limits -func buildNestedQuery(depth int, maxDepth int) (string, error) { - if depth > maxDepth { - return "", errors.New("query depth exceeds limit") - } - if depth == 0 { - return "SELECT * FROM base", nil - } - inner, err := buildNestedQuery(depth-1, maxDepth) - if err != nil { - return "", err - } - return fmt.Sprintf("SELECT * FROM (%s)", inner), nil -} +### E2008 - Unsupported Data Type + +Data type not yet supported. + +```sql +-- Wrong: Unsupported XML type +CREATE TABLE users (id INT, data XML) + +-- Right: Use TEXT or VARCHAR +CREATE TABLE users (id INT, data TEXT) ``` --- -### Advanced SQL Features - Common Errors +### E2009 - Unsupported Constraint -#### Window Functions +Constraint type not supported. -**Missing OVER clause**: ```sql --- Wrong -SELECT name, ROW_NUMBER() FROM employees +-- May not be supported: Complex CHECK with function +CREATE TABLE users ( + id INT, + CONSTRAINT chk_custom CHECK (custom_function(id) > 0) +) --- Right -SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) FROM employees +-- Supported: Simple CHECK constraint +CREATE TABLE users ( + id INT, + CONSTRAINT chk_id CHECK (id > 0) +) ``` -**PARTITION BY without OVER**: -```sql --- Wrong -SELECT name, RANK() PARTITION BY dept FROM employees +--- --- Right -SELECT name, RANK() OVER (PARTITION BY dept ORDER BY salary DESC) FROM employees -``` +### E2010 - Unsupported JOIN Type + +JOIN type not supported. -**Window frame without ORDER BY**: ```sql --- Wrong -SELECT SUM(amount) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) FROM sales +-- Wrong: LATERAL JOIN (may not be supported) +SELECT * FROM users +LATERAL JOIN orders ON users.id = orders.user_id --- Right -SELECT SUM(amount) OVER (ORDER BY date ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) FROM sales +-- Right: Use standard JOIN types +SELECT * FROM users +LEFT JOIN orders ON users.id = orders.user_id + +-- Supported: INNER, LEFT, RIGHT, FULL, CROSS, NATURAL ``` --- -#### Common Table Expressions (CTEs) +### E2011 - Invalid CTE Syntax + +CTE (WITH clause) syntax is invalid. -**CTE without following statement**: ```sql --- Wrong -WITH user_counts AS ( - SELECT dept, COUNT(*) as cnt FROM employees GROUP BY dept -) +-- Wrong: Missing parentheses +WITH user_counts AS + SELECT dept, COUNT(*) FROM employees GROUP BY dept +SELECT * FROM user_counts --- Right +-- Right: Add parentheses WITH user_counts AS ( - SELECT dept, COUNT(*) as cnt FROM employees GROUP BY dept -) -SELECT * FROM user_counts WHERE cnt > 5 -``` - -**Recursive CTE without UNION**: -```sql --- Wrong -WITH RECURSIVE emp_tree AS ( - SELECT id, name, manager_id FROM employees + SELECT dept, COUNT(*) FROM employees GROUP BY dept ) -SELECT * FROM emp_tree +SELECT * FROM user_counts --- Right -WITH RECURSIVE emp_tree AS ( - SELECT id, name, manager_id, 1 as level - FROM employees - WHERE manager_id IS NULL +-- Proper recursive CTE with UNION +WITH RECURSIVE hierarchy AS ( + SELECT id, parent_id, 1 as level FROM nodes WHERE parent_id IS NULL UNION ALL - SELECT e.id, e.name, e.manager_id, et.level + 1 - FROM employees e - JOIN emp_tree et ON e.manager_id = et.id + SELECT n.id, n.parent_id, h.level + 1 + FROM nodes n + JOIN hierarchy h ON n.parent_id = h.id ) -SELECT * FROM emp_tree -``` - -**Missing comma between multiple CTEs**: -```sql --- Wrong -WITH cte1 AS (SELECT * FROM users) - cte2 AS (SELECT * FROM orders) -SELECT * FROM cte1 - --- Right -WITH cte1 AS (SELECT * FROM users), - cte2 AS (SELECT * FROM orders) -SELECT * FROM cte1 +SELECT * FROM hierarchy ``` --- -#### Set Operations (UNION, INTERSECT, EXCEPT) +### E2012 - Invalid Set Operation + +Set operation (UNION, INTERSECT, EXCEPT) has invalid syntax. -**Mismatched column counts**: ```sql --- Wrong -SELECT id, name FROM users +-- Wrong: Different column counts +SELECT id FROM users UNION -SELECT id FROM orders +SELECT id, name FROM orders --- Right +-- Right: Same column count SELECT id, name FROM users UNION -SELECT order_id, customer_name FROM orders -``` +SELECT id, customer_name FROM orders -**ORDER BY in subquery**: -```sql --- Wrong -(SELECT * FROM users ORDER BY name) -UNION -(SELECT * FROM admins ORDER BY name) - --- Right +-- ORDER BY at end only SELECT * FROM users UNION SELECT * FROM admins @@ -598,37 +418,129 @@ ORDER BY name --- -#### JOIN Operations +## E3xxx - Semantic Errors + +**Note:** Semantic errors require semantic analysis to be enabled. + +### E3001 - Undefined Table + +Table reference cannot be resolved. -**Missing ON/USING clause**: ```sql --- Wrong -SELECT * FROM users JOIN orders +SELECT * FROM nonexistent_table +``` --- Right -SELECT * FROM users JOIN orders ON users.id = orders.user_id +--- --- Also Right: Using USING clause -SELECT * FROM users JOIN orders USING (user_id) +### E3002 - Undefined Column + +Column reference cannot be resolved. + +```sql +SELECT nonexistent_column FROM users +``` + +--- + +### E3003 - Type Mismatch + +Type incompatibility in expressions. + +```sql +-- Wrong: String instead of number +SELECT * FROM users WHERE age > '18' + +-- Right: Numeric value +SELECT * FROM users WHERE age > 18 ``` -**Ambiguous column reference**: +--- + +### E3004 - Ambiguous Column + +Column name could refer to multiple tables. + ```sql --- Wrong +-- Wrong: Ambiguous column SELECT id FROM users, orders WHERE id > 10 --- Right +-- Right: Qualify column names SELECT users.id FROM users, orders WHERE users.id > 10 +``` + +--- --- Also Right: Use aliases -SELECT u.id FROM users u, orders o WHERE u.id > 10 +## E4xxx - Unsupported Features + +### E4001 - Unsupported Feature + +SQL feature not yet implemented. + +**Note:** GoSQLX is under active development. Check documentation for currently supported features. + +--- + +### E4002 - Unsupported Dialect + +SQL dialect-specific syntax not supported. + +**Note:** GoSQLX supports standard SQL with extensions for PostgreSQL, MySQL, SQL Server, Oracle, and SQLite. Some dialect-specific features may not be available. + +--- + +## Common SQL Patterns + +### Window Functions + +```sql +-- Wrong: Missing OVER clause +SELECT name, ROW_NUMBER() FROM employees + +-- Right: Add OVER clause +SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) FROM employees + +-- Window frame requires ORDER BY +SELECT SUM(amount) OVER (ORDER BY date ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) FROM sales +``` + +### Common Table Expressions + +```sql +-- Wrong: CTE without following statement +WITH user_counts AS ( + SELECT dept, COUNT(*) as cnt FROM employees GROUP BY dept +) + +-- Right: Add SELECT statement +WITH user_counts AS ( + SELECT dept, COUNT(*) as cnt FROM employees GROUP BY dept +) +SELECT * FROM user_counts WHERE cnt > 5 + +-- Multiple CTEs need commas +WITH cte1 AS (SELECT * FROM users), + cte2 AS (SELECT * FROM orders) +SELECT * FROM cte1 +``` + +### JOIN Operations + +```sql +-- Wrong: Missing ON clause +SELECT * FROM users JOIN orders + +-- Right: Add ON clause +SELECT * FROM users JOIN orders ON users.id = orders.user_id + +-- Or use USING clause +SELECT * FROM users JOIN orders USING (user_id) ``` --- -## Error Handling Best Practices +## Error Handling in Code -### 1. Check Error Codes Programmatically +### Check Error Codes ```go import ( @@ -636,12 +548,11 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" ) -p := parser.New() +p := parser.NewParser() ast, err := p.Parse(tokens) if err != nil { // Check for specific error code if errors.IsCode(err, errors.ErrCodeExpectedToken) { - // Handle syntax errors fmt.Println("SQL syntax error detected") } @@ -651,7 +562,7 @@ if err != nil { } ``` -### 2. Use Structured Error Information +### Use Structured Error Information ```go if parseErr, ok := err.(*errors.Error); ok { @@ -668,101 +579,10 @@ if parseErr, ok := err.(*errors.Error); ok { } ``` -### 3. Format Errors for User Display - -```go -import "github.com/ajitpratap0/GoSQLX/pkg/errors" - -// Get formatted error with context -formatted := errors.FormatErrorWithContext(err, sqlQuery) -fmt.Println(formatted) - -// Get error summary (no context) -summary := errors.FormatErrorSummary(err) -fmt.Println(summary) - -// Format with custom suggestion -formatted := errors.FormatErrorWithSuggestion( - errors.ErrCodeExpectedToken, - "expected FROM", - location, - sqlQuery, - 4, // highlight length - "Use FROM keyword after SELECT", -) -``` - -### 4. Extract Error Components - -```go -// Check if it's a structured error -if errors.IsStructuredError(err) { - // Extract location - if loc, ok := errors.ExtractLocation(err); ok { - fmt.Printf("Error at line %d, column %d\n", loc.Line, loc.Column) - } - - // Extract error code - if code, ok := errors.ExtractErrorCode(err); ok { - fmt.Printf("Error code: %s\n", code) - } -} -``` - ---- - -## Common Mistake Patterns - -GoSQLX provides intelligent suggestions for 20+ common SQL mistakes: - -### Type Mismatches - -```sql --- ❌ Wrong: String instead of number -SELECT * FROM users WHERE age > '18' - --- ✓ Right: Numeric value -SELECT * FROM users WHERE age > 18 - -Hint: Remove quotes around numeric values -``` - -### Missing Operators - -```sql --- ❌ Wrong: Missing comparison operator -SELECT * FROM users WHERE age 18 - --- ✓ Right: Add comparison operator -SELECT * FROM users WHERE age = 18 -``` - -### Aggregate Function Syntax - -```sql --- ❌ Wrong: Missing parentheses -SELECT COUNT * FROM users - --- ✓ Right: Proper function syntax -SELECT COUNT(*) FROM users -``` - -### GROUP BY Requirements - -```sql --- ❌ Wrong: Missing GROUP BY -SELECT dept, COUNT(*) FROM employees - --- ✓ Right: Add GROUP BY -SELECT dept, COUNT(*) FROM employees GROUP BY dept -``` - --- ## Performance Tips -When working with errors in production: - 1. **Cache error patterns**: Error suggestions use Levenshtein distance which can be cached 2. **Use error codes**: Check error codes instead of string matching 3. **Structured logging**: Log error codes and locations for debugging @@ -772,24 +592,21 @@ When working with errors in production: ## Getting Help -- **Full Documentation**: See [ERROR_REFERENCE.md](ERROR_REFERENCE.md) for detailed error descriptions -- **GitHub Issues**: Report bugs or request features at [github.com/ajitpratap0/GoSQLX/issues](https://github.com/ajitpratap0/GoSQLX/issues) +- **Troubleshooting Guide**: See [TROUBLESHOOTING.md](TROUBLESHOOTING.md) +- **GitHub Issues**: [github.com/ajitpratap0/GoSQLX/issues](https://github.com/ajitpratap0/GoSQLX/issues) - **Help URLs**: Each error includes a help URL: `https://docs.gosqlx.dev/errors/` --- -## Error Code Changelog +## Changelog ### v1.4.0 - Added comprehensive error context formatting - Added intelligent error suggestions - Added Unicode support in error messages -- Added window function error patterns -- Added CTE error patterns -- Added set operation error patterns +- Added window function, CTE, and set operation error patterns ### v1.3.0 - Initial structured error system - Basic error codes (E1xxx-E4xxx) -- Position tracking -- Simple hints +- Position tracking and hints diff --git a/docs/ERROR_REFERENCE.md b/docs/ERROR_REFERENCE.md deleted file mode 100644 index 2623c6d..0000000 --- a/docs/ERROR_REFERENCE.md +++ /dev/null @@ -1,500 +0,0 @@ -# GoSQLX Error Reference - -This document provides a comprehensive reference for all error codes in GoSQLX, including descriptions, common causes, and solutions. - -## Error Code System - -GoSQLX uses a structured error code system for programmatic error handling: - -- **E1xxx**: Tokenizer errors (lexical analysis) -- **E2xxx**: Parser syntax errors (grammatical analysis) -- **E3xxx**: Semantic errors (logical errors) -- **E4xxx**: Unsupported features - -## Using Error Codes Programmatically - -```go -import ( - "github.com/ajitpratap0/GoSQLX/pkg/errors" - "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" -) - -ast, err := gosqlx.Parse("SELECT * FORM users") -if err != nil { - // Check for specific error code - if errors.IsCode(err, errors.ErrCodeExpectedToken) { - fmt.Println("Syntax error detected") - } - - // Get error code - code := errors.GetCode(err) - fmt.Printf("Error code: %s\n", code) -} -``` - ---- - -## Tokenizer Errors (E1xxx) - -### E1001 - Unexpected Character - -**Description**: An unexpected or invalid character was encountered during tokenization. - -**Example**: -``` -Error E1001 at line 1, column 39: unexpected character '&' - 1 | SELECT * FROM users WHERE name = 'John' & age > 18 - ^ -Hint: Remove or escape the character '&' -``` - -**Common Causes**: -- Using unsupported operators (use `AND` instead of `&`) -- Special characters in identifiers without proper quoting -- Copy-paste errors introducing non-SQL characters - -**Solutions**: -- Replace `&` with `AND` for logical operations -- Use double quotes for identifiers with special characters: `"my-table"` -- Check for hidden characters (especially in copy-pasted SQL) - ---- - -### E1002 - Unterminated String - -**Description**: A string literal was not properly closed with a matching quote. - -**Example**: -``` -Error E1002 at line 1, column 34: unterminated string literal - 1 | SELECT * FROM users WHERE name = 'John - ^ -Hint: Make sure all string literals are properly closed with matching quotes -``` - -**Common Causes**: -- Missing closing quote in string literal -- Unescaped quotes within strings -- Multiline strings without proper formatting - -**Solutions**: -- Add the missing closing quote -- Escape quotes within strings: `'O''Brien'` or use different quote type -- For multiline strings, use proper SQL multiline syntax - ---- - -### E1003 - Invalid Number - -**Description**: A numeric literal has invalid format. - -**Example**: -``` -Error E1003 at line 1, column 33: invalid numeric literal: '18.45.6' - 1 | SELECT * FROM users WHERE age > 18.45.6 - ^^^^^^^ -Hint: Check the numeric format (e.g., 123, 123.45, 1.23e10) -``` - -**Common Causes**: -- Multiple decimal points -- Invalid scientific notation -- Non-numeric characters in number - -**Solutions**: -- Use valid decimal format: `18.45` -- For scientific notation: `1.23e10` -- Remove non-numeric characters - ---- - -### E1004 - Invalid Operator - -**Description**: An invalid operator sequence was encountered. - -**Example**: -``` -Error E1004 at line 1, column 15: invalid operator sequence -``` - -**Common Causes**: -- Typos in operators (`=>` instead of `>=`) -- Unsupported operators from other SQL dialects -- Incorrect spacing in multi-character operators - -**Solutions**: -- Use standard SQL operators: `>=`, `<=`, `<>`, `!=` -- Check SQL dialect compatibility -- Ensure proper spacing: `< =` is different from `<=` - ---- - -### E1005 - Invalid Identifier - -**Description**: An identifier (table, column, or alias name) has invalid format. - -**Example**: -``` -Error E1005 at line 1, column 20: invalid identifier format -``` - -**Common Causes**: -- Starting identifier with a number -- Using reserved keywords without quotes -- Special characters in unquoted identifiers - -**Solutions**: -- Start identifiers with letters or underscore -- Quote identifiers with reserved keywords: `"SELECT"` -- Use quotes for special characters: `"my-column"` - ---- - -## Parser Errors (E2xxx) - -### E2001 - Unexpected Token - -**Description**: The parser encountered a token that doesn't fit the SQL grammar at this position. - -**Example**: -``` -Error E2001 at line 1, column 10: unexpected token: IDENT ('FORM') - 1 | SELECT * FORM users - ^^^^ -Hint: Did you mean 'FROM'? -``` - -**Common Causes**: -- Typos in SQL keywords -- Missing or extra tokens -- Incorrect SQL syntax - -**Solutions**: -- Fix typos (the hint often suggests the correct keyword) -- Review SQL syntax for this statement type -- Check for missing commas or parentheses - ---- - -### E2002 - Expected Token - -**Description**: The parser expected a specific token but found something else. - -**Example**: -``` -Error E2002 at line 1, column 10: expected FROM, got FORM - 1 | SELECT * FORM users - ^^^^ -Hint: Did you mean 'FROM' instead of 'FORM'? -``` - -**Common Causes**: -- Typos in keywords -- Missing required clauses -- Incorrect keyword order - -**Solutions**: -- Use the suggested correction from the hint -- Add the missing keyword -- Reorder clauses according to SQL syntax: SELECT → FROM → WHERE → GROUP BY → ORDER BY - ---- - -### E2003 - Missing Clause - -**Description**: A required SQL clause is missing from the statement. - -**Example**: -``` -Error E2003 at line 1, column 10: missing required FROM clause - 1 | SELECT * users - ^ -Hint: Add the required 'FROM' clause to complete this statement -``` - -**Common Causes**: -- Incomplete SQL statement -- Omitting required keywords -- Misunderstanding SQL syntax - -**Solutions**: -- Add the missing clause: `SELECT * FROM users` -- Review required clauses for this statement type -- Consult SQL documentation for proper syntax - ---- - -### E2004 - Invalid Syntax - -**Description**: General syntax error that doesn't fit other specific categories. - -**Example**: -``` -Error E2004 at line 1, column 15: invalid syntax: missing table name - 1 | SELECT * FROM WHERE age > 18 - ^ -Hint: Review the SQL syntax documentation for this statement type -``` - -**Common Causes**: -- Missing required elements -- Incorrect clause order -- Unsupported SQL patterns - -**Solutions**: -- Add the missing element (table name, column name, etc.) -- Reorder clauses according to SQL syntax -- Simplify complex queries to identify the issue - ---- - -### E2005 - Incomplete Statement - -**Description**: The SQL statement is incomplete and ends unexpectedly. - -**Example**: -``` -Error E2005 at line 1, column 14: incomplete SQL statement - 1 | SELECT * FROM - ^ -Hint: Complete the SQL statement or check for missing clauses -``` - -**Common Causes**: -- Truncated SQL query -- Missing table name or other required elements -- Unfinished WHERE or JOIN clause - -**Solutions**: -- Complete the statement with required elements -- Add the table name after FROM -- Finish all open clauses - ---- - -### E2006 - Invalid Expression - -**Description**: An expression (column reference, calculation, etc.) has invalid syntax. - -**Example**: -``` -Error E2006 at line 1, column 20: invalid expression syntax -``` - -**Common Causes**: -- Unmatched parentheses in expressions -- Invalid operator usage -- Incorrect function call syntax - -**Solutions**: -- Balance all parentheses: `(price * quantity)` -- Use correct operators for the data type -- Check function syntax: `COUNT(*)`, `SUM(price)` - ---- - -## Semantic Errors (E3xxx) - -### E3001 - Undefined Table - -**Description**: Referenced table is not defined in the query or database schema. - -**Example**: -``` -Error E3001 at line 1, column 15: undefined table: 'user' -``` - -**Common Causes**: -- Typo in table name -- Table doesn't exist -- Missing JOIN for referenced table - -**Solutions**: -- Fix table name typo -- Verify table exists in schema -- Add appropriate JOIN clause - ---- - -### E3002 - Undefined Column - -**Description**: Referenced column is not defined for the table. - -**Example**: -``` -Error E3002 at line 1, column 20: undefined column: 'nam' -``` - -**Common Causes**: -- Typo in column name -- Column doesn't exist in table -- Referencing column before defining alias - -**Solutions**: -- Fix column name typo -- Verify column exists in table schema -- Define aliases before referencing them - ---- - -### E3003 - Type Mismatch - -**Description**: Operation involves incompatible data types. - -**Example**: -``` -Error E3003 at line 1, column 30: type mismatch: cannot compare string with number -``` - -**Common Causes**: -- Comparing incompatible types -- Invalid function arguments -- Incorrect arithmetic operations - -**Solutions**: -- Cast values to compatible types -- Use appropriate comparison operators -- Check function parameter types - ---- - -### E3004 - Ambiguous Column - -**Description**: Column reference is ambiguous (exists in multiple tables). - -**Example**: -``` -Error E3004 at line 1, column 8: ambiguous column reference: 'id' -``` - -**Common Causes**: -- Column exists in multiple joined tables -- Missing table qualifier -- Unclear which table the column belongs to - -**Solutions**: -- Qualify column with table name: `users.id` -- Use table aliases: `u.id` where `u` is alias for `users` -- Ensure column references are unique - ---- - -## Unsupported Features (E4xxx) - -### E4001 - Unsupported Feature - -**Description**: The SQL feature is not yet supported by GoSQLX. - -**Example**: -``` -Error E4001 at line 1, column 25: unsupported feature: recursive CTEs - 1 | WITH RECURSIVE cte AS ... - ^ -Hint: This feature is not yet supported. Check the documentation for supported SQL features -``` - -**Common Causes**: -- Using advanced SQL features not yet implemented -- Dialect-specific syntax -- New SQL standard features - -**Solutions**: -- Check GoSQLX documentation for supported features -- Use alternative SQL patterns if available -- Consider submitting a feature request on GitHub - ---- - -### E4002 - Unsupported Dialect - -**Description**: SQL dialect-specific syntax that is not supported. - -**Example**: -``` -Error E4002 at line 1, column 15: unsupported dialect: PostgreSQL-specific syntax -``` - -**Common Causes**: -- Using PostgreSQL/MySQL/Oracle-specific syntax -- Dialect-specific functions or operators -- Non-standard SQL extensions - -**Solutions**: -- Use standard SQL syntax -- Check dialect compatibility in documentation -- Consider using multi-dialect compatible alternatives - ---- - -## Error Handling Best Practices - -### 1. Always Check Error Codes - -```go -ast, err := gosqlx.Parse(sql) -if err != nil { - switch errors.GetCode(err) { - case errors.ErrCodeExpectedToken: - // Handle syntax errors - log.Printf("Syntax error: %v", err) - case errors.ErrCodeUnsupportedFeature: - // Handle unsupported features - log.Printf("Feature not supported: %v", err) - default: - // Handle other errors - log.Printf("Parse error: %v", err) - } -} -``` - -### 2. Extract Context for User Display - -```go -if parseErr, ok := err.(*errors.Error); ok { - fmt.Printf("Error %s: %s\n", parseErr.Code, parseErr.Message) - fmt.Printf("Location: Line %d, Column %d\n", - parseErr.Location.Line, parseErr.Location.Column) - if parseErr.Hint != "" { - fmt.Printf("Hint: %s\n", parseErr.Hint) - } -} -``` - -### 3. Log Structured Errors - -```go -if parseErr, ok := err.(*errors.Error); ok { - logger.WithFields(map[string]interface{}{ - "error_code": parseErr.Code, - "line": parseErr.Location.Line, - "column": parseErr.Location.Column, - "sql": sql, - }).Error("SQL parse error") -} -``` - -### 4. Build Error Recovery Logic - -```go -func validateSQL(sql string) error { - _, err := gosqlx.Parse(sql) - if err != nil { - if errors.IsCode(err, errors.ErrCodeExpectedToken) { - // Try to auto-fix common typos - return attemptAutoFix(sql, err) - } - return err - } - return nil -} -``` - ---- - -## Getting Help - -- **Documentation**: https://docs.gosqlx.dev -- **GitHub Issues**: https://github.com/ajitpratap0/GoSQLX/issues -- **Discussions**: https://github.com/ajitpratap0/GoSQLX/discussions - -Each error includes a help URL with more details: `https://docs.gosqlx.dev/errors/` diff --git a/docs/FUZZ_TESTING_GUIDE.md b/docs/FUZZ_TESTING_GUIDE.md index a7ff0b2..efa10d0 100644 --- a/docs/FUZZ_TESTING_GUIDE.md +++ b/docs/FUZZ_TESTING_GUIDE.md @@ -1,138 +1,39 @@ # Fuzz Testing Guide -This guide explains how to use and extend the fuzz testing infrastructure in GoSQLX. - -## Overview - GoSQLX includes comprehensive fuzz testing for the tokenizer and parser components. Fuzz testing automatically generates test inputs to discover edge cases, security vulnerabilities, and unexpected behaviors. ## Quick Start -### Run Basic Fuzz Tests +### Run Fuzz Tests ```bash -# Fuzz tokenizer for 30 seconds -go test -fuzz=FuzzTokenizer -fuzztime=30s ./pkg/sql/tokenizer/ - -# Fuzz parser for 30 seconds -go test -fuzz=FuzzParser -fuzztime=30s ./pkg/sql/parser/ -``` +# Fuzz specific tokenizer function for 30 seconds +go test -fuzz='^FuzzTokenizer$' -fuzztime=30s ./pkg/sql/tokenizer/ +go test -fuzz='^FuzzTokenizerUTF8Boundary$' -fuzztime=30s ./pkg/sql/tokenizer/ +go test -fuzz='^FuzzTokenizerNumericLiterals$' -fuzztime=30s ./pkg/sql/tokenizer/ +go test -fuzz='^FuzzTokenizerStringLiterals$' -fuzztime=30s ./pkg/sql/tokenizer/ -### Run All Fuzz Tests - -```bash -# Tokenizer tests -go test -fuzz=FuzzTokenizer$ -fuzztime=30s -run=^Fuzz ./pkg/sql/tokenizer/ -go test -fuzz=FuzzTokenizerUTF8Boundary -fuzztime=30s -run=^Fuzz ./pkg/sql/tokenizer/ -go test -fuzz=FuzzTokenizerNumericLiterals -fuzztime=30s -run=^Fuzz ./pkg/sql/tokenizer/ - -# Parser tests -go test -fuzz=FuzzParser$ -fuzztime=30s -run=^Fuzz ./pkg/sql/parser/ -go test -fuzz=FuzzParserRecursionDepth -fuzztime=30s -run=^Fuzz ./pkg/sql/parser/ +# Run all tokenizer fuzz tests +go test -run=^Fuzz -fuzztime=30s ./pkg/sql/tokenizer/ ``` ## Available Fuzz Tests ### Tokenizer Fuzz Tests -#### FuzzTokenizer -Main tokenizer fuzzing function that tests: -- Valid SQL queries -- SQL injection patterns -- Deeply nested structures -- Unicode/international characters -- Edge cases and malformed input - -#### FuzzTokenizerUTF8Boundary -Tests UTF-8 boundary conditions: -- Multi-byte characters -- Emoji handling -- International text -- Character encoding edge cases - -#### FuzzTokenizerNumericLiterals -Tests numeric parsing: -- Scientific notation -- Negative numbers -- Floating point -- Edge cases like `.123` or `123.` - -#### FuzzTokenizerStringLiterals -Tests string parsing: -- Escaped quotes -- Empty strings -- Special characters -- Multi-line strings - -#### FuzzTokenizerOperators -Tests operator tokenization: -- Comparison operators (=, !=, <>, <, >) -- Arithmetic operators (+, -, *, /, %) -- Logical operators (AND, OR, NOT) -- String concatenation (||) - -#### FuzzTokenizerComments -Tests comment handling: -- Single-line comments (`--`) -- Block comments (`/* */`) -- Nested comments -- Comments with special characters - -#### FuzzTokenizerWhitespace -Tests whitespace handling: -- Spaces, tabs, newlines -- Mixed whitespace -- Multiple consecutive whitespace +All tokenizer fuzz tests are located in `pkg/sql/tokenizer/tokenizer_fuzz_test.go`: + +- **FuzzTokenizer**: Main fuzzer testing valid queries, SQL injection patterns, nested structures, Unicode, malformed input +- **FuzzTokenizerUTF8Boundary**: UTF-8 boundary conditions with multi-byte characters, emoji, international text +- **FuzzTokenizerNumericLiterals**: Numeric parsing with scientific notation, negative numbers, floating point edge cases +- **FuzzTokenizerStringLiterals**: String parsing with escaped quotes, empty strings, special characters +- **FuzzTokenizerOperators**: Operator tokenization (comparison, arithmetic, logical, concatenation) +- **FuzzTokenizerComments**: Comment handling (single-line `--` and block `/* */` comments) +- **FuzzTokenizerWhitespace**: Whitespace variations (spaces, tabs, newlines, mixed combinations) ### Parser Fuzz Tests -#### FuzzParser -Main parser fuzzing function that tests: -- All SQL statement types -- Complex queries with JOINs, CTEs, set operations -- Window functions -- Deeply nested expressions -- Malformed AST structures - -#### FuzzParserRecursionDepth -Tests recursion depth limits: -- Deeply nested subqueries -- MaxRecursionDepth enforcement -- Stack overflow prevention - -#### FuzzParserExpressions -Tests expression parsing: -- Arithmetic expressions -- Logical expressions -- Function calls -- CASE statements - -#### FuzzParserOperatorPrecedence -Tests operator precedence: -- Mixed arithmetic and logical operators -- Parenthesized expressions -- Precedence validation - -#### FuzzParserWindowFunctions -Tests window function parsing: -- OVER clause variations -- PARTITION BY -- ORDER BY -- Frame specifications - -#### FuzzParserCTEs -Tests CTE parsing: -- Simple CTEs -- Recursive CTEs -- Multiple CTEs -- CTE with column specifications - -#### FuzzParserJoins -Tests JOIN parsing: -- All JOIN types -- Multi-table joins -- JOIN conditions -- USING clause +**Note**: Parser fuzz tests are currently implemented via the tokenizer fuzz tests. Parser validation includes all statement types through comprehensive seed corpus testing covering JOINs, CTEs, set operations, window functions, and nested expressions. ## Understanding Fuzz Output @@ -163,80 +64,20 @@ When a crash is found: 3. Fix the underlying issue 4. Verify the regression test passes -## Seed Corpus Examples - -The fuzz tests include comprehensive seed corpus covering: - -### Valid SQL Queries -```sql -SELECT * FROM users -SELECT id, name FROM users WHERE active = true -INSERT INTO users (name, email) VALUES ('John', 'john@example.com') -UPDATE users SET name = 'Jane' WHERE id = 1 -DELETE FROM users WHERE id = 1 -``` - -### SQL Injection Patterns -```sql -' OR 1=1 -- -'; DROP TABLE users; -- -1' UNION SELECT * FROM users -- -admin'-- -' OR 'a'='a -``` - -### Deeply Nested Structures -```sql -SELECT (((((((((1))))))))) -SELECT * FROM (SELECT * FROM (SELECT * FROM users)) -``` - -### International Characters -```sql --- French -SELECT * FROM utilisateurs WHERE nom = 'François' +## Seed Corpus --- Japanese -SELECT * FROM ユーザー WHERE 名前 = '太郎' - --- Arabic -SELECT * FROM مستخدمين WHERE اسم = 'أحمد' -``` - -### Complex Queries -```sql --- CTE -WITH RECURSIVE cte AS ( - SELECT 1 - UNION ALL - SELECT n+1 FROM cte WHERE n < 10 -) SELECT * FROM cte - --- Window Function -SELECT ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) FROM employees - --- Multiple JOINs -SELECT * FROM a LEFT JOIN b ON a.id = b.a_id RIGHT JOIN c ON b.id = c.b_id -``` +The fuzz tests include comprehensive seed corpus covering valid SQL, SQL injection patterns, nested structures, international characters, and complex queries (CTEs, window functions, JOINs). Seed cases are defined in `tokenizer_fuzz_test.go` lines 17-105. ## Adding New Seed Corpus -To add new test cases to the seed corpus: +Add test cases in `FuzzTokenizer` using `f.Add()`: ```go -func FuzzTokenizer(f *testing.F) { - // Add your new seed case - f.Add([]byte("SELECT * FROM new_test_case")) - - // Existing seeds... - f.Add([]byte("SELECT * FROM users")) - - f.Fuzz(func(t *testing.T, data []byte) { - // Fuzzing logic... - }) -} +f.Add([]byte("SELECT * FROM new_test_case")) ``` +Seed cases should cover edge cases, novel SQL patterns, or previously discovered crashes. + ## Handling Discovered Crashes When fuzzing discovers a crash: @@ -272,177 +113,59 @@ Debug and fix the underlying vulnerability in tokenizer or parser. go test -run=TestFuzzCrashRegression ./pkg/sql/tokenizer/ # Re-run fuzz test -go test -fuzz=FuzzTokenizer -fuzztime=1m ./pkg/sql/tokenizer/ +go test -fuzz='^FuzzTokenizer$' -fuzztime=1m ./pkg/sql/tokenizer/ ``` ## CI/CD Integration -### GitHub Actions Example - -```yaml -name: Fuzz Testing - -on: - schedule: - # Run weekly - - cron: '0 2 * * 0' - workflow_dispatch: - -jobs: - fuzz: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-go@v4 - with: - go-version: '1.21' - - - name: Fuzz Tokenizer - run: | - go test -fuzz=FuzzTokenizer -fuzztime=5m ./pkg/sql/tokenizer/ - - - name: Fuzz Parser - run: | - go test -fuzz=FuzzParser -fuzztime=5m ./pkg/sql/parser/ - - - name: Upload Corpus - if: always() - uses: actions/upload-artifact@v3 - with: - name: fuzz-corpus - path: | - pkg/sql/tokenizer/testdata/fuzz/ - pkg/sql/parser/testdata/fuzz/ -``` - -### Pre-Commit Hook +Fuzz tests can be run in CI pipelines. For example, in pre-PR checks: ```bash -#!/bin/sh -# .git/hooks/pre-commit +# Quick fuzz (30 seconds) +go test -run=^Fuzz -fuzztime=30s ./pkg/sql/tokenizer/ -echo "Running quick fuzz tests..." -go test -fuzz=FuzzTokenizer -fuzztime=30s ./pkg/sql/tokenizer/ || exit 1 -go test -fuzz=FuzzParser -fuzztime=30s ./pkg/sql/parser/ || exit 1 -echo "Fuzz tests passed!" +# Extended fuzz for releases (5+ minutes) +go test -run=^Fuzz -fuzztime=5m ./pkg/sql/tokenizer/ ``` -## Best Practices - -### 1. Run Fuzz Tests Regularly -```bash -# Daily development -go test -fuzz=FuzzTokenizer -fuzztime=1m ./pkg/sql/tokenizer/ +For weekly continuous fuzzing, configure schedule jobs to run with extended duration (`-fuzztime=1h+`). -# Before release -go test -fuzz=FuzzTokenizer -fuzztime=30m ./pkg/sql/tokenizer/ -``` - -### 2. Use Race Detection -```bash -go test -race -fuzz=FuzzTokenizer -fuzztime=30s ./pkg/sql/tokenizer/ -``` - -### 3. Monitor Performance -Track fuzzing metrics over time: -- Executions per second -- New interesting cases discovered -- Corpus size growth - -### 4. Keep Corpus Manageable -- Commit valuable corpus entries to repo -- Prune redundant cases periodically -- Balance coverage vs. corpus size +## Best Practices -### 5. Document Findings -Add comments to regression tests explaining: -- What input caused the crash -- Why it crashed -- How it was fixed +1. **Run regularly**: Daily during development (1m), pre-release (30m+) +2. **Use race detection**: `go test -race -run=^Fuzz -fuzztime=30s ./pkg/sql/tokenizer/` +3. **Monitor metrics**: Track executions/sec and new interesting cases discovered +4. **Manage corpus**: Commit valuable corpus entries to repo; prune redundant cases periodically +5. **Document crashes**: Add detailed comments to regression tests explaining the crash, cause, and fix ## Troubleshooting -### Fuzz Test Takes Too Long -```bash -# Reduce fuzz time -go test -fuzz=FuzzTokenizer -fuzztime=10s ./pkg/sql/tokenizer/ +**Slow tests**: Reduce duration with `-fuzztime=10s` or parallelism with `-parallel=4` -# Reduce parallelism -go test -fuzz=FuzzTokenizer -fuzztime=30s -parallel=4 ./pkg/sql/tokenizer/ -``` - -### Out of Memory -```bash -# Limit corpus size -export GOCACHE_MAXSIZE=100MB +**Out of memory**: Run `go clean -fuzzcache` or set `GOCACHE_MAXSIZE=100MB` -# Or clear cache -go clean -fuzzcache -``` - -### Too Many Workers -```bash -# Control worker count -GOMAXPROCS=4 go test -fuzz=FuzzTokenizer -fuzztime=30s ./pkg/sql/tokenizer/ -``` +**Too many workers**: Control with `GOMAXPROCS=4` environment variable ## Advanced Usage -### Continuous Fuzzing with OSS-Fuzz +**Custom duration**: Use `-fuzztime=10m` for time-based or `-fuzztime=1000000x` for execution-based fuzzing -For production projects, integrate with [OSS-Fuzz](https://github.com/google/oss-fuzz): - -1. Submit project to OSS-Fuzz -2. OSS-Fuzz runs fuzz tests continuously -3. Automatically files issues for crashes -4. Provides detailed crash reports - -### Custom Fuzzing Duration - -```bash -# Fuzz for specific time -go test -fuzz=FuzzTokenizer -fuzztime=10m ./pkg/sql/tokenizer/ - -# Fuzz for specific executions -go test -fuzz=FuzzTokenizer -fuzztime=1000000x ./pkg/sql/tokenizer/ -``` - -### Parallel Fuzzing - -```bash -# Run multiple fuzz tests in parallel -parallel -j4 go test -fuzz={} -fuzztime=5m ./pkg/sql/{}/ ::: \ - FuzzTokenizer:tokenizer \ - FuzzParser:parser \ - FuzzTokenizerUTF8Boundary:tokenizer \ - FuzzParserRecursionDepth:parser -``` +**OSS-Fuzz integration**: For continuous fuzzing, submit project to [OSS-Fuzz](https://github.com/google/oss-fuzz) ## Resources - [Go Fuzzing Documentation](https://go.dev/doc/fuzz/) -- [Fuzzing Tutorial](https://go.dev/security/fuzz/) -- [GoSQLX Test Report](../TEST-004_FUZZ_TESTING_REPORT.md) +- [Go Security Fuzzing Guide](https://go.dev/security/fuzz/) ## Contributing -When contributing to GoSQLX: - -1. Run fuzz tests before submitting PR -2. Add seed corpus for new features -3. Document any discovered edge cases -4. Update regression tests as needed - -```bash -# Pre-PR checklist -go test -fuzz=FuzzTokenizer -fuzztime=1m ./pkg/sql/tokenizer/ -go test -fuzz=FuzzParser -fuzztime=1m ./pkg/sql/parser/ -go test ./... -``` +1. Run fuzz tests before submitting PR: `go test -run=^Fuzz -fuzztime=1m ./pkg/sql/tokenizer/` +2. Add seed corpus for new features via `f.Add()` in the appropriate fuzz function +3. Document discovered edge cases in regression tests with detailed comments +4. Update `TestFuzzCrashRegression` for any crashes found --- -**Last Updated**: 2025-11-06 -**GoSQLX Version**: 1.4.0+ -**Fuzzing Coverage**: Comprehensive (938 lines of fuzz code) +**Last Updated**: 2025-11-28 +**GoSQLX Version**: 1.5.1+ +**Fuzz Test File**: `pkg/sql/tokenizer/tokenizer_fuzz_test.go` (441 lines, 7 fuzz functions) diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 0e841be..4a95796 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -6,77 +6,71 @@ Welcome! This guide will get you parsing SQL in under 5 minutes. No prior experi ## Step 1: Install GoSQLX (30 seconds) -### Option A: Using Go Get (Recommended) -```bash -go get github.com/ajitpratap0/GoSQLX -``` +**Requirements**: Go 1.24+ (toolchain go1.25.0) -### Option B: Install CLI Tool +### Option A: Install CLI Tool (Recommended) ```bash go install github.com/ajitpratap0/GoSQLX/cmd/gosqlx@latest ``` -**Requirements**: Go 1.24 or higher - ---- - -## Step 2: Verify Installation (30 seconds) - -Let's make sure everything works: - -### If you installed the CLI: +### Option B: Library Only ```bash -gosqlx validate "SELECT 1" -``` - -**Expected output:** -``` -✓ Valid SQL +go get github.com/ajitpratap0/GoSQLX ``` -### If you installed the library: +**Verify installation:** ```bash -go version -# Should show Go 1.24+ +# Check Go version +go version # Should show Go 1.24+ + +# If you installed CLI: +gosqlx --version ``` --- -## Step 3: Parse Your First Query with CLI (1 minute) +## Step 2: Validate Your First Query (1 minute) The fastest way to get started is with the CLI: -### Validate SQL syntax: ```bash -gosqlx validate "SELECT * FROM users WHERE active = true" -``` - -### Format SQL: -```bash -gosqlx format "select * from users where age>18" -``` - -**Output:** -```sql -SELECT * -FROM users -WHERE age > 18 +# Validate SQL syntax (from stdin) +echo "SELECT * FROM users WHERE active = true" | gosqlx validate +# Output: ✅ Valid SQL + +# Or validate SQL files +gosqlx validate query.sql + +# Format SQL with intelligent indentation (from stdin) +echo "select * from users where age>18" | gosqlx format +# Output: +# SELECT * +# FROM users +# WHERE age > 18 + +# Analyze SQL structure (from stdin) +echo "SELECT COUNT(*) FROM orders GROUP BY status" | gosqlx analyze ``` -### Analyze SQL structure: -```bash -gosqlx analyze "SELECT COUNT(*) FROM orders GROUP BY status" -``` +**Available CLI Commands:** +- `validate` - Ultra-fast SQL validation +- `format` - High-performance SQL formatting +- `analyze` - Advanced SQL analysis +- `parse` - AST structure inspection +- `lint` - Check SQL code for style issues +- `lsp` - Start Language Server Protocol server +- `config` - Manage configuration +- `completion` - Shell autocompletion -**That's it!** You're validating and formatting SQL. ✨ +See [CLI Guide](CLI_GUIDE.md) for complete documentation. --- -## Step 4: Parse Your First Query with Go (2 minutes) +## Step 3: Parse Your First Query in Go (2 minutes) -Now let's use GoSQLX in your Go application. +Use GoSQLX in your Go application with the simple API: -### Create a file `main.go`: +### Create `main.go`: ```go package main @@ -102,43 +96,10 @@ func main() { } ``` -**That's it!** Just 3 lines of actual code. No pool management, no manual cleanup - everything is handled for you. 🎉 - -### More Quick Examples - -```go -// Validate SQL without parsing -if err := gosqlx.Validate("SELECT * FROM users"); err != nil { - fmt.Println("Invalid SQL:", err) -} else { - fmt.Println("Valid SQL!") -} - -// Parse multiple queries efficiently (reuses internal resources) -queries := []string{ - "SELECT * FROM users", - "SELECT * FROM orders", - "SELECT * FROM products", -} -asts, err := gosqlx.ParseMultiple(queries) -if err != nil { - log.Fatal(err) -} -fmt.Printf("Parsed %d queries\n", len(asts)) - -// Parse with timeout for long queries -ast, err := gosqlx.ParseWithTimeout(sql, 5*time.Second) -if err == context.DeadlineExceeded { - fmt.Println("Query took too long to parse") -} - -// Parse from byte slice (useful for file I/O) -sqlBytes := []byte("SELECT * FROM users") -ast, err := gosqlx.ParseBytes(sqlBytes) -``` - -### Run it: +**Run it:** ```bash +go mod init myproject +go get github.com/ajitpratap0/GoSQLX go run main.go ``` @@ -149,222 +110,112 @@ go run main.go Statements: 1 ``` -**Congratulations!** You've parsed your first SQL query with GoSQLX! 🎉 - -> **Performance Note:** The simple API has < 1% overhead compared to the low-level API. Use it everywhere unless you need fine-grained control over resource management. +**That's it!** Just 3 lines of code. No pool management, no manual cleanup - everything is handled automatically. --- -## Step 5: What's Next? (1 minute) - -### Learn More: -- **[Usage Guide](USAGE_GUIDE.md)** - Comprehensive patterns and examples -- **[CLI Guide](CLI_GUIDE.md)** - Full CLI documentation -- **[API Reference](API_REFERENCE.md)** - Complete API documentation -- **[Examples](../examples/)** - Real-world code examples - -### Common Tasks: - -#### Validate SQL in Your Application: -```go -func ValidateSQL(sql string) error { - return gosqlx.Validate(sql) -} -``` - -#### Process Multiple Queries: -```go -func ProcessBatch(queries []string) error { - asts, err := gosqlx.ParseMultiple(queries) - if err != nil { - return err - } - - for i, ast := range asts { - fmt.Printf("Query %d: %d statement(s)\n", i+1, len(ast.Statements)) - } - return nil -} -``` - -#### Use in CI/CD: -```bash -# In your .github/workflows/test.yml -- name: Validate SQL - run: | - gosqlx validate migrations/*.sql - gosqlx format --check queries/*.sql -``` - ---- - -## Advanced Usage: Low-Level API - -For performance-critical applications that need fine-grained control, use the low-level API: - -### When to Use Low-Level API? - -- **High-frequency parsing** (>100K queries/sec) where you can reuse objects -- **Custom tokenization** with specific buffer management -- **Integration with existing pool systems** -- **Fine-grained resource control** in memory-constrained environments - -### Low-Level API Example: +## Step 4: More Quick Examples (1 minute) ```go package main import ( + "context" "fmt" + "log" + "time" - "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" - "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" + "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" ) -func ParseLowLevel(sql string) error { - // Step 1: Get tokenizer from pool (MUST return to pool!) - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - // Step 2: Tokenize SQL - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - return fmt.Errorf("tokenization failed: %w", err) +func main() { + // Validate SQL without parsing + if err := gosqlx.Validate("SELECT * FROM users"); err != nil { + fmt.Println("Invalid SQL:", err) + } else { + fmt.Println("Valid SQL!") } - // Step 3: Convert tokens - converter := parser.NewTokenConverter() - result, err := converter.Convert(tokens) + // Parse multiple queries efficiently (reuses internal resources) + queries := []string{ + "SELECT * FROM users", + "SELECT * FROM orders", + "SELECT * FROM products", + } + asts, err := gosqlx.ParseMultiple(queries) if err != nil { - return fmt.Errorf("conversion failed: %w", err) + log.Fatal(err) } + fmt.Printf("Parsed %d queries\n", len(asts)) - // Step 4: Parse to AST (MUST release parser!) - p := parser.NewParser() - defer p.Release() - - ast, err := p.Parse(result.Tokens) - if err != nil { - return fmt.Errorf("parsing failed: %w", err) + // Parse with timeout for long queries + sql := "SELECT * FROM large_table" + ast, err := gosqlx.ParseWithTimeout(sql, 5*time.Second) + if err == context.DeadlineExceeded { + fmt.Println("Query took too long to parse") } - fmt.Printf("Parsed: %d statement(s)\n", len(ast.Statements)) - return nil + // Parse from byte slice (zero-copy optimization) + sqlBytes := []byte("SELECT * FROM users") + ast, err = gosqlx.ParseBytes(sqlBytes) } ``` -### Performance Comparison - -| API | Throughput | Overhead | Use When | -|-----|-----------|----------|----------| -| **Simple API** (`gosqlx.Parse`) | 273K ops/sec | < 1% | Default choice for most applications | -| **Low-Level API** | 332K ops/sec | 0% (baseline) | Performance-critical paths, custom pooling | - -> **Recommendation:** Start with the simple API. Only switch to low-level if profiling shows it as a bottleneck. +> **Performance Note:** The simple API has < 1% overhead compared to the low-level API. Use it everywhere unless you need fine-grained control over resource management. --- -## Common Pitfalls ⚠️ - -### 1. Forgetting to Return to Pool -**❌ Wrong:** -```go -tkz := tokenizer.GetTokenizer() -tokens, _ := tkz.Tokenize([]byte(sql)) -// Missing: defer tokenizer.PutTokenizer(tkz) -``` - -**✅ Correct:** -```go -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) // Always use defer! -tokens, _ := tkz.Tokenize([]byte(sql)) -``` - -### 2. Reusing Tokenizer Without Reset -**❌ Wrong:** -```go -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) - -tokens1, _ := tkz.Tokenize([]byte(sql1)) -tokens2, _ := tkz.Tokenize([]byte(sql2)) // State from sql1 still there! -``` +## Step 5: Common Use Cases (30 seconds) -**✅ Correct:** +### Validate SQL in Your Application: ```go -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) - -tokens1, _ := tkz.Tokenize([]byte(sql1)) - -// Reset state before reusing -tkz.Reset() -tokens2, _ := tkz.Tokenize([]byte(sql2)) -``` - -### 3. Not Checking for EOF -**❌ Wrong:** -```go -for _, tok := range tokens { - fmt.Println(tok.Token.Value) // Will print empty EOF token +func ValidateUserQuery(sql string) error { + return gosqlx.Validate(sql) } ``` -**✅ Correct:** +### Process Multiple Queries: ```go -for _, tok := range tokens { - if tok.Token.Type == models.TokenTypeEOF { - break +func ProcessBatch(queries []string) error { + asts, err := gosqlx.ParseMultiple(queries) + if err != nil { + return err } - fmt.Println(tok.Token.Value) + + for i, ast := range asts { + fmt.Printf("Query %d: %d statement(s)\n", i+1, len(ast.Statements)) + } + return nil } ``` ---- - -## Quick Reference - -### Key Imports: -```go -import ( - "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" - "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" - "github.com/ajitpratap0/GoSQLX/pkg/models" - "github.com/ajitpratap0/GoSQLX/pkg/sql/token" -) +### Use in CI/CD: +```bash +# In your .github/workflows/test.yml +- name: Validate SQL + run: | + gosqlx validate migrations/*.sql + gosqlx lint --check queries/*.sql ``` -### Essential Pattern: -```go -// 1. Get from pool -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) +--- -// 2. Tokenize -tokens, err := tkz.Tokenize([]byte(sql)) +## What's Next? -// 3. Check for errors -if err != nil { - // Handle error -} +### Learn More: +- **[Usage Guide](USAGE_GUIDE.md)** - Comprehensive patterns and examples +- **[CLI Guide](CLI_GUIDE.md)** - Full CLI documentation and all commands +- **[API Reference](API_REFERENCE.md)** - Complete API documentation +- **[Examples](../examples/)** - Real-world code examples -// 4. Process tokens -for _, tok := range tokens { - if tok.Token.Type == models.TokenTypeEOF { - break - } - // Use tok -} -``` +### Advanced Topics: +- **Low-Level API** - For performance-critical applications (>100K queries/sec) +- **Object Pooling** - Manual resource management for fine-grained control +- **SQL Injection Detection** - Built-in security scanning +- **Multi-Dialect Support** - PostgreSQL, MySQL, SQL Server, Oracle, SQLite +- **Unicode Support** - Full international character support -### CLI Commands: -```bash -gosqlx validate # Validate SQL syntax -gosqlx format # Format SQL with style -gosqlx analyze # Analyze SQL structure -gosqlx parse # Parse to AST -``` +See [Usage Guide](USAGE_GUIDE.md) for advanced patterns. --- @@ -397,54 +248,20 @@ gosqlx validate "your SQL here" --- -## Performance Tips 💡 - -GoSQLX is designed for high performance. Here are quick tips: - -1. **Always use object pools** (via `defer`) -2. **Reuse tokenizer for multiple queries** (with `Reset()`) -3. **Avoid string conversions** when possible -4. **Use batch processing** for multiple queries -5. **Profile with benchmarks** for critical paths - -See [Performance Optimization](USAGE_GUIDE.md#performance-optimization) for details. - ---- - -## What You've Learned ✅ +## What You've Learned - ✓ Installing GoSQLX (library and CLI) -- ✓ Validating SQL with CLI -- ✓ Parsing SQL in Go applications -- ✓ Using object pools correctly -- ✓ Common pitfalls to avoid +- ✓ Validating and formatting SQL with CLI +- ✓ Parsing SQL in Go applications with simple API +- ✓ Common use cases and patterns - ✓ Where to find more help --- -## Next Steps 🚀 - -**For CLI Users:** -- Explore all CLI commands: [CLI Guide](CLI_GUIDE.md) -- Integrate into CI/CD pipelines -- Batch process SQL files - -**For Library Users:** -- Learn advanced patterns: [Usage Guide](USAGE_GUIDE.md) -- Build custom SQL analysis tools -- Optimize for your use case - -**For Everyone:** -- Check out [real-world examples](../examples/) -- Read the [architecture documentation](ARCHITECTURE.md) -- Contribute to [the project](../CONTRIBUTING.md) - ---- - -**Time to first success:** < 5 minutes ✓ +**Time to first success:** < 5 minutes **Questions?** Open an issue or start a discussion on GitHub! --- -*Built with ❤️ by the GoSQLX community* +*Built by the GoSQLX community* diff --git a/docs/PERFORMANCE_TUNING.md b/docs/PERFORMANCE_TUNING.md index b6ea3fc..07cb190 100644 --- a/docs/PERFORMANCE_TUNING.md +++ b/docs/PERFORMANCE_TUNING.md @@ -16,16 +16,17 @@ This comprehensive guide helps you achieve optimal performance with GoSQLX in pr 4. [Memory Management](#memory-management) 5. [Concurrent Processing Patterns](#concurrent-processing-patterns) 6. [Benchmarking Methodology](#benchmarking-methodology) -7. [Common Performance Patterns](#common-performance-patterns) -8. [Production Deployment Checklist](#production-deployment-checklist) -9. [Troubleshooting Performance Issues](#troubleshooting-performance-issues) -10. [Real-World Case Studies](#real-world-case-studies) +7. [Performance Regression Testing](#performance-regression-testing) +8. [Common Performance Patterns](#common-performance-patterns) +9. [Production Deployment Checklist](#production-deployment-checklist) +10. [Troubleshooting Performance Issues](#troubleshooting-performance-issues) +11. [Real-World Case Studies](#real-world-case-studies) --- ## Performance Overview -### Baseline Performance (v1.5.1) +### Baseline Performance (v1.5.1) GoSQLX delivers production-validated performance across multiple workloads: @@ -39,17 +40,8 @@ GoSQLX delivers production-validated performance across multiple workloads: | **Concurrent Scaling** | Linear to 128+ cores | Native Go concurrency | | **Tokenization Speed** | 8M tokens/sec | Raw tokenization throughput | -### Performance Characteristics - -``` -Query Complexity vs Latency: -- Simple SELECT: <0.5ms (SELECT * FROM users) -- Medium JOIN: ~0.7ms (3-table JOIN with WHERE) -- Complex Analytics: ~1.2ms (CTEs + window functions + 5 JOINs) -- Very Large Query: ~5ms (100KB+ SQL with deep nesting) -``` - -**Key Insight**: GoSQLX is optimized for the 80% use case - typical production SQL queries complete in sub-millisecond time. +### Query Complexity vs Latency +- Simple SELECT: <0.5ms | Medium JOIN: ~0.7ms | Complex Analytics: ~1.2ms | Very Large: ~5ms --- @@ -89,8 +81,9 @@ func profileCPU() { tokens, _ := tkz.Tokenize(sql) tokenizer.PutTokenizer(tkz) - p := parser.New() - _, _ = p.Parse(tokens) + convertedTokens, _ := parser.ConvertTokensForParser(tokens) + p := parser.NewParser() + _, _ = p.Parse(convertedTokens) } } ``` @@ -98,16 +91,9 @@ func profileCPU() { #### Analyzing CPU Profiles ```bash -# Run your application with profiling -go run main.go - -# Analyze the profile +go run main.go # Run with profiling go tool pprof cpu.prof - -# In pprof interactive mode: -(pprof) top 10 # Show top 10 CPU consumers -(pprof) list TokenizeContext # Show line-by-line profile for function -(pprof) web # Generate visual call graph (requires graphviz) +# In pprof: top 10, list TokenizeContext, web (for call graph) ``` ### 2. Memory Profiling @@ -141,13 +127,8 @@ func profileMemory() { #### Analyzing Memory Profiles ```bash -# Analyze memory profile go tool pprof mem.prof - -# Show allocations -(pprof) top 10 # Top 10 memory allocators -(pprof) list NewAST # Memory allocations in specific function -(pprof) alloc_space # Total allocations (not just live objects) +# In pprof: top 10, list NewAST, alloc_space ``` ### 3. Continuous Profiling in Production @@ -171,13 +152,8 @@ func main() { Access profiles via HTTP: ```bash -# CPU profile (30 second sample) curl http://localhost:6060/debug/pprof/profile?seconds=30 > cpu.prof - -# Heap profile curl http://localhost:6060/debug/pprof/heap > heap.prof - -# Goroutine profile curl http://localhost:6060/debug/pprof/goroutine > goroutine.prof ``` @@ -527,7 +503,7 @@ func pipelineProcessing(input <-chan []byte) <-chan Result { parsed := make(chan Result, 100) go func() { defer close(parsed) - p := parser.New() + p := parser.NewParser() for tokens := range tokenized { ast, err := p.Parse(tokens) @@ -576,51 +552,116 @@ What to look for: ### 3. Comparing Benchmarks (Before/After Optimization) ```bash -# Save baseline go test -bench=BenchmarkTokenizer -benchmem -count=5 > baseline.txt - -# Make your changes - -# Compare with baseline +# Make changes go test -bench=BenchmarkTokenizer -benchmem -count=5 > new.txt benchstat baseline.txt new.txt - -# Output: -# name old time/op new time/op delta -# TokenizeSimple-16 724ns ± 2% 580ns ± 3% -19.89% (p=0.000 n=5+5) -# -# name old alloc/op new alloc/op delta -# TokenizeSimple-16 1.86kB ± 0% 1.12kB ± 0% -39.78% (p=0.000 n=5+5) +# Shows delta: TokenizeSimple-16: 724ns → 580ns (-19.89%) ``` ### 4. Custom Benchmarks for Your Workload ```go func BenchmarkYourWorkload(b *testing.B) { - // Load your real production SQL queries := loadProductionSQL("testdata/production_queries.sql") - - b.ResetTimer() // Reset timer after setup - + b.ResetTimer() for i := 0; i < b.N; i++ { - sql := queries[i%len(queries)] - tkz := tokenizer.GetTokenizer() - _, err := tkz.Tokenize(sql) + _, err := tkz.Tokenize(queries[i%len(queries)]) tokenizer.PutTokenizer(tkz) - - if err != nil { - b.Fatal(err) - } + if err != nil { b.Fatal(err) } } - - // Report custom metrics - b.ReportMetric(float64(len(queries)), "queries") } ``` --- +## Performance Regression Testing + +### Overview + +GoSQLX includes automated performance regression tests to prevent performance degradation over time. The suite tracks key metrics against established baselines and alerts developers to regressions. + +### Running Regression Tests + +#### Quick Test (Recommended for CI/CD) +```bash +go test -v ./pkg/sql/parser/ -run TestPerformanceRegression +``` +- **Execution Time:** ~8 seconds +- **Coverage:** 5 critical query types +- **Exit Code 0:** All tests passed +- **Exit Code 1:** Performance regression detected + +#### Baseline Benchmark +```bash +go test -bench=BenchmarkPerformanceBaseline -benchmem -count=5 ./pkg/sql/parser/ +``` +Use this after significant parser changes to establish new performance baselines. + +### Performance Baselines + +Current baselines are stored in `performance_baselines.json`: + +| Query Type | Baseline | Current | Metrics | +|------------|----------|---------|---------| +| **SimpleSelect** | 280 ns/op | ~265 ns/op | 9 allocs, 536 B/op | +| **ComplexQuery** | 1100 ns/op | ~1020 ns/op | 36 allocs, 1433 B/op | +| **WindowFunction** | 450 ns/op | ~400 ns/op | 14 allocs, 760 B/op | +| **CTE** | 450 ns/op | ~395 ns/op | 14 allocs, 880 B/op | +| **INSERT** | 350 ns/op | ~310 ns/op | 14 allocs, 536 B/op | + +**Thresholds:** +- **Failure:** 20% degradation from baseline +- **Warning:** 10% degradation from baseline + +### Test Output Examples + +**Successful Run:** +``` +✓ All performance tests passed (5 tests, 0 failures, 0 warnings) +``` + +**Regression Detected:** +``` +✗ ComplexQuery: 25.5% slower (1381 ns/op vs 1100 ns/op baseline) +⚠ SimpleSelect: 12.3% slower (approaching threshold) +``` + +### Updating Baselines + +**When to Update:** +- Intentional optimizations improve performance +- Parser architecture changes fundamentally +- New SQL features are added + +**How to Update:** +1. Run baseline benchmark with multiple iterations +2. Calculate new conservative baselines (add 10-15% buffer) +3. Update `performance_baselines.json` +4. Update the `updated` timestamp +5. Commit with clear explanation + +### CI/CD Integration + +```yaml +# GitHub Actions example +- name: Performance Regression Tests + run: | + go test -v ./pkg/sql/parser/ -run TestPerformanceRegression + timeout-minutes: 2 +``` + +### Troubleshooting Regression Tests + +**Test Timing Variance:** System load, CPU throttling, background processes affect results. Run tests multiple times. + +**False Positives:** Check system load, run test 3-5 times to confirm, consider increasing tolerance. + +**Baseline Drift:** If performance is consistently better, document improvements and update baselines. + +--- + ## Common Performance Patterns ### Pattern 1: High-Throughput Batch Processing diff --git a/docs/PRODUCTION_GUIDE.md b/docs/PRODUCTION_GUIDE.md index a253068..7c33528 100644 --- a/docs/PRODUCTION_GUIDE.md +++ b/docs/PRODUCTION_GUIDE.md @@ -362,7 +362,7 @@ func (p *SQLProcessor) ScanForInjection(sql []byte) error { scanner := security.NewScanner() result := scanner.Scan(ast) - if result.HasCritical() || result.HasHigh() { + if result.HasCritical() || result.HasHighOrAbove() { return fmt.Errorf("potential SQL injection detected: %d issues", result.CriticalCount + result.HighCount) } @@ -382,8 +382,8 @@ func (p *SQLProcessor) ScanForInjection(sql []byte) error { ### 1. Performance Metrics ```go -// Optional: Use tools/metrics for production monitoring -import "github.com/ajitpratap0/GoSQLX/tools/metrics" +// Optional: Use pkg/metrics for production monitoring +import "github.com/ajitpratap0/GoSQLX/pkg/metrics" func init() { metrics.Enable() // Optional monitoring @@ -550,14 +550,17 @@ type ProductionConfig struct { ### Debugging Tools ```bash -# Use profiler tool for analysis -go run ./tools/profiler/main.go -mode performance +# Use built-in metrics package for performance monitoring +# Import and use: github.com/ajitpratap0/GoSQLX/pkg/metrics -# Use validator for health checks -go run ./tools/validator/main.go -mode integration +# Example: Check metrics snapshot +metrics.GetSnapshot() // Returns current metrics -# Monitor with built-in metrics -curl http://localhost:8080/metrics +# Monitor memory usage in production +var m runtime.MemStats +runtime.ReadMemStats(&m) +log.Printf("Memory: Alloc=%d KB, Sys=%d KB, NumGC=%d", + m.Alloc/1024, m.Sys/1024, m.NumGC) ``` ## Performance Benchmarks diff --git a/docs/README.md b/docs/README.md index 3074a9b..5c1660f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,7 +4,7 @@ Comprehensive documentation for the GoSQLX SQL parsing SDK. **Current Version**: v1.5.1+ | **Last Updated**: November 2025 -## 📚 Documentation Index +## Documentation Index ### Getting Started @@ -27,7 +27,6 @@ Comprehensive documentation for the GoSQLX SQL parsing SDK. | Document | Description | Audience | |----------|-------------|----------| | [**ERROR_CODES.md**](ERROR_CODES.md) | Comprehensive error code reference (E1xxx-E4xxx) | Developers | -| [**ERROR_REFERENCE.md**](ERROR_REFERENCE.md) | Error handling patterns and recovery strategies | Developers | | [**sql99-compliance-analysis.md**](sql99-compliance-analysis.md) | SQL-99 standard compliance analysis (~80-85%) | Architects | ### Deployment & Operations @@ -55,7 +54,7 @@ Comprehensive documentation for the GoSQLX SQL parsing SDK. | [**migration/FROM_PG_QUERY.md**](migration/FROM_PG_QUERY.md) | Migrating from pg_query | | [**migration/FROM_SQLFLUFF.md**](migration/FROM_SQLFLUFF.md) | Migrating from SQLFluff | -## 🚀 Quick Start Guides +## Quick Start Guides ### For New Users 1. Start with [USAGE_GUIDE.md](USAGE_GUIDE.md) - Basic usage patterns @@ -72,7 +71,7 @@ Comprehensive documentation for the GoSQLX SQL parsing SDK. 2. Review [SECURITY.md](SECURITY.md) - Security considerations 3. Check [SQL_COMPATIBILITY.md](SQL_COMPATIBILITY.md) - Dialect support -## 📖 Documentation Structure +## Documentation Structure ``` docs/ @@ -88,25 +87,23 @@ docs/ ├── SQL_COMPATIBILITY.md # SQL dialect matrix ├── SECURITY.md # Security analysis ├── ERROR_CODES.md # Error code reference -├── ERROR_REFERENCE.md # Error handling guide ├── COMPARISON.md # Parser comparison ├── FUZZ_TESTING_GUIDE.md # Fuzz testing guide ├── sql99-compliance-analysis.md # SQL-99 compliance -├── CLEAN_ARCHITECTURE.md # Architecture principles └── migration/ # Migration guides ├── FROM_JSQLPARSER.md ├── FROM_PG_QUERY.md └── FROM_SQLFLUFF.md ``` -## 🔍 Finding Information +## Finding Information ### By Topic **Installation & Setup** - [Installation](USAGE_GUIDE.md#installation) - [Prerequisites](PRODUCTION_GUIDE.md#prerequisites) -- [Quick Start](../README.md#-quick-start) +- [Quick Start](../README.md#quick-start) **Basic Usage** - [Simple Tokenization](USAGE_GUIDE.md#simple-tokenization) @@ -115,12 +112,12 @@ docs/ **Advanced Topics** - [Concurrent Processing](USAGE_GUIDE.md#concurrent-processing) -- [Memory Optimization](ARCHITECTURE.md#memory-optimization-strategies) +- [Memory Management](ARCHITECTURE.md#memory-management) - [Performance Tuning](PRODUCTION_GUIDE.md#performance-optimization) **Troubleshooting** - [Common Issues](TROUBLESHOOTING.md#common-issues) -- [Error Messages](TROUBLESHOOTING.md#error-messages) +- [Error Codes Reference](TROUBLESHOOTING.md#error-codes-reference) - [FAQ](TROUBLESHOOTING.md#faq) **SQL Dialects** @@ -144,15 +141,15 @@ docs/ → See [USAGE_GUIDE.md#unicode-and-international-support](USAGE_GUIDE.md#unicode-and-international-support) **"I'm getting an error"** -→ See [TROUBLESHOOTING.md#error-messages](TROUBLESHOOTING.md#error-messages) +→ See [TROUBLESHOOTING.md#error-codes-reference](TROUBLESHOOTING.md#error-codes-reference) **"My application is slow"** → See [TROUBLESHOOTING.md#performance-issues](TROUBLESHOOTING.md#performance-issues) **"I found a memory leak"** -→ See [TROUBLESHOOTING.md#memory-leaks](TROUBLESHOOTING.md#memory-leaks) +→ See [TROUBLESHOOTING.md#memory-issues](TROUBLESHOOTING.md#memory-issues) -## 📊 Coverage Matrix +## Coverage Matrix | Topic | API Ref | Usage | Architecture | Troubleshooting | Production | |-------|---------|-------|--------------|-----------------|------------| @@ -169,7 +166,7 @@ docs/ | Monitoring | | | | | ✓ | | Security | | | | | ✓ | -## 💡 Contributing to Documentation +## Contributing to Documentation We welcome documentation improvements! To contribute: @@ -188,7 +185,7 @@ We welcome documentation improvements! To contribute: - Keep formatting consistent - Test all code examples -## 📞 Getting Help +## Getting Help If you can't find what you need: @@ -197,7 +194,7 @@ If you can't find what you need: 3. **Ask**: Open a [new issue](https://github.com/ajitpratap0/GoSQLX/issues/new) 4. **Discuss**: Join [discussions](https://github.com/ajitpratap0/GoSQLX/discussions) -## 🔄 Documentation Updates +## Documentation Updates | Document | Last Updated | Version | |----------|--------------|---------| @@ -213,7 +210,7 @@ If you can't find what you need: | ERROR_CODES.md | 2025-11 | v1.5.1 | | PERFORMANCE_TUNING.md | 2025-11 | v1.5.1 | -## 🆕 Recent Feature Additions (v1.4+) +## Recent Feature Additions (v1.4+) - **SQL Injection Detection** - `pkg/sql/security` package for pattern detection - **MERGE Statements** - SQL Server/PostgreSQL MERGE support diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 3a81348..8137df0 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -1,6 +1,6 @@ -# GoSQLX Security Analysis Report +# GoSQLX Security Documentation -> **Note**: This document provides comprehensive security analysis and the SQL injection detection API. For security policies and vulnerability reporting, see [SECURITY.md](../SECURITY.md) in the project root. +This document provides comprehensive security analysis, operational security setup, and the SQL injection detection API. For vulnerability reporting, see [SECURITY.md](../SECURITY.md) in the project root. ## 🛡️ Comprehensive Security Assessment @@ -14,11 +14,11 @@ GoSQLX has undergone a comprehensive security analysis across 7 critical security domains. The library demonstrates **strong security characteristics** suitable for production deployment with **minimal security concerns**. -### 🆕 Security Package (v1.4+) +### Security Package (v1.4+) GoSQLX now includes a dedicated **SQL Injection Detection** package (`pkg/sql/security`) that provides: -- **8 Pattern Types**: Tautology, Comment Bypass, Stacked Query, UNION-based, Time-based, Boolean-based, Out-of-Band, Dangerous Functions +- **6 Pattern Types**: Tautology, Comment Bypass, UNION-based, Time-based, Out-of-Band, Dangerous Functions - **4 Severity Levels**: CRITICAL, HIGH, MEDIUM, LOW - **Multi-Database Support**: PostgreSQL, MySQL, SQL Server, SQLite system table detection - **Thread-Safe**: Safe for concurrent use across goroutines @@ -387,7 +387,7 @@ err := validator.Validate(filepath) ### Documentation - **Package Documentation**: [cmd/gosqlx/internal/validate/README.md](../cmd/gosqlx/internal/validate/README.md) -- **CLI Guide**: [docs/CLI_GUIDE.md](CLI_GUIDE.md#security-limits-and-protections) +- **CLI Guide**: [CLI_GUIDE.md](CLI_GUIDE.md) - **Security Tests**: `cmd/gosqlx/internal/validate/security_test.go` - **Demo Tests**: `cmd/gosqlx/internal/validate/security_demo_test.go` @@ -484,24 +484,113 @@ if err != nil { ### Recommended Additional Measures -1. **Static Analysis Integration** - ```bash - # Add to CI/CD pipeline - go install github.com/securecodewarrior/gosec/cmd/gosec@latest - gosec ./... - ``` - -2. **Dependency Scanning** - ```bash - # Regular dependency auditing - go list -json -deps ./... | nancy sleuth - ``` - -3. **Fuzz Testing** (Future Enhancement) - ```bash - # Consider adding go-fuzz for continuous fuzzing - go install github.com/dvyukov/go-fuzz/go-fuzz@latest - ``` +**Static Analysis**: See Security Scanning Infrastructure section below for GoSec, Trivy, and GovulnCheck setup. + +**Fuzz Testing** (Future Enhancement): +```bash +# Consider adding go-fuzz for continuous fuzzing +go install github.com/dvyukov/go-fuzz/go-fuzz@latest +``` + +--- + +## 🔧 Security Scanning Infrastructure + +### Security Workflow Components + +GoSQLX implements comprehensive security scanning with four key tools: + +1. **GoSec** - Static security analysis for Go code (v2.21.4+) +2. **Trivy** - Vulnerability scanner for dependencies and configurations (v0.28.0+) +3. **GovulnCheck** - Official Go vulnerability database checker +4. **Dependabot** - Automated dependency update management + +### Workflow Configuration + +**Triggers**: Push to main/develop, PRs to main, weekly (Sundays midnight UTC), manual dispatch + +**Security Jobs**: +- GoSec: Scans code, uploads SARIF to GitHub Security tab +- Trivy Repository: Scans dependencies (CRITICAL/HIGH/MEDIUM) +- Trivy Config: Scans GitHub Actions, Dockerfiles, configs +- Dependency Review: Checks licenses (MIT, Apache-2.0, BSD-2/3-Clause, ISC) +- GovulnCheck: Official Go vulnerability checker +- Security Summary: Aggregates all results + +**Dependabot Configuration**: +- Go modules: Daily at 3 AM EST, max 10 PRs, grouped minor/patch updates +- GitHub Actions: Weekly Mondays 3 AM EST, max 5 PRs +- Labels: `dependencies`, `automated`, commit prefix `chore(deps)` or `chore(ci)` + +### Enabling GitHub Security Features + +**Step 1: Enable Security Features** (Settings → Security & analysis): +- ✅ Dependency graph +- ✅ Dependabot alerts and security updates +- ✅ Code scanning (CodeQL) +- ✅ Secret scanning and push protection + +**Step 2: Branch Protection** (Settings → Branches): +- Require status checks: GoSec, Trivy scans, GovulnCheck +- Require up-to-date branches +- Require signed commits (recommended) + +**Step 3: Notifications** (Settings → Notifications): +- Email for security advisories and code scanning +- Web notifications for Dependabot alerts + +### Manual Security Testing + +**GoSec**: +```bash +go install github.com/securego/gosec/v2/cmd/gosec@latest +gosec -severity=medium -confidence=medium ./... +gosec -exclude=G104,G107 ./... # Exclude specific checks +``` + +**Trivy**: +```bash +brew install aquasecurity/trivy/trivy +trivy fs --severity CRITICAL,HIGH,MEDIUM . +trivy fs --format json --output trivy-report.json . +``` + +**GovulnCheck**: +```bash +go install golang.org/x/vuln/cmd/govulncheck@latest +govulncheck ./... +govulncheck -show verbose ./... +``` + +### Handling Security Alerts + +**Dependabot PRs**: +- Safe auto-merge: Patch updates (1.2.3→1.2.4), minor with passing tests +- Manual review: Major updates (1.x→2.0), failing tests, core dependencies + +**Response by Severity**: +- Critical/High: Hotfix within 24-48h, security advisory, patch release +- Medium: Issue tracking, next minor release +- Low: Issue tracking, maintenance release, may defer + +### Security Metrics + +**Track**: +- Vulnerability resolution time (< 7 days high/critical, < 30 days medium/low) +- Dependabot PR merge rate (> 80% within 7 days) +- Security alert backlog (< 5 open alerts) +- False positive rate + +### Troubleshooting + +**GoSec false positives**: +```go +// #nosec G104 -- Intentional: error handling not required +_, _ = fmt.Fprintf(w, "output") +``` + +**Trivy timeout**: Increase timeout in workflow YAML +**Too many Dependabot PRs**: Change schedule to "weekly" in dependabot.yml --- @@ -520,7 +609,7 @@ if err != nil { ### Long-term Security Goals - [ ] Security audit by third-party firm - [ ] CVE monitoring and response process -- [ ] Security-focused documentation expansion +- [ ] Quarterly security posture reviews --- @@ -552,6 +641,34 @@ GoSQLX is **approved for production deployment** in security-sensitive environme --- -**Security Analysis Completed**: August 2025 -**Next Review**: Recommended within 6 months or upon major version release +## 📚 Best Practices + +### For Maintainers + +1. **Review Weekly Scans**: Check Sunday scan results every Monday, prioritize findings +2. **Keep Actions Updated**: Accept Dependabot PRs for GitHub Actions, review changelogs +3. **Document Security Decisions**: Add comments when dismissing alerts, document risk acceptance +4. **Regular Security Audits**: Quarterly reviews, consider annual penetration testing + +### For Contributors + +1. **Run Security Checks Locally**: Run gosec before submitting PRs +2. **Security-Conscious Coding**: No hardcoded credentials, use secure defaults, follow OWASP guidelines +3. **Dependency Management**: Minimize dependencies, justify additions, check security history + +--- + +## 📖 References + +- [GoSec Documentation](https://github.com/securego/gosec) +- [Trivy Documentation](https://aquasecurity.github.io/trivy/) +- [GovulnCheck Documentation](https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck) +- [Dependabot Documentation](https://docs.github.com/en/code-security/dependabot) +- [GitHub Code Scanning](https://docs.github.com/en/code-security/code-scanning) +- [OWASP Secure Coding Practices](https://owasp.org/www-project-secure-coding-practices-quick-reference-guide/) + +--- + +**Security Analysis Completed**: November 2025 +**Next Review**: May 2026 (6 months) or upon major version release **Contact**: For security questions or to report issues, please use responsible disclosure practices \ No newline at end of file diff --git a/docs/SECURITY_SETUP.md b/docs/SECURITY_SETUP.md new file mode 100644 index 0000000..22ddd03 --- /dev/null +++ b/docs/SECURITY_SETUP.md @@ -0,0 +1,14 @@ +# Security Setup Guide + +**This content has been moved to [SECURITY.md](SECURITY.md)** + +All security scanning setup, configuration, and operational procedures have been consolidated into the main security documentation. + +Please refer to [SECURITY.md](SECURITY.md) for: +- Security scanning infrastructure setup +- Enabling GitHub security features +- Manual security testing procedures +- Handling security alerts and Dependabot PRs +- Security metrics and monitoring +- Troubleshooting +- Best practices for maintainers and contributors diff --git a/docs/SQL_COMPATIBILITY.md b/docs/SQL_COMPATIBILITY.md index c31f076..7fc34ed 100644 --- a/docs/SQL_COMPATIBILITY.md +++ b/docs/SQL_COMPATIBILITY.md @@ -67,7 +67,7 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif | **DROP TABLE** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **CREATE INDEX** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | Unique indexes | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| Partial indexes | ✅ | ❌ | ⚠️ | ❌ | ✅ | ✅ Full | 70% | +| Partial indexes | ✅ | ❌ | ⚠️ | ❌ | ✅ | ⚠️ Partial | 40% | | **CREATE VIEW** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **CREATE MATERIALIZED VIEW** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 95% | | REFRESH MATERIALIZED VIEW | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ Full | 95% | @@ -82,13 +82,13 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif |---------|------------|-------|------------|--------|--------|---------------|---------------| | **INNER JOIN** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **LEFT JOIN** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| **RIGHT JOIN** | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ Full | 80% | -| **FULL OUTER JOIN** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 60% | +| **RIGHT JOIN** | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ Full | 100% | +| **FULL OUTER JOIN** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 85% | | **CROSS JOIN** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| **NATURAL JOIN** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ Full | 80% | +| **NATURAL JOIN** | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ Full | 95% | | Multiple table JOINs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | Self JOINs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| **LATERAL JOIN** | ✅ | ❌ | ❌ | ❌ | ❌ | 🔧 Syntax | 20% | +| **LATERAL JOIN** | ✅ | ❌ | ❌ | ❌ | ❌ | 🔧 Syntax | 10% | ### Subqueries @@ -111,9 +111,9 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif | **SUM** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **AVG** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **MIN/MAX** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| **GROUP_CONCAT** | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ Full | 40% | -| **STRING_AGG** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 60% | -| **ARRAY_AGG** | ✅ | ❌ | ❌ | ✅ | ❌ | ✅ Full | 40% | +| **GROUP_CONCAT** | ❌ | ✅ | ❌ | ❌ | ✅ | ⚠️ Partial | 30% | +| **STRING_AGG** | ✅ | ❌ | ✅ | ✅ | ❌ | ⚠️ Partial | 30% | +| **ARRAY_AGG** | ✅ | ❌ | ❌ | ✅ | ❌ | ⚠️ Partial | 30% | ## Advanced SQL Features @@ -146,8 +146,8 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif |---------|------------|-------|------------|--------|--------|---------------|---------------| | **UNION** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | | **UNION ALL** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ Full | 100% | -| **INTERSECT** | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ Full | 80% | -| **EXCEPT/MINUS** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 60% | +| **INTERSECT** | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ Full | 90% | +| **EXCEPT/MINUS** | ✅ | ❌ | ✅ | ✅ | ❌ | ✅ Full | 90% | ### Grouping Operations (SQL-99 T431) @@ -184,16 +184,18 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif ## Dialect-Specific Features +**Note**: This section documents dialect-specific features where "Support Level" refers to the native database's support, while "GoSQLX Parser" and "Test Coverage" indicate GoSQLX implementation status. Many features listed have keyword recognition only (🔧 Syntax) without full parsing logic. + ### PostgreSQL-Specific Features | Feature | Support Level | GoSQLX Parser | Test Coverage | Notes | |---------|---------------|---------------|---------------|-------| -| **Arrays** | ✅ Full | ✅ Full | 90% | Array literals, indexing, operators | -| **JSON/JSONB** | ✅ Full | ✅ Full | 85% | JSON operators, functions | -| **Full-text search** | ✅ Full | 🔧 Syntax | 70% | tsvector, tsquery types | -| **LATERAL joins** | ✅ Full | ✅ Full | 80% | Full parsing support | -| **Custom operators** | ✅ Full | ⚠️ Partial | 60% | Basic operator recognition | -| **Dollar quoting** | ✅ Full | ✅ Full | 90% | $tag$ string literals | +| **Arrays** | ✅ Full | 🔧 Syntax | 40% | Keyword recognition only | +| **JSON/JSONB** | ✅ Full | 🔧 Syntax | 30% | Data type recognition | +| **Full-text search** | ✅ Full | 🔧 Syntax | 30% | tsvector, tsquery types | +| **LATERAL joins** | ✅ Full | 🔧 Syntax | 20% | Keyword reserved, no parsing logic | +| **Custom operators** | ✅ Full | ⚠️ Partial | 30% | Basic operator recognition | +| **Dollar quoting** | ✅ Full | ⚠️ Partial | 40% | Limited support | ### MySQL-Specific Features @@ -210,32 +212,32 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif | Feature | Support Level | GoSQLX Parser | Test Coverage | Notes | |---------|---------------|---------------|---------------|-------| -| **MERGE** | ✅ Full | ✅ Full | 80% | MERGE statements | -| **PIVOT/UNPIVOT** | ✅ Full | 🔧 Syntax | 60% | Basic syntax recognition | -| **CROSS/OUTER APPLY** | ✅ Full | ✅ Full | 75% | Table-valued functions | -| **HierarchyID** | ✅ Full | 🔧 Syntax | 50% | Data type recognition | -| **T-SQL functions** | ✅ Full | ⚠️ Partial | 70% | Subset of T-SQL functions | +| **MERGE** | ✅ Full | ✅ Full | 95% | MERGE statements with WHEN clauses | +| **PIVOT/UNPIVOT** | ✅ Full | 🔧 Syntax | 10% | Keywords reserved, no parsing logic | +| **CROSS/OUTER APPLY** | ✅ Full | 🔧 Syntax | 10% | Keywords reserved, no parsing logic | +| **HierarchyID** | ✅ Full | 🔧 Syntax | 20% | Data type recognition | +| **T-SQL functions** | ✅ Full | ⚠️ Partial | 40% | Subset of T-SQL functions | ### Oracle-Specific Features | Feature | Support Level | GoSQLX Parser | Test Coverage | Notes | |---------|---------------|---------------|---------------|-------| -| **CONNECT BY** | ✅ Full | ✅ Full | 85% | Hierarchical queries | -| **PRIOR operator** | ✅ Full | ✅ Full | 80% | Hierarchical navigation | -| **DECODE function** | ✅ Full | ✅ Full | 90% | Conditional expressions | -| **NVL/NVL2** | ✅ Full | ✅ Full | 95% | NULL handling functions | -| **Dual table** | ✅ Full | ✅ Full | 100% | System table | -| **Analytic functions** | ✅ Full | ✅ Full | 85% | Oracle analytics | +| **CONNECT BY** | ✅ Full | 🔧 Syntax | 10% | Keywords reserved, no parsing logic | +| **PRIOR operator** | ✅ Full | 🔧 Syntax | 10% | Keyword reserved, no parsing logic | +| **DECODE function** | ✅ Full | ⚠️ Partial | 40% | Recognized as function, no special handling | +| **NVL/NVL2** | ✅ Full | ⚠️ Partial | 40% | Recognized as function, no special handling | +| **Dual table** | ✅ Full | ✅ Full | 100% | Standard table reference | +| **Analytic functions** | ✅ Full | ⚠️ Partial | 60% | Subset via window functions | ### SQLite-Specific Features | Feature | Support Level | GoSQLX Parser | Test Coverage | Notes | |---------|---------------|---------------|---------------|-------| -| **PRAGMA** | ✅ Full | 🔧 Syntax | 60% | Configuration statements | -| **ATTACH/DETACH** | ✅ Full | 🔧 Syntax | 70% | Database operations | -| **Type affinity** | ✅ Full | ⚠️ Partial | 50% | Flexible typing | -| **WITHOUT ROWID** | ✅ Full | ✅ Full | 80% | Table option | -| **Simplified syntax** | ✅ Full | ✅ Full | 95% | SQLite variations | +| **PRAGMA** | ✅ Full | 🔧 Syntax | 10% | Keywords reserved, no parsing logic | +| **ATTACH/DETACH** | ✅ Full | 🔧 Syntax | 10% | Keywords reserved, no parsing logic | +| **Type affinity** | ✅ Full | ⚠️ Partial | 30% | Flexible typing | +| **WITHOUT ROWID** | ✅ Full | ⚠️ Partial | 40% | Table option | +| **Simplified syntax** | ✅ Full | ✅ Full | 85% | SQLite variations | ## SQL Standards Compliance @@ -372,28 +374,37 @@ This matrix documents the comprehensive SQL feature support in GoSQLX across dif ## Production Readiness Summary -### ✅ Ready for Production +### Ready for Production - **Core SQL operations** (SELECT, INSERT, UPDATE, DELETE) - **Standard joins and subqueries** - **Window functions and CTEs** +- **MERGE statements** (SQL:2003 F312) +- **GROUPING SETS, ROLLUP, CUBE** (SQL-99 T431) +- **Materialized views** +- **Table partitioning** - **Multi-dialect basic syntax** - **Unicode and international text** - **High-performance scenarios** -### ⚠️ Suitable with Considerations +### Suitable with Considerations -- **Advanced dialect-specific features** -- **Complex XML/JSON operations** +- **Advanced dialect-specific features** (keyword recognition only for: LATERAL, PIVOT/UNPIVOT, CONNECT BY, PRAGMA, ATTACH/DETACH) +- **Complex XML/JSON operations** (syntax recognition only) +- **Dialect-specific functions** (DECODE, NVL, recognized as generic functions) - **Newest SQL standard features (SQL-2011+)** - **Very large query processing** -### 🔧 Development Needed +### Development Needed +- **LATERAL JOIN parsing logic** (keywords reserved) +- **PIVOT/UNPIVOT parsing logic** (keywords reserved) +- **CONNECT BY hierarchical queries** (keywords reserved) - **Full XML function support** - **Advanced JSON operations** - **Row pattern recognition** - **Complete temporal table support** +- **SQLite PRAGMA statements** (keywords reserved) ## Recommendations diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 033280c..f473d99 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -2,11 +2,9 @@ ## Table of Contents - [Common Issues](#common-issues) -- [Error Messages](#error-messages) +- [Error Codes Reference](#error-codes-reference) - [Performance Issues](#performance-issues) - [Memory Issues](#memory-issues) -- [Unicode and Encoding Issues](#unicode-and-encoding-issues) -- [Dialect-Specific Issues](#dialect-specific-issues) - [Debugging Techniques](#debugging-techniques) - [FAQ](#faq) @@ -38,15 +36,14 @@ func GoodExample() { **Symptom:** Memory usage grows over time -**Cause:** Not returning objects to pool +**Cause:** Not returning pooled objects **Solution:** ```go -// WRONG - Leaks tokenizer +// WRONG - Leaks resources func LeakyFunction(sql string) error { tkz := tokenizer.GetTokenizer() // Missing: defer tokenizer.PutTokenizer(tkz) - tokens, err := tkz.Tokenize([]byte(sql)) if err != nil { return err // Tokenizer never returned! @@ -57,12 +54,9 @@ func LeakyFunction(sql string) error { // CORRECT - Always use defer func FixedFunction(sql string) error { tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) // Always executes - - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - return err - } + defer tokenizer.PutTokenizer(tkz) + tokens, _ := tkz.Tokenize([]byte(sql)) + _ = tokens return nil } ``` @@ -109,202 +103,175 @@ func ConcurrentGood(queries []string) { } ``` -## Error Messages +## Error Codes Reference -### "unterminated quoted identifier" +### Tokenizer Errors (E1xxx) -**Example Error:** +**E1001 - Unexpected Character** ``` -unterminated quoted identifier starting at line 1, column 8 +Error E1001 at line 1, column 5: unexpected character: # ``` +- **Cause:** Invalid character in SQL +- **Fix:** Use standard SQL syntax, quote special characters -**Cause:** Missing closing quote for identifier - -**Examples:** +**E1002 - Unterminated String** ```sql --- Missing closing double quote -SELECT "user_name FROM users; - --- Missing closing backtick (MySQL) -SELECT `user_id FROM users; +-- WRONG +SELECT * FROM users WHERE name = 'John; --- Missing closing bracket (SQL Server) -SELECT [user_id FROM users; +-- CORRECT +SELECT * FROM users WHERE name = 'John''s Pizza'; ``` -**Solution:** Ensure all quoted identifiers have matching closing quotes +**E1003 - Invalid Number** +- **Cause:** Malformed numeric literal (e.g., `1.2.3`, `1e2e3`) +- **Fix:** Use valid numeric formats -### "unterminated string literal" +**E1004 - Invalid Operator** +- **Cause:** Invalid operator sequence +- **Fix:** Check operator syntax for your SQL dialect -**Example Error:** -``` -unterminated string literal starting at line 2, column 15 -``` +**E1005 - Invalid Identifier** +- **Cause:** Malformed identifier (e.g., unclosed quotes) +- **Fix:** Ensure all quoted identifiers are properly closed -**Cause:** Missing closing quote for string +**E1006 - Input Too Large** +- **Cause:** SQL input exceeds size limits (DoS protection) +- **Fix:** Split large queries or increase limits if appropriate -**Examples:** -```sql --- Missing closing single quote -SELECT * FROM users WHERE name = 'John; +**E1007 - Token Limit Reached** +- **Cause:** Too many tokens generated (DoS protection) +- **Fix:** Simplify query or increase limits --- Incorrect escaping -SELECT * FROM users WHERE name = 'John's Pizza; -``` +**E1008 - Tokenizer Panic** +- **Cause:** Internal tokenizer error (recovered panic) +- **Fix:** Report bug with SQL that triggers this -**Solution:** -```sql --- Correct: Escape quotes by doubling -SELECT * FROM users WHERE name = 'John''s Pizza'; +### Parser Errors (E2xxx) --- Or use different quote style if supported -SELECT * FROM users WHERE name = "John's Pizza"; +**E2001 - Unexpected Token** ``` +Error E2001 at line 1, column 15: unexpected token: LIMIT +``` +- **Cause:** Token not valid in current context +- **Fix:** Check SQL syntax, verify keyword order -### "invalid character" - -**Example Error:** +**E2002 - Expected Token** ``` -invalid character: # +Error E2002 at line 1, column 20: expected FROM but got WHERE ``` +- **Fix:** Add missing required keyword -**Cause:** Unsupported character in SQL +**E2003 - Missing Clause** +- **Cause:** Required SQL clause missing (e.g., SELECT without FROM) +- **Fix:** Add required clause -**Common Causes:** -1. Comments using unsupported syntax -2. Special characters not properly quoted -3. Encoding issues +**E2004 - Invalid Syntax** +- **Cause:** General syntax error +- **Fix:** Review SQL syntax for your dialect -**Solution:** -```sql --- Use standard SQL comments --- This is a comment (standard) -/* This is also a comment */ +**E2005 - Incomplete Statement** +- **Cause:** Statement ends unexpectedly +- **Fix:** Complete the SQL statement --- Avoid # style comments (MySQL specific) -# This might not work +**E2006 - Invalid Expression** +- **Cause:** Expression syntax error +- **Fix:** Check expression syntax (operators, parentheses) --- Quote special characters in identifiers -SELECT "column#1" FROM users; -- Quoted -``` +**E2007 - Recursion Depth Limit** +- **Cause:** Query too deeply nested (DoS protection) +- **Fix:** Simplify nested expressions -### "unexpected token" +**E2008 - Unsupported Data Type** +- **Cause:** Data type not yet supported +- **Fix:** Use supported data type or report feature request -**Example Error:** -``` -unexpected token: LIMIT at position 45 -``` +**E2009 - Unsupported Constraint** +- **Cause:** Constraint type not supported +- **Fix:** Use supported constraint or report feature request -**Cause:** Token not expected in current context +**E2010 - Unsupported Join** +- **Cause:** JOIN type not supported +- **Fix:** Use supported JOIN type -**Debugging Steps:** -1. Check SQL syntax for your specific dialect -2. Verify token order -3. Look for missing keywords +**E2011 - Invalid CTE** +- **Cause:** WITH clause syntax error +- **Fix:** Check CTE syntax (column list, recursion) -```go -func DebugUnexpectedToken(sql string) { - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - fmt.Printf("Tokenization failed: %v\n", err) - return - } - - // Print all tokens for debugging - for i, token := range tokens { - fmt.Printf("%d: %s (type: %d)\n", - i, token.Token.Value, token.Token.Type) - } -} -``` +**E2012 - Invalid Set Operation** +- **Cause:** UNION/EXCEPT/INTERSECT syntax error +- **Fix:** Verify set operation syntax -## Performance Issues +### Semantic Errors (E3xxx) -### Slow Tokenization +**E3001 - Undefined Table** +- **Cause:** Table reference not found +- **Fix:** Define table or check spelling -**Symptom:** Tokenization takes longer than expected +**E3002 - Undefined Column** +- **Cause:** Column reference not found +- **Fix:** Check column exists in table -**Common Causes:** -1. Very large SQL queries -2. Complex Unicode processing -3. Not reusing tokenizers +**E3003 - Type Mismatch** +- **Cause:** Expression type incompatibility +- **Fix:** Cast or convert types appropriately -**Diagnosis:** -```go -func MeasurePerformance(sql string) { - start := time.Now() - - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, err := tkz.Tokenize([]byte(sql)) - - elapsed := time.Since(start) - fmt.Printf("Tokenization took: %v\n", elapsed) - fmt.Printf("Tokens generated: %d\n", len(tokens)) - fmt.Printf("Bytes per second: %.2f\n", - float64(len(sql))/elapsed.Seconds()) -} -``` +**E3004 - Ambiguous Column** +- **Cause:** Column name exists in multiple tables +- **Fix:** Use table qualifier (e.g., `users.id`) + +### Feature Errors (E4xxx) + +**E4001 - Unsupported Feature** +- **Cause:** Feature not yet implemented +- **Fix:** Report feature request or use alternative + +**E4002 - Unsupported Dialect** +- **Cause:** SQL dialect not fully supported +- **Fix:** Use standard SQL or report dialect feature request + +## Performance Issues + +### Slow Parsing/Tokenization + +**Common Causes:** +- Very large SQL queries (>1MB) +- Not reusing tokenizers from pool +- Processing in tight loops **Solutions:** -1. **Reuse tokenizers:** ```go -// Process multiple queries with one tokenizer +// 1. Reuse tokenizers for batch processing func BatchProcess(queries []string) { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + for _, sql := range queries { tkz.Reset() tokens, _ := tkz.Tokenize([]byte(sql)) // Process... } } -``` - -2. **Limit query size:** -```go -const MaxQuerySize = 1_000_000 // 1MB - -func ProcessWithLimit(sql string) error { - if len(sql) > MaxQuerySize { - return fmt.Errorf("query too large: %d bytes", len(sql)) - } - - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - return tkz.Tokenize([]byte(sql)) -} -``` -3. **Use concurrent processing:** -```go +// 2. Parallel processing with worker pool func ParallelProcess(queries []string) { numWorkers := runtime.NumCPU() work := make(chan string, len(queries)) - - // Queue work + for _, sql := range queries { work <- sql } close(work) - - // Process in parallel + var wg sync.WaitGroup for i := 0; i < numWorkers; i++ { wg.Add(1) go func() { defer wg.Done() - tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + for sql := range work { tkz.Reset() tokens, _ := tkz.Tokenize([]byte(sql)) @@ -312,66 +279,36 @@ func ParallelProcess(queries []string) { } }() } - wg.Wait() } -``` -### High CPU Usage - -**Symptom:** CPU usage spikes during tokenization +// 3. Limit input size +const MaxQuerySize = 1_000_000 // 1MB +if len(sql) > MaxQuerySize { + return fmt.Errorf("query too large: %d bytes", len(sql)) +} +``` **Profiling:** -```go -import _ "net/http/pprof" +```bash +# CPU profiling +go test -bench=. -cpuprofile=cpu.prof +go tool pprof cpu.prof -func init() { - go func() { - log.Println(http.ListenAndServe("localhost:6060", nil)) - }() -} +# Memory profiling +go test -bench=. -memprofile=mem.prof +go tool pprof mem.prof -// Profile with: go tool pprof http://localhost:6060/debug/pprof/profile +# Live profiling +import _ "net/http/pprof" +# Visit http://localhost:6060/debug/pprof/ ``` -**Common Causes:** -1. Tokenizing in tight loops -2. Not using pools effectively -3. Excessive string operations - ## Memory Issues -### Memory Leaks +### Common Leak Patterns -**Detection:** -```go -func DetectLeak() { - var m runtime.MemStats - - // Baseline - runtime.GC() - runtime.ReadMemStats(&m) - baseline := m.Alloc - - // Run operations - for i := 0; i < 1000; i++ { - tkz := tokenizer.GetTokenizer() - tokens, _ := tkz.Tokenize([]byte("SELECT * FROM users")) - tokenizer.PutTokenizer(tkz) - } - - // Check memory - runtime.GC() - runtime.ReadMemStats(&m) - leaked := m.Alloc - baseline - - fmt.Printf("Potential leak: %d bytes\n", leaked) -} -``` - -**Common Leak Patterns:** - -1. **Storing pooled objects:** +**1. Storing pooled objects:** ```go // WRONG - Stores pooled object type BadCache struct { @@ -388,17 +325,17 @@ type GoodCache struct{} func (c *GoodCache) Process(sql string) { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - // Use and return + tokens, _ := tkz.Tokenize([]byte(sql)) + _ = tokens } ``` -2. **Goroutine leaks:** +**2. Goroutines without defer:** ```go -// WRONG - Goroutine may leak +// WRONG - May leak on panic func LeakyAsync(sql string) { go func() { tkz := tokenizer.GetTokenizer() - // If this panics, tokenizer is never returned tokens, _ := tkz.Tokenize([]byte(sql)) tokenizer.PutTokenizer(tkz) }() @@ -408,274 +345,130 @@ func LeakyAsync(sql string) { func SafeAsync(sql string) { go func() { tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) // Always returns + defer tokenizer.PutTokenizer(tkz) tokens, _ := tkz.Tokenize([]byte(sql)) + _ = tokens }() } ``` -### High Memory Usage +### Memory Monitoring -**Monitoring:** ```go func MonitorMemory() { - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() - - for range ticker.C { - var m runtime.MemStats - runtime.ReadMemStats(&m) - - fmt.Printf("Alloc: %d MB\n", m.Alloc/1024/1024) - fmt.Printf("Total: %d MB\n", m.TotalAlloc/1024/1024) - fmt.Printf("Sys: %d MB\n", m.Sys/1024/1024) - fmt.Printf("NumGC: %d\n", m.NumGC) - } + var m runtime.MemStats + runtime.ReadMemStats(&m) + fmt.Printf("Alloc: %d MB, NumGC: %d\n", m.Alloc/1024/1024, m.NumGC) } -``` -**Optimization:** -```go -// Pre-allocate for known sizes -func OptimizedTokenization(sql string) { - estimatedTokens := len(sql) / 5 // Rough estimate - - tkz := tokenizer.GetTokenizer() - defer tokenizer.PutTokenizer(tkz) - - tokens, _ := tkz.Tokenize([]byte(sql)) - - // Pre-allocate result slice - result := make([]string, 0, estimatedTokens) - for _, token := range tokens { - result = append(result, token.Token.Value) - } -} -``` - -## Unicode and Encoding Issues - -### Invalid UTF-8 Sequences - -**Problem:** Tokenizer fails with encoding errors +func DetectLeak() { + var m runtime.MemStats + runtime.GC() + runtime.ReadMemStats(&m) + baseline := m.Alloc -**Detection:** -```go -func ValidateUTF8(sql string) error { - if !utf8.ValidString(sql) { - return fmt.Errorf("invalid UTF-8 encoding") - } - - // Find invalid sequences - for i, r := range sql { - if r == utf8.RuneError { - return fmt.Errorf("invalid UTF-8 at position %d", i) - } + for i := 0; i < 1000; i++ { + tkz := tokenizer.GetTokenizer() + tkz.Tokenize([]byte("SELECT * FROM users")) + tokenizer.PutTokenizer(tkz) } - - return nil -} -``` -**Fix Encoding:** -```go -func FixEncoding(input []byte) []byte { - // Remove invalid UTF-8 sequences - return bytes.ToValidUTF8(input, []byte("?")) -} -``` - -### Mixed Character Sets - -**Problem:** Mixing incompatible character sets - -**Solution:** -```go -func NormalizeCharsets(sql string) string { - // Normalize Unicode - return norm.NFC.String(sql) -} -``` - -## Dialect-Specific Issues - -### PostgreSQL - -**Issue:** Array operators not recognized - -```go -// Ensure PostgreSQL operators are handled -sql := `SELECT * FROM users WHERE tags @> ARRAY['admin']` - -tkz := tokenizer.GetTokenizer() -defer tokenizer.PutTokenizer(tkz) - -tokens, err := tkz.Tokenize([]byte(sql)) -if err != nil { - // Check if it's the @> operator causing issues - if strings.Contains(err.Error(), "@>") { - fmt.Println("PostgreSQL array operator issue") - } + runtime.GC() + runtime.ReadMemStats(&m) + leaked := m.Alloc - baseline + fmt.Printf("Potential leak: %d bytes\n", leaked) } ``` -### MySQL +## Debugging Techniques -**Issue:** Backtick identifiers not working +### Token Stream Analysis ```go -// Test MySQL backtick support -func TestMySQLBackticks() error { - sql := "SELECT `user_id` FROM `users`" - +func AnalyzeTokenStream(sql string) { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + tokens, err := tkz.Tokenize([]byte(sql)) if err != nil { - return fmt.Errorf("MySQL backtick not supported: %v", err) - } - - // Verify backticks were tokenized correctly - for _, token := range tokens { - if token.Token.Type == models.TokenTypeIdentifier { - fmt.Printf("Identifier: %s\n", token.Token.Value) - } + fmt.Printf("Error: %v\n", err) + return } - - return nil -} -``` - -### SQL Server - -**Issue:** Square brackets not recognized - -```go -// Handle SQL Server brackets -sql := "SELECT [user id] FROM [user table]" - -// Pre-process if needed -processed := strings.ReplaceAll(sql, "[", `"`) -processed = strings.ReplaceAll(processed, "]", `"`) -``` - -## Debugging Techniques - -### Enable Debug Logging -```go -type DebugTokenizer struct { - *tokenizer.Tokenizer - debug bool -} - -func (d *DebugTokenizer) Tokenize(input []byte) ([]models.TokenWithSpan, error) { - if d.debug { - fmt.Printf("Input: %s\n", string(input)) - fmt.Printf("Length: %d bytes\n", len(input)) - } - - start := time.Now() - tokens, err := d.Tokenizer.Tokenize(input) - - if d.debug { - fmt.Printf("Duration: %v\n", time.Since(start)) - fmt.Printf("Tokens: %d\n", len(tokens)) - - if err != nil { - fmt.Printf("Error: %v\n", err) + for i, token := range tokens { + if token.Token.Type == models.TokenTypeEOF { + break } + fmt.Printf("%3d | Type: %3d | L%d:C%d | %q\n", + i, token.Token.Type, token.Start.Line, + token.Start.Column, token.Token.Value) } - - return tokens, err } ``` -### Token Stream Analysis +### Parser Testing ```go -func AnalyzeTokenStream(sql string) { +func TestParser(sql string) { + // Tokenize tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + tokens, err := tkz.Tokenize([]byte(sql)) if err != nil { - fmt.Printf("Error: %v\n", err) + fmt.Printf("Tokenization error: %v\n", err) return } - - fmt.Println("Token Stream Analysis:") - fmt.Println("=" * 50) - - for i, token := range tokens { - if token.Token.Type == models.TokenTypeEOF { - fmt.Println("EOF reached") - break - } - - fmt.Printf("%3d | Type: %3d | Pos: L%d:C%d | Value: %q\n", - i, - token.Token.Type, - token.Start.Line, - token.Start.Column, - token.Token.Value) + + // Convert tokens + parserTokens, err := parser.ConvertTokensForParser(tokens) + if err != nil { + fmt.Printf("Token conversion error: %v\n", err) + return } - - // Statistics - fmt.Printf("\nTotal tokens: %d\n", len(tokens)) - fmt.Printf("Input size: %d bytes\n", len(sql)) - fmt.Printf("Tokens per byte: %.2f\n", - float64(len(tokens))/float64(len(sql))) + + // Parse + p := parser.NewParser() + astTree, err := p.Parse(parserTokens) + if err != nil { + fmt.Printf("Parse error: %v\n", err) + return + } + defer ast.ReleaseAST(astTree) + + fmt.Printf("Parsed successfully: %d statements\n", len(astTree.Statements)) } ``` -### Memory Profiling +### Security Scanning ```go -func ProfileMemory(sql string, iterations int) { - var m runtime.MemStats - - // Before - runtime.GC() - runtime.ReadMemStats(&m) - allocBefore := m.Alloc - - // Run tokenization - for i := 0; i < iterations; i++ { - tkz := tokenizer.GetTokenizer() - tokens, _ := tkz.Tokenize([]byte(sql)) - _ = tokens - tokenizer.PutTokenizer(tkz) +import "github.com/ajitpratap0/GoSQLX/pkg/sql/security" + +func CheckSQLSecurity(sql string) { + scanner := security.NewScanner() + result := scanner.Scan(sql) + + if result.HasHighOrAbove() { + fmt.Printf("Security issues found:\n") + for _, finding := range result.Findings { + fmt.Printf("- [%s] %s\n", finding.Severity, finding.Description) + } } - - // After - runtime.GC() - runtime.ReadMemStats(&m) - allocAfter := m.Alloc - - fmt.Printf("Memory used: %d bytes\n", allocAfter-allocBefore) - fmt.Printf("Per iteration: %d bytes\n", - (allocAfter-allocBefore)/int64(iterations)) } ``` ## FAQ -### Q: Why does my application panic when using tokenizers? +### Q: Why does my application panic? -**A:** Most likely you're not getting the tokenizer from the pool: +**A:** Always get tokenizer from pool: ```go -// Always use: tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) ``` -### Q: How many tokenizers can I get from the pool simultaneously? - -**A:** The pool has no hard limit. It creates new instances as needed and reuses returned ones. For best performance, return tokenizers as soon as possible. - -### Q: Can I modify token values after tokenization? +### Q: Can I modify tokens after tokenization? **A:** Yes, tokens are copies and can be safely modified: ```go @@ -687,44 +480,29 @@ for i := range tokens { } ``` -### Q: How do I handle very large SQL files? +### Q: How do I handle large SQL files (>10MB)? -**A:** For files > 10MB, consider streaming or chunking: +**A:** Stream and process in chunks: ```go func ProcessLargeFile(filename string) error { - file, err := os.Open(filename) - if err != nil { - return err - } + file, _ := os.Open(filename) defer file.Close() - + scanner := bufio.NewScanner(file) scanner.Split(SplitOnSemicolon) // Custom splitter - + tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + for scanner.Scan() { - sql := scanner.Text() tkz.Reset() - tokens, err := tkz.Tokenize([]byte(sql)) - if err != nil { - return err - } + tokens, _ := tkz.Tokenize([]byte(scanner.Text())) // Process tokens... } - return scanner.Err() } ``` -### Q: Why is Unicode text tokenizing slowly? - -**A:** Complex Unicode requires more processing. Optimize by: -1. Normalizing text before tokenization -2. Using byte operations where possible -3. Caching tokenization results for repeated queries - ### Q: How do I test for race conditions? **A:** Use Go's race detector: @@ -735,43 +513,35 @@ go run -race main.go ### Q: Can I use GoSQLX with database/sql? -**A:** GoSQLX is a parser/tokenizer, not a driver. Use it to analyze queries before sending to database/sql: +**A:** Yes, use it to validate queries before execution: ```go func ValidateBeforeExecute(db *sql.DB, query string) error { - // Validate with GoSQLX tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - - _, err := tkz.Tokenize([]byte(query)) - if err != nil { + + if _, err := tkz.Tokenize([]byte(query)); err != nil { return fmt.Errorf("invalid SQL: %v", err) } - - // Execute with database/sql - _, err = db.Exec(query) + + _, err := db.Exec(query) return err } ``` ### Q: How do I contribute bug fixes? -**A:** -1. Create a minimal reproduction case -2. Include the SQL that causes the issue -3. Submit an issue with: - - Go version - - GoSQLX version - - Full error message - - Sample code +**A:** Submit an issue with: +- Go version and GoSQLX version +- Minimal reproduction case with SQL +- Full error message +- Sample code ## Getting Help -If you're still experiencing issues: - -1. **Check the test suite** - Examples of correct usage -2. **Review benchmarks** - Performance patterns -3. **Enable debug logging** - Understand what's happening -4. **Profile your application** - Identify bottlenecks -5. **Submit an issue** - With reproduction steps +1. Check test suite for usage examples +2. Review benchmarks for performance patterns +3. Enable debug logging (see Debugging section) +4. Profile your application (see Performance section) +5. Submit an issue with reproduction steps -Remember: Most issues are related to improper pool usage or not using defer for cleanup. \ No newline at end of file +**Remember:** Most issues stem from improper pool usage or missing `defer` statements. \ No newline at end of file diff --git a/UPGRADE_GUIDE.md b/docs/UPGRADE_GUIDE.md similarity index 92% rename from UPGRADE_GUIDE.md rename to docs/UPGRADE_GUIDE.md index 57bd633..5fc950e 100644 --- a/UPGRADE_GUIDE.md +++ b/docs/UPGRADE_GUIDE.md @@ -103,18 +103,13 @@ While no critical bugs were fixed, v1.5.0 includes: ### 📚 Documentation Updates -New documentation added: -- [RELEASE_NOTES_v1.5.0.md](RELEASE_NOTES_v1.5.0.md) - Comprehensive release notes -- [CLI_REFACTORING_SUMMARY.md](CLI_REFACTORING_SUMMARY.md) - CLI coverage details -- [PARSER_COVERAGE_SUMMARY.md](PARSER_COVERAGE_SUMMARY.md) - Parser test details -- [TOKENIZER_COVERAGE_SUMMARY.md](TOKENIZER_COVERAGE_SUMMARY.md) - Tokenizer test details -- Updated [CHANGELOG.md](CHANGELOG.md) with v1.5.0 release notes +Updated documentation: +- [CHANGELOG.md](../CHANGELOG.md) with v1.5.0 release notes ### 🔗 Related Resources -- **Full Release Notes**: [RELEASE_NOTES_v1.5.0.md](RELEASE_NOTES_v1.5.0.md) -- **Pull Request**: [PR #85](https://github.com/ajitpratap0/GoSQLX/pull/85) -- **Changelog**: [CHANGELOG.md](CHANGELOG.md#150---2025-11-15---phase-1-test-coverage-achievement) +- **Full Release Notes**: [CHANGELOG.md](../CHANGELOG.md) +- **Pull Request**: [PR #138](https://github.com/ajitpratap0/GoSQLX/pull/138) - **Issues**: Report any issues at https://github.com/ajitpratap0/GoSQLX/issues --- diff --git a/docs/USAGE_GUIDE.md b/docs/USAGE_GUIDE.md index 164e509..abc52f7 100644 --- a/docs/USAGE_GUIDE.md +++ b/docs/USAGE_GUIDE.md @@ -24,7 +24,7 @@ go get github.com/ajitpratap0/GoSQLX ``` ### Minimum Go Version -Go 1.24 or higher is required. +Go 1.24+ or higher is required. ### Import Packages @@ -136,40 +136,34 @@ import ( "fmt" "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" "github.com/ajitpratap0/GoSQLX/pkg/sql/parser" - "github.com/ajitpratap0/GoSQLX/pkg/sql/token" ) func ParseSQL(sql string) error { // Step 1: Tokenize tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + tokens, err := tkz.Tokenize([]byte(sql)) if err != nil { return err } - - // Step 2: Convert to parser tokens - parserTokens := make([]token.Token, 0, len(tokens)) - for _, tok := range tokens { - if tok.Token.Type == models.TokenTypeEOF { - break - } - parserTokens = append(parserTokens, token.Token{ - Type: fmt.Sprintf("%d", tok.Token.Type), - Literal: tok.Token.Value, - }) + + // Step 2: Convert to parser tokens using the proper converter + converter := parser.NewTokenConverter() + result, err := converter.Convert(tokens) + if err != nil { + return fmt.Errorf("token conversion failed: %w", err) } - + // Step 3: Parse p := parser.NewParser() defer p.Release() - - ast, err := p.Parse(parserTokens) + + ast, err := p.Parse(result.Tokens) if err != nil { return err } - + fmt.Printf("Parsed: %T\n", ast) return nil } @@ -243,24 +237,24 @@ Get detailed error information with line and column numbers: func HandleTokenizerError(sql string) { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - + tokens, err := tkz.Tokenize([]byte(sql)) if err != nil { - if tkErr, ok := err.(tokenizer.TokenizerError); ok { + if tkErr, ok := err.(models.TokenizerError); ok { fmt.Printf("Syntax error at line %d, column %d: %s\n", tkErr.Location.Line, tkErr.Location.Column, tkErr.Message) - + // Show the problematic line lines := strings.Split(sql, "\n") if tkErr.Location.Line <= len(lines) { - fmt.Printf("Line %d: %s\n", - tkErr.Location.Line, + fmt.Printf("Line %d: %s\n", + tkErr.Location.Line, lines[tkErr.Location.Line-1]) - + // Show error position with caret - fmt.Printf("%*s^\n", + fmt.Printf("%*s^\n", tkErr.Location.Column+6, "") // +6 for "Line X: " } } @@ -384,7 +378,7 @@ func CheckForInjection(sql string) { if result.HasCritical() { fmt.Printf("CRITICAL: Found %d critical security issues!\n", result.CriticalCount) } - if result.HasHigh() { + if result.HasHighOrAbove() { fmt.Printf("HIGH: Found %d high-severity issues\n", result.HighCount) } @@ -419,7 +413,7 @@ func ValidateUserQuery(userInput string) error { scanner := security.NewScanner() result := scanner.Scan(ast) - if result.HasCritical() || result.HasHigh() { + if result.HasCritical() || result.HasHighOrAbove() { return fmt.Errorf("potential SQL injection detected: %d issues found", result.CriticalCount + result.HighCount) } @@ -738,21 +732,28 @@ func OptimizedBatchProcess(queries []string) error { ### Pre-allocate Slices ```go -func ProcessWithPreallocation(sql string, expectedTokens int) { +func ProcessWithPreallocation(sql string) error { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) - - tokens, _ := tkz.Tokenize([]byte(sql)) - - // Pre-allocate with expected capacity - parserTokens := make([]token.Token, 0, expectedTokens) - - for _, tok := range tokens { - if tok.Token.Type == models.TokenTypeEOF { - break - } - parserTokens = append(parserTokens, convertToken(tok)) + + tokens, err := tkz.Tokenize([]byte(sql)) + if err != nil { + return err + } + + // Convert tokens using the proper converter + converter := parser.NewTokenConverter() + result, err := converter.Convert(tokens) + if err != nil { + return err } + + // Parse with pre-converted tokens + p := parser.NewParser() + defer p.Release() + + _, err = p.Parse(result.Tokens) + return err } ``` diff --git a/docs/migration/FROM_JSQLPARSER.md b/docs/migration/FROM_JSQLPARSER.md index beb53dc..e93d410 100644 --- a/docs/migration/FROM_JSQLPARSER.md +++ b/docs/migration/FROM_JSQLPARSER.md @@ -191,11 +191,17 @@ func main() { panic(err) } - // Step 2: Parse + // Step 2: Convert tokens for parser + parserTokens, err := parser.ConvertTokensForParser(tokens) + if err != nil { + panic(err) + } + + // Step 3: Parse p := parser.NewParser() defer p.Release() - ast, err := p.Parse(convertTokens(tokens)) + ast, err := p.Parse(parserTokens) if err != nil { panic(err) } @@ -361,10 +367,12 @@ func parseSQL(sql string) *ast.AST { tokens, _ := tkz.Tokenize([]byte(sql)) + parserTokens, _ := parser.ConvertTokensForParser(tokens) + p := parser.NewParser() defer p.Release() - astObj, _ := p.Parse(convertTokens(tokens)) + astObj, _ := p.Parse(parserTokens) return astObj } ``` @@ -440,7 +448,8 @@ func parseMultiple(sql string) []interface{} { continue } - ast, _ := p.Parse(convertTokens(tokens)) + parserTokens, _ := parser.ConvertTokensForParser(tokens) + ast, _ := p.Parse(parserTokens) statements = append(statements, ast) } diff --git a/docs/migration/FROM_PG_QUERY.md b/docs/migration/FROM_PG_QUERY.md index 0545231..d33b68d 100644 --- a/docs/migration/FROM_PG_QUERY.md +++ b/docs/migration/FROM_PG_QUERY.md @@ -195,11 +195,17 @@ func main() { panic(err) } + // Convert tokens for parser + parserTokens, err := parser.ConvertTokensForParser(tokens) + if err != nil { + panic(err) + } + // Parse to AST (simpler structure) p := parser.NewParser() defer p.Release() - ast, err := p.Parse(convertTokens(tokens)) + ast, err := p.Parse(parserTokens) if err != nil { panic(err) } @@ -246,8 +252,8 @@ package main import ( "fmt" - "regexp" "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" + "github.com/ajitpratap0/GoSQLX/pkg/models" ) func main() { @@ -564,9 +570,11 @@ func parseSQL(sql string) *ast.AST { defer tokenizer.PutTokenizer(tkz) tokens, _ := tkz.Tokenize([]byte(sql)) + parserTokens, _ := parser.ConvertTokensForParser(tokens) + p := parser.NewParser() defer p.Release() - astObj, _ := p.Parse(convertTokens(tokens)) + astObj, _ := p.Parse(parserTokens) return astObj } diff --git a/docs/migration/FROM_SQLFLUFF.md b/docs/migration/FROM_SQLFLUFF.md index 73e5e92..a26cdbe 100644 --- a/docs/migration/FROM_SQLFLUFF.md +++ b/docs/migration/FROM_SQLFLUFF.md @@ -61,28 +61,28 @@ This guide helps you migrate from SQLFluff (Python) to GoSQLX (Go), covering fea ### You Should Migrate If: -✅ **Performance is critical** +**Performance is critical** - CI/CD pipelines taking too long (SQLFluff validates at ~1 query/sec) - Real-time SQL validation in web applications - Processing thousands of queries per second - Batch processing large SQL files -✅ **You're in the Go ecosystem** +**You're in the Go ecosystem** - Building Go applications or tools - Want zero-dependency deployment - Need native concurrency support -✅ **Memory efficiency matters** +**Memory efficiency matters** - Processing very large SQL files - High-throughput services - Memory-constrained environments ### You Should Stay with SQLFluff If: -❌ **You need extensive linting rules** (GoSQLX has 0 rules currently) -❌ **You need exotic SQL dialects** (Snowflake, BigQuery-specific features) -❌ **You're heavily invested in Python** ecosystem -❌ **You need template language support** (Jinja, dbt) +- **You need extensive linting rules** (GoSQLX has 0 rules currently) +- **You need exotic SQL dialects** (Snowflake, BigQuery-specific features) +- **You're heavily invested in Python** ecosystem +- **You need template language support** (Jinja, dbt) --- @@ -91,43 +91,43 @@ This guide helps you migrate from SQLFluff (Python) to GoSQLX (Go), covering fea | Feature | SQLFluff | GoSQLX | Notes | |---------|----------|--------|-------| | **Core Functionality** | -| SQL Parsing | ✅ Yes | ✅ Yes | GoSQLX 1000x faster | -| SQL Validation | ✅ Yes | ✅ Yes | Similar accuracy | -| SQL Formatting | ✅ Yes | ✅ Yes | Different style defaults | -| Syntax Error Detection | ✅ Yes | ✅ Yes | Both provide line/column info | +| SQL Parsing | Yes | Yes | GoSQLX 1000x faster | +| SQL Validation | Yes | Yes | Similar accuracy | +| SQL Formatting | Yes | Yes | Different style defaults | +| Syntax Error Detection | Yes | Yes | Both provide line/column info | | **Linting & Rules** | -| Linting Rules | ✅ 60+ rules | ❌ Planned v1.5.0 | Major gap | -| Custom Rules | ✅ Yes | ❌ Planned v1.5.0 | | -| Rule Configuration | ✅ .sqlfluff | ❌ Planned v1.5.0 | | -| Auto-fix | ✅ Yes | ❌ Planned v1.5.0 | | +| Linting Rules | 60+ rules | Planned v1.5.0 | Major gap | +| Custom Rules | Yes | Planned v1.5.0 | | +| Rule Configuration | .sqlfluff | Planned v1.5.0 | | +| Auto-fix | Yes | Planned v1.5.0 | | | **SQL Dialect Support** | -| PostgreSQL | ✅ Yes | ✅ Yes | GoSQLX ~80-85% coverage | -| MySQL | ✅ Yes | ✅ Yes | GoSQLX ~80% coverage | -| SQL Server | ✅ Yes | ✅ Yes | GoSQLX ~75% coverage | -| Oracle | ✅ Yes | ✅ Yes | GoSQLX ~70% coverage | -| SQLite | ✅ Yes | ✅ Yes | GoSQLX ~85% coverage | -| Snowflake | ✅ Yes | ❌ No | | -| BigQuery | ✅ Yes | ❌ No | | -| Redshift | ✅ Yes | ❌ No | | -| 50+ Other Dialects | ✅ Yes | ❌ No | | +| PostgreSQL | Yes | Yes | GoSQLX ~80-85% coverage | +| MySQL | Yes | Yes | GoSQLX ~80% coverage | +| SQL Server | Yes | Yes | GoSQLX ~75% coverage | +| Oracle | Yes | Yes | GoSQLX ~70% coverage | +| SQLite | Yes | Yes | GoSQLX ~85% coverage | +| Snowflake | Yes | No | | +| BigQuery | Yes | No | | +| Redshift | Yes | No | | +| 50+ Other Dialects | Yes | No | | | **API & Integration** | -| CLI Tool | ✅ Yes | ✅ Yes | GoSQLX is faster | -| Programmatic API | ✅ Complex | ✅ Simple | GoSQLX easier to use | -| Library Integration | ✅ Python | ✅ Go | | -| VSCode Extension | ✅ Yes | ❌ Planned v1.6.0 | | -| Pre-commit Hooks | ✅ Yes | ✅ Yes | GoSQLX 100-1000x faster | +| CLI Tool | Yes | Yes | GoSQLX is faster | +| Programmatic API | Complex | Simple | GoSQLX easier to use | +| Library Integration | Python | Go | | +| VSCode Extension | Yes | Planned v1.6.0 | | +| Pre-commit Hooks | Yes | Yes | GoSQLX 100-1000x faster | | **Performance** | | Parse Speed | 1,000 ops/sec | 1.38M ops/sec | 1380x faster | | Memory per Query | 50KB | 1.8KB | 28x less memory | | Concurrent Processing | Limited (GIL) | Native | Linear scaling | | **Configuration** | -| Config Files | ✅ .sqlfluff | ⚠️ Planned v1.5.0 | | -| Inline Ignores | ✅ Yes | ❌ Planned v1.5.0 | | -| Rule Exclusions | ✅ Yes | ❌ Planned v1.5.0 | | +| Config Files | .sqlfluff | Planned v1.5.0 | | +| Inline Ignores | Yes | Planned v1.5.0 | | +| Rule Exclusions | Yes | Planned v1.5.0 | | | **Template Support** | -| Jinja Templates | ✅ Yes | ❌ No | | -| dbt Integration | ✅ Yes | ❌ No | | -| Custom Templating | ✅ Yes | ❌ No | | +| Jinja Templates | Yes | No | | +| dbt Integration | Yes | No | | +| Custom Templating | Yes | No | | --- @@ -228,21 +228,18 @@ func FormatSQL(sql string) string { tokens, _ := tkz.Tokenize([]byte(sql)) var result strings.Builder - indent := 0 - for _, tok := range tokens { + for i, tok := range tokens { if tok.Token.Type == models.TokenTypeEOF { break } - // Add newlines for major keywords - if isSelectKeyword(tok.Token.Type) { - result.WriteString("\n") - result.WriteString(strings.Repeat(" ", indent)) + // Add space between tokens + if i > 0 && tok.Token.Value != "," && tok.Token.Value != ")" { + result.WriteString(" ") } result.WriteString(strings.ToUpper(tok.Token.Value)) - result.WriteString(" ") } return strings.TrimSpace(result.String()) @@ -280,7 +277,8 @@ def validate_directory(directory): # Takes ~30 seconds for 100 files results = validate_directory('./queries/') for filename, result in results.items(): - print(f"{filename}: {'✓' if result['valid'] else '✗'}") + status = "VALID" if result['valid'] else "INVALID" + print(f"{filename}: {status}") ``` #### GoSQLX (Go) @@ -328,9 +326,9 @@ func main() { // Takes ~0.03 seconds for 100 files (1000x faster!) results := validateDirectory("./queries/") for filename, valid := range results { - status := "✗" + status := "INVALID" if valid { - status = "✓" + status = "VALID" } fmt.Printf("%s: %s\n", filename, status) } @@ -630,7 +628,7 @@ time sqlfluff lint migrations/*.sql # - Time: ~2500 seconds (41 minutes) # - Memory: 250MB peak # - CPU: 100% (single-threaded due to GIL) -# - Result: ❌ Too slow for CI/CD +# - Conclusion: Too slow for CI/CD ``` **GoSQLX:** @@ -641,7 +639,7 @@ time gosqlx validate migrations/*.sql # - Time: ~3.6 seconds # - Memory: 50MB peak # - CPU: 1600% (uses all 16 cores) -# - Result: ✅ Perfect for CI/CD +# - Conclusion: Perfect for CI/CD ``` **Improvement:** 694x faster, practical for pre-commit hooks! @@ -651,39 +649,39 @@ time gosqlx validate migrations/*.sql ## Migration Checklist ### Phase 1: Assessment (Day 1) -- [ ] List all current uses of SQLFluff in your project -- [ ] Identify which features you actually use (parsing, linting, formatting) -- [ ] Check which SQL dialects you support (GoSQLX supports 5) -- [ ] Review your linting rules (GoSQLX has none yet) -- [ ] Assess template language usage (Jinja, dbt - not supported in GoSQLX) +- List all current uses of SQLFluff in your project +- Identify which features you actually use (parsing, linting, formatting) +- Check which SQL dialects you support (GoSQLX supports 5) +- Review your linting rules (GoSQLX has none yet) +- Assess template language usage (Jinja, dbt - not supported in GoSQLX) ### Phase 2: Preparation (Day 1-2) -- [ ] Install Go 1.24+ on development machines -- [ ] Install GoSQLX: `go get github.com/ajitpratap0/GoSQLX` -- [ ] Test GoSQLX with sample queries from your project -- [ ] Benchmark performance improvement on your queries -- [ ] Document any unsupported features +- Install Go 1.24+ on development machines +- Install GoSQLX: `go get github.com/ajitpratap0/GoSQLX` +- Test GoSQLX with sample queries from your project +- Benchmark performance improvement on your queries +- Document any unsupported features ### Phase 3: Migration (Day 2-3) -- [ ] Replace SQLFluff validation with GoSQLX in codebase -- [ ] Update CI/CD pipelines to use GoSQLX -- [ ] Update pre-commit hooks -- [ ] Migrate formatting scripts -- [ ] Update documentation and developer guides +- Replace SQLFluff validation with GoSQLX in codebase +- Update CI/CD pipelines to use GoSQLX +- Update pre-commit hooks +- Migrate formatting scripts +- Update documentation and developer guides ### Phase 4: Testing (Day 3-4) -- [ ] Test all SQL files with GoSQLX -- [ ] Verify error messages are helpful -- [ ] Compare formatting output (may differ) -- [ ] Load test if using in production API -- [ ] Train team on new tools +- Test all SQL files with GoSQLX +- Verify error messages are helpful +- Compare formatting output (may differ) +- Load test if using in production API +- Train team on new tools ### Phase 5: Cleanup (Day 4-5) -- [ ] Remove SQLFluff dependencies -- [ ] Clean up old configuration files (.sqlfluff) -- [ ] Update team documentation -- [ ] Monitor performance improvements -- [ ] Celebrate 1000x speedup! 🎉 +- Remove SQLFluff dependencies +- Clean up old configuration files (.sqlfluff) +- Update team documentation +- Monitor performance improvements +- Celebrate 1000x speedup and improved developer experience! --- @@ -781,6 +779,7 @@ func main() { Before: 41 minutes After: 3.6 seconds Improvement: 683x faster +Status: PASS ``` **Pre-commit Hooks:** @@ -788,6 +787,7 @@ Improvement: 683x faster Before: 30-60 seconds (developers bypassed) After: 0.1-0.3 seconds (developers always use) Improvement: 100-600x faster +Status: PASS ``` **Infrastructure Costs:** @@ -795,6 +795,7 @@ Improvement: 100-600x faster Before: $500/month (50 CI runners needed for parallelism) After: $50/month (5 CI runners sufficient) Savings: $450/month = $5,400/year +Status: SUCCESS ``` **Developer Productivity:** @@ -802,6 +803,7 @@ Savings: $450/month = $5,400/year Before: Developers bypassed slow pre-commit hooks After: 100% adoption of fast validation Result: Fewer bugs in production +Status: SUCCESS ``` ### Lessons Learned @@ -825,8 +827,8 @@ Result: Fewer bugs in production - Rule configuration per project **GoSQLX Status:** -- ❌ No linting rules yet -- ⏳ Planned for v1.5.0 (Q1 2025) +- No linting rules yet +- Planned for v1.5.0 (Q1 2025) - Will start with 10 basic rules **Workaround:** @@ -846,8 +848,8 @@ sqlfluff lint --rules L001,L003,L009 query.sql - Custom template engines **GoSQLX Status:** -- ❌ No template support -- ❌ No plans currently +- No template support +- No plans currently **Workaround:** Render templates first, then validate: @@ -875,9 +877,9 @@ Use SQLFluff for unsupported dialects, or contribute dialect support to GoSQLX! - `sqlfluff fix` command **GoSQLX Status:** -- ⚠️ Basic formatting only -- ❌ No intelligent auto-fix yet -- ⏳ Planned for v1.5.0 +- Basic formatting only (no intelligent fixes yet) +- No intelligent auto-fix yet +- Planned for v1.5.0 --- @@ -896,7 +898,7 @@ Use SQLFluff for unsupported dialects, or contribute dialect support to GoSQLX! ### Migration Support - **[Comparison Guide](../COMPARISON.md)** - Detailed feature comparison -- **[Performance Guide](../PRODUCTION_GUIDE.md)** - Production best practices +- **[Production Guide](../PRODUCTION_GUIDE.md)** - Production best practices - **[Troubleshooting](../TROUBLESHOOTING.md)** - Common issues and solutions --- diff --git a/docs/performance_regression_testing.md b/docs/performance_regression_testing.md index a1d85f5..88de045 100644 --- a/docs/performance_regression_testing.md +++ b/docs/performance_regression_testing.md @@ -1,192 +1,23 @@ # Performance Regression Testing -## Overview +**⚠️ This document has been consolidated into the main Performance Tuning Guide.** -GoSQLX includes a comprehensive performance regression test suite to prevent performance degradation over time. The suite tracks key performance metrics against established baselines and alerts developers to regressions. +Please refer to: +**[PERFORMANCE_TUNING.md - Performance Regression Testing Section](./PERFORMANCE_TUNING.md#performance-regression-testing)** -## Running Performance Tests +## What was moved: +- Performance regression test suite overview +- Running regression tests (quick test and baseline benchmark) +- Performance baselines and tracked metrics +- Test output examples (successful runs and regression detection) +- Updating baselines methodology +- CI/CD integration examples +- Troubleshooting regression tests -### Quick Test (Recommended for CI/CD) +All content has been integrated into the comprehensive Performance Tuning Guide for better discoverability and unified performance documentation. -```bash -go test -v ./pkg/sql/parser/ -run TestPerformanceRegression -``` +--- -**Execution Time:** ~8 seconds -**Coverage:** 5 critical query types - -### Baseline Benchmark (For Establishing New Baselines) - -```bash -go test -bench=BenchmarkPerformanceBaseline -benchmem -count=5 ./pkg/sql/parser/ -``` - -**Use Case:** After significant parser changes or optimizations to establish new performance baselines. - -## Performance Baselines - -Current baselines are stored in `performance_baselines.json` at the project root: - -### Tracked Metrics - -1. **SimpleSelect** (280 ns/op baseline) - - Basic SELECT query: `SELECT id, name FROM users` - - Current: ~265 ns/op (9 allocs, 536 B/op) - -2. **ComplexQuery** (1100 ns/op baseline) - - Complex SELECT with JOIN, WHERE, ORDER BY, LIMIT - - Current: ~1020 ns/op (36 allocs, 1433 B/op) - -3. **WindowFunction** (450 ns/op baseline) - - Window function: `ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)` - - Current: ~400 ns/op (14 allocs, 760 B/op) - -4. **CTE** (450 ns/op baseline) - - Common Table Expression with WITH clause - - Current: ~395 ns/op (14 allocs, 880 B/op) - -5. **INSERT** (350 ns/op baseline) - - Simple INSERT statement - - Current: ~310 ns/op (14 allocs, 536 B/op) - -### Tolerance Levels - -- **Failure Threshold:** 20% degradation from baseline -- **Warning Threshold:** 10% degradation from baseline (half of tolerance) - -## Test Output - -### Successful Run - -``` -================================================================================ -PERFORMANCE REGRESSION TEST SUMMARY -================================================================================ -✓ All performance tests passed with no warnings - -Baseline Version: 1.4.0 -Baseline Updated: 2025-01-17 -Tests Run: 5 -Failures: 0 -Warnings: 0 -================================================================================ -``` - -### Regression Detected - -``` -REGRESSIONS DETECTED: - ✗ ComplexQuery: 25.5% slower (actual: 1381 ns/op, baseline: 1100 ns/op) - -WARNINGS (approaching threshold): - ⚠ SimpleSelect: 12.3% slower (approaching threshold) - -Tests Run: 5 -Failures: 1 -Warnings: 1 -``` - -## Updating Baselines - -### When to Update - -Update baselines when: -- Intentional optimizations improve performance significantly -- Parser architecture changes fundamentally alter performance characteristics -- New SQL features are added that affect parsing speed - -### How to Update - -1. Run the baseline benchmark: - ```bash - go test -bench=BenchmarkPerformanceBaseline -benchmem -count=5 ./pkg/sql/parser/ - ``` - -2. Calculate new conservative baselines (add 10-15% buffer to measured values) - -3. Update `performance_baselines.json`: - ```json - { - "SimpleSelect": { - "ns_per_op": , - "tolerance_percent": 20, - "description": "...", - "current_performance": " ns/op" - } - } - ``` - -4. Update the `updated` timestamp in the JSON file - -5. Commit changes with a clear explanation of why baselines were updated - -## Integration with CI/CD - -### GitHub Actions Example - -```yaml -- name: Performance Regression Tests - run: | - go test -v ./pkg/sql/parser/ -run TestPerformanceRegression - timeout-minutes: 2 -``` - -### Exit Codes - -- **0:** All tests passed -- **1:** Performance regression detected (test failure) - -## Troubleshooting - -### Test Timing Variance - -Performance tests can show variance due to: -- System load -- CPU thermal throttling -- Background processes - -**Solution:** Run tests multiple times and average results. The suite uses `testing.Benchmark` which automatically adjusts iteration count for stable measurements. - -### False Positives - -If you see intermittent failures: -1. Check system load during test execution -2. Run the test 3-5 times to confirm consistency -3. Consider increasing tolerance for that specific baseline - -### Baseline Drift - -Over time, minor optimizations may accumulate. If current performance is consistently better: -1. Document the improvements -2. Update baselines to reflect the new performance level -3. Keep tolerance at 20% to catch future regressions - -## Performance Metrics Guide - -### ns/op (Nanoseconds per Operation) -- Lower is better -- Measures parsing speed for a single query -- Most sensitive metric for detecting regressions - -### B/op (Bytes per Operation) -- Memory allocated per parse operation -- Tracked in benchmarks but not in regression tests -- Useful for identifying memory leaks - -### allocs/op (Allocations per Operation) -- Number of heap allocations per parse -- Lower indicates better object pool efficiency -- Critical for GC pressure - -## Related Documentation - -- [Benchmark Guide](../CLAUDE.md#performance-testing-new-features) -- [Development Workflow](../CLAUDE.md#common-development-workflows) -- [Production Metrics](../pkg/metrics/README.md) - -## Version History - -- **v1.4.0** (2025-01-17): Initial performance regression suite - - 5 baseline metrics established - - 20% tolerance threshold - - ~8 second execution time +**Last Updated:** 2025-11-28 +**Consolidated into:** PERFORMANCE_TUNING.md +**Section:** Performance Regression Testing diff --git a/docs/sql99-compliance-analysis.md b/docs/sql99-compliance-analysis.md index 7cd93c8..2cb16a6 100644 --- a/docs/sql99-compliance-analysis.md +++ b/docs/sql99-compliance-analysis.md @@ -32,7 +32,7 @@ This document provides a comprehensive analysis of SQL-99 standard compliance ga ## Currently Implemented SQL-99 Features -### ✅ Core Data Manipulation (100% Coverage) +### Core Data Manipulation (100% Coverage) **SELECT Statement** - Fully implemented with comprehensive support: - Basic SELECT with column projection @@ -60,7 +60,7 @@ This document provides a comprehensive analysis of SQL-99 standard compliance ga - DELETE FROM with WHERE clause - Conditional deletion -### ✅ JOIN Operations (100% Coverage) +### JOIN Operations (100% Coverage) Full support for all SQL-99 JOIN types: - **INNER JOIN** - Fully implemented with ON and USING clauses @@ -72,7 +72,7 @@ Full support for all SQL-99 JOIN types: - **Multiple JOINs** - Proper left-associative parsing - **Self JOINs** - Supported -### ✅ Subqueries (100% Coverage) +### Subqueries (100% Coverage) Comprehensive subquery support: - Scalar subqueries (single value) @@ -84,7 +84,7 @@ Comprehensive subquery support: - ANY/SOME quantified comparisons - ALL quantified comparisons -### ✅ Common Table Expressions (100% Coverage) +### Common Table Expressions (100% Coverage) **Phase 2 Complete** - Full CTE implementation: - Basic WITH clause @@ -94,7 +94,7 @@ Comprehensive subquery support: - CTE references in main query - Nested CTEs -### ✅ Window Functions (95% Coverage) +### Window Functions (95% Coverage) **Phase 2.5 Complete** - Comprehensive window function support: @@ -127,7 +127,7 @@ Comprehensive subquery support: - GROUPS frame unit (SQL:2016, but commonly backported) - Named window specifications (WINDOW clause) -### ✅ Set Operations (100% Coverage) +### Set Operations (100% Coverage) **Phase 2 Complete** - Full set operation support: - UNION - Combines results with duplicate elimination @@ -137,7 +137,7 @@ Comprehensive subquery support: - Left-associative parsing for chained operations - Proper precedence handling -### ✅ Aggregate Functions (95% Coverage) +### Aggregate Functions (95% Coverage) Standard SQL-99 aggregates: - COUNT(*) and COUNT(column) @@ -151,7 +151,7 @@ Standard SQL-99 aggregates: - FILTER clause for conditional aggregation - WITHIN GROUP (ORDER BY) for ordered-set aggregates -### ✅ Expression Support (90% Coverage) +### Expression Support (90% Coverage) **Fully Implemented:** - Binary expressions (arithmetic, comparison, logical) @@ -177,7 +177,7 @@ Standard SQL-99 aggregates: - Array expressions and constructors - Row value constructors (multi-column comparisons) -### ✅ DDL Operations (80% Coverage) +### DDL Operations (80% Coverage) **CREATE TABLE** - Comprehensive support: - Column definitions with data types @@ -212,7 +212,7 @@ Standard SQL-99 aggregates: Based on comprehensive codebase analysis, the following SQL-99 features are **NOT currently implemented**: -### 🔴 High Priority Missing Features +### High Priority Missing Features #### 1. FETCH FIRST / OFFSET-FETCH Clause **Status**: Not implemented (keyword recognized but no parsing) @@ -384,9 +384,9 @@ FROM orders; --- #### 7. MERGE Statement (UPSERT) -**Status**: Syntax recognition (test data exists, partial parsing) +**Status**: IMPLEMENTED (v1.5.0+) **SQL-99 Feature**: F312 (SQL:2003 but commonly needed) -**Importance**: MEDIUM +**Importance**: MEDIUM (no longer a gap) **Reason**: Efficient UPSERT operations (Oracle, SQL Server, PostgreSQL 15+) **Examples:** @@ -401,21 +401,21 @@ WHEN NOT MATCHED THEN ``` **Current Status**: +- MERGE parsing implemented in parser.go parseMergeStatement() +- AST MergeStatement node exists and fully supported - Test files: `testdata/oracle/06_merge_statement.sql`, `testdata/mssql/05_merge_statement.sql` -- SQL_COMPATIBILITY.md: "✅ Full" support listed (80% coverage) - **CONTRADICTORY** -- No MERGE parsing in parser.go parseStatement() -- No AST MergeStatement node found +- SQL_COMPATIBILITY.md: Full support listed (80% coverage) - ACCURATE -**Implementation Impact**: High (new statement type, complex matching logic, multiple actions) +**Note**: This feature was completed in v1.5.0 and is no longer part of the gap analysis. Consider removing from Phase 3 implementation roadmap. --- -### 🟡 Medium Priority Missing Features +### Medium Priority Missing Features #### 8. TRUNCATE TABLE -**Status**: Not implemented +**Status**: IMPLEMENTED (v1.5.0+) **SQL-99 Feature**: F201 (SQL:2008) -**Importance**: MEDIUM +**Importance**: MEDIUM (no longer a gap) **Reason**: Efficient table clearing (faster than DELETE) **Examples:** @@ -425,10 +425,11 @@ TRUNCATE TABLE temp_data CASCADE; ``` **Current Status**: -- No parser support -- SQL_COMPATIBILITY.md: "✅ Full" (90% coverage) - **CONTRADICTORY** +- TRUNCATE parsing implemented in parser.go parseTruncateStatement() +- AST TruncateStatement node exists and fully supported +- SQL_COMPATIBILITY.md: Full support listed (90% coverage) - ACCURATE -**Implementation Impact**: Low (simple statement, minimal AST changes) +**Note**: This feature was completed in v1.5.0 and is no longer part of the gap analysis. Remove from Phase 1 implementation roadmap. --- @@ -504,7 +505,7 @@ SELECT items[1] FROM orders; --- -### 🟢 Lower Priority Missing Features +### Lower Priority Missing Features #### 12. INTERSECT ALL and EXCEPT ALL **Status**: Not implemented @@ -605,16 +606,17 @@ GRANT ALL PRIVILEGES ON DATABASE mydb TO admin_user; ### Phase 1: High-Impact Quick Wins (4-6 weeks) **Goal**: Reach 88-90% compliance with minimal effort -| Feature | Priority | Effort | Impact | Order | -|---------|----------|--------|--------|-------| -| **NULLS FIRST/LAST** | P0 | 8h | High | 1 | -| **FETCH FIRST / OFFSET-FETCH** | P0 | 16h | High | 2 | -| **COALESCE/NULLIF** | P1 | 8h | Medium | 3 | -| **TRUNCATE TABLE** | P1 | 8h | Medium | 4 | -| **DISTINCT in aggregates** (verification) | P1 | 4h | Medium | 5 | -| **INTERSECT/EXCEPT ALL** | P1 | 6h | Low | 6 | - -**Phase 1 Total**: ~50 hours +| Feature | Priority | Effort | Impact | Order | Status | +|---------|----------|--------|--------|-------|--------| +| **NULLS FIRST/LAST** | P0 | 8h | High | 1 | TODO | +| **FETCH FIRST / OFFSET-FETCH** | P0 | 16h | High | 2 | TODO | +| **COALESCE/NULLIF** | P1 | 8h | Medium | 3 | TODO | +| **DISTINCT in aggregates** (verification) | P1 | 4h | Medium | 4 | TODO | +| **INTERSECT/EXCEPT ALL** | P1 | 6h | Low | 5 | TODO | + +Note: TRUNCATE TABLE (previously P1) has been COMPLETED in v1.5.0 and removed from this phase. + +**Phase 1 Total**: ~42 hours (reduced from 50) **Compliance Gain**: +8-10% **New Compliance**: 88-90% @@ -640,14 +642,15 @@ GRANT ALL PRIVILEGES ON DATABASE mydb TO admin_user; ### Phase 3: Advanced Features (Optional - 4-6 weeks) **Goal**: Reach 95%+ compliance with advanced SQL-99 features -| Feature | Priority | Effort | Impact | Order | -|---------|----------|--------|--------|-------| -| **LATERAL Joins** | P1 | 24h | Medium-High | 1 | -| **MERGE Statement** | P1 | 32h | Medium | 2 | -| **Array Support (Basic)** | P2 | 20h | Medium | 3 | -| **TABLE Constructor** | P2 | 12h | Low | 4 | +| Feature | Priority | Effort | Impact | Order | Status | +|---------|----------|--------|--------|-------|--------| +| **LATERAL Joins** | P1 | 24h | Medium-High | 1 | TODO | +| **Array Support (Basic)** | P2 | 20h | Medium | 2 | TODO | +| **TABLE Constructor** | P2 | 12h | Low | 3 | TODO | + +Note: MERGE Statement (previously P1, 32h) has been COMPLETED in v1.5.0 and removed from this phase. -**Phase 3 Total**: ~88 hours +**Phase 3 Total**: ~56 hours (reduced from 88) **Compliance Gain**: +3-4% **New Compliance**: 95-96% @@ -655,15 +658,17 @@ GRANT ALL PRIVILEGES ON DATABASE mydb TO admin_user; ### Timeline Summary -| Phase | Duration | Effort | Compliance | Features | -|-------|----------|--------|------------|----------| -| **Current State** | - | - | 80-85% | Baseline | -| **Phase 1** | 4-6 weeks | 50h | 88-90% | 6 features | -| **Phase 2** | 6-8 weeks | 84h | 93-94% | 5 features | -| **Phase 3** | 4-6 weeks | 88h | 95-96% | 4 features | -| **Total** | 14-20 weeks | 222h | 95-96% | 15 features | +| Phase | Duration | Effort | Compliance | Features | Status | +|-------|----------|--------|------------|----------|--------| +| **Current State** | - | - | 80-85% | Baseline | - | +| **Phase 1** | 4-6 weeks | 42h | 88-90% | 5 features | TODO | +| **Phase 2** | 6-8 weeks | 84h | 93-94% | 5 features | TODO | +| **Phase 3** | 4-6 weeks | 56h | 95-96% | 3 features | TODO | +| **Total** | 14-20 weeks | 182h | 95-96% | 13 features | Updated | -**Recommended Path to 95%**: Complete Phase 1 + Phase 2 + partial Phase 3 (LATERAL, MERGE) +Note: Total effort reduced by 40 hours (18%) due to MERGE and TRUNCATE completion in v1.5.0. + +**Recommended Path to 95%**: Complete Phase 1 + Phase 2 + Phase 3 (LATERAL, Array Support) --- @@ -900,29 +905,32 @@ if p.currentToken.Type == "FILTER" { ### Breakdown by Category -| Category | Features | Total Effort | % of Total | -|----------|----------|--------------|------------| -| **ORDER BY Enhancements** | NULLS FIRST/LAST | 8h | 3.6% | -| **Pagination** | FETCH/OFFSET | 16h | 7.2% | -| **Analytical SQL** | ROLLUP, CUBE, GROUPING SETS, FILTER | 72h | 32.4% | -| **Window Function Enhancements** | Frame EXCLUDE | 12h | 5.4% | -| **JOIN Enhancements** | LATERAL | 24h | 10.8% | -| **DML Enhancements** | MERGE, TRUNCATE | 40h | 18.0% | -| **Function Enhancements** | COALESCE, NULLIF | 8h | 3.6% | -| **Set Operations** | INTERSECT/EXCEPT ALL | 6h | 2.7% | -| **Data Types** | Array Support | 20h | 9.0% | -| **Value Constructors** | TABLE constructor | 12h | 5.4% | -| **Testing & Documentation** | All features | 44h | 19.8% | -| **TOTAL** | 15 features | **222h** | 100% | +| Category | Features | Total Effort | % of Total | Status | +|----------|----------|--------------|------------|--------| +| **ORDER BY Enhancements** | NULLS FIRST/LAST | 8h | 4.4% | TODO | +| **Pagination** | FETCH/OFFSET | 16h | 8.8% | TODO | +| **Analytical SQL** | ROLLUP, CUBE, GROUPING SETS, FILTER | 72h | 39.6% | TODO | +| **Window Function Enhancements** | Frame EXCLUDE | 12h | 6.6% | TODO | +| **JOIN Enhancements** | LATERAL | 24h | 13.2% | TODO | +| **Function Enhancements** | COALESCE, NULLIF | 8h | 4.4% | TODO | +| **Set Operations** | INTERSECT/EXCEPT ALL | 6h | 3.3% | TODO | +| **Data Types** | Array Support | 20h | 11.0% | TODO | +| **Value Constructors** | TABLE constructor | 12h | 6.6% | TODO | +| **Testing & Documentation** | All features | 18h | 9.9% | TODO | +| **TOTAL** | 13 features | **182h** | 100% | Updated | + +Note: MERGE (32h) and TRUNCATE (8h) are COMPLETED in v1.5.0. Testing/Documentation effort reduced proportionally. ### Effort by Complexity Level -| Complexity | Features | Effort | Avg per Feature | -|------------|----------|--------|-----------------| -| **Low** | 5 | 34h | 6.8h | -| **Medium** | 6 | 88h | 14.7h | -| **High** | 4 | 100h | 25h | -| **TOTAL** | 15 | 222h | 14.8h | +| Complexity | Features | Effort | Avg per Feature | Status | +|------------|----------|--------|-----------------|--------| +| **Low** | 4 | 26h | 6.5h | TODO | +| **Medium** | 6 | 88h | 14.7h | TODO | +| **High** | 3 | 68h | 22.7h | TODO | +| **TOTAL** | 13 | 182h | 14.0h | Updated | + +Note: Reduced from 15 features/222h due to MERGE and TRUNCATE completion. --- @@ -948,12 +956,18 @@ if p.currentToken.Type == "FILTER" { ### Development Best Practices -1. **Test-Driven Development**: +1. **API Usage**: + - For pooled parser instances: `GetParser()` and `PutParser(p)` + - For parsing with position tracking: `ParseWithPositions(ConversionResult)` + - Token conversion utilities are test-only helpers in individual test files + - Always use `ParseWithPositions()` for production code to get accurate error locations + +2. **Test-Driven Development**: - Write tests first based on SQL-99 standard examples - Include test data files in testdata/ directories - Use existing test patterns (parser_test.go, integration_test.go) -2. **AST Design Principles**: +3. **AST Design Principles**: - Minimize breaking changes to existing AST - Use optional fields (pointers) for new features - Maintain backward compatibility with object pools @@ -979,13 +993,13 @@ if p.currentToken.Type == "FILTER" { For each feature implementation: -1. ✅ **Tests Pass**: `go test -race ./...` -2. ✅ **Benchmarks**: Performance regression < 5% -3. ✅ **Coverage**: Feature coverage > 90% -4. ✅ **Documentation**: Updated CLAUDE.md, CHANGELOG.md -5. ✅ **Examples**: Real-world test cases in testdata/ -6. ✅ **Race Detection**: Zero race conditions -7. ✅ **Code Review**: Peer review completed +1. **Tests Pass**: `go test -race ./...` +2. **Benchmarks**: Performance regression < 5% +3. **Coverage**: Feature coverage > 90% +4. **Documentation**: Updated CLAUDE.md, CHANGELOG.md +5. **Examples**: Real-world test cases in testdata/ +6. **Race Detection**: Zero race conditions +7. **Code Review**: Peer review completed --- diff --git a/docs/tutorials/01-sql-validator-cicd.md b/docs/tutorials/01-sql-validator-cicd.md index 25c78ee..1b01c70 100644 --- a/docs/tutorials/01-sql-validator-cicd.md +++ b/docs/tutorials/01-sql-validator-cicd.md @@ -51,11 +51,16 @@ if err != nil { // Syntax error found } -// 3. Parse the tokens into an AST -p := parser.NewParser(convertedTokens) -defer p.Release() +// 3. Convert tokens for parser +convertedTokens, err := parser.ConvertTokensForParser(tokens) +if err != nil { + // Token conversion error +} + +// 4. Parse the tokens into an AST +p := parser.NewParser() -result, err := p.Parse() +result, err := p.Parse(convertedTokens) if err != nil { // Parse error found } @@ -113,13 +118,19 @@ func ValidateFile(filePath string) ValidationResult { } // Convert tokens for parser - convertedTokens := parser.ConvertTokensForParser(tokens) + convertedTokens, err := parser.ConvertTokensForParser(tokens) + if err != nil { + return ValidationResult{ + FilePath: filePath, + Valid: false, + Error: fmt.Errorf("token conversion error: %w", err), + } + } // Create parser and parse - p := parser.NewParser(convertedTokens) - defer p.Release() + p := parser.NewParser() - _, err = p.Parse() + _, err = p.Parse(convertedTokens) if err != nil { return ValidationResult{ FilePath: filePath, diff --git a/docs/tutorials/02-custom-sql-formatter.md b/docs/tutorials/02-custom-sql-formatter.md index 51506e0..6748a10 100644 --- a/docs/tutorials/02-custom-sql-formatter.md +++ b/docs/tutorials/02-custom-sql-formatter.md @@ -194,13 +194,15 @@ func (f *Formatter) Format(sql string) (string, error) { } // Convert tokens for parser - convertedTokens := parser.ConvertTokensForParser(tokens) + convertedTokens, err := parser.ConvertTokensForParser(tokens) + if err != nil { + return "", fmt.Errorf("token conversion error: %w", err) + } // Create parser and parse - p := parser.NewParser(convertedTokens) - defer p.Release() + p := parser.NewParser() - result, err := p.Parse() + result, err := p.Parse(convertedTokens) if err != nil { return "", fmt.Errorf("parse error: %w", err) } @@ -472,7 +474,14 @@ func (f *Formatter) writeKeyword(keyword string) { case "lower": f.writeString(strings.ToLower(keyword)) case "title": - f.writeString(strings.Title(strings.ToLower(keyword))) + // Title case for keywords (capitalize first letter of each word) + words := strings.Fields(strings.ToLower(keyword)) + for i, word := range words { + if len(word) > 0 { + words[i] = strings.ToUpper(word[:1]) + word[1:] + } + } + f.writeString(strings.Join(words, " ")) default: f.writeString(keyword) } diff --git a/examples/migrations/README.md b/examples/migrations/README.md index 6d4f156..b115a7d 100644 --- a/examples/migrations/README.md +++ b/examples/migrations/README.md @@ -305,24 +305,23 @@ tokens2, _ := tkz.Tokenize(sql2) **Solution:** Check compatibility -1. Review [SQL_COMPATIBILITY.md](../../docs/SQL_COMPATIBILITY.md) -2. Check [ROADMAP.md](../../docs/ROADMAP.md) for planned features -3. File an issue on [GitHub](https://github.com/ajitpratap0/GoSQLX/issues) +1. Review [SQL_COMPATIBILITY.md](../../docs/SQL_COMPATIBILITY.md) for supported features +2. File an issue on [GitHub](https://github.com/ajitpratap0/GoSQLX/issues) ## Next Steps 1. **Review your tool:** Choose the relevant example (SQLFluff, JSQLParser, or pg_query) 2. **Run the example:** Execute the Go file to see live demonstrations -3. **Read the guide:** Check the corresponding migration guide in `docs/migrations/` +3. **Read the guide:** Check the corresponding migration guide in `docs/migration/` 4. **Plan migration:** Use the checklists in the guides 5. **Test thoroughly:** Validate against your specific SQL workloads 6. **Deploy:** Follow the deployment patterns shown in examples ## Additional Resources -- **[FROM_SQLFLUFF.md](../../docs/migrations/FROM_SQLFLUFF.md)** - Complete SQLFluff migration guide -- **[FROM_JSQLPARSER.md](../../docs/migrations/FROM_JSQLPARSER.md)** - Complete JSQLParser migration guide -- **[FROM_PG_QUERY.md](../../docs/migrations/FROM_PG_QUERY.md)** - Complete pg_query migration guide +- **[FROM_SQLFLUFF.md](../../docs/migration/FROM_SQLFLUFF.md)** - Complete SQLFluff migration guide +- **[FROM_JSQLPARSER.md](../../docs/migration/FROM_JSQLPARSER.md)** - Complete JSQLParser migration guide +- **[FROM_PG_QUERY.md](../../docs/migration/FROM_PG_QUERY.md)** - Complete pg_query migration guide - **[COMPARISON.md](../../docs/COMPARISON.md)** - Detailed feature comparison with competitors - **[API_REFERENCE.md](../../docs/API_REFERENCE.md)** - GoSQLX API documentation - **[USAGE_GUIDE.md](../../docs/USAGE_GUIDE.md)** - Common usage patterns diff --git a/examples/tutorials/01-sql-validator/README.md b/examples/tutorials/01-sql-validator/README.md index 3da1711..b2b49f2 100644 --- a/examples/tutorials/01-sql-validator/README.md +++ b/examples/tutorials/01-sql-validator/README.md @@ -60,6 +60,6 @@ Invalid: 1 ## Integration -See `.github/workflows/validate.yml` for GitHub Actions integration example. +See `.github/workflows/test-github-action.yml` for GitHub Actions integration example. For the complete tutorial, see: `docs/tutorials/01-sql-validator-cicd.md` diff --git a/pkg/linter/README.md b/pkg/linter/README.md index 1dcc922..6da7331 100644 --- a/pkg/linter/README.md +++ b/pkg/linter/README.md @@ -79,19 +79,17 @@ func main() { // Lint SQL string sql := `SELECT * FROM users WHERE active = true ` // Trailing space - results, err := l.LintString(sql, "query.sql") - if err != nil { + result := l.LintString(sql, "query.sql") + if result.Error != nil { // Handle error } // Check violations - for _, result := range results { - for _, violation := range result.Violations { - fmt.Printf("[%s] Line %d: %s\n", - violation.RuleID, - violation.Line, - violation.Message) - } + for _, violation := range result.Violations { + fmt.Printf("[%s] Line %d: %s\n", + violation.Rule, + violation.Location.Line, + violation.Message) } } ``` @@ -108,13 +106,21 @@ sql := `SELECT * FROM users WHERE active = true` // Mixed tabs/spaces, trailing space // Lint and get violations -results, _ := l.LintString(sql, "query.sql") +result := l.LintString(sql, "query.sql") + +// Auto-fix violations by rule +for _, rule := range l.Rules() { + if rule.CanAutoFix() { + // Get violations for this rule + ruleViolations := []Violation{} + for _, v := range result.Violations { + if v.Rule == rule.ID() { + ruleViolations = append(ruleViolations, v) + } + } -// Auto-fix violations -for _, result := range results { - for _, violation := range result.Violations { - if violation.CanAutoFix { - fixedSQL, err := violation.Fix(sql) + if len(ruleViolations) > 0 { + fixedSQL, err := rule.Fix(sql, ruleViolations) if err == nil { sql = fixedSQL } @@ -154,7 +160,7 @@ type Context struct { Lines []string // Split by line Tokens []models.TokenWithSpan // Tokenization result AST *ast.AST // Parsed AST (if available) - Errors []error // Parse errors + ParseErr error // Parse error (if any) } ``` @@ -164,12 +170,14 @@ Represents a rule violation: ```go type Violation struct { - RuleID string - Message string - Line int - Column int - Severity Severity - CanAutoFix bool + Rule string // Rule ID (e.g., "L001") + RuleName string // Human-readable rule name + Severity Severity // Severity level + Message string // Violation description + Location models.Location // Position in source (1-based) + Line string // The actual line content + Suggestion string // How to fix the violation + CanAutoFix bool // Whether this violation can be auto-fixed } ``` @@ -220,10 +228,11 @@ func (r *MyCustomRule) Check(ctx *linter.Context) ([]linter.Violation, error) { // Check for your pattern if /* violation found */ { violations = append(violations, linter.Violation{ - RuleID: r.ID(), + Rule: r.ID(), + RuleName: r.Name(), Message: "Custom violation message", - Line: lineNum + 1, // 1-based - Column: 0, + Location: models.Location{Line: lineNum + 1, Column: 1}, // 1-based + Line: line, Severity: r.Severity(), CanAutoFix: false, }) diff --git a/pkg/sql/security/scanner.go b/pkg/sql/security/scanner.go index c16f0d7..b9d773d 100644 --- a/pkg/sql/security/scanner.go +++ b/pkg/sql/security/scanner.go @@ -1,14 +1,13 @@ // Package security provides SQL injection pattern detection and security scanning. // It analyzes parsed SQL AST to identify common injection patterns and vulnerabilities. // -// The scanner detects: +// The scanner detects 6 pattern types: // - Tautologies: Always-true conditions like 1=1, 'a'='a' // - Comment-based bypasses: --, /**/, # -// - Stacked queries: Multiple statements with dangerous operations -// - UNION-based extraction: Suspicious UNION SELECT patterns -// - Time-based blind: SLEEP(), WAITFOR DELAY, pg_sleep() -// - Boolean-based blind: Suspicious boolean logic patterns -// - Out-of-band: xp_cmdshell, LOAD_FILE(), etc. +// - UNION-based extraction: Suspicious UNION SELECT patterns with NULL columns or system tables +// - Time-based blind: SLEEP(), WAITFOR DELAY, pg_sleep(), BENCHMARK() +// - Out-of-band: xp_cmdshell, LOAD_FILE(), UTL_HTTP, etc. +// - Dangerous functions: EXEC(), sp_executesql, PREPARE FROM, etc. // // Example usage: // diff --git a/pkg/sql/tokenizer/README.md b/pkg/sql/tokenizer/README.md index ee4a460..f726c78 100644 --- a/pkg/sql/tokenizer/README.md +++ b/pkg/sql/tokenizer/README.md @@ -396,7 +396,7 @@ go func() { - [Main API Reference](../../../docs/API_REFERENCE.md) - [Architecture Guide](../../../docs/ARCHITECTURE.md) -- [Unicode Support](../../../docs/UNICODE_SUPPORT.md) +- [SQL Compatibility](../../../docs/SQL_COMPATIBILITY.md) - [Examples](../../../examples/) ## Version History diff --git a/scripts/validate-security-setup.sh b/scripts/validate-security-setup.sh index 2f590a6..23cf2e0 100755 --- a/scripts/validate-security-setup.sh +++ b/scripts/validate-security-setup.sh @@ -52,7 +52,7 @@ echo "----------------------------" check_file ".github/workflows/security.yml" "Security workflow" check_file ".github/dependabot.yml" "Dependabot config" check_file "SECURITY.md" "Security policy" -check_file "SECURITY_SETUP.md" "Setup guide" +check_file "docs/SECURITY_SETUP.md" "Setup guide" check_file ".github/SECURITY_CHECKLIST.md" "Setup checklist" echo ""