Skip to content

Commit c96d9fe

Browse files
committed
refactor(clickhouse): use AST-based parameter detection instead of regex
Replace regex-based parameter detection with AST walking using the doubleclick parser. This avoids issues with parameters in comments being incorrectly detected. For INSERT VALUES clauses (which doubleclick doesn't fully parse), extract the VALUES content and parse it as a SELECT to properly detect parameters through AST walking. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 71cfd12 commit c96d9fe

File tree

1 file changed

+196
-11
lines changed

1 file changed

+196
-11
lines changed

internal/engine/clickhouse/analyzer/analyze.go

Lines changed: 196 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
"sync"
1010

1111
_ "github.com/ClickHouse/clickhouse-go/v2" // ClickHouse driver
12+
dcast "github.com/sqlc-dev/doubleclick/ast"
13+
"github.com/sqlc-dev/doubleclick/parser"
1214

1315
core "github.com/sqlc-dev/sqlc/internal/analysis"
1416
"github.com/sqlc-dev/sqlc/internal/config"
@@ -331,25 +333,40 @@ type paramInfo struct {
331333
Type string
332334
}
333335

334-
// detectParameters finds parameters in a ClickHouse query.
336+
// detectParameters finds parameters in a ClickHouse query using the doubleclick parser.
335337
// ClickHouse supports {name:Type} and ? style parameters.
336338
func detectParameters(query string) []paramInfo {
337339
var params []paramInfo
338340

339-
// Find all {name:Type} style parameters using regex
340-
// This is more reliable than AST walking as it works for all statement types
341-
matches := namedParamRegex.FindAllStringSubmatch(query, -1)
342-
for _, match := range matches {
343-
if len(match) >= 3 {
344-
name := match[1]
345-
dataType := normalizeType(match[2])
346-
params = append(params, paramInfo{
347-
Name: name,
348-
Type: dataType,
341+
ctx := context.Background()
342+
343+
// First, try to parse and walk the query AST for named parameters
344+
stmts, err := parser.Parse(ctx, strings.NewReader(query))
345+
if err == nil {
346+
for _, stmt := range stmts {
347+
walkStatement(stmt, func(expr dcast.Expression) {
348+
if param, ok := expr.(*dcast.Parameter); ok {
349+
if param.Name != "" {
350+
dataType := "any"
351+
if param.Type != nil {
352+
dataType = normalizeType(param.Type.Name)
353+
}
354+
params = append(params, paramInfo{
355+
Name: param.Name,
356+
Type: dataType,
357+
})
358+
}
359+
}
349360
})
350361
}
351362
}
352363

364+
// If no named parameters found from AST, try to extract VALUES clause for INSERT statements
365+
// The doubleclick parser doesn't fully parse VALUES, so we parse it as a SELECT
366+
if len(params) == 0 {
367+
params = extractValuesParameters(ctx, query)
368+
}
369+
353370
// Count ? placeholders and add them after any named parameters
354371
count := strings.Count(query, "?")
355372
for i := 0; i < count; i++ {
@@ -362,6 +379,174 @@ func detectParameters(query string) []paramInfo {
362379
return params
363380
}
364381

382+
// extractValuesParameters extracts parameters from INSERT VALUES clause by parsing it as a SELECT.
383+
// This works around the limitation that doubleclick doesn't parse VALUES clause expressions.
384+
func extractValuesParameters(ctx context.Context, query string) []paramInfo {
385+
var params []paramInfo
386+
387+
// Find VALUES clause (case insensitive)
388+
upperQuery := strings.ToUpper(query)
389+
valuesIdx := strings.Index(upperQuery, "VALUES")
390+
if valuesIdx == -1 {
391+
return params
392+
}
393+
394+
// Extract everything after VALUES
395+
valuesClause := query[valuesIdx+6:]
396+
397+
// Find the parentheses containing the values
398+
start := strings.Index(valuesClause, "(")
399+
if start == -1 {
400+
return params
401+
}
402+
403+
// Find matching closing parenthesis
404+
depth := 0
405+
end := -1
406+
for i := start; i < len(valuesClause); i++ {
407+
switch valuesClause[i] {
408+
case '(':
409+
depth++
410+
case ')':
411+
depth--
412+
if depth == 0 {
413+
end = i
414+
break
415+
}
416+
}
417+
if end != -1 {
418+
break
419+
}
420+
}
421+
422+
if end == -1 {
423+
return params
424+
}
425+
426+
// Extract the values list and convert to SELECT query
427+
valuesList := valuesClause[start+1 : end]
428+
selectQuery := "SELECT " + valuesList
429+
430+
// Parse the synthetic SELECT query
431+
stmts, err := parser.Parse(ctx, strings.NewReader(selectQuery))
432+
if err != nil {
433+
return params
434+
}
435+
436+
// Walk the AST to find Parameter nodes
437+
for _, stmt := range stmts {
438+
walkStatement(stmt, func(expr dcast.Expression) {
439+
if param, ok := expr.(*dcast.Parameter); ok {
440+
if param.Name != "" {
441+
dataType := "any"
442+
if param.Type != nil {
443+
dataType = normalizeType(param.Type.Name)
444+
}
445+
params = append(params, paramInfo{
446+
Name: param.Name,
447+
Type: dataType,
448+
})
449+
}
450+
}
451+
})
452+
}
453+
454+
return params
455+
}
456+
457+
// walkStatement walks a statement and calls fn for each expression.
458+
func walkStatement(stmt dcast.Statement, fn func(dcast.Expression)) {
459+
switch s := stmt.(type) {
460+
case *dcast.SelectQuery:
461+
walkSelectQuery(s, fn)
462+
case *dcast.SelectWithUnionQuery:
463+
for _, sel := range s.Selects {
464+
walkStatement(sel, fn)
465+
}
466+
case *dcast.InsertQuery:
467+
if s.Select != nil {
468+
walkStatement(s.Select, fn)
469+
}
470+
}
471+
}
472+
473+
// walkSelectQuery walks a SELECT query and calls fn for each expression.
474+
func walkSelectQuery(s *dcast.SelectQuery, fn func(dcast.Expression)) {
475+
// Walk columns
476+
for _, col := range s.Columns {
477+
walkExpression(col, fn)
478+
}
479+
// Walk WHERE clause
480+
if s.Where != nil {
481+
walkExpression(s.Where, fn)
482+
}
483+
// Walk GROUP BY
484+
for _, g := range s.GroupBy {
485+
walkExpression(g, fn)
486+
}
487+
// Walk HAVING
488+
if s.Having != nil {
489+
walkExpression(s.Having, fn)
490+
}
491+
// Walk ORDER BY
492+
for _, o := range s.OrderBy {
493+
walkExpression(o.Expression, fn)
494+
}
495+
// Walk LIMIT
496+
if s.Limit != nil {
497+
walkExpression(s.Limit, fn)
498+
}
499+
// Walk OFFSET
500+
if s.Offset != nil {
501+
walkExpression(s.Offset, fn)
502+
}
503+
}
504+
505+
// walkExpression walks an expression and calls fn for each sub-expression.
506+
func walkExpression(expr dcast.Expression, fn func(dcast.Expression)) {
507+
if expr == nil {
508+
return
509+
}
510+
fn(expr)
511+
512+
switch e := expr.(type) {
513+
case *dcast.BinaryExpr:
514+
walkExpression(e.Left, fn)
515+
walkExpression(e.Right, fn)
516+
case *dcast.UnaryExpr:
517+
walkExpression(e.Operand, fn)
518+
case *dcast.FunctionCall:
519+
for _, arg := range e.Arguments {
520+
walkExpression(arg, fn)
521+
}
522+
case *dcast.Subquery:
523+
walkStatement(e.Query, fn)
524+
case *dcast.CaseExpr:
525+
if e.Operand != nil {
526+
walkExpression(e.Operand, fn)
527+
}
528+
for _, when := range e.Whens {
529+
walkExpression(when.Condition, fn)
530+
walkExpression(when.Result, fn)
531+
}
532+
if e.Else != nil {
533+
walkExpression(e.Else, fn)
534+
}
535+
case *dcast.InExpr:
536+
walkExpression(e.Expr, fn)
537+
for _, v := range e.List {
538+
walkExpression(v, fn)
539+
}
540+
if e.Query != nil {
541+
walkStatement(e.Query, fn)
542+
}
543+
case *dcast.BetweenExpr:
544+
walkExpression(e.Expr, fn)
545+
walkExpression(e.Low, fn)
546+
walkExpression(e.High, fn)
547+
}
548+
}
549+
365550
// namedParamRegex matches ClickHouse named parameters like {name:Type}
366551
var namedParamRegex = regexp.MustCompile(`\{(\w+):(\w+)\}`)
367552

0 commit comments

Comments
 (0)