Skip to content

Commit 6502db4

Browse files
ajitpratap0Ajit Pratap Singhclaude
authored
feat: Parser Enhancements Batch 5 - ARRAY Constructor, WITHIN GROUP & JSONB Operators (#187)
Implements three new parser features: ## Issue #178: PostgreSQL JSONB Existence Operators - Fixed token converter to properly handle ?, ?|, ?& operators - These were incorrectly being converted to placeholder tokens - Added proper type mappings in buildTypeMapping() ## Issue #182: PostgreSQL ARRAY Constructor Syntax - Added TokenTypeArray (395) and TokenTypeWithin (396) token types - Added ARRAY and WITHIN keywords to keywords.go and tokenizer.go - Added ArrayConstructorExpression AST node with pooling - Supports both ARRAY[...] literal and ARRAY(SELECT...) subquery syntax - Added parseArrayConstructor() function in expressions.go ## Issue #183: SQL:2003 WITHIN GROUP Ordered-Set Aggregates - Added WithinGroup field to FunctionCall struct - Added parsing support in window.go parseFunctionCall() - Supports PERCENTILE_CONT, PERCENTILE_DISC, MODE, LISTAGG - Includes ASC/DESC and NULLS FIRST/LAST ordering Test Coverage: - Added array_constructor_test.go with 15+ test cases - Added within_group_test.go with 12+ test cases - Added json_operators_test.go parser tests - All 26 packages pass with race detection Closes #178, #182, #183 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Ajit Pratap Singh <ajitpratapsingh@Ajits-Mac-mini.local> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0ac1a90 commit 6502db4

25 files changed

Lines changed: 1440 additions & 55 deletions

cmd/gosqlx/cmd/sql_formatter.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,14 @@ func (f *SQLFormatter) formatInsert(stmt *ast.InsertStatement) error {
256256
if len(stmt.Values) > 0 {
257257
f.writeNewline()
258258
f.writeKeyword("VALUES")
259-
f.builder.WriteString(" (")
260-
f.formatExpressionList(stmt.Values, ", ")
261-
f.builder.WriteString(")")
259+
for i, row := range stmt.Values {
260+
if i > 0 {
261+
f.builder.WriteString(",")
262+
}
263+
f.builder.WriteString(" (")
264+
f.formatExpressionList(row, ", ")
265+
f.builder.WriteString(")")
266+
}
262267
}
263268

264269
if stmt.Query != nil {

pkg/gosqlx/extract.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -928,8 +928,10 @@ func (fc *functionCollector) collectFromNode(node ast.Node) {
928928
fc.collectFromNode(n.With)
929929
}
930930
case *ast.InsertStatement:
931-
for _, val := range n.Values {
932-
fc.collectFromExpression(val)
931+
for _, row := range n.Values {
932+
for _, val := range row {
933+
fc.collectFromExpression(val)
934+
}
933935
}
934936
if n.Query != nil {
935937
fc.collectFromNode(n.Query)

pkg/models/token_type.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ const (
337337
TokenTypeCube TokenType = 392
338338
TokenTypeGrouping TokenType = 393
339339
TokenTypeSets TokenType = 394 // SETS keyword for GROUPING SETS
340+
TokenTypeArray TokenType = 395 // ARRAY keyword for PostgreSQL array constructor
341+
TokenTypeWithin TokenType = 396 // WITHIN keyword for WITHIN GROUP clause
340342

341343
// Role/Permission Keywords (400-419)
342344
TokenTypeRole TokenType = 400
@@ -620,6 +622,8 @@ var tokenStringMap = map[TokenType]string{
620622
TokenTypeCube: "CUBE",
621623
TokenTypeGrouping: "GROUPING",
622624
TokenTypeSets: "SETS",
625+
TokenTypeArray: "ARRAY",
626+
TokenTypeWithin: "WITHIN",
623627

624628
// Role/Permission Keywords
625629
TokenTypeRole: "ROLE",

pkg/sql/ast/ast.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -602,13 +602,15 @@ func (i Identifier) Children() []Node { return nil }
602602
// New in v1.6.0:
603603
// - Filter: FILTER clause for conditional aggregation
604604
// - OrderBy: ORDER BY clause for order-sensitive aggregates (STRING_AGG, ARRAY_AGG, etc.)
605+
// - WithinGroup: ORDER BY clause for ordered-set aggregates (PERCENTILE_CONT, PERCENTILE_DISC, MODE, etc.)
605606
type FunctionCall struct {
606-
Name string
607-
Arguments []Expression // Renamed from Args for consistency
608-
Over *WindowSpec // For window functions
609-
Distinct bool
610-
Filter Expression // WHERE clause for aggregate functions
611-
OrderBy []OrderByExpression // ORDER BY clause for aggregate functions (STRING_AGG, ARRAY_AGG, etc.)
607+
Name string
608+
Arguments []Expression // Renamed from Args for consistency
609+
Over *WindowSpec // For window functions
610+
Distinct bool
611+
Filter Expression // WHERE clause for aggregate functions
612+
OrderBy []OrderByExpression // ORDER BY clause for aggregate functions (STRING_AGG, ARRAY_AGG, etc.)
613+
WithinGroup []OrderByExpression // ORDER BY clause for ordered-set aggregates (PERCENTILE_CONT, etc.)
612614
}
613615

614616
func (f *FunctionCall) expressionNode() {}
@@ -625,6 +627,10 @@ func (f FunctionCall) Children() []Node {
625627
orderBy := orderBy // G601: Create local copy to avoid memory aliasing
626628
children = append(children, &orderBy)
627629
}
630+
for _, orderBy := range f.WithinGroup {
631+
orderBy := orderBy // G601: Create local copy to avoid memory aliasing
632+
children = append(children, &orderBy)
633+
}
628634
return children
629635
}
630636

@@ -887,6 +893,38 @@ func (l *ListExpression) expressionNode() {}
887893
func (l ListExpression) TokenLiteral() string { return "LIST" }
888894
func (l ListExpression) Children() []Node { return nodifyExpressions(l.Values) }
889895

896+
// TupleExpression represents a row constructor / tuple (col1, col2) for multi-column comparisons
897+
// Used in: WHERE (user_id, status) IN ((1, 'active'), (2, 'pending'))
898+
type TupleExpression struct {
899+
Expressions []Expression
900+
}
901+
902+
func (t *TupleExpression) expressionNode() {}
903+
func (t TupleExpression) TokenLiteral() string { return "TUPLE" }
904+
func (t TupleExpression) Children() []Node { return nodifyExpressions(t.Expressions) }
905+
906+
// ArrayConstructorExpression represents PostgreSQL ARRAY constructor syntax.
907+
// Creates an array value from a list of expressions or a subquery.
908+
//
909+
// Examples:
910+
//
911+
// ARRAY[1, 2, 3] - Integer array literal
912+
// ARRAY['admin', 'moderator'] - Text array literal
913+
// ARRAY(SELECT id FROM users) - Array from subquery
914+
type ArrayConstructorExpression struct {
915+
Elements []Expression // Elements inside ARRAY[...]
916+
Subquery *SelectStatement // For ARRAY(SELECT ...) syntax (optional)
917+
}
918+
919+
func (a *ArrayConstructorExpression) expressionNode() {}
920+
func (a ArrayConstructorExpression) TokenLiteral() string { return "ARRAY" }
921+
func (a ArrayConstructorExpression) Children() []Node {
922+
if a.Subquery != nil {
923+
return []Node{a.Subquery}
924+
}
925+
return nodifyExpressions(a.Elements)
926+
}
927+
890928
// UnaryExpression represents operations like NOT expr
891929
type UnaryExpression struct {
892930
Operator UnaryOperator
@@ -963,7 +1001,7 @@ type InsertStatement struct {
9631001
With *WithClause
9641002
TableName string
9651003
Columns []Expression
966-
Values []Expression
1004+
Values [][]Expression // Multi-row support: each inner slice is one row of values
9671005
Query *SelectStatement // For INSERT ... SELECT
9681006
Returning []Expression
9691007
OnConflict *OnConflict
@@ -978,7 +1016,10 @@ func (i InsertStatement) Children() []Node {
9781016
children = append(children, i.With)
9791017
}
9801018
children = append(children, nodifyExpressions(i.Columns)...)
981-
children = append(children, nodifyExpressions(i.Values)...)
1019+
// Flatten multi-row values for Children()
1020+
for _, row := range i.Values {
1021+
children = append(children, nodifyExpressions(row)...)
1022+
}
9821023
if i.Query != nil {
9831024
children = append(children, i.Query)
9841025
}

pkg/sql/ast/coverage_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,10 @@ func TestSpanMethods(t *testing.T) {
298298
&Identifier{Name: "id"},
299299
&Identifier{Name: "name"},
300300
},
301-
Values: []Expression{
301+
Values: [][]Expression{{
302302
&LiteralValue{Value: 1},
303303
&LiteralValue{Value: "test"},
304-
},
304+
}},
305305
}
306306
span := insert.Span()
307307
// Should return combined span of components
@@ -830,7 +830,7 @@ func TestInsertStatementChildrenCoverage(t *testing.T) {
830830
},
831831
TableName: "users",
832832
Columns: []Expression{&Identifier{Name: "id"}},
833-
Values: []Expression{&LiteralValue{Value: 1}},
833+
Values: [][]Expression{{&LiteralValue{Value: 1}}},
834834
Query: &SelectStatement{},
835835
Returning: []Expression{&Identifier{Name: "id"}},
836836
OnConflict: &OnConflict{

pkg/sql/ast/interface_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ func TestInsertStatementChildren(t *testing.T) {
799799
stmt := &InsertStatement{
800800
With: &WithClause{},
801801
Columns: []Expression{testIdent},
802-
Values: []Expression{testExpr},
802+
Values: [][]Expression{{testExpr}},
803803
Query: &SelectStatement{},
804804
Returning: []Expression{testIdent},
805805
OnConflict: &OnConflict{},

pkg/sql/ast/nodes_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ func TestInsertStatement(t *testing.T) {
539539
stmt: &InsertStatement{
540540
TableName: "users",
541541
Columns: []Expression{&Identifier{Name: "name"}, &Identifier{Name: "email"}},
542-
Values: []Expression{&LiteralValue{Value: "John"}, &LiteralValue{Value: "john@example.com"}},
542+
Values: [][]Expression{{&LiteralValue{Value: "John"}, &LiteralValue{Value: "john@example.com"}}},
543543
},
544544
wantLiteral: "INSERT",
545545
minChildren: 2,

pkg/sql/ast/pool.go

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ var (
5353
New: func() interface{} {
5454
return &InsertStatement{
5555
Columns: make([]Expression, 0, 4),
56-
Values: make([]Expression, 0, 4),
56+
Values: make([][]Expression, 0, 4),
5757
}
5858
},
5959
}
@@ -129,6 +129,22 @@ var (
129129
},
130130
}
131131

132+
tupleExprPool = sync.Pool{
133+
New: func() interface{} {
134+
return &TupleExpression{
135+
Expressions: make([]Expression, 0, 4),
136+
}
137+
},
138+
}
139+
140+
arrayConstructorPool = sync.Pool{
141+
New: func() interface{} {
142+
return &ArrayConstructorExpression{
143+
Elements: make([]Expression, 0, 4),
144+
}
145+
},
146+
}
147+
132148
subqueryExprPool = sync.Pool{
133149
New: func() interface{} {
134150
return &SubqueryExpression{}
@@ -337,9 +353,13 @@ func PutInsertStatement(stmt *InsertStatement) {
337353
PutExpression(stmt.Columns[i])
338354
stmt.Columns[i] = nil
339355
}
356+
// Clean up multi-row values
340357
for i := range stmt.Values {
341-
PutExpression(stmt.Values[i])
342-
stmt.Values[i] = nil
358+
for j := range stmt.Values[i] {
359+
PutExpression(stmt.Values[i][j])
360+
stmt.Values[i][j] = nil
361+
}
362+
stmt.Values[i] = stmt.Values[i][:0]
343363
}
344364

345365
// Reset slices but keep capacity
@@ -784,6 +804,27 @@ func PutExpression(expr Expression) {
784804
e.Values = e.Values[:0]
785805
listExprPool.Put(e)
786806

807+
case *TupleExpression:
808+
for i := range e.Expressions {
809+
if e.Expressions[i] != nil {
810+
workQueue = append(workQueue, e.Expressions[i])
811+
}
812+
e.Expressions[i] = nil
813+
}
814+
e.Expressions = e.Expressions[:0]
815+
tupleExprPool.Put(e)
816+
817+
case *ArrayConstructorExpression:
818+
for i := range e.Elements {
819+
if e.Elements[i] != nil {
820+
workQueue = append(workQueue, e.Elements[i])
821+
}
822+
e.Elements[i] = nil
823+
}
824+
e.Elements = e.Elements[:0]
825+
e.Subquery = nil
826+
arrayConstructorPool.Put(e)
827+
787828
case *UnaryExpression:
788829
if e.Expr != nil {
789830
workQueue = append(workQueue, e.Expr)
@@ -933,6 +974,48 @@ func PutInExpression(ie *InExpression) {
933974
inExprPool.Put(ie)
934975
}
935976

977+
// GetTupleExpression gets a TupleExpression from the pool
978+
func GetTupleExpression() *TupleExpression {
979+
te := tupleExprPool.Get().(*TupleExpression)
980+
te.Expressions = te.Expressions[:0]
981+
return te
982+
}
983+
984+
// PutTupleExpression returns a TupleExpression to the pool
985+
func PutTupleExpression(te *TupleExpression) {
986+
if te == nil {
987+
return
988+
}
989+
for i := range te.Expressions {
990+
PutExpression(te.Expressions[i])
991+
te.Expressions[i] = nil
992+
}
993+
te.Expressions = te.Expressions[:0]
994+
tupleExprPool.Put(te)
995+
}
996+
997+
// GetArrayConstructor gets an ArrayConstructorExpression from the pool
998+
func GetArrayConstructor() *ArrayConstructorExpression {
999+
ac := arrayConstructorPool.Get().(*ArrayConstructorExpression)
1000+
ac.Elements = ac.Elements[:0]
1001+
ac.Subquery = nil
1002+
return ac
1003+
}
1004+
1005+
// PutArrayConstructor returns an ArrayConstructorExpression to the pool
1006+
func PutArrayConstructor(ac *ArrayConstructorExpression) {
1007+
if ac == nil {
1008+
return
1009+
}
1010+
for i := range ac.Elements {
1011+
PutExpression(ac.Elements[i])
1012+
ac.Elements[i] = nil
1013+
}
1014+
ac.Elements = ac.Elements[:0]
1015+
ac.Subquery = nil
1016+
arrayConstructorPool.Put(ac)
1017+
}
1018+
9361019
// GetSubqueryExpression gets a SubqueryExpression from the pool
9371020
func GetSubqueryExpression() *SubqueryExpression {
9381021
return subqueryExprPool.Get().(*SubqueryExpression)

pkg/sql/ast/pool_test.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,12 @@ func TestInsertStatementPool(t *testing.T) {
2121
&Identifier{Name: "name"},
2222
&Identifier{Name: "email"},
2323
}
24-
stmt.Values = []Expression{
25-
&LiteralValue{Value: "John"},
26-
&LiteralValue{Value: "john@example.com"},
24+
// Values is now [][]Expression for multi-row support
25+
stmt.Values = [][]Expression{
26+
{
27+
&LiteralValue{Value: "John"},
28+
&LiteralValue{Value: "john@example.com"},
29+
},
2730
}
2831

2932
// Return to pool
@@ -371,7 +374,8 @@ func TestMemoryLeaks_InsertStatementPool(t *testing.T) {
371374

372375
stmt.TableName = "users"
373376
stmt.Columns = append(stmt.Columns, &Identifier{Name: "name"}, &Identifier{Name: "email"})
374-
stmt.Values = append(stmt.Values, &LiteralValue{Value: "John"}, &LiteralValue{Value: "john@test.com"})
377+
// Values is now [][]Expression for multi-row support
378+
stmt.Values = append(stmt.Values, []Expression{&LiteralValue{Value: "John"}, &LiteralValue{Value: "john@test.com"}})
375379

376380
PutInsertStatement(stmt)
377381

pkg/sql/ast/span.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,11 @@ func (i *InsertStatement) Span() models.Span {
9696
}
9797
}
9898

99-
for _, val := range i.Values {
100-
if spanned, ok := val.(Spanned); ok {
101-
spans = append(spans, spanned.Span())
99+
for _, row := range i.Values {
100+
for _, val := range row {
101+
if spanned, ok := val.(Spanned); ok {
102+
spans = append(spans, spanned.Span())
103+
}
102104
}
103105
}
104106

0 commit comments

Comments
 (0)