diff --git a/crates/polyglot-sql/src/parser.rs b/crates/polyglot-sql/src/parser.rs index a6e699b..9ee5d4f 100644 --- a/crates/polyglot-sql/src/parser.rs +++ b/crates/polyglot-sql/src/parser.rs @@ -4882,8 +4882,9 @@ impl Parser { } else { return Err(self.parse_error("Expected identifier after ${")); } - } else if self.check(TokenType::String) { + } else if self.check(TokenType::String) || self.check(TokenType::DollarString) { // DuckDB allows string literals as table names: SELECT * FROM 'x.y' + // Snowflake JDBC uses dollar-quoted strings for stage paths: SELECT $1 FROM $$@%"table"/$$ // Convert to a quoted identifier let string_token = self.advance(); let table_name = Identifier { @@ -22741,9 +22742,14 @@ impl Parser { while self.check(TokenType::Slash) { self.skip(); // consume / stage_path.push('/'); - if (self.check(TokenType::Var) + // Use `while` (not `if`) because a single path segment between slashes + // may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413` + // tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413). + while (self.check(TokenType::Var) || self.check_keyword() - || self.is_identifier_token()) + || self.is_identifier_token() + || self.check(TokenType::Number) + || self.check(TokenType::Dash)) && !self.check_next(TokenType::Eq) { stage_path.push_str(&self.advance().text); @@ -22790,10 +22796,14 @@ impl Parser { while self.check(TokenType::Slash) { self.skip(); // consume / stage_path.push('/'); - // Get path segment but don't consume if followed by = (that's a parameter) - if (self.check(TokenType::Var) + // Use `while` (not `if`) because a single path segment between slashes + // may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413` + // tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413). + while (self.check(TokenType::Var) || self.check_keyword() - || self.is_identifier_token()) + || self.is_identifier_token() + || self.check(TokenType::Number) + || self.check(TokenType::Dash)) && !self.check_next(TokenType::Eq) { stage_path.push_str(&self.advance().text); @@ -22828,9 +22838,14 @@ impl Parser { while self.check(TokenType::Slash) { self.skip(); // consume / stage_path.push('/'); - if (self.check(TokenType::Var) + // Use `while` (not `if`) because a single path segment between slashes + // may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413` + // tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413). + while (self.check(TokenType::Var) || self.check_keyword() - || self.is_identifier_token()) + || self.is_identifier_token() + || self.check(TokenType::Number) + || self.check(TokenType::Dash)) && !self.check_next(TokenType::Eq) { stage_path.push_str(&self.advance().text); @@ -22863,9 +22878,11 @@ impl Parser { break; } // Stop at ? (placeholder for stage destination), quoted string - // (e.g., '@SYSTEM$BIND/...'), or semicolon + // (e.g., '@SYSTEM$BIND/...'), dollar-quoted string (e.g., $$@%"table"$$), + // or semicolon if self.check(TokenType::Parameter) || self.check(TokenType::String) + || self.check(TokenType::DollarString) || self.check(TokenType::Semicolon) { break; @@ -22876,11 +22893,11 @@ impl Parser { (source_parts.join(""), false) }; - // Parse target stage (@stage_name, ? placeholder, or quoted '@stage') + // Parse target stage (@stage_name, ? placeholder, quoted '@stage', or dollar-quoted $$@%"stage"$$) let target = if self.match_token(TokenType::Parameter) { Expression::Placeholder(Placeholder { index: None }) - } else if self.check(TokenType::String) { - // Quoted stage: '@SYSTEM$BIND/path' + } else if self.check(TokenType::String) || self.check(TokenType::DollarString) { + // Quoted stage: '@SYSTEM$BIND/path' or $$@%"table"$$ let tok = self.advance(); Expression::Literal(Box::new(Literal::String(tok.text.clone()))) } else { diff --git a/crates/polyglot-sql/tests/snowflake_regression_test.rs b/crates/polyglot-sql/tests/snowflake_regression_test.rs index 2472bac..50a0b1e 100644 --- a/crates/polyglot-sql/tests/snowflake_regression_test.rs +++ b/crates/polyglot-sql/tests/snowflake_regression_test.rs @@ -327,3 +327,105 @@ fn test_snowflake_create_table_unaffected() { let result = parse_one(&gen, DialectType::Snowflake); assert!(result.is_ok(), "CREATE TABLE broken: {:?}", result.err()); } + +// ===================================================================== +// Category A: DollarString in PUT (source/target) +// Related: https://github.com/tobilg/polyglot/issues/165 +// ===================================================================== + +#[test] +fn test_snowflake_put_dollar_quoted_stage() { + // JDBC driver wraps stage references in $$...$$ for special characters + let sql = r#"put file:///tmp/placeholder $$@%"ice cream (nice)"$$ overwrite=true"#; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with dollar-quoted stage target failed: {:?}", + result.err() + ); +} + +// ===================================================================== +// Category B: DollarString as table source in SELECT FROM +// Related: https://github.com/tobilg/polyglot/issues/165 +// ===================================================================== + +#[test] +fn test_snowflake_select_from_dollar_quoted_stage() { + // JDBC driver uses $$...$$ for stage paths in FROM clause + let sql = r#"SELECT $1 FROM $$@%"ice cream (nice)"/$$"#; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "SELECT FROM dollar-quoted stage path failed: {:?}", + result.err() + ); +} + +// ===================================================================== +// Category C: @~ user stage with UUID path segments +// Related: https://github.com/tobilg/polyglot/issues/165 +// ===================================================================== + +#[test] +fn test_snowflake_put_user_stage_uuid_path() { + let sql = + "put file:///tmp/placeholder @~/00626646-bb1e-4729-a1ab-d4b96aebbed5/testUploadStream overwrite=true"; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with @~/UUID/name path failed: {:?}", + result.err() + ); +} + +#[test] +fn test_snowflake_put_user_stage_uuid_path_compress() { + let sql = + "put file:///tmp/placeholder @~/00626646-bb1e-4729-a1ab-d4b96aebbed5/testCompressAndUploadStream overwrite=true"; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with @~/UUID/compressUpload path failed: {:?}", + result.err() + ); +} + +// ===================================================================== +// Category D: Named stage with UUID subpath + AUTO_COMPRESS +// Related: https://github.com/tobilg/polyglot/issues/165 +// ===================================================================== + +#[test] +fn test_snowflake_put_named_stage_uuid_subpath_auto_compress() { + let sql = "PUT file:///tmp/test_file.csv @teststage/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=FALSE"; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with @stage/UUID AUTO_COMPRESS=FALSE failed: {:?}", + result.err() + ); +} + +#[test] +fn test_snowflake_put_named_stage_uuid_subpath_auto_compress_true() { + let sql = "PUT file:///tmp/test_file.csv @teststage/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=TRUE"; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with @stage/UUID AUTO_COMPRESS=TRUE failed: {:?}", + result.err() + ); +} + +#[test] +fn test_snowflake_put_long_path_stage_uuid() { + // Real JDBC test path with long file URI and named stage + UUID + let sql = "PUT file:///Users/test/projects/test_file.csv @testeb07cda9279e4320a061356c5a1eef53/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=FALSE"; + let result = parse_one(sql, DialectType::Snowflake); + assert!( + result.is_ok(), + "PUT with long path + @stage/UUID failed: {:?}", + result.err() + ); +}