Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions crates/polyglot-sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4882,8 +4882,9 @@ impl Parser {
} else {
return Err(self.parse_error("Expected identifier after ${"));
}
} else if self.check(TokenType::String) {
} else if self.check(TokenType::String) || self.check(TokenType::DollarString) {
// DuckDB allows string literals as table names: SELECT * FROM 'x.y'
// Snowflake JDBC uses dollar-quoted strings for stage paths: SELECT $1 FROM $$@%"table"/$$
// Convert to a quoted identifier
let string_token = self.advance();
let table_name = Identifier {
Expand Down Expand Up @@ -22741,9 +22742,14 @@ impl Parser {
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
if (self.check(TokenType::Var)
// Use `while` (not `if`) because a single path segment between slashes
// may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413`
// tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413).
while (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
|| self.is_identifier_token()
|| self.check(TokenType::Number)
|| self.check(TokenType::Dash))
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
Expand Down Expand Up @@ -22790,10 +22796,14 @@ impl Parser {
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
// Get path segment but don't consume if followed by = (that's a parameter)
if (self.check(TokenType::Var)
// Use `while` (not `if`) because a single path segment between slashes
// may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413`
// tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413).
while (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
|| self.is_identifier_token()
|| self.check(TokenType::Number)
|| self.check(TokenType::Dash))
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
Expand Down Expand Up @@ -22828,9 +22838,14 @@ impl Parser {
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
if (self.check(TokenType::Var)
// Use `while` (not `if`) because a single path segment between slashes
// may consist of multiple tokens — e.g., a UUID like `c8b31cea-a6d1-4413`
// tokenizes as Identifier("c8b31cea") + Dash + Identifier("a6d1") + Dash + Number(4413).
while (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
|| self.is_identifier_token()
|| self.check(TokenType::Number)
|| self.check(TokenType::Dash))
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
Expand Down Expand Up @@ -22863,9 +22878,11 @@ impl Parser {
break;
}
// Stop at ? (placeholder for stage destination), quoted string
// (e.g., '@SYSTEM$BIND/...'), or semicolon
// (e.g., '@SYSTEM$BIND/...'), dollar-quoted string (e.g., $$@%"table"$$),
// or semicolon
if self.check(TokenType::Parameter)
|| self.check(TokenType::String)
|| self.check(TokenType::DollarString)
|| self.check(TokenType::Semicolon)
{
break;
Expand All @@ -22876,11 +22893,11 @@ impl Parser {
(source_parts.join(""), false)
};

// Parse target stage (@stage_name, ? placeholder, or quoted '@stage')
// Parse target stage (@stage_name, ? placeholder, quoted '@stage', or dollar-quoted $$@%"stage"$$)
let target = if self.match_token(TokenType::Parameter) {
Expression::Placeholder(Placeholder { index: None })
} else if self.check(TokenType::String) {
// Quoted stage: '@SYSTEM$BIND/path'
} else if self.check(TokenType::String) || self.check(TokenType::DollarString) {
// Quoted stage: '@SYSTEM$BIND/path' or $$@%"table"$$
let tok = self.advance();
Expression::Literal(Box::new(Literal::String(tok.text.clone())))
} else {
Expand Down
102 changes: 102 additions & 0 deletions crates/polyglot-sql/tests/snowflake_regression_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,105 @@ fn test_snowflake_create_table_unaffected() {
let result = parse_one(&gen, DialectType::Snowflake);
assert!(result.is_ok(), "CREATE TABLE broken: {:?}", result.err());
}

// =====================================================================
// Category A: DollarString in PUT (source/target)
// Related: https://github.com/tobilg/polyglot/issues/165
// =====================================================================

#[test]
fn test_snowflake_put_dollar_quoted_stage() {
// JDBC driver wraps stage references in $$...$$ for special characters
let sql = r#"put file:///tmp/placeholder $$@%"ice cream (nice)"$$ overwrite=true"#;
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with dollar-quoted stage target failed: {:?}",
result.err()
);
}

// =====================================================================
// Category B: DollarString as table source in SELECT FROM
// Related: https://github.com/tobilg/polyglot/issues/165
// =====================================================================

#[test]
fn test_snowflake_select_from_dollar_quoted_stage() {
// JDBC driver uses $$...$$ for stage paths in FROM clause
let sql = r#"SELECT $1 FROM $$@%"ice cream (nice)"/$$"#;
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"SELECT FROM dollar-quoted stage path failed: {:?}",
result.err()
);
}

// =====================================================================
// Category C: @~ user stage with UUID path segments
// Related: https://github.com/tobilg/polyglot/issues/165
// =====================================================================

#[test]
fn test_snowflake_put_user_stage_uuid_path() {
let sql =
"put file:///tmp/placeholder @~/00626646-bb1e-4729-a1ab-d4b96aebbed5/testUploadStream overwrite=true";
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with @~/UUID/name path failed: {:?}",
result.err()
);
}

#[test]
fn test_snowflake_put_user_stage_uuid_path_compress() {
let sql =
"put file:///tmp/placeholder @~/00626646-bb1e-4729-a1ab-d4b96aebbed5/testCompressAndUploadStream overwrite=true";
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with @~/UUID/compressUpload path failed: {:?}",
result.err()
);
}

// =====================================================================
// Category D: Named stage with UUID subpath + AUTO_COMPRESS
// Related: https://github.com/tobilg/polyglot/issues/165
// =====================================================================

#[test]
fn test_snowflake_put_named_stage_uuid_subpath_auto_compress() {
let sql = "PUT file:///tmp/test_file.csv @teststage/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=FALSE";
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with @stage/UUID AUTO_COMPRESS=FALSE failed: {:?}",
result.err()
);
}

#[test]
fn test_snowflake_put_named_stage_uuid_subpath_auto_compress_true() {
let sql = "PUT file:///tmp/test_file.csv @teststage/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=TRUE";
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with @stage/UUID AUTO_COMPRESS=TRUE failed: {:?}",
result.err()
);
}

#[test]
fn test_snowflake_put_long_path_stage_uuid() {
// Real JDBC test path with long file URI and named stage + UUID
let sql = "PUT file:///Users/test/projects/test_file.csv @testeb07cda9279e4320a061356c5a1eef53/c8b31cea-a6d1-4413-936d-bf8c9d63ab9f AUTO_COMPRESS=FALSE";
let result = parse_one(sql, DialectType::Snowflake);
assert!(
result.is_ok(),
"PUT with long path + @stage/UUID failed: {:?}",
result.err()
);
}
Loading