Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SQL parser prototype #338

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ comfy-table = "7.1"
html-escape = "0.2"
syn = "2.0.91"
quote = "1.0.37"
paste = "1.0.15"
proc-macro2 = "1.0.92"
prettyplease = "0.2.24"
phf = { version = "0.11.2", features = ["macros"] }
ryu = "1.0.18"
Expand Down Expand Up @@ -77,6 +79,7 @@ aes-gcm = "0.10.3"
cbc = { version = "0.1.2", features = ["std"] }
base64 = "0.22.1"
md-5 = "0.10.6"
chumsky = { version = "1.0.0-alpha.7", default-features = false }

######
# The versions of the following dependencies are managed manually.
Expand Down
11 changes: 5 additions & 6 deletions crates/sail-spark-connect/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ fn build_spark_config() -> Result<(), Box<dyn std::error::Error>> {
.collect::<Vec<_>>();

let tokens = quote! {
use phf::phf_map;

#[derive(Debug, Clone, PartialEq)]
pub struct SparkConfigEntry<'a> {
pub key: &'a str,
Expand All @@ -176,16 +174,17 @@ fn build_spark_config() -> Result<(), Box<dyn std::error::Error>> {
// We define the map in a separate macro to avoid slowing down the IDE
// when previewing the definition of `SPARK_CONFIG`.
macro_rules! spark_config_map {
() => { phf_map! { #(#entries)* } }
() => { phf::phf_map! { #(#entries)* } }
}

pub static SPARK_CONFIG: phf::Map<&'static str, SparkConfigEntry<'static>> = spark_config_map!();
};

let tree = syn::parse2(tokens)?;
let formatted = prettyplease::unparse(&tree);
let out_dir = PathBuf::from(std::env::var("OUT_DIR")?);
std::fs::write(out_dir.join("spark_config.rs"), formatted)?;
std::fs::write(
out_dir.join("spark_config.rs"),
prettyplease::unparse(&syn::parse2(tokens)?),
)?;
Ok(())
}

Expand Down
12 changes: 12 additions & 0 deletions crates/sail-sql-macro/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "sail-sql-macro"
version.workspace = true
edition.workspace = true

[dependencies]
syn = { workspace = true }
quote = { workspace = true }
proc-macro2 = { workspace = true }

[lib]
proc-macro = true
113 changes: 113 additions & 0 deletions crates/sail-sql-macro/src/attribute.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::mem;

use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::{Attribute, Expr, Meta, MetaNameValue, Path, Token};

/// An extractor for a specific attribute name.
/// The attribute can have named arguments such as `#[attribute(argument = value)]`,
/// or paths such as `#[attribute(path)]`.
pub struct AttributeExtractor<'a> {
name: &'a str,
arguments: Vec<MetaNameValue>,
paths: Vec<Path>,
}

impl<'a> AttributeExtractor<'a> {
/// Creates an extractor for the given attribute name.
/// The arguments and paths are collected from the attribute list and
/// stored in the extractor for further extraction.
pub fn try_new(name: &'a str, attributes: &[Attribute]) -> syn::Result<Self> {
let mut arguments = Vec::new();
let mut paths = Vec::new();
for attr in attributes {
if !attr.path().is_ident(name) {
continue;
}
let nested = attr.parse_args_with(Punctuated::<Meta, Token![,]>::parse_terminated)?;
for meta in nested {
match meta {
Meta::Path(x) => {
paths.push(x);
}
Meta::NameValue(x) => {
arguments.push(x);
}
_ => return Err(syn::Error::new(meta.span(), "invalid attribute value")),
}
}
}
Ok(Self {
name,
arguments,
paths,
})
}

/// Returns an error if there are any remaining arguments or paths for the attribute.
pub fn expect_empty(&self) -> syn::Result<()> {
if let Some(x) = self.arguments.first() {
Err(syn::Error::new(
x.span(),
format!("unexpected `{}` attribute argument", self.name),
))
} else if let Some(x) = self.paths.first() {
Err(syn::Error::new(
x.span(),
format!("unexpected `{}` attribute path", self.name),
))
} else {
Ok(())
}
}

/// Extracts a single argument value from the attribute.
/// The argument is removed from the extractor.
/// Returns an error if there are multiple arguments with the same name.
pub fn extract_argument_value<T, F>(&mut self, argument: &str, transform: F) -> syn::Result<T>
where
F: FnOnce(Option<Expr>) -> syn::Result<T>,
{
let arguments = mem::take(&mut self.arguments);
let (mut extracted, remaining) = arguments
.into_iter()
.partition::<Vec<_>, _>(|x| x.path.is_ident(argument));
self.arguments = remaining;
let one = extracted.pop();
if let Some(other) = extracted.last() {
Err(syn::Error::new(
other.span(),
format!(
"duplicated `{}` argument for the `{}` attribute",
argument, self.name
),
))
} else {
transform(one.map(|x| x.value))
}
}

/// Extracts a single path from the attribute.
/// The path is removed from the extractor.
/// Returns an error if there are multiple paths with the same name.
#[allow(unused)]
pub fn extract_path(&mut self, path: &str) -> syn::Result<Option<()>> {
let paths = mem::take(&mut self.paths);
let (mut extracted, remaining) = paths
.into_iter()
.partition::<Vec<_>, _>(|x| x.is_ident(path));
self.paths = remaining;
let one = extracted.pop();
if let Some(other) = extracted.last() {
Err(syn::Error::new(
other.span(),
format!(
"duplicated `{}` path for the `{}` attribute",
path, self.name
),
))
} else {
Ok(one.map(|_| ()))
}
}
}
55 changes: 55 additions & 0 deletions crates/sail-sql-macro/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
extern crate proc_macro;
extern crate proc_macro2;

use proc_macro::TokenStream;
use syn::{parse_macro_input, DeriveInput};

mod attribute;
mod tree;
pub(crate) mod utils;

/// Derives the `TreeParser` trait by generating a recursive descent parser for the type.
///
/// The type can be an enum with struct or tuple variants, or a struct with named or unnamed fields.
/// For enums, the variants are parsed as choices (or nested choices for enums with many variants).
/// For structs, the fields are parsed sequentially.
///
/// The parser cannot be derived for enums with unit variants, or structs with no fields.
/// The parser cannot be derived for types corresponding to a grammar with left recursion,
/// or a grammar requiring operator precedence handling.
/// In such cases, the `TreeParser` trait should be implemented manually.
/// `TreeParser` should also be implemented manually for terminals such as keywords, literals, and
/// operators.
///
/// The attribute `parser` can be used to control how the parsers are derived.
/// There are a few supported arguments for the attribute.
///
/// `parser(dependency = "type")` can be specified at the top level for the enum or the struct,
/// where `type` is a single type `T` or a tuple type `(T1, T2, ...)`. Note that the dependency
/// needs to be specified as a string literal.
/// For a single type `T`, the derived `parser()` method will expect a parser for `T` as the data.
/// For a tuple type `(T1, T2, ...)`, the derived `parser()` method will expect a tuple of parsers
/// for each type as the data.
///
/// This argument is used to support recursive types, where the parser needs to first be declared
/// via `chumsky::recursive::Recursive::declare()`. `parser()` receives the declared parser(s)
/// and the returned parser can be then used for `chumsky::recursive::Recursive::define()`.
///
/// If this argument is not specified, the `parser()` method will expect unit data (`()`).
///
/// `parser(function = expr)` can be specified for individual fields (named or unnamed fields in
/// enum variants or structs), where `expr` is a function that takes the data (one or a tuple of
/// declared parsers) and returns the parser for the field.
///
/// If this argument is not specified, the parser for the field is derived by calling the `parser()`
/// method of the field type with unit data (`()`). Such unit data is accepted for terminal parsers
/// or derived parsers without the `parser(dependency = "...")` attribute.
///
/// The `parser` attribute is not allowed for at the enum variant level.
#[proc_macro_derive(TreeParser, attributes(parser))]
pub fn derive_tree_parser(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
tree::parser::derive_tree_parser(input)
.unwrap_or_else(syn::Error::into_compile_error)
.into()
}
1 change: 1 addition & 0 deletions crates/sail-sql-macro/src/tree/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod parser;
Loading