Skip to content

Commit e226bda

Browse files
committed
Add experimental draft support for GPML-style graph query
1 parent f57ee41 commit e226bda

File tree

6 files changed

+538
-5
lines changed

6 files changed

+538
-5
lines changed

CHANGELOG.md

+13
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- AST for the currently parsed subset of PartiQL
1414
- Tracking of locations in source text for ASTs and Errors
1515
- Conformance tests via test generation from [partiql-tests](https://github.com/partiql/partiql-tests/)
16+
- An experimental (pending [#15](https://github.com/partiql/partiql-docs/issues/15)) embedding of a subset of
17+
the [GPML (Graph Pattern Matching Language)](https://arxiv.org/abs/2112.06217) graph query into the `FROM` clause,
18+
supporting. The use within the grammar is based on the assumption of a new graph data type being added to the
19+
specification of data types within PartiQL, and should be considered experimental until the semantics of the graph
20+
data type are specified.
21+
- basic and abbreviated node and edge patterns (section 4.1 of the GPML paper)
22+
- concatenated path patterns (section 4.2 of the GPML paper)
23+
- path variables (section 4.2 of the GPML paper)
24+
- graph patterns (i.e., comma separated path patterns) (section 4.3 of the GPML paper)
25+
- parenthesized patterns (section 4.4 of the GPML paper)
26+
- path quantifiers (section 4.4 of the GPML paper)
27+
- restrictors and selector (section 5.1 of the GPML paper)
28+
- pre-filters and post-filters (section 5.2 of the GPML paper)
1629

1730

partiql-ast/src/ast.rs

+147
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use rust_decimal::Decimal as RustDecimal;
1212
use serde::{Deserialize, Serialize};
1313
use std::fmt;
1414
use std::fmt::Display;
15+
use std::num::NonZeroU32;
1516
use std::ops::Range;
1617

1718
/// Provides the required methods for AstNode conversations.
@@ -235,6 +236,13 @@ pub type CallAst = AstBytePos<Call>;
235236
pub type CaseAst = AstBytePos<Case>;
236237
pub type FromClauseAst = AstBytePos<FromClause>;
237238
pub type FromLetAst = AstBytePos<FromLet>;
239+
pub type GraphMatchAst = AstBytePos<GraphMatch>;
240+
pub type GraphMatchExprAst = AstBytePos<GraphMatchExpr>;
241+
pub type GraphMatchEdgeAst = AstBytePos<GraphMatchEdge>;
242+
pub type GraphMatchNodeAst = AstBytePos<GraphMatchNode>;
243+
pub type GraphMatchPatternAst = AstBytePos<GraphMatchPattern>;
244+
pub type GraphMatchPatternPartAst = AstBytePos<GraphMatchPatternPart>;
245+
pub type GraphMatchQuantifierAst = AstBytePos<GraphMatchQuantifier>;
238246
pub type GroupByExprAst = AstBytePos<GroupByExpr>;
239247
pub type GroupKeyAst = AstBytePos<GroupKey>;
240248
pub type InAst = AstBytePos<In>;
@@ -655,6 +663,9 @@ pub enum FromClause {
655663
FromLet(FromLetAst),
656664
/// <from_source> JOIN \[INNER | LEFT | RIGHT | FULL\] <from_source> ON <expr>
657665
Join(JoinAst),
666+
667+
/// <expr> MATCH <graph_pattern>
668+
GraphMatch(GraphMatchAst),
658669
}
659670

660671
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
@@ -699,6 +710,142 @@ pub enum JoinKind {
699710
Cross,
700711
}
701712

713+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
714+
pub struct GraphMatch {
715+
pub expr: Box<Expr>,
716+
pub graph_expr: Box<GraphMatchExprAst>,
717+
}
718+
719+
/// The direction of an edge
720+
/// | Orientation | Edge pattern | Abbreviation |
721+
/// |---------------------------+--------------+--------------|
722+
/// | Pointing left | <−[ spec ]− | <− |
723+
/// | Undirected | ~[ spec ]~ | ~ |
724+
/// | Pointing right | −[ spec ]−> | −> |
725+
/// | Left or undirected | <~[ spec ]~ | <~ |
726+
/// | Undirected or right | ~[ spec ]~> | ~> |
727+
/// | Left or right | <−[ spec ]−> | <−> |
728+
/// | Left, undirected or right | −[ spec ]− | − |
729+
///
730+
/// Fig. 5. Table of edge patterns:
731+
/// https://arxiv.org/abs/2112.06217
732+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
733+
pub enum GraphMatchDirection {
734+
Left,
735+
Undirected,
736+
Right,
737+
LeftOrUndirected,
738+
UndirectedOrRight,
739+
LeftOrRight,
740+
LeftOrUndirectedOrRight,
741+
}
742+
743+
/// A part of a graph pattern
744+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
745+
pub enum GraphMatchPatternPart {
746+
/// A single node in a graph pattern.
747+
Node(GraphMatchNodeAst),
748+
749+
/// A single edge in a graph pattern.
750+
Edge(GraphMatchEdgeAst),
751+
752+
/// A sub-pattern.
753+
Pattern(GraphMatchPatternAst),
754+
}
755+
756+
/// A quantifier for graph edges or patterns. (e.g., the `{2,5}` in `MATCH (x)->{2,5}(y)`)
757+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
758+
pub struct GraphMatchQuantifier {
759+
pub lower: u32,
760+
pub upper: Option<NonZeroU32>,
761+
}
762+
763+
/// A path restrictor
764+
/// | Keyword | Description
765+
/// |----------------+--------------
766+
/// | TRAIL | No repeated edges.
767+
/// | ACYCLIC | No repeated nodes.
768+
/// | SIMPLE | No repeated nodes, except that the first and last nodes may be the same.
769+
///
770+
/// Fig. 7. Table of restrictors:
771+
/// https://arxiv.org/abs/2112.06217
772+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
773+
pub enum GraphMatchRestrictor {
774+
Trail,
775+
Acyclic,
776+
Simple,
777+
}
778+
779+
/// A single node in a graph pattern.
780+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
781+
pub struct GraphMatchNode {
782+
/// an optional node pre-filter, e.g.: `WHERE c.name='Alarm'` in `MATCH (c WHERE c.name='Alarm')`
783+
pub prefilter: Option<Box<Expr>>,
784+
/// the optional element variable of the node match, e.g.: `x` in `MATCH (x)`
785+
pub variable: Option<SymbolPrimitive>,
786+
/// the optional label(s) to match for the node, e.g.: `Entity` in `MATCH (x:Entity)`
787+
pub label: Option<Vec<SymbolPrimitive>>,
788+
}
789+
790+
/// A single edge in a graph pattern.
791+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
792+
pub struct GraphMatchEdge {
793+
/// edge direction
794+
pub direction: GraphMatchDirection,
795+
/// an optional quantifier for the edge match
796+
pub quantifier: Option<GraphMatchQuantifierAst>,
797+
/// an optional edge pre-filter, e.g.: `WHERE t.capacity>100` in `MATCH −[t:hasSupply WHERE t.capacity>100]−>`
798+
pub prefilter: Option<Box<Expr>>,
799+
/// the optional element variable of the edge match, e.g.: `t` in `MATCH −[t]−>`
800+
pub variable: Option<SymbolPrimitive>,
801+
/// the optional label(s) to match for the edge. e.g.: `Target` in `MATCH −[t:Target]−>`
802+
pub label: Option<Vec<SymbolPrimitive>>,
803+
}
804+
805+
/// A single graph match pattern.
806+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
807+
pub struct GraphMatchPattern {
808+
pub restrictor: Option<GraphMatchRestrictor>,
809+
/// an optional quantifier for the entire pattern match
810+
pub quantifier: Option<GraphMatchQuantifierAst>,
811+
/// an optional pattern pre-filter, e.g.: `WHERE a.name=b.name` in `MATCH [(a)->(b) WHERE a.name=b.name]`
812+
pub prefilter: Option<Box<Expr>>,
813+
/// the optional element variable of the pattern, e.g.: `p` in `MATCH p = (a) −[t]−> (b)`
814+
pub variable: Option<SymbolPrimitive>,
815+
/// the ordered pattern parts
816+
pub parts: Vec<GraphMatchPatternPart>,
817+
}
818+
819+
/// A path selector
820+
/// | Keyword
821+
/// |------------------
822+
/// | ANY SHORTEST
823+
/// | ALL SHORTEST
824+
/// | ANY
825+
/// | ANY k
826+
/// | SHORTEST k
827+
/// | SHORTEST k GROUP
828+
///
829+
/// Fig. 8. Table of restrictors:
830+
/// https://arxiv.org/abs/2112.06217
831+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
832+
pub enum GraphMatchSelector {
833+
AnyShortest,
834+
AllShortest,
835+
Any,
836+
AnyK(NonZeroU32),
837+
ShortestK(NonZeroU32),
838+
ShortestKGroup(NonZeroU32),
839+
}
840+
841+
/// A graph match clause as defined in GPML
842+
/// See https://arxiv.org/abs/2112.06217
843+
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
844+
pub struct GraphMatchExpr {
845+
pub selector: Option<GraphMatchSelector>,
846+
pub patterns: Vec<GraphMatchPatternAst>,
847+
}
848+
702849
/// A generic pair of expressions. Used in the `pub struct`, `searched_case`
703850
/// and `simple_case` expr variants above.
704851
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]

partiql-parser/benches/bench_parse.rs

+17
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ const Q_COMPLEX_FEXPR: &str = r#"
3434
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
3535
"#;
3636

37+
const Q_COMPLEX_MATCH: &str = r#"
38+
SELECT (
39+
SELECT numRec, data
40+
FROM
41+
(deltaGraph MATCH (t) -[:hasChange]-> (dt), (dt) -[:checkPointedBy]-> (t1)),
42+
(
43+
SELECT foo(u.id), bar(review), rindex
44+
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
45+
) as data,
46+
delta.numRec as numRec
47+
)
48+
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
49+
"#;
50+
3751
fn parse_bench(c: &mut Criterion) {
3852
fn parse(text: &str) -> ParserResult {
3953
Parser::default().parse(text)
@@ -45,6 +59,9 @@ fn parse_bench(c: &mut Criterion) {
4559
c.bench_function("parse-complex-fexpr", |b| {
4660
b.iter(|| parse(black_box(Q_COMPLEX_FEXPR)))
4761
});
62+
c.bench_function("parse-complex-match", |b| {
63+
b.iter(|| parse(black_box(Q_COMPLEX_MATCH)))
64+
});
4865
}
4966

5067
criterion_group! {

partiql-parser/src/lexer.rs

+32-4
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ pub enum Token<'input> {
465465
Caret,
466466
#[token(".")]
467467
Period,
468+
#[token("~")]
469+
Tilde,
468470
#[token("||")]
469471
DblPipe,
470472

@@ -510,10 +512,14 @@ pub enum Token<'input> {
510512
// Keywords
511513
#[regex("(?i:All)")]
512514
All,
515+
#[regex("(?i:Acyclic)")]
516+
Acyclic,
513517
#[regex("(?i:Asc)")]
514518
Asc,
515519
#[regex("(?i:And)")]
516520
And,
521+
#[regex("(?i:Any)")]
522+
Any,
517523
#[regex("(?i:As)")]
518524
As,
519525
#[regex("(?i:At)")]
@@ -572,6 +578,8 @@ pub enum Token<'input> {
572578
Like,
573579
#[regex("(?i:Limit)")]
574580
Limit,
581+
#[regex("(?i:Match)")]
582+
Match,
575583
#[regex("(?i:Missing)")]
576584
Missing,
577585
#[regex("(?i:Natural)")]
@@ -602,8 +610,14 @@ pub enum Token<'input> {
602610
Right,
603611
#[regex("(?i:Select)")]
604612
Select,
613+
#[regex("(?i:Simple)")]
614+
Simple,
615+
#[regex("(?i:Shortest)")]
616+
Shortest,
605617
#[regex("(?i:Then)")]
606618
Then,
619+
#[regex("(?i:Trail)")]
620+
Trail,
607621
#[regex("(?i:True)")]
608622
True,
609623
#[regex("(?i:Union)")]
@@ -628,9 +642,11 @@ impl<'input> Token<'input> {
628642
pub fn is_keyword(&self) -> bool {
629643
matches!(
630644
self,
631-
Token::All
645+
Token::Acyclic
646+
| Token::All
632647
| Token::Asc
633648
| Token::And
649+
| Token::Any
634650
| Token::As
635651
| Token::At
636652
| Token::Between
@@ -656,6 +672,7 @@ impl<'input> Token<'input> {
656672
| Token::Left
657673
| Token::Like
658674
| Token::Limit
675+
| Token::Match
659676
| Token::Missing
660677
| Token::Natural
661678
| Token::Not
@@ -671,7 +688,10 @@ impl<'input> Token<'input> {
671688
| Token::Preserve
672689
| Token::Right
673690
| Token::Select
691+
| Token::Simple
692+
| Token::Shortest
674693
| Token::Then
694+
| Token::Trail
675695
| Token::Union
676696
| Token::Unpivot
677697
| Token::Using
@@ -717,6 +737,7 @@ impl<'input> fmt::Display for Token<'input> {
717737
Token::Slash => write!(f, "/"),
718738
Token::Caret => write!(f, "^"),
719739
Token::Period => write!(f, "."),
740+
Token::Tilde => write!(f, "~"),
720741
Token::DblPipe => write!(f, "||"),
721742
Token::UnquotedIdent(id) => write!(f, "<{}:UNQUOTED_IDENT>", id),
722743
Token::QuotedIdent(id) => write!(f, "<{}:QUOTED_IDENT>", id),
@@ -729,9 +750,11 @@ impl<'input> fmt::Display for Token<'input> {
729750
Token::EmbeddedIonQuote => write!(f, "<ION>"),
730751
Token::Ion(txt) => write!(f, "<{}:ION>", txt),
731752

732-
Token::All
753+
Token::Acyclic
754+
| Token::All
733755
| Token::Asc
734756
| Token::And
757+
| Token::Any
735758
| Token::As
736759
| Token::At
737760
| Token::Between
@@ -761,6 +784,7 @@ impl<'input> fmt::Display for Token<'input> {
761784
| Token::Left
762785
| Token::Like
763786
| Token::Limit
787+
| Token::Match
764788
| Token::Missing
765789
| Token::Natural
766790
| Token::Not
@@ -776,7 +800,10 @@ impl<'input> fmt::Display for Token<'input> {
776800
| Token::Preserve
777801
| Token::Right
778802
| Token::Select
803+
| Token::Simple
804+
| Token::Shortest
779805
| Token::Then
806+
| Token::Trail
780807
| Token::True
781808
| Token::Union
782809
| Token::Unpivot
@@ -811,7 +838,8 @@ mod tests {
811838
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
812839
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
813840
Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
814-
Cross By Between At As And Asc All Values Case When Then Else End";
841+
Cross By Between At As And Asc All Values Case When Then Else End Match Any Shortest \
842+
Trail Acyclic Simple";
815843
let symbols = symbols.split(' ').chain(primitives.split(' '));
816844
let keywords = keywords.split(' ');
817845

@@ -833,7 +861,7 @@ mod tests {
833861
"<quoted_ident:QUOTED_IDENT>", "IN", "<unquoted_atident:UNQUOTED_ATIDENT>", "HAVING",
834862
"<quoted_atident:QUOTED_ATIDENT>", "GROUP", "FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT",
835863
"ESCAPE", "DESC", "CROSS", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
836-
"CASE", "WHEN", "THEN", "ELSE", "END",
864+
"CASE", "WHEN", "THEN", "ELSE", "END", "MATCH", "ANY", "SHORTEST", "TRAIL", "ACYCLIC", "SIMPLE",
837865
];
838866
let displayed = toks
839867
.into_iter()

0 commit comments

Comments
 (0)