Skip to content

Commit f4931e5

Browse files
committed
Add experimental draft support for GPML-style graph query
1 parent 17cbcb5 commit f4931e5

File tree

6 files changed

+543
-5
lines changed

6 files changed

+543
-5
lines changed

CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717
- Adds the capability for exporting the playground session on client side to be able to get fetched from another playground windows.
1818
- Adds a REST API and exposes /parse for parsing the query over http request.
1919
- Containerization using Docker.
20+
- An experimental (pending [#15](https://github.com/partiql/partiql-docs/issues/15)) embedding of a subset of
21+
the [GPML (Graph Pattern Matching Language)](https://arxiv.org/abs/2112.06217) graph query into the `FROM` clause,
22+
supporting. The use within the grammar is based on the assumption of a new graph data type being added to the
23+
specification of data types within PartiQL, and should be considered experimental until the semantics of the graph
24+
data type are specified.
25+
- basic and abbreviated node and edge patterns (section 4.1 of the GPML paper)
26+
- concatenated path patterns (section 4.2 of the GPML paper)
27+
- path variables (section 4.2 of the GPML paper)
28+
- graph patterns (i.e., comma separated path patterns) (section 4.3 of the GPML paper)
29+
- parenthesized patterns (section 4.4 of the GPML paper)
30+
- path quantifiers (section 4.4 of the GPML paper)
31+
- restrictors and selector (section 5.1 of the GPML paper)
32+
- pre-filters and post-filters (section 5.2 of the GPML paper)
2033

2134
### Fixes
2235
- Fixes the bug with AST graph PAN and ZOOM—before this change the pan and zoom was quite flaky and very hard to work with.
2336
- Fixes the version value for the session and JSON output by ensuring it gets picked from the selected version in the UI.
2437

38+
2539
## [0.1.0] - 2022-08-05
2640
### Added
2741
- Lexer & Parser for the majority of PartiQL query capabilities—see syntax [success](https://github.com/partiql/partiql-tests/tree/main/partiql-tests-data/success/syntax)
@@ -33,5 +47,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3347
- PartiQL Playground proof of concept (POC)
3448
- PartiQL CLI with REPL and query visualization features
3549

50+
3651
[Unreleased]: https://github.com/partiql/partiql-lang-rust/compare/v0.1.0...HEAD
3752
[0.1.0]: https://github.com/partiql/partiql-lang-rust/compare/v0.1.0

partiql-ast/src/ast.rs

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use rust_decimal::Decimal as RustDecimal;
1111

1212
use std::fmt;
13+
use std::num::NonZeroU32;
1314

1415
#[cfg(feature = "serde")]
1516
use serde::{Deserialize, Serialize};
@@ -534,6 +535,8 @@ pub enum FromClause {
534535
FromLet(AstNode<FromLet>),
535536
/// <from_source> JOIN \[INNER | LEFT | RIGHT | FULL\] <from_source> ON <expr>
536537
Join(AstNode<Join>),
538+
/// <expr> MATCH <graph_pattern>
539+
GraphMatch(AstNode<GraphMatch>),
537540
}
538541

539542
#[derive(Clone, Debug, PartialEq)]
@@ -583,7 +586,153 @@ pub enum JoinSpec {
583586
Natural,
584587
}
585588

586-
/// GROUP BY <grouping_strategy> <group_key>[, <group_key>]... \[AS <symbol>\]
589+
#[derive(Clone, Debug, PartialEq)]
590+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
591+
pub struct GraphMatch {
592+
pub expr: Box<Expr>,
593+
pub graph_expr: Box<AstNode<GraphMatchExpr>>,
594+
}
595+
596+
/// The direction of an edge
597+
/// | Orientation | Edge pattern | Abbreviation |
598+
/// |---------------------------+--------------+--------------|
599+
/// | Pointing left | <−[ spec ]− | <− |
600+
/// | Undirected | ~[ spec ]~ | ~ |
601+
/// | Pointing right | −[ spec ]−> | −> |
602+
/// | Left or undirected | <~[ spec ]~ | <~ |
603+
/// | Undirected or right | ~[ spec ]~> | ~> |
604+
/// | Left or right | <−[ spec ]−> | <−> |
605+
/// | Left, undirected or right | −[ spec ]− | − |
606+
///
607+
/// Fig. 5. Table of edge patterns:
608+
/// https://arxiv.org/abs/2112.06217
609+
#[derive(Clone, Debug, PartialEq)]
610+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
611+
pub enum GraphMatchDirection {
612+
Left,
613+
Undirected,
614+
Right,
615+
LeftOrUndirected,
616+
UndirectedOrRight,
617+
LeftOrRight,
618+
LeftOrUndirectedOrRight,
619+
}
620+
621+
/// A part of a graph pattern
622+
#[derive(Clone, Debug, PartialEq)]
623+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
624+
pub enum GraphMatchPatternPart {
625+
/// A single node in a graph pattern.
626+
Node(AstNode<GraphMatchNode>),
627+
628+
/// A single edge in a graph pattern.
629+
Edge(AstNode<GraphMatchEdge>),
630+
631+
/// A sub-pattern.
632+
Pattern(AstNode<GraphMatchPattern>),
633+
}
634+
635+
/// A quantifier for graph edges or patterns. (e.g., the `{2,5}` in `MATCH (x)->{2,5}(y)`)
636+
#[derive(Clone, Debug, PartialEq)]
637+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
638+
pub struct GraphMatchQuantifier {
639+
pub lower: u32,
640+
pub upper: Option<NonZeroU32>,
641+
}
642+
643+
/// A path restrictor
644+
/// | Keyword | Description
645+
/// |----------------+--------------
646+
/// | TRAIL | No repeated edges.
647+
/// | ACYCLIC | No repeated nodes.
648+
/// | SIMPLE | No repeated nodes, except that the first and last nodes may be the same.
649+
///
650+
/// Fig. 7. Table of restrictors:
651+
/// https://arxiv.org/abs/2112.06217
652+
#[derive(Clone, Debug, PartialEq)]
653+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
654+
pub enum GraphMatchRestrictor {
655+
Trail,
656+
Acyclic,
657+
Simple,
658+
}
659+
660+
/// A single node in a graph pattern.
661+
#[derive(Clone, Debug, PartialEq)]
662+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
663+
pub struct GraphMatchNode {
664+
/// an optional node pre-filter, e.g.: `WHERE c.name='Alarm'` in `MATCH (c WHERE c.name='Alarm')`
665+
pub prefilter: Option<Box<Expr>>,
666+
/// the optional element variable of the node match, e.g.: `x` in `MATCH (x)`
667+
pub variable: Option<SymbolPrimitive>,
668+
/// the optional label(s) to match for the node, e.g.: `Entity` in `MATCH (x:Entity)`
669+
pub label: Option<Vec<SymbolPrimitive>>,
670+
}
671+
672+
/// A single edge in a graph pattern.
673+
#[derive(Clone, Debug, PartialEq)]
674+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
675+
pub struct GraphMatchEdge {
676+
/// edge direction
677+
pub direction: GraphMatchDirection,
678+
/// an optional quantifier for the edge match
679+
pub quantifier: Option<AstNode<GraphMatchQuantifier>>,
680+
/// an optional edge pre-filter, e.g.: `WHERE t.capacity>100` in `MATCH −[t:hasSupply WHERE t.capacity>100]−>`
681+
pub prefilter: Option<Box<Expr>>,
682+
/// the optional element variable of the edge match, e.g.: `t` in `MATCH −[t]−>`
683+
pub variable: Option<SymbolPrimitive>,
684+
/// the optional label(s) to match for the edge. e.g.: `Target` in `MATCH −[t:Target]−>`
685+
pub label: Option<Vec<SymbolPrimitive>>,
686+
}
687+
688+
/// A single graph match pattern.
689+
#[derive(Clone, Debug, PartialEq)]
690+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
691+
pub struct GraphMatchPattern {
692+
pub restrictor: Option<GraphMatchRestrictor>,
693+
/// an optional quantifier for the entire pattern match
694+
pub quantifier: Option<AstNode<GraphMatchQuantifier>>,
695+
/// an optional pattern pre-filter, e.g.: `WHERE a.name=b.name` in `MATCH [(a)->(b) WHERE a.name=b.name]`
696+
pub prefilter: Option<Box<Expr>>,
697+
/// the optional element variable of the pattern, e.g.: `p` in `MATCH p = (a) −[t]−> (b)`
698+
pub variable: Option<SymbolPrimitive>,
699+
/// the ordered pattern parts
700+
pub parts: Vec<GraphMatchPatternPart>,
701+
}
702+
703+
/// A path selector
704+
/// | Keyword
705+
/// |------------------
706+
/// | ANY SHORTEST
707+
/// | ALL SHORTEST
708+
/// | ANY
709+
/// | ANY k
710+
/// | SHORTEST k
711+
/// | SHORTEST k GROUP
712+
///
713+
/// Fig. 8. Table of restrictors:
714+
/// https://arxiv.org/abs/2112.06217
715+
#[derive(Clone, Debug, PartialEq)]
716+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
717+
pub enum GraphMatchSelector {
718+
AnyShortest,
719+
AllShortest,
720+
Any,
721+
AnyK(NonZeroU32),
722+
ShortestK(NonZeroU32),
723+
ShortestKGroup(NonZeroU32),
724+
}
725+
726+
/// A graph match clause as defined in GPML
727+
/// See https://arxiv.org/abs/2112.06217
728+
#[derive(Clone, Debug, PartialEq)]
729+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
730+
pub struct GraphMatchExpr {
731+
pub selector: Option<GraphMatchSelector>,
732+
pub patterns: Vec<AstNode<GraphMatchPattern>>,
733+
}
734+
735+
/// GROUP BY <grouping_strategy> <group_key_list>... \[AS <symbol>\]
587736
#[derive(Clone, Debug, PartialEq)]
588737
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
589738
pub struct GroupByExpr {

partiql-parser/benches/bench_parse.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ const Q_COMPLEX_FEXPR: &str = r#"
3434
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
3535
"#;
3636

37+
const Q_COMPLEX_MATCH: &str = r#"
38+
SELECT (
39+
SELECT numRec, data
40+
FROM
41+
(deltaGraph MATCH (t) -[:hasChange]-> (dt), (dt) -[:checkPointedBy]-> (t1)),
42+
(
43+
SELECT foo(u.id), bar(review), rindex
44+
FROM delta.data as u CROSS JOIN UNPIVOT u.reviews as review AT rindex
45+
) as data,
46+
delta.numRec as numRec
47+
)
48+
AS deltas FROM SOURCE_VIEW_DELTA_FULL_TRANSACTIONS delta_full_transactions
49+
"#;
50+
3751
fn parse_bench(c: &mut Criterion) {
3852
fn parse(text: &str) -> ParserResult {
3953
Parser::default().parse(text)
@@ -45,6 +59,9 @@ fn parse_bench(c: &mut Criterion) {
4559
c.bench_function("parse-complex-fexpr", |b| {
4660
b.iter(|| parse(black_box(Q_COMPLEX_FEXPR)))
4761
});
62+
c.bench_function("parse-complex-match", |b| {
63+
b.iter(|| parse(black_box(Q_COMPLEX_MATCH)))
64+
});
4865
}
4966

5067
criterion_group! {

partiql-parser/src/lexer.rs

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ pub enum Token<'input> {
467467
Caret,
468468
#[token(".")]
469469
Period,
470+
#[token("~")]
471+
Tilde,
470472
#[token("||")]
471473
DblPipe,
472474

@@ -512,10 +514,14 @@ pub enum Token<'input> {
512514
// Keywords
513515
#[regex("(?i:All)")]
514516
All,
517+
#[regex("(?i:Acyclic)")]
518+
Acyclic,
515519
#[regex("(?i:Asc)")]
516520
Asc,
517521
#[regex("(?i:And)")]
518522
And,
523+
#[regex("(?i:Any)")]
524+
Any,
519525
#[regex("(?i:As)")]
520526
As,
521527
#[regex("(?i:At)")]
@@ -576,6 +582,8 @@ pub enum Token<'input> {
576582
Like,
577583
#[regex("(?i:Limit)")]
578584
Limit,
585+
#[regex("(?i:Match)")]
586+
Match,
579587
#[regex("(?i:Missing)")]
580588
Missing,
581589
#[regex("(?i:Natural)")]
@@ -612,8 +620,14 @@ pub enum Token<'input> {
612620
Time,
613621
#[regex("(?i:Timestamp)")]
614622
Timestamp,
623+
#[regex("(?i:Simple)")]
624+
Simple,
625+
#[regex("(?i:Shortest)")]
626+
Shortest,
615627
#[regex("(?i:Then)")]
616628
Then,
629+
#[regex("(?i:Trail)")]
630+
Trail,
617631
#[regex("(?i:True)")]
618632
True,
619633
#[regex("(?i:Union)")]
@@ -642,9 +656,11 @@ impl<'input> Token<'input> {
642656
pub fn is_keyword(&self) -> bool {
643657
matches!(
644658
self,
645-
Token::All
659+
Token::Acyclic
660+
| Token::All
646661
| Token::Asc
647662
| Token::And
663+
| Token::Any
648664
| Token::As
649665
| Token::At
650666
| Token::Between
@@ -671,6 +687,7 @@ impl<'input> Token<'input> {
671687
| Token::Left
672688
| Token::Like
673689
| Token::Limit
690+
| Token::Match
674691
| Token::Missing
675692
| Token::Natural
676693
| Token::Not
@@ -689,7 +706,10 @@ impl<'input> Token<'input> {
689706
| Token::Table
690707
| Token::Time
691708
| Token::Timestamp
709+
| Token::Simple
710+
| Token::Shortest
692711
| Token::Then
712+
| Token::Trail
693713
| Token::Union
694714
| Token::Unpivot
695715
| Token::Using
@@ -736,6 +756,7 @@ impl<'input> fmt::Display for Token<'input> {
736756
Token::Slash => write!(f, "/"),
737757
Token::Caret => write!(f, "^"),
738758
Token::Period => write!(f, "."),
759+
Token::Tilde => write!(f, "~"),
739760
Token::DblPipe => write!(f, "||"),
740761
Token::UnquotedIdent(id) => write!(f, "<{}:UNQUOTED_IDENT>", id),
741762
Token::QuotedIdent(id) => write!(f, "<{}:QUOTED_IDENT>", id),
@@ -748,9 +769,11 @@ impl<'input> fmt::Display for Token<'input> {
748769
Token::EmbeddedIonQuote => write!(f, "<ION>"),
749770
Token::Ion(txt) => write!(f, "<{}:ION>", txt),
750771

751-
Token::All
772+
Token::Acyclic
773+
| Token::All
752774
| Token::Asc
753775
| Token::And
776+
| Token::Any
754777
| Token::As
755778
| Token::At
756779
| Token::Between
@@ -781,6 +804,7 @@ impl<'input> fmt::Display for Token<'input> {
781804
| Token::Left
782805
| Token::Like
783806
| Token::Limit
807+
| Token::Match
784808
| Token::Missing
785809
| Token::Natural
786810
| Token::Not
@@ -799,7 +823,10 @@ impl<'input> fmt::Display for Token<'input> {
799823
| Token::Table
800824
| Token::Time
801825
| Token::Timestamp
826+
| Token::Simple
827+
| Token::Shortest
802828
| Token::Then
829+
| Token::Trail
803830
| Token::True
804831
| Token::Union
805832
| Token::Unpivot
@@ -836,7 +863,8 @@ mod tests {
836863
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
837864
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
838865
Intersect Is Inner In Having Group From For Full First False Except Escape Desc \
839-
Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End";
866+
Cross Table Time Timestamp Date By Between At As And Asc All Values Case When Then Else End \
867+
Match Any Shortest Trail Acyclic Simple";
840868
let symbols = symbols.split(' ').chain(primitives.split(' '));
841869
let keywords = keywords.split(' ');
842870

@@ -858,7 +886,7 @@ mod tests {
858886
"<unquoted_atident:UNQUOTED_ATIDENT>", "GROUP", "<quoted_atident:QUOTED_ATIDENT>",
859887
"FROM", "FOR", "FULL", "FIRST", "FALSE", "EXCEPT", "ESCAPE", "DESC", "CROSS", "TABLE",
860888
"TIME", "TIMESTAMP", "DATE", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
861-
"CASE", "WHEN", "THEN", "ELSE", "END"
889+
"CASE", "WHEN", "THEN", "ELSE", "END", "MATCH", "ANY", "SHORTEST", "TRAIL", "ACYCLIC", "SIMPLE"
862890
];
863891
let displayed = toks
864892
.into_iter()

0 commit comments

Comments
 (0)