Skip to content

Support joins on prefixes of arbitrary length #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 18 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions examples/borrow_check.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
extern crate datafrog;
use datafrog::Iteration;
use datafrog::{Iteration, Relation};

type Region = u32;
type Borrow = u32;
Expand All @@ -14,15 +14,15 @@ fn main() {
let subset = iteration1.variable::<(Region, Region, Point)>("subset");

// different indices for `subset`.
let subset_r1p = iteration1.variable::<((Region, Point), Region)>("subset_r1p");
let subset_r2p = iteration1.variable::<((Region, Point), Region)>("subset_r2p");
let subset_p = iteration1.variable::<(Point, (Region, Region))>("subset_p");
let subset_r1p = iteration1.variable::<(Region, Point, Region)>("subset_r1p");
let subset_r2p = iteration1.variable::<(Region, Point, Region)>("subset_r2p");
let subset_p = iteration1.variable::<(Point, Region, Region)>("subset_p");

// temporaries as we perform a multi-way join.
let subset_1 = iteration1.variable::<((Region, Point), Region)>("subset_1");
let subset_2 = iteration1.variable::<((Region, Point), Region)>("subset_2");
let subset_1 = iteration1.variable::<(Region, Point, Region)>("subset_1");
let subset_2 = iteration1.variable::<(Region, Point, Region)>("subset_2");

let region_live_at = iteration1.variable::<((Region, Point), ())>("region_live_at");
let region_live_at = iteration1.variable::<(Region, Point)>("region_live_at");
let cfg_edge_p = iteration1.variable::<(Point, Point)>("cfg_edge_p");

// load initial facts.
Expand All @@ -33,29 +33,29 @@ fn main() {
// .. and then start iterating rules!
while iteration1.changed() {
// remap fields to re-index by keys.
subset_r1p.from_map(&subset, |&(r1, r2, p)| ((r1, p), r2));
subset_r2p.from_map(&subset, |&(r1, r2, p)| ((r2, p), r1));
subset_p.from_map(&subset, |&(r1, r2, p)| (p, (r1, r2)));
subset_r1p.from_map(&subset, |&(r1, r2, p)| (r1, p, r2));
subset_r2p.from_map(&subset, |&(r1, r2, p)| (r2, p, r1));
subset_p.from_map(&subset, |&(r1, r2, p)| (p, r1, r2));

// R0: subset(R1, R2, P) :- outlives(R1, R2, P).
// Already loaded; outlives is static.

// R1: subset(R1, R3, P) :-
// subset(R1, R2, P),
// subset(R2, R3, P).
subset.from_join(&subset_r2p, &subset_r1p, |&(_r2, p), &r1, &r3| (r1, r3, p));
subset.from_join(&subset_r2p, &subset_r1p, |(_r2, p), r1, r3| (r1, r3, p));

// R2: subset(R1, R2, Q) :-
// subset(R1, R2, P),
// cfg_edge(P, Q),
// region_live_at(R1, Q),
// region_live_at(R2, Q).

subset_1.from_join(&subset_p, &cfg_edge_p, |&_p, &(r1, r2), &q| ((r1, q), r2));
subset_2.from_join(&subset_1, &region_live_at, |&(r1, q), &r2, &()| {
((r2, q), r1)
subset_1.from_join_first(&subset_p, &cfg_edge_p, |_p, (r1, r2), q| (r1, q, r2));
subset_2.from_join(&subset_1, &region_live_at, |(r1, q), r2, ()| {
(r2, q, r1)
});
subset.from_join(&subset_2, &region_live_at, |&(r2, q), &r1, &()| (r1, r2, q));
subset.from_join(&subset_2, &region_live_at, |(r2, q), r1, ()| (r1, r2, q));
}

subset_r1p.complete()
Expand All @@ -69,41 +69,41 @@ fn main() {
let requires = iteration2.variable::<(Region, Borrow, Point)>("requires");
requires.insert(Vec::new().into());

let requires_rp = iteration2.variable::<((Region, Point), Borrow)>("requires_rp");
let requires_bp = iteration2.variable::<((Borrow, Point), Region)>("requires_bp");
let requires_rp = iteration2.variable::<(Region, Point, Borrow)>("requires_rp");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of these are redundant now, although this is partly because it was written without leapjoin.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Frank initially had these more like documentation of how to use the API than really optimal, runnable examples. There are docs and doctests for leapjoins IIRC so it's not terrible that this example doesn't use them. Don't feel obligated to update them.

We could have that as a "good first issue" "help wanted" issue if some contributor wanted to try and clean that up. It would teach them a bit about the API, contrast the example with the polonius analyses, and so on.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A better example might switch requires to have the same layout as requires_rp or requires_bp and reorder it at the end. Without that and without leapjoin (which I agree doesn't really belong here), I think the current version is optimal.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess maybe having a single iteration instead of two, or actually feeding input facts into the computation, and so on, could turn it into a slightly more realistic example if someone wanted to do so.

As for the other example, graspan1, I think there were some input facts in one of Franks's blog posts a few years back.

let requires_bp = iteration2.variable::<(Borrow, Point, Region)>("requires_bp");

let requires_1 = iteration2.variable::<(Point, (Borrow, Region))>("requires_1");
let requires_2 = iteration2.variable::<((Region, Point), Borrow)>("requires_2");
let requires_1 = iteration2.variable::<(Point, Borrow, Region)>("requires_1");
let requires_2 = iteration2.variable::<(Region, Point, Borrow)>("requires_2");

let subset_r1p = iteration2.variable::<((Region, Point), Region)>("subset_r1p");
let subset_r1p = iteration2.variable::<(Region, Point, Region)>("subset_r1p");
subset_r1p.insert(subset);

let killed = Vec::new().into();
let region_live_at = iteration2.variable::<((Region, Point), ())>("region_live_at");
let killed: Relation<(Borrow, Point)> = Vec::new().into();
let region_live_at = iteration2.variable::<(Region, Point)>("region_live_at");
let cfg_edge_p = iteration2.variable::<(Point, Point)>("cfg_edge_p");

// .. and then start iterating rules!
while iteration2.changed() {
requires_rp.from_map(&requires, |&(r, b, p)| ((r, p), b));
requires_bp.from_map(&requires, |&(r, b, p)| ((b, p), r));
requires_rp.from_map(&requires, |&(r, b, p)| (r, p, b));
requires_bp.from_map(&requires, |&(r, b, p)| (b, p, r));

// requires(R, B, P) :- borrow_region(R, B, P).
// Already loaded; borrow_region is static.

// requires(R2, B, P) :-
// requires(R1, B, P),
// subset(R1, R2, P).
requires.from_join(&requires_rp, &subset_r1p, |&(_r1, p), &b, &r2| (r2, b, p));
requires.from_join(&requires_rp, &subset_r1p, |(_r1, p), b, r2| (r2, b, p));

// requires(R, B, Q) :-
// requires(R, B, P),
// !killed(B, P),
// cfg_edge(P, Q),
// (region_live_at(R, Q); universal_region(R)).

requires_1.from_antijoin(&requires_bp, &killed, |&(b, p), &r| (p, (b, r)));
requires_2.from_join(&requires_1, &cfg_edge_p, |&_p, &(b, r), &q| ((r, q), b));
requires.from_join(&requires_2, &region_live_at, |&(r, q), &b, &()| (r, b, q));
requires_1.from_antijoin(&requires_bp, &killed, |(b, p, r)| (p, b, r));
requires_2.from_join_first(&requires_1, &cfg_edge_p, |_p, (b, r), q| (r, q, b));
requires.from_join(&requires_2, &region_live_at, |(r, q), b, ()| (r, b, q));
}

requires.complete()
Expand Down
2 changes: 1 addition & 1 deletion examples/graspan1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn main() {
// .. and then start iterating rules!
while iteration.changed() {
// N(a,c) <- N(a,b), E(b,c)
variable1.from_join(&variable1, &variable2, |_b, &a, &c| (c, a));
variable1.from_join_first(&variable1, &variable2, |_b, a, c| (c, a));
}

let reachable = variable1.complete();
Expand Down
151 changes: 77 additions & 74 deletions src/join.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Join functionality.

use super::{Relation, Variable};
use super::{Relation, Split, Variable};
use std::cell::Ref;
use std::ops::Deref;

Expand All @@ -9,28 +9,38 @@ use std::ops::Deref;
/// because relations have no "recent" tuples, so the fn would be a
/// guaranteed no-op if both arguments were relations. See also
/// `join_into_relation`.
pub(crate) fn join_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>(
input1: &Variable<(Key, Val1)>,
input2: impl JoinInput<'me, (Key, Val2)>,
output: &Variable<Result>,
mut logic: impl FnMut(&Key, &Val1, &Val2) -> Result,
) {
pub(crate) fn join_into<'me, P, A, B, O>(
input1: &Variable<A>,
input2: impl JoinInput<'me, B>,
output: &Variable<O>,
mut logic: impl FnMut(P, A::Suffix, B::Suffix) -> O,
) where
P: Ord,
A: Copy + Split<P>,
B: Copy + Split<P>,
O: Ord,
{
let mut results = Vec::new();
let push_result = |k: &Key, v1: &Val1, v2: &Val2| results.push(logic(k, v1, v2));
let push_result = |k, v1, v2| results.push(logic(k, v1, v2));

join_delta(input1, input2, push_result);

output.insert(Relation::from_vec(results));
}

pub(crate) fn join_and_filter_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>(
input1: &Variable<(Key, Val1)>,
input2: impl JoinInput<'me, (Key, Val2)>,
output: &Variable<Result>,
mut logic: impl FnMut(&Key, &Val1, &Val2) -> Option<Result>,
) {
pub(crate) fn join_and_filter_into<'me, P, A, B, O>(
input1: &Variable<A>,
input2: impl JoinInput<'me, B>,
output: &Variable<O>,
mut logic: impl FnMut(P, A::Suffix, B::Suffix) -> Option<O>,
) where
P: Ord,
A: Copy + Split<P>,
B: Copy + Split<P>,
O: Ord,
{
let mut results = Vec::new();
let push_result = |k: &Key, v1: &Val1, v2: &Val2| {
let push_result = |k, v1, v2| {
if let Some(result) = logic(k, v1, v2) {
results.push(result);
}
Expand All @@ -43,11 +53,15 @@ pub(crate) fn join_and_filter_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result:

/// Joins the `recent` tuples of each input with the `stable` tuples of the other, then the
/// `recent` tuples of *both* inputs.
fn join_delta<'me, Key: Ord, Val1: Ord, Val2: Ord>(
input1: &Variable<(Key, Val1)>,
input2: impl JoinInput<'me, (Key, Val2)>,
mut result: impl FnMut(&Key, &Val1, &Val2),
) {
fn join_delta<'me, P, A, B>(
input1: &Variable<A>,
input2: impl JoinInput<'me, B>,
mut result: impl FnMut(P, A::Suffix, B::Suffix),
) where
P: Ord,
A: Copy + Split<P>,
B: Copy + Split<P>,
{
let recent1 = input1.recent();
let recent2 = input2.recent();

Expand All @@ -63,11 +77,17 @@ fn join_delta<'me, Key: Ord, Val1: Ord, Val2: Ord>(
}

/// Join, but for two relations.
pub(crate) fn join_into_relation<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>(
input1: &Relation<(Key, Val1)>,
input2: &Relation<(Key, Val2)>,
mut logic: impl FnMut(&Key, &Val1, &Val2) -> Result,
) -> Relation<Result> {
pub(crate) fn join_into_relation<P, A, B, O>(
input1: &Relation<A>,
input2: &Relation<B>,
mut logic: impl FnMut(P, A::Suffix, B::Suffix) -> O,
) -> Relation<O>
where
P: Ord,
A: Copy + Split<P>,
B: Copy + Split<P>,
O: Ord,
{
let mut results = Vec::new();

join_helper(&input1.elements, &input2.elements, |k, v1, v2| {
Expand All @@ -78,48 +98,57 @@ pub(crate) fn join_into_relation<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Or
}

/// Moves all recent tuples from `input1` that are not present in `input2` into `output`.
pub(crate) fn antijoin<Key: Ord, Val: Ord, Result: Ord>(
input1: &Relation<(Key, Val)>,
input2: &Relation<Key>,
mut logic: impl FnMut(&Key, &Val) -> Result,
) -> Relation<Result> {
pub(crate) fn antijoin<P, A, O>(
input1: &Relation<A>,
input2: &Relation<P>,
mut logic: impl FnMut(A) -> O,
) -> Relation<O>
where
A: Copy + Split<P>,
P: Ord,
O: Ord,
{
let mut tuples2 = &input2[..];

let results = input1
.elements
.iter()
.filter(|(ref key, _)| {
tuples2 = gallop(tuples2, |k| k < key);
tuples2.first() != Some(key)
.filter(|el| {
tuples2 = gallop(tuples2, |p| p < &el.prefix());
tuples2.first() != Some(&el.prefix())
})
.map(|(ref key, ref val)| logic(key, val))
.map(|&el| logic(el))
.collect::<Vec<_>>();

Relation::from_vec(results)
}

fn join_helper<K: Ord, V1, V2>(
mut slice1: &[(K, V1)],
mut slice2: &[(K, V2)],
mut result: impl FnMut(&K, &V1, &V2),
) {
fn join_helper<P, A, B>(
mut slice1: &[A],
mut slice2: &[B],
mut result: impl FnMut(P, A::Suffix, B::Suffix),
) where
A: Copy + Split<P>,
B: Copy + Split<P>,
P: Ord,
{
while !slice1.is_empty() && !slice2.is_empty() {
use std::cmp::Ordering;

// If the keys match produce tuples, else advance the smaller key until they might.
match slice1[0].0.cmp(&slice2[0].0) {
match slice1[0].prefix().cmp(&slice2[0].prefix()) {
Ordering::Less => {
slice1 = gallop(slice1, |x| x.0 < slice2[0].0);
slice1 = gallop(slice1, |x| x.prefix() < slice2[0].prefix());
}
Ordering::Equal => {
// Determine the number of matching keys in each slice.
let count1 = slice1.iter().take_while(|x| x.0 == slice1[0].0).count();
let count2 = slice2.iter().take_while(|x| x.0 == slice2[0].0).count();
let count1 = slice1.iter().take_while(|x| x.prefix() == slice1[0].prefix()).count();
let count2 = slice2.iter().take_while(|x| x.prefix() == slice2[0].prefix()).count();

// Produce results from the cross-product of matches.
for index1 in 0..count1 {
for s2 in slice2[..count2].iter() {
result(&slice1[0].0, &slice1[index1].1, &s2.1);
result(slice1[0].prefix(), slice1[index1].suffix(), s2.suffix());
}
}

Expand All @@ -128,7 +157,7 @@ fn join_helper<K: Ord, V1, V2>(
slice2 = &slice2[count2..];
}
Ordering::Greater => {
slice2 = gallop(slice2, |x| x.0 < slice1[0].0);
slice2 = gallop(slice2, |x| x.prefix() < slice1[0].prefix());
}
}
}
Expand Down Expand Up @@ -158,7 +187,7 @@ pub(crate) fn gallop<T>(mut slice: &[T], mut cmp: impl FnMut(&T) -> bool) -> &[T
}

/// An input that can be used with `from_join`; either a `Variable` or a `Relation`.
pub trait JoinInput<'me, Tuple: Ord>: Copy {
pub trait JoinInput<'me, Tuple>: Copy {
/// If we are on iteration N of the loop, these are the tuples
/// added on iteration N-1. (For a `Relation`, this is always an
/// empty slice.)
Expand All @@ -171,7 +200,7 @@ pub trait JoinInput<'me, Tuple: Ord>: Copy {
fn for_each_stable_set(self, f: impl FnMut(&[Tuple]));
}

impl<'me, Tuple: Ord> JoinInput<'me, Tuple> for &'me Variable<Tuple> {
impl<'me, Tuple> JoinInput<'me, Tuple> for &'me Variable<Tuple> {
type RecentTuples = Ref<'me, [Tuple]>;

fn recent(self) -> Self::RecentTuples {
Expand All @@ -185,7 +214,7 @@ impl<'me, Tuple: Ord> JoinInput<'me, Tuple> for &'me Variable<Tuple> {
}
}

impl<'me, Tuple: Ord> JoinInput<'me, Tuple> for &'me Relation<Tuple> {
impl<'me, Tuple> JoinInput<'me, Tuple> for &'me Relation<Tuple> {
type RecentTuples = &'me [Tuple];

fn recent(self) -> Self::RecentTuples {
Expand All @@ -196,29 +225,3 @@ impl<'me, Tuple: Ord> JoinInput<'me, Tuple> for &'me Relation<Tuple> {
f(&self.elements)
}
}

impl<'me, Tuple: Ord> JoinInput<'me, (Tuple, ())> for &'me Relation<Tuple> {
type RecentTuples = &'me [(Tuple, ())];

fn recent(self) -> Self::RecentTuples {
&[]
}

fn for_each_stable_set(self, mut f: impl FnMut(&[(Tuple, ())])) {
use std::mem;
assert_eq!(mem::size_of::<(Tuple, ())>(), mem::size_of::<Tuple>());
assert_eq!(mem::align_of::<(Tuple, ())>(), mem::align_of::<Tuple>());

// SAFETY: https://rust-lang.github.io/unsafe-code-guidelines/layout/structs-and-tuples.html#structs-with-1-zst-fields
// guarantees that `T` is layout compatible with `(T, ())`, since `()` is a 1-ZST. We use
// `slice::from_raw_parts` because the layout compatibility guarantee does not extend to
// containers like `&[T]`.
let elements: &'me [Tuple] = self.elements.as_slice();
let len = elements.len();

let elements: &'me [(Tuple, ())] =
unsafe { std::slice::from_raw_parts(elements.as_ptr() as *const _, len) };

f(elements)
}
}
Loading