Skip to content

Commit

Permalink
Merge branch 'patch-2'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed May 8, 2021
2 parents 5edc076 + a9e4feb commit f01dc54
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 226 deletions.
4 changes: 3 additions & 1 deletion git-odb/src/pack/index/traverse/indexed.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::VecDeque;

use super::{Error, SafetyCheck};
use crate::pack::{
self,
Expand Down Expand Up @@ -157,7 +159,7 @@ impl From<pack::index::Entry> for EntryWithDefault {
}
}

fn digest_statistics(items: Vec<pack::tree::Item<EntryWithDefault>>) -> index::traverse::Outcome {
fn digest_statistics(items: VecDeque<pack::tree::Item<EntryWithDefault>>) -> index::traverse::Outcome {
let mut res = index::traverse::Outcome::default();
let average = &mut res.average;
for item in &items {
Expand Down
6 changes: 3 additions & 3 deletions git-odb/src/pack/index/write/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use crate::{
};
use byteorder::{BigEndian, WriteBytesExt};
use git_features::progress::{self, Progress};
use std::{cmp::Ordering, io};
use std::{cmp::Ordering, collections::VecDeque, io};

pub(crate) fn write_to(
out: impl io::Write,
entries_sorted_by_oid: Vec<pack::tree::Item<pack::index::write::TreeEntry>>,
entries_sorted_by_oid: VecDeque<pack::tree::Item<pack::index::write::TreeEntry>>,
pack_hash: &git_hash::ObjectId,
kind: pack::index::Version,
mut progress: impl Progress,
Expand Down Expand Up @@ -36,7 +36,7 @@ pub(crate) fn write_to(
const HIGH_BIT: u32 = 0x8000_0000;

let needs_64bit_offsets =
entries_sorted_by_oid.last().expect("at least one pack entry").offset > LARGE_OFFSET_THRESHOLD;
entries_sorted_by_oid.back().expect("at least one pack entry").offset > LARGE_OFFSET_THRESHOLD;
let mut fan_out_be = [0u32; 256];
progress.init(Some(4), progress::steps());
let start = std::time::Instant::now();
Expand Down
2 changes: 1 addition & 1 deletion git-odb/src/pack/index/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl pack::index::File {

{
let _progress = root_progress.add_child("sorting by id");
items.sort_by_key(|e| e.data.id);
items.make_contiguous().sort_by_key(|e| e.data.id);
}

root_progress.inc();
Expand Down
206 changes: 48 additions & 158 deletions git-odb/src/pack/tree/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,191 +3,81 @@ use crate::{
pack::tree::{Item, Tree},
};

/// All the **unsafe** bits to support parallel iteration with write access
/// Iteration
impl<T> Tree<T> {
#[allow(unsafe_code)]
/// # SAFETY
/// Called from node with is guaranteed to not be aliasing with any other node.
/// Note that we are iterating nodes that we are affecting here, but that only affects the
/// 'is_root' field fo the item, not the data field that we are writing here.
/// For all details see `from_node_take_entry()`.
unsafe fn put_data(&self, index: usize, data: T) {
let items_mut: &mut Vec<Item<T>> = &mut *(self.items.get());
items_mut.get_unchecked_mut(index).data = data;
}
/// Return an iterator over chunks of roots. Roots are not children themselves, they have no parents.
pub fn iter_root_chunks(&mut self, chunk_size: usize) -> impl Iterator<Item = Chunk<'_, T>> + '_ {
let (front, back) = self.items.as_mut_slices();
assert_eq!(
front.len(),
self.roots,
"This should not happen unless we added more nodes than were declared in the constructor"
);

#[allow(unsafe_code)]
/// # SAFETY
/// Similar to `from_node_put_data(…)`
unsafe fn offset(&self, index: usize) -> u64 {
let items: &Vec<Item<T>> = &*(self.items.get());
items.get_unchecked(index).offset
front.chunks_mut(chunk_size).map(move |c| Chunk {
inner: c.iter_mut(),
children: back as *mut [Item<T>],
})
}
}

#[allow(unsafe_code)]
/// # SAFETY
/// Similar to `from_node_put_data(…)`
unsafe fn entry_slice(&self, index: usize) -> std::ops::Range<u64> {
let items: &Vec<Item<T>> = &*(self.items.get());
let start = items.get_unchecked(index).offset;
let end = items
.get(index + 1)
.map(|e| e.offset)
.or(self.pack_entries_end)
.expect("traversal(…) to have set this value (BUG)");
start..end
}
/// A chunk returned by `iter_root_chunks`, which can be iterated over to get [`Node`]s.
pub struct Chunk<'a, T> {
inner: std::slice::IterMut<'a, Item<T>>,
children: *mut [Item<T>],
}

#[allow(unsafe_code)]
/// # SAFETY
/// A tree is a data structure without cycles, and we assure of that by verifying all input.
/// A node as identified by index can only be traversed once using the Chunks iterator.
/// When the iterator is created, this instance cannot be mutated anymore nor can it be read.
/// That iterator is only handed out once.
/// `Node` instances produced by it consume themselves when iterating their children, allowing them to be
/// used only once, recursively. Again, traversal is always forward and consuming, making it impossible to
/// alias multiple nodes in the tree.
/// It's safe for multiple threads to hold different chunks, as they are guaranteed to be non-overlapping and unique.
/// If the tree is accessed after iteration, it will panic as no mutation is allowed anymore, nor is
unsafe fn take_entry(&self, index: usize) -> (T, Vec<usize>)
where
T: Default,
{
let items_mut: &mut Vec<Item<T>> = &mut *(self.items.get());
let item = items_mut.get_unchecked_mut(index);
let children = std::mem::take(&mut item.children);
let data = std::mem::take(&mut item.data);
(data, children)
}
// SAFETY: The raw pointer is uniquely materialized in `Node::into_child_iter`.
#[allow(unsafe_code)]
unsafe impl<'a, T> Send for Chunk<'a, T> {}

#[allow(unsafe_code)]
/// # SAFETY
/// As `take_entry(…)` - but this one only takes if the data of Node is a root
unsafe fn from_iter_take_entry_if_root(&self, index: usize) -> Option<(T, Vec<usize>)>
where
T: Default,
{
let items_mut: &mut Vec<Item<T>> = &mut *(self.items.get());
let item = items_mut.get_unchecked_mut(index);
if item.is_root {
let children = std::mem::take(&mut item.children);
let data = std::mem::take(&mut item.data);
Some((data, children))
} else {
None
}
}
}
impl<'a, T> Iterator for Chunk<'a, T> {
type Item = Node<'a, T>;

/// Iteration
impl<T> Tree<T> {
/// Return an iterator over chunks of roots. Roots are not children themselves, they have no parents.
pub fn iter_root_chunks(&mut self, chunk_size: usize) -> Chunks<'_, T> {
Chunks {
tree: self,
chunk_size,
cursor: 0,
}
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|item| Node {
item,
children: self.children,
})
}
}

/// An item returned by a [`Chunks`] iterator, allowing access to the `data` stored alongside nodes in a [`Tree`]
/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`].
pub struct Node<'a, T> {
tree: &'a Tree<T>,
index: usize,
children: Vec<usize>,
/// The custom data attached to each node of the tree whose ownership was transferred into the iteration.
pub data: T,
item: &'a mut Item<T>,
children: *mut [Item<T>],
}

impl<'a, T> Node<'a, T>
where
T: Default,
{
impl<'a, T> Node<'a, T> {
/// Returns the offset into the pack at which the `Node`s data is located.
pub fn offset(&self) -> u64 {
#[allow(unsafe_code)]
// SAFETY: The index is valid as it was controlled by `add_child(…)`, then see `take_entry(…)`
unsafe {
self.tree.offset(self.index)
}
self.item.offset
}

/// Returns the slice into the data pack at which the pack entry is located.
pub fn entry_slice(&self) -> pack::data::EntryRange {
#[allow(unsafe_code)]
// SAFETY: The index is valid as it was controlled by `add_child(…)`, then see `take_entry(…)`
unsafe {
self.tree.entry_slice(self.index)
}
self.item.offset..self.item.next_offset
}

/// Returns the node data associated with this node.
pub fn data(&mut self) -> &mut T {
&mut self.item.data
}

/// Write potentially changed `Node` data back into the [`Tree`] and transform this `Node` into an iterator
/// over its `Node`s children.
/// Transform this `Node` into an iterator over its children.
///
/// Children are `Node`s referring to pack entries whose base object is this pack entry.
pub fn store_changes_then_into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> {
#[allow(unsafe_code)]
// SAFETY: The index is valid as it was controlled by `add_child(…)`, then see `take_entry(…)`
unsafe {
self.tree.put_data(self.index, self.data)
};
let Self { tree, children, .. } = self;
children.into_iter().map(move |index| {
// SAFETY: The index is valid as it was controlled by `add_child(…)`, then see `take_entry(…)`
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
let children = self.children;
self.item.children.iter().map(move |&index| {
// SAFETY: The children array is alive by the 'a lifetime.
// SAFETY: The index is a valid index into the children array.
// SAFETY: The resulting mutable pointer cannot be yielded by any other node.
#[allow(unsafe_code)]
let (data, children) = unsafe { tree.take_entry(index) };
Node {
tree,
index,
item: unsafe { &mut *(children as *mut Item<T>).add(index) },
children,
data,
}
})
}
}

/// An iterator over chunks of [`Node`]s in a [`Tree`]
pub struct Chunks<'a, T> {
tree: &'a Tree<T>,
chunk_size: usize,
cursor: usize,
}

impl<'a, T> Iterator for Chunks<'a, T>
where
T: Default,
{
type Item = Vec<Node<'a, T>>;

fn next(&mut self) -> Option<Self::Item> {
if self.cursor == self.tree.one_past_last_seen_root {
return None;
}
let mut items_remaining = self.chunk_size;
let mut res = Vec::with_capacity(self.chunk_size);

while items_remaining > 0 && self.cursor < self.tree.one_past_last_seen_root {
// SAFETY: The index is valid as the cursor cannot surpass the amount of items. `one_past_last_seen_root`
// is guaranteed to be self.tree.items.len() at most, or smaller.
// Then see `take_entry_if_root(…)`
#[allow(unsafe_code)]
if let Some((data, children)) = unsafe { self.tree.from_iter_take_entry_if_root(self.cursor) } {
res.push(Node {
tree: self.tree,
index: self.cursor,
children,
data,
});
items_remaining -= 1;
}
self.cursor += 1;
}

if res.is_empty() {
None
} else {
Some(res)
}
}
}
Loading

0 comments on commit f01dc54

Please sign in to comment.