Skip to content

Commit

Permalink
Put Tree in its own module
Browse files Browse the repository at this point in the history
This means only the code in tree.rs has the ability to violate the relevant invariants
  • Loading branch information
Manishearth committed Jan 6, 2024
1 parent 6587eea commit c85a981
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 212 deletions.
210 changes: 4 additions & 206 deletions gix-pack/src/cache/delta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,216 +17,14 @@ pub mod traverse;
///
pub mod from_offsets;

/// An item stored within the [`Tree`] whose data is stored in a pack file, identified by
/// the offset of its first (`offset`) and last (`next_offset`) bytes.
///
/// It represents either a root entry, or one that relies on a base to be resolvable,
/// alongside associated `data` `T`.
pub struct Item<T> {
/// The offset into the pack file at which the pack entry's data is located.
pub offset: crate::data::Offset,
/// The offset of the next item in the pack file.
pub next_offset: crate::data::Offset,
/// Data to store with each Item, effectively data associated with each entry in a pack.
pub data: T,
/// Indices into our Tree's `items`, one for each pack entry that depends on us.
///
/// Limited to u32 as that's the maximum amount of objects in a pack.
// SAFETY INVARIANT:
// - only one Item in a tree may have any given child index. `future_child_offsets`
// should also not contain any indices found in `children`.\
// - These indices should be in bounds for tree.child_items
children: Vec<u32>,
}

/// Identify what kind of node we have last seen
enum NodeKind {
Root,
Child,
}

/// A tree that allows one-time iteration over all nodes and their children, consuming it in the process,
/// while being shareable among threads without a lock.
/// It does this by making the guarantee that iteration only happens once.
pub struct Tree<T> {
/// The root nodes, i.e. base objects
// SAFETY invariant: see Item.children
root_items: Vec<Item<T>>,
/// The child nodes, i.e. those that rely a base object, like ref and ofs delta objects
// SAFETY invariant: see Item.children
child_items: Vec<Item<T>>,
/// The last encountered node was either a root or a child.
last_seen: Option<NodeKind>,
/// Future child offsets, associating their offset into the pack with their index in the items array.
/// (parent_offset, child_index)
// SAFETY invariant:
// - None of these child indices should already have parents
// i.e. future_child_offsets[i].1 should never be also found
// in Item.children. Indices should be found here at most once.
// - These indices should be in bounds for tree.child_items.
future_child_offsets: Vec<(crate::data::Offset, usize)>,
}

impl<T> Tree<T> {
/// Instantiate a empty tree capable of storing `num_objects` amounts of items.
pub fn with_capacity(num_objects: usize) -> Result<Self, Error> {
Ok(Tree {
root_items: Vec::with_capacity(num_objects / 2),
child_items: Vec::with_capacity(num_objects / 2),
last_seen: None,
future_child_offsets: Vec::new(),
})
}

fn num_items(&self) -> usize {
self.root_items.len() + self.child_items.len()
}

fn assert_is_incrementing_and_update_next_offset(&mut self, offset: crate::data::Offset) -> Result<(), Error> {
let items = match &self.last_seen {
Some(NodeKind::Root) => &mut self.root_items,
Some(NodeKind::Child) => &mut self.child_items,
None => return Ok(()),
};
let item = &mut items.last_mut().expect("last seen won't lie");
if offset <= item.offset {
return Err(Error::InvariantIncreasingPackOffset {
last_pack_offset: item.offset,
pack_offset: offset,
});
}
item.next_offset = offset;
Ok(())
}

fn set_pack_entries_end_and_resolve_ref_offsets(
&mut self,
pack_entries_end: crate::data::Offset,
) -> Result<(), traverse::Error> {
if !self.future_child_offsets.is_empty() {
for (parent_offset, child_index) in self.future_child_offsets.drain(..) {
// SAFETY invariants upheld:
// - We are draining from future_child_offsets and adding to children, keeping things the same.
// - We can rely on the `future_child_offsets` invariant to be sure that `children` is
// not getting any indices that are already in use in `children` elsewhere
// - The indices are in bounds for child_items since they were in bounds for future_child_offsets,
// we can carry over the invariant.
if let Ok(i) = self.child_items.binary_search_by_key(&parent_offset, |i| i.offset) {
self.child_items[i].children.push(child_index as u32);
} else if let Ok(i) = self.root_items.binary_search_by_key(&parent_offset, |i| i.offset) {
self.root_items[i].children.push(child_index as u32);
} else {
return Err(traverse::Error::OutOfPackRefDelta {
base_pack_offset: parent_offset,
});
}
}
}

self.assert_is_incrementing_and_update_next_offset(pack_entries_end)
.expect("BUG: pack now is smaller than all previously seen entries");
Ok(())
}
/// Tree datastructure
// kept in separate module to encapsulate unsafety (it has field invariants)
mod tree;

/// Add a new root node, one that only has children but is not a child itself, at the given pack `offset` and associate
/// custom `data` with it.
pub fn add_root(&mut self, offset: crate::data::Offset, data: T) -> Result<(), Error> {
self.assert_is_incrementing_and_update_next_offset(offset)?;
self.last_seen = NodeKind::Root.into();
self.root_items.push(Item {
offset,
next_offset: 0,
data,
// SAFETY INVARIANT upheld: there are no children
children: Default::default(),
});
Ok(())
}

/// Add a child of the item at `base_offset` which itself resides at pack `offset` and associate custom `data` with it.
pub fn add_child(
&mut self,
base_offset: crate::data::Offset,
offset: crate::data::Offset,
data: T,
) -> Result<(), Error> {
self.assert_is_incrementing_and_update_next_offset(offset)?;

let next_child_index = self.child_items.len();
// SAFETY INVARIANT upheld:
// - This is one of two methods that modifies `children` and future_child_offsets. Out
// of the two, it is the only one that produces new indices in the system.
// - This always pushes next_child_index to *either* `children` or `future_child_offsets`,
// maintaining the cross-field invariant there.
// - This method will always push to child_items (at the end), incrementing
// future values of next_child_index. This means next_child_index is always
// unique for this method call.
// - As the only method producing new indices, this is the only time
// next_child_index will be added to children/future_child_offsets, upholding the invariant.
// - Since next_child_index will always be a valid index by the end of this method,
// this always produces valid in-bounds indices, upholding the bounds invariant.

if let Ok(i) = self.child_items.binary_search_by_key(&base_offset, |i| i.offset) {
self.child_items[i].children.push(next_child_index as u32);
} else if let Ok(i) = self.root_items.binary_search_by_key(&base_offset, |i| i.offset) {
self.root_items[i].children.push(next_child_index as u32);
} else {
self.future_child_offsets.push((base_offset, next_child_index));
}

self.last_seen = NodeKind::Child.into();
self.child_items.push(Item {
offset,
next_offset: 0,
data,
// SAFETY INVARIANT upheld: there are no children
children: Default::default(),
});
Ok(())
}
}
pub use tree::{Item, Tree};

#[cfg(test)]
mod tests {
mod tree {
mod from_offsets_in_pack {
use std::sync::atomic::AtomicBool;

use crate as pack;

const SMALL_PACK_INDEX: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx";
const SMALL_PACK: &str = "objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack";

const INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx";
const PACK_FOR_INDEX_V1: &str = "objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack";

use gix_testtools::fixture_path;

#[test]
fn v1() -> Result<(), Box<dyn std::error::Error>> {
tree(INDEX_V1, PACK_FOR_INDEX_V1)
}

#[test]
fn v2() -> Result<(), Box<dyn std::error::Error>> {
tree(SMALL_PACK_INDEX, SMALL_PACK)
}

fn tree(index_path: &str, pack_path: &str) -> Result<(), Box<dyn std::error::Error>> {
let idx = pack::index::File::at(fixture_path(index_path), gix_hash::Kind::Sha1)?;
crate::cache::delta::Tree::from_offsets_in_pack(
&fixture_path(pack_path),
idx.sorted_offsets().into_iter(),
&|ofs| *ofs,
&|id| idx.lookup(id).map(|index| idx.pack_offset_at_index(index)),
&mut gix_features::progress::Discard,
&AtomicBool::new(false),
gix_hash::Kind::Sha1,
)?;
Ok(())
}
}
}

#[test]
fn size_of_pack_tree_item() {
Expand Down
9 changes: 5 additions & 4 deletions gix-pack/src/cache/delta/traverse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,11 @@ where
let object_progress = OwnShared::new(Mutable::new(object_progress));

let start = std::time::Instant::now();
let child_items = ItemSliceSync::new(&mut self.child_items);
let (mut root_items, mut child_items_vec) = self.take_root_and_child();
let child_items = ItemSliceSync::new(&mut child_items_vec);
let child_items = &child_items;
in_parallel_with_slice(
&mut self.root_items,
&mut root_items,
thread_limit,
{
{
Expand Down Expand Up @@ -180,8 +181,8 @@ where
size_progress.show_throughput(start);

Ok(Outcome {
roots: self.root_items,
children: self.child_items,
roots: root_items,
children: child_items_vec,
})
}
}
4 changes: 2 additions & 2 deletions gix-pack/src/cache/delta/traverse/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ mod root {

/// Returns true if this node has children, e.g. is not a leaf in the tree.
pub fn has_children(&self) -> bool {
!self.item.children.is_empty()
!self.item.children().is_empty()
}

/// Transform this `Node` into an iterator over its children.
Expand All @@ -69,7 +69,7 @@ mod root {
pub fn into_child_iter(self) -> impl Iterator<Item = Node<'a, T>> + 'a {
let children = self.child_items;
#[allow(unsafe_code)]
self.item.children.iter().map(move |&index| {
self.item.children().iter().map(move |&index| {
// SAFETY: Due to the invariant on new(), we can rely on these indices
// being unique.
let item = unsafe { children.get_mut(index as usize) };
Expand Down
Loading

0 comments on commit c85a981

Please sign in to comment.