From d6b8977ce1662b75d9b0a82978b387dc7faf3979 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Wed, 22 Nov 2023 17:11:48 -0800 Subject: [PATCH 1/6] rename an `Item`-typed variable from `node` to `item` I found it confusing that `node` is not a `Node` but an `Item`. --- gix-pack/src/cache/delta/traverse/resolve.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index ceb81a4dc62..7b37d8038eb 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -30,7 +30,7 @@ pub(crate) struct State<'items, F, MBFN, T: Send> { pub(crate) fn deltas( objects: gix_features::progress::StepShared, size: gix_features::progress::StepShared, - node: &mut Item, + item: &mut Item, State { delta_bytes, fully_resolved_delta_bytes, @@ -70,7 +70,7 @@ where let mut nodes: Vec<_> = vec![( root_level, Node { - item: node, + item, child_items: child_items.clone(), }, )]; From d27857d7a6fb5d4b858e0398f3f1f2b5656e70d0 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 23 Nov 2023 09:49:55 -0800 Subject: [PATCH 2/6] extract a `ItemSliceSend::get_mut()` from `Node::into_child_iter()` --- gix-pack/src/cache/delta/traverse/util.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/util.rs b/gix-pack/src/cache/delta/traverse/util.rs index 5807846df5e..4a88fa33be9 100644 --- a/gix-pack/src/cache/delta/traverse/util.rs +++ b/gix-pack/src/cache/delta/traverse/util.rs @@ -20,6 +20,13 @@ where phantom: PhantomData, } } + + /// SAFETY: The index must point into the slice and must not be reused concurrently. + #[allow(unsafe_code)] + pub unsafe fn get_mut(&self, index: usize) -> &'a mut T { + // SAFETY: The children array is alive by the 'a lifetime. + unsafe { &mut *self.items.add(index) } + } } /// SAFETY: This would be unsafe if this would ever be abused, but it's used internally and only in a way that assure that the pointers @@ -72,15 +79,12 @@ impl<'a, T: Send> Node<'a, T> { /// Children are `Node`s referring to pack entries whose base object is this pack entry. pub fn into_child_iter(self) -> impl Iterator> + 'a { let children = self.child_items; - self.item.children.iter().map(move |&index| { - // SAFETY: The children array is alive by the 'a lifetime. - // SAFETY: The index is a valid index into the children array. - // SAFETY: The resulting mutable pointer cannot be yielded by any other node. - #[allow(unsafe_code)] - Node { - item: unsafe { &mut *children.items.add(index as usize) }, - child_items: children.clone(), - } + // SAFETY: The index is a valid index into the children array. + // SAFETY: The resulting mutable pointer cannot be yielded by any other node. + #[allow(unsafe_code)] + self.item.children.iter().map(move |&index| Node { + item: unsafe { children.get_mut(index as usize) }, + child_items: children.clone(), }) } } From d655ee13779c40c28e518056f539e38d1b52ac8c Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 23 Nov 2023 09:46:59 -0800 Subject: [PATCH 3/6] move delta `Node` into `resolve.rs` The `Node` type is only used in `resolve.rs`, and using it safely depends on using the child items correctly, so let's define it there. --- gix-pack/src/cache/delta/traverse/resolve.rs | 47 ++++++++++++++++++-- gix-pack/src/cache/delta/traverse/util.rs | 44 ------------------ 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index 7b37d8038eb..5d54f7d9b86 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -7,16 +7,55 @@ use gix_features::{progress::Progress, threading, zlib}; use crate::{ cache::delta::{ - traverse::{ - util::{ItemSliceSend, Node}, - Context, Error, - }, + traverse::{util::ItemSliceSend, Context, Error}, Item, }, data, data::EntryRange, }; +/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. +pub(crate) struct Node<'a, T: Send> { + pub item: &'a mut Item, + pub child_items: ItemSliceSend<'a, Item>, +} + +impl<'a, T: Send> Node<'a, T> { + /// Returns the offset into the pack at which the `Node`s data is located. + pub fn offset(&self) -> u64 { + self.item.offset + } + + /// Returns the slice into the data pack at which the pack entry is located. + pub fn entry_slice(&self) -> crate::data::EntryRange { + self.item.offset..self.item.next_offset + } + + /// Returns the node data associated with this node. + pub fn data(&mut self) -> &mut T { + &mut self.item.data + } + + /// Returns true if this node has children, e.g. is not a leaf in the tree. + pub fn has_children(&self) -> bool { + !self.item.children.is_empty() + } + + /// Transform this `Node` into an iterator over its children. + /// + /// Children are `Node`s referring to pack entries whose base object is this pack entry. + pub fn into_child_iter(self) -> impl Iterator> + 'a { + let children = self.child_items; + // SAFETY: The index is a valid index into the children array. + // SAFETY: The resulting mutable pointer cannot be yielded by any other node. + #[allow(unsafe_code)] + self.item.children.iter().map(move |&index| Node { + item: unsafe { children.get_mut(index as usize) }, + child_items: children.clone(), + }) + } +} + pub(crate) struct State<'items, F, MBFN, T: Send> { pub delta_bytes: Vec, pub fully_resolved_delta_bytes: Vec, diff --git a/gix-pack/src/cache/delta/traverse/util.rs b/gix-pack/src/cache/delta/traverse/util.rs index 4a88fa33be9..63cb81a143d 100644 --- a/gix-pack/src/cache/delta/traverse/util.rs +++ b/gix-pack/src/cache/delta/traverse/util.rs @@ -1,7 +1,5 @@ use std::marker::PhantomData; -use crate::cache::delta::Item; - pub(crate) struct ItemSliceSend<'a, T> where T: Send, @@ -46,45 +44,3 @@ where // SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right. #[allow(unsafe_code)] unsafe impl Send for ItemSliceSend<'_, T> where T: Send {} - -/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. -pub(crate) struct Node<'a, T: Send> { - pub item: &'a mut Item, - pub child_items: ItemSliceSend<'a, Item>, -} - -impl<'a, T: Send> Node<'a, T> { - /// Returns the offset into the pack at which the `Node`s data is located. - pub fn offset(&self) -> u64 { - self.item.offset - } - - /// Returns the slice into the data pack at which the pack entry is located. - pub fn entry_slice(&self) -> crate::data::EntryRange { - self.item.offset..self.item.next_offset - } - - /// Returns the node data associated with this node. - pub fn data(&mut self) -> &mut T { - &mut self.item.data - } - - /// Returns true if this node has children, e.g. is not a leaf in the tree. - pub fn has_children(&self) -> bool { - !self.item.children.is_empty() - } - - /// Transform this `Node` into an iterator over its children. - /// - /// Children are `Node`s referring to pack entries whose base object is this pack entry. - pub fn into_child_iter(self) -> impl Iterator> + 'a { - let children = self.child_items; - // SAFETY: The index is a valid index into the children array. - // SAFETY: The resulting mutable pointer cannot be yielded by any other node. - #[allow(unsafe_code)] - self.item.children.iter().map(move |&index| Node { - item: unsafe { children.get_mut(index as usize) }, - child_items: children.clone(), - }) - } -} From 7301ca13f89f2c0aafb145aedee4b32481920620 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 23 Nov 2023 10:22:30 -0800 Subject: [PATCH 4/6] make delta `Node` private to `resolve.rs` --- gix-pack/src/cache/delta/traverse/resolve.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index 5d54f7d9b86..a637b1b33f7 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -15,9 +15,9 @@ use crate::{ }; /// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. -pub(crate) struct Node<'a, T: Send> { - pub item: &'a mut Item, - pub child_items: ItemSliceSend<'a, Item>, +struct Node<'a, T: Send> { + item: &'a mut Item, + child_items: ItemSliceSend<'a, Item>, } impl<'a, T: Send> Node<'a, T> { @@ -225,7 +225,7 @@ where /// system. Since this thread will take a controlling function, we may spawn one more than that. In threaded mode, we will finish /// all remaining work. #[allow(clippy::too_many_arguments)] -pub(crate) fn deltas_mt( +fn deltas_mt( mut threads_to_create: isize, decompressed_bytes_by_pack_offset: BTreeMap)>, objects: gix_features::progress::StepShared, From 43bca114ae4de2e84fdcd3c1bdcb736417d0f4e6 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 23 Nov 2023 10:24:30 -0800 Subject: [PATCH 5/6] remove `Clone` impl of `ItemSliceSend` and make it `Sync` instead The `Clone` impl allows two `ItemSliceSend` to be backed by the same slice. That makes the behavior of `get_mut()` harder to reason about because we have to consider clones of the `ItemSliceSend` to determine if it's safe. By making it `Sync`, we make it more obvious that multiple threads referring to the same instance need to be considered. I renamed the type to `ItemSliceSync` since it's now also `Sync`. We could make the type also check the bounds and that an individual element has not been borrowed more than once. Then we could make `get_mut()` not `unsafe`. --- gix-pack/src/cache/delta/traverse/mod.rs | 26 +++++++++--------- gix-pack/src/cache/delta/traverse/resolve.rs | 16 ++++------- gix-pack/src/cache/delta/traverse/util.rs | 28 +++++++------------- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/mod.rs b/gix-pack/src/cache/delta/traverse/mod.rs index 553a633a3a0..679e9bdda87 100644 --- a/gix-pack/src/cache/delta/traverse/mod.rs +++ b/gix-pack/src/cache/delta/traverse/mod.rs @@ -9,7 +9,7 @@ use gix_features::{ }; use crate::{ - cache::delta::{traverse::util::ItemSliceSend, Item, Tree}, + cache::delta::{traverse::util::ItemSliceSync, Item, Tree}, data::EntryRange, }; @@ -133,25 +133,23 @@ where let object_progress = OwnShared::new(Mutable::new(object_progress)); let start = std::time::Instant::now(); + let child_items = ItemSliceSync::new(&mut self.child_items); + let child_items = &child_items; in_parallel_with_slice( &mut self.root_items, thread_limit, { - let child_items = ItemSliceSend::new(&mut self.child_items); { let object_progress = object_progress.clone(); - move |thread_index| { - let _ = &child_items; - resolve::State { - delta_bytes: Vec::::with_capacity(4096), - fully_resolved_delta_bytes: Vec::::with_capacity(4096), - progress: Box::new( - threading::lock(&object_progress).add_child(format!("thread {thread_index}")), - ), - resolve: resolve.clone(), - modify_base: inspect_object.clone(), - child_items: child_items.clone(), - } + move |thread_index| resolve::State { + delta_bytes: Vec::::with_capacity(4096), + fully_resolved_delta_bytes: Vec::::with_capacity(4096), + progress: Box::new( + threading::lock(&object_progress).add_child(format!("thread {thread_index}")), + ), + resolve: resolve.clone(), + modify_base: inspect_object.clone(), + child_items, } } }, diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index a637b1b33f7..87dc1475713 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -7,7 +7,7 @@ use gix_features::{progress::Progress, threading, zlib}; use crate::{ cache::delta::{ - traverse::{util::ItemSliceSend, Context, Error}, + traverse::{util::ItemSliceSync, Context, Error}, Item, }, data, @@ -17,7 +17,7 @@ use crate::{ /// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. struct Node<'a, T: Send> { item: &'a mut Item, - child_items: ItemSliceSend<'a, Item>, + child_items: &'a ItemSliceSync<'a, Item>, } impl<'a, T: Send> Node<'a, T> { @@ -51,7 +51,7 @@ impl<'a, T: Send> Node<'a, T> { #[allow(unsafe_code)] self.item.children.iter().map(move |&index| Node { item: unsafe { children.get_mut(index as usize) }, - child_items: children.clone(), + child_items: children, }) } } @@ -62,7 +62,7 @@ pub(crate) struct State<'items, F, MBFN, T: Send> { pub progress: Box, pub resolve: F, pub modify_base: MBFN, - pub child_items: ItemSliceSend<'items, Item>, + pub child_items: &'items ItemSliceSync<'items, Item>, } #[allow(clippy::too_many_arguments)] @@ -106,13 +106,7 @@ where // each node is a base, and its children always start out as deltas which become a base after applying them. // These will be pushed onto our stack until all are processed let root_level = 0; - let mut nodes: Vec<_> = vec![( - root_level, - Node { - item, - child_items: child_items.clone(), - }, - )]; + let mut nodes: Vec<_> = vec![(root_level, Node { item, child_items })]; while let Some((level, mut base)) = nodes.pop() { if should_interrupt.load(Ordering::Relaxed) { return Err(Error::Interrupted); diff --git a/gix-pack/src/cache/delta/traverse/util.rs b/gix-pack/src/cache/delta/traverse/util.rs index 63cb81a143d..5f09b918d64 100644 --- a/gix-pack/src/cache/delta/traverse/util.rs +++ b/gix-pack/src/cache/delta/traverse/util.rs @@ -1,6 +1,6 @@ use std::marker::PhantomData; -pub(crate) struct ItemSliceSend<'a, T> +pub(crate) struct ItemSliceSync<'a, T> where T: Send, { @@ -8,12 +8,12 @@ where phantom: PhantomData<&'a T>, } -impl<'a, T> ItemSliceSend<'a, T> +impl<'a, T> ItemSliceSync<'a, T> where T: Send, { pub fn new(items: &'a mut [T]) -> Self { - ItemSliceSend { + ItemSliceSync { items: items.as_mut_ptr(), phantom: PhantomData, } @@ -27,20 +27,10 @@ where } } -/// SAFETY: This would be unsafe if this would ever be abused, but it's used internally and only in a way that assure that the pointers -/// don't violate aliasing rules. -impl Clone for ItemSliceSend<'_, T> -where - T: Send, -{ - fn clone(&self) -> Self { - ItemSliceSend { - items: self.items, - phantom: self.phantom, - } - } -} - -// SAFETY: T is `Send`, and we only ever access one T at a time. And, ptrs need that assurance, I wonder if it's always right. +// SAFETY: T is `Send`, and we only use the pointer for creating new pointers. +#[allow(unsafe_code)] +unsafe impl Send for ItemSliceSync<'_, T> where T: Send {} +// SAFETY: T is `Send`, and as long as the user follows the contract of +// `get_mut()`, we only ever access one T at a time. #[allow(unsafe_code)] -unsafe impl Send for ItemSliceSend<'_, T> where T: Send {} +unsafe impl Sync for ItemSliceSync<'_, T> where T: Send {} From c4807deffa170062c97e8452f4e20f73fb3a8aff Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 23 Nov 2023 12:11:07 -0800 Subject: [PATCH 6/6] give `Node` an unsafe constructor `Node` is only safe if it doesn't repeat any indexes into the `ItemSliceSync`. Let's document this by giving it an unsafe constructor. To enforce that, I also moved it into a private module. --- gix-pack/src/cache/delta/traverse/resolve.rs | 83 ++++++++++++-------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/gix-pack/src/cache/delta/traverse/resolve.rs b/gix-pack/src/cache/delta/traverse/resolve.rs index 87dc1475713..26a18248501 100644 --- a/gix-pack/src/cache/delta/traverse/resolve.rs +++ b/gix-pack/src/cache/delta/traverse/resolve.rs @@ -14,45 +14,57 @@ use crate::{ data::EntryRange, }; -/// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. -struct Node<'a, T: Send> { - item: &'a mut Item, - child_items: &'a ItemSliceSync<'a, Item>, -} +mod node { + use crate::cache::delta::{traverse::util::ItemSliceSync, Item}; -impl<'a, T: Send> Node<'a, T> { - /// Returns the offset into the pack at which the `Node`s data is located. - pub fn offset(&self) -> u64 { - self.item.offset + /// An item returned by `iter_root_chunks`, allowing access to the `data` stored alongside nodes in a [`Tree`]. + pub(crate) struct Node<'a, T: Send> { + item: &'a mut Item, + child_items: &'a ItemSliceSync<'a, Item>, } - /// Returns the slice into the data pack at which the pack entry is located. - pub fn entry_slice(&self) -> crate::data::EntryRange { - self.item.offset..self.item.next_offset + impl<'a, T: Send> Node<'a, T> { + /// SAFETY: The child_items must be unique among between users of the `ItemSliceSync`. + #[allow(unsafe_code)] + pub(crate) unsafe fn new(item: &'a mut Item, child_items: &'a ItemSliceSync<'a, Item>) -> Self { + Node { item, child_items } + } } - /// Returns the node data associated with this node. - pub fn data(&mut self) -> &mut T { - &mut self.item.data - } + impl<'a, T: Send> Node<'a, T> { + /// Returns the offset into the pack at which the `Node`s data is located. + pub fn offset(&self) -> u64 { + self.item.offset + } - /// Returns true if this node has children, e.g. is not a leaf in the tree. - pub fn has_children(&self) -> bool { - !self.item.children.is_empty() - } + /// Returns the slice into the data pack at which the pack entry is located. + pub fn entry_slice(&self) -> crate::data::EntryRange { + self.item.offset..self.item.next_offset + } - /// Transform this `Node` into an iterator over its children. - /// - /// Children are `Node`s referring to pack entries whose base object is this pack entry. - pub fn into_child_iter(self) -> impl Iterator> + 'a { - let children = self.child_items; - // SAFETY: The index is a valid index into the children array. - // SAFETY: The resulting mutable pointer cannot be yielded by any other node. - #[allow(unsafe_code)] - self.item.children.iter().map(move |&index| Node { - item: unsafe { children.get_mut(index as usize) }, - child_items: children, - }) + /// Returns the node data associated with this node. + pub fn data(&mut self) -> &mut T { + &mut self.item.data + } + + /// Returns true if this node has children, e.g. is not a leaf in the tree. + pub fn has_children(&self) -> bool { + !self.item.children.is_empty() + } + + /// Transform this `Node` into an iterator over its children. + /// + /// Children are `Node`s referring to pack entries whose base object is this pack entry. + pub fn into_child_iter(self) -> impl Iterator> + 'a { + let children = self.child_items; + // SAFETY: The index is a valid index into the children array. + // SAFETY: The resulting mutable pointer cannot be yielded by any other node. + #[allow(unsafe_code)] + self.item.children.iter().map(move |&index| Node { + item: unsafe { children.get_mut(index as usize) }, + child_items: children, + }) + } } } @@ -106,7 +118,10 @@ where // each node is a base, and its children always start out as deltas which become a base after applying them. // These will be pushed onto our stack until all are processed let root_level = 0; - let mut nodes: Vec<_> = vec![(root_level, Node { item, child_items })]; + // SAFETY: The child items are unique + #[allow(unsafe_code)] + let root_node = unsafe { node::Node::new(item, child_items) }; + let mut nodes: Vec<_> = vec![(root_level, root_node)]; while let Some((level, mut base)) = nodes.pop() { if should_interrupt.load(Ordering::Relaxed) { return Err(Error::Interrupted); @@ -225,7 +240,7 @@ fn deltas_mt( objects: gix_features::progress::StepShared, size: gix_features::progress::StepShared, progress: &dyn Progress, - nodes: Vec<(u16, Node<'_, T>)>, + nodes: Vec<(u16, node::Node<'_, T>)>, resolve: F, resolve_data: &R, modify_base: MBFN,