From 167f49f2fc71900da707d6cfd0c778e0fc3687df Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 9 Jan 2025 08:28:27 +0100 Subject: [PATCH 1/3] reproduce issue with 'too many packs' for slotmap --- gix/src/commit.rs | 8 +++++ gix/tests/gix/remote/fetch.rs | 62 +++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/gix/src/commit.rs b/gix/src/commit.rs index ad191f64a4a..d9c8d635ba6 100644 --- a/gix/src/commit.rs +++ b/gix/src/commit.rs @@ -1,6 +1,8 @@ //! #![allow(clippy::empty_docs)] +use std::convert::Infallible; + /// An empty array of a type usable with the `gix::easy` API to help declaring no parents should be used pub const NO_PARENT_IDS: [gix_hash::ObjectId; 0] = []; @@ -22,6 +24,12 @@ pub enum Error { ReferenceEdit(#[from] crate::reference::edit::Error), } +impl From for Error { + fn from(_value: Infallible) -> Self { + unreachable!("cannot be invoked") + } +} + /// #[cfg(feature = "revision")] pub mod describe { diff --git a/gix/tests/gix/remote/fetch.rs b/gix/tests/gix/remote/fetch.rs index aa4d75f6241..c067f8d641a 100644 --- a/gix/tests/gix/remote/fetch.rs +++ b/gix/tests/gix/remote/fetch.rs @@ -14,12 +14,15 @@ mod shallow { mod blocking_and_async_io { use std::sync::atomic::AtomicBool; + use gix::interrupt::IS_INTERRUPTED; use gix::{ config::tree::Protocol, remote::{fetch, fetch::Status, Direction::Fetch}, }; use gix_features::progress; + use gix_odb::store::init::Slots; use gix_protocol::maybe_async; + use gix_testtools::tempfile; use gix_testtools::tempfile::TempDir; use crate::{ @@ -85,6 +88,65 @@ mod blocking_and_async_io { try_repo_rw(name).unwrap() } + #[test] + fn fetch_more_packs_than_can_be_handled() -> gix_testtools::Result { + fn create_empty_commit(repo: &gix::Repository) -> anyhow::Result<()> { + let name = repo.head_name()?.expect("no detached head"); + repo.commit( + name.as_bstr(), + "empty", + gix::hash::ObjectId::empty_tree(repo.object_hash()), + repo.try_find_reference(name.as_ref())?.map(|r| r.id()), + )?; + Ok(()) + } + let remote_dir = tempfile::tempdir()?; + let remote_repo = gix::init_bare(remote_dir.path())?; + create_empty_commit(&remote_repo)?; + + for max_packs in 1..=2 { + let local_dir = tempfile::tempdir()?; + let (local_repo, _) = gix::clone::PrepareFetch::new( + remote_repo.path(), + local_dir.path(), + gix::create::Kind::Bare, + Default::default(), + gix::open::Options::isolated().object_store_slots(Slots::Given(max_packs)), + )? + .fetch_only(gix::progress::Discard, &IS_INTERRUPTED)?; + + let remote = local_repo + .branch_remote( + local_repo.head_ref()?.expect("branch available").name().shorten(), + Fetch, + ) + .expect("remote is configured after clone")?; + for round in 1.. { + eprintln!("Fetch number {round}…"); + create_empty_commit(&remote_repo)?; + match remote + .connect(Fetch)? + .prepare_fetch(gix::progress::Discard, Default::default())? + .receive(gix::progress::Discard, &IS_INTERRUPTED) + { + Ok(out) => { + for local_tracking_branch_name in out.ref_map.mappings.into_iter().filter_map(|m| m.local) { + let r = local_repo.find_reference(&local_tracking_branch_name)?; + r.id() + .object() + .expect("object should be present after fetching, triggering pack refreshes works"); + } + } + Err(err) => assert_eq!( + err.to_string(), + "It should indicate that the ODB is exhausted for now - we can't grow" + ), + } + } + } + Ok(()) + } + #[test] #[cfg(feature = "blocking-network-client")] #[allow(clippy::result_large_err)] From 34197e7c9b8748e61482206c041205d221d717bc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 13 Jan 2025 15:59:58 +0100 Subject: [PATCH 2/3] fix: improve handling of overflows when there are more pack than we can hold. Internally, there is a statically allocated vec which holds opened packs and indices. When reconciling the disk-state with what's currently loaded, it was possible to get into a situation where there were more files than we could fit into the slotmap. The system now fails loudly with a specific Error so callers can deal with it. --- gix-odb/src/lib.rs | 2 +- gix-odb/src/store_impls/dynamic/load_index.rs | 7 ++++--- gix/tests/gix/remote/fetch.rs | 11 ++++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/gix-odb/src/lib.rs b/gix-odb/src/lib.rs index ca91fa7a185..e108e8f4df4 100644 --- a/gix-odb/src/lib.rs +++ b/gix-odb/src/lib.rs @@ -119,7 +119,7 @@ pub struct Store { /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it. /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices. - /// The map is static and cannot typically change. + /// The map is static and cannot change. /// It's read often and changed rarely. pub(crate) files: Vec, diff --git a/gix-odb/src/store_impls/dynamic/load_index.rs b/gix-odb/src/store_impls/dynamic/load_index.rs index a83381976cb..5cb51fb97e4 100644 --- a/gix-odb/src/store_impls/dynamic/load_index.rs +++ b/gix-odb/src/store_impls/dynamic/load_index.rs @@ -266,7 +266,7 @@ impl super::Store { Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file"); if index_info.is_multi_index() && files.mtime() != mtime { // we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices - // that are currently available. Instead we have to move what's there into a new slot, along with the changes, + // that are currently available. Instead, we have to move what's there into a new slot, along with the changes, // and later free the slot or dispose of the index in the slot (like we do for removed/missing files). index_paths_to_add.push_back((index_info, mtime, Some(slot_idx))); // If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well. @@ -295,10 +295,11 @@ impl super::Store { .map_or(0, |idx| (idx + 1) % self.files.len()); let mut num_indices_checked = 0; let mut needs_generation_change = false; + dbg!(idx_by_index_path.len()); let mut slot_indices_to_remove: Vec<_> = idx_by_index_path.into_values().collect(); while let Some((mut index_info, mtime, move_from_slot_idx)) = index_paths_to_add.pop_front() { 'increment_slot_index: loop { - if num_indices_checked == self.files.len() { + if dbg!(num_indices_checked) == dbg!(self.files.len()) { return Err(Error::InsufficientSlots { current: self.files.len(), needed: index_paths_to_add.len() + 1, /*the one currently popped off*/ @@ -502,7 +503,7 @@ impl super::Store { } // Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way // the chance to hit an object should be higher. We leave it to the handle to sort by LRU. - // Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests. + // Git itself doesn't change the order which may save time, but we want it to be stable which also helps some tests. // NOTE: this will work well for well-packed repos or those using geometric repacking, but force us to open a lot // of files when dealing with new objects, as there is no notion of recency here as would be with unmaintained // repositories. Different algorithms should be provided, like newest packs first, and possibly a mix of both diff --git a/gix/tests/gix/remote/fetch.rs b/gix/tests/gix/remote/fetch.rs index c067f8d641a..ea584d64feb 100644 --- a/gix/tests/gix/remote/fetch.rs +++ b/gix/tests/gix/remote/fetch.rs @@ -100,12 +100,13 @@ mod blocking_and_async_io { )?; Ok(()) } - let remote_dir = tempfile::tempdir()?; - let remote_repo = gix::init_bare(remote_dir.path())?; - create_empty_commit(&remote_repo)?; - for max_packs in 1..=2 { + let remote_dir = tempfile::tempdir()?; + let remote_repo = gix::init_bare(remote_dir.path())?; + create_empty_commit(&remote_repo)?; + let local_dir = tempfile::tempdir()?; + dbg!(max_packs); let (local_repo, _) = gix::clone::PrepareFetch::new( remote_repo.path(), local_dir.path(), @@ -122,7 +123,7 @@ mod blocking_and_async_io { ) .expect("remote is configured after clone")?; for round in 1.. { - eprintln!("Fetch number {round}…"); + dbg!(round); create_empty_commit(&remote_repo)?; match remote .connect(Fetch)? From 27343ea3c07651039f67af43f81df6c855725e9e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 22 Feb 2025 14:36:17 +0100 Subject: [PATCH 3/3] remove this --- gix-odb/src/store_impls/dynamic/load_index.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gix-odb/src/store_impls/dynamic/load_index.rs b/gix-odb/src/store_impls/dynamic/load_index.rs index 5cb51fb97e4..a5367820039 100644 --- a/gix-odb/src/store_impls/dynamic/load_index.rs +++ b/gix-odb/src/store_impls/dynamic/load_index.rs @@ -257,6 +257,7 @@ impl super::Store { // Figure out this number based on what we see while handling the existing indices let mut num_loaded_indices = 0; + dbg!(indices_by_modification_time.len()); for (index_info, mtime) in indices_by_modification_time.into_iter().map(|(a, b, _)| (a, b)) { match idx_by_index_path.remove(index_info.path()) { Some(slot_idx) => { @@ -295,7 +296,7 @@ impl super::Store { .map_or(0, |idx| (idx + 1) % self.files.len()); let mut num_indices_checked = 0; let mut needs_generation_change = false; - dbg!(idx_by_index_path.len()); + dbg!(index_paths_to_add.len(), next_possibly_free_index); let mut slot_indices_to_remove: Vec<_> = idx_by_index_path.into_values().collect(); while let Some((mut index_info, mtime, move_from_slot_idx)) = index_paths_to_add.pop_front() { 'increment_slot_index: loop {