-
-
Notifications
You must be signed in to change notification settings - Fork 328
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feat_basic_connectivity_check'
- Loading branch information
Showing
19 changed files
with
347 additions
and
2 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -281,6 +281,7 @@ members = [ | |
"gix-archive", | ||
"gix-worktree-stream", | ||
"gix-revwalk", | ||
"gix-fsck", | ||
|
||
"tests/tools", | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use anyhow::Context; | ||
use gix::{objs::Kind, ObjectId}; | ||
|
||
pub fn function(mut repo: gix::Repository, spec: Option<String>, mut out: impl std::io::Write) -> anyhow::Result<()> { | ||
let spec = spec.unwrap_or("HEAD".into()); | ||
|
||
repo.object_cache_size_if_unset(4 * 1024 * 1024); | ||
// We expect to be finding a bunch of non-existent objects here - never refresh the ODB | ||
repo.objects.refresh_never(); | ||
|
||
let id = repo | ||
.rev_parse_single(spec.as_str()) | ||
.context("Only single revisions are supported")?; | ||
let commits: gix::revision::Walk<'_> = id | ||
.object()? | ||
.peel_to_kind(gix::object::Kind::Commit) | ||
.context("Need commitish as starting point")? | ||
.id() | ||
.ancestors() | ||
.all()?; | ||
|
||
let on_missing = |oid: &ObjectId, kind: Kind| { | ||
writeln!(out, "{oid}: {kind}").expect("failed to write output"); | ||
}; | ||
|
||
let mut check = gix_fsck::Connectivity::new(&repo.objects, on_missing); | ||
// Walk all commits, checking each one for connectivity | ||
for commit in commits { | ||
let commit = commit?; | ||
check.check_commit(&commit.id)?; | ||
// Note that we leave parent-iteration to the commits iterator, as it will | ||
// correctly handle shallow repositories which are expected to have the commits | ||
// along the shallow boundary missing. | ||
} | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
[package] | ||
name = "gix-fsck" | ||
version = "0.1.0" | ||
repository = "https://github.com/Byron/gitoxide" | ||
authors = ["Cameron Esfahani <[email protected]>", "Sebastian Thiel <[email protected]>"] | ||
license = "MIT OR Apache-2.0" | ||
description = "Verifies the connectivity and validity of objects in the database" | ||
edition = "2021" | ||
include = ["src/**/*", "LICENSE-*"] | ||
rust-version = "1.65" | ||
|
||
[lib] | ||
doctest = false | ||
|
||
[dependencies] | ||
gix-hash = { version = "^0.13.1", path = "../gix-hash" } | ||
gix-hashtable = { version = "^0.4.0", path = "../gix-hashtable" } | ||
gix-object = { version = "^0.38.0", path = "../gix-object" } | ||
|
||
[dev-dependencies] | ||
gix-odb = { path = "../gix-odb" } | ||
gix-testtools = { path = "../tests/tools"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../LICENSE-APACHE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../LICENSE-MIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
//! A library for performing object database integrity and connectivity checks | ||
#![deny(rust_2018_idioms, unsafe_code, missing_docs)] | ||
|
||
use gix_hash::ObjectId; | ||
use gix_hashtable::HashSet; | ||
use gix_object::{tree::EntryMode, Exists, FindExt, Kind}; | ||
use std::collections::VecDeque; | ||
|
||
/// Perform a connectivity check. | ||
pub struct Connectivity<T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// ODB handle to use for the check | ||
db: T, | ||
/// Closure to invoke when a missing object is encountered | ||
missing_cb: F, | ||
/// Set of Object IDs already (or about to be) scanned during the check | ||
seen: HashSet, | ||
/// A buffer to keep a single object at a time. | ||
buf: Vec<u8>, | ||
} | ||
|
||
impl<T, F> Connectivity<T, F> | ||
where | ||
T: FindExt + Exists, | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
/// Instantiate a connectivity check. | ||
pub fn new(db: T, missing_cb: F) -> Connectivity<T, F> { | ||
Connectivity { | ||
db, | ||
missing_cb, | ||
seen: HashSet::default(), | ||
buf: Default::default(), | ||
} | ||
} | ||
|
||
/// Run the connectivity check on the provided commit `oid`. | ||
/// | ||
/// ### Algorithm | ||
/// | ||
/// Walk the trees and blobs referenced by the commit and verify they exist in the ODB. | ||
/// Any objects previously encountered by this instance will be skipped silently. | ||
/// Any referenced blobs that are not present in the ODB will result in a call to the `missing_cb`. | ||
/// Missing commits or trees will cause an error to be returned. | ||
/// - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?) | ||
pub fn check_commit(&mut self, oid: &ObjectId) -> Result<(), gix_object::find::existing_object::Error> { | ||
// Attempt to insert the commit ID in the set, and if already present, return immediately | ||
if !self.seen.insert(*oid) { | ||
return Ok(()); | ||
} | ||
// Obtain the commit's tree ID | ||
let tree_id = { | ||
let commit = self.db.find_commit(oid, &mut self.buf)?; | ||
commit.tree() | ||
}; | ||
|
||
let mut tree_ids = VecDeque::from_iter(Some(tree_id)); | ||
while let Some(tree_id) = tree_ids.pop_front() { | ||
if self.seen.insert(tree_id) { | ||
self.check_tree(&tree_id, &mut tree_ids); | ||
} | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
/// Blobs are checked right away, trees are stored in `tree_ids` for the parent to iterate them, and only | ||
/// if they have not been `seen` yet. | ||
fn check_tree(&mut self, oid: &ObjectId, tree_ids: &mut VecDeque<ObjectId>) { | ||
let Ok(tree) = self.db.find_tree(oid, &mut self.buf) else { | ||
(self.missing_cb)(oid, Kind::Tree); | ||
return; | ||
}; | ||
|
||
for entry_ref in tree.entries.iter() { | ||
match entry_ref.mode { | ||
EntryMode::Tree => { | ||
let tree_id = entry_ref.oid.to_owned(); | ||
if self.seen.insert(tree_id) { | ||
tree_ids.push_back(tree_id); | ||
} | ||
} | ||
EntryMode::Blob | EntryMode::BlobExecutable | EntryMode::Link => { | ||
let blob_id = entry_ref.oid.to_owned(); | ||
if self.seen.insert(blob_id) { | ||
check_blob(&self.db, &blob_id, &mut self.missing_cb); | ||
} | ||
} | ||
EntryMode::Commit => { | ||
// Skip submodules as it's not in this repository! | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
fn check_blob<F>(db: impl Exists, oid: &ObjectId, mut missing_cb: F) | ||
where | ||
F: FnMut(&ObjectId, Kind), | ||
{ | ||
if !db.exists(oid) { | ||
missing_cb(oid, Kind::Blob); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
use gix_fsck::Connectivity; | ||
use gix_hash::ObjectId; | ||
use gix_hashtable::HashMap; | ||
use gix_object::Kind; | ||
use gix_testtools::once_cell::sync::Lazy; | ||
|
||
use crate::hex_to_id; | ||
|
||
fn check_missing<'a>(repo_name: &str, commits: impl IntoIterator<Item = &'a ObjectId>) -> HashMap<ObjectId, Kind> { | ||
let db = { | ||
let fixture_path = gix_testtools::scripted_fixture_read_only("make_test_repos.sh") | ||
.expect("fixture path") | ||
.join(repo_name) | ||
.join(".git") | ||
.join("objects"); | ||
let mut db = gix_odb::at(fixture_path).expect("valid odb"); | ||
db.refresh_never(); | ||
db | ||
}; | ||
|
||
let mut missing: HashMap<ObjectId, Kind> = HashMap::default(); | ||
let record_missing_and_assert_no_duplicate = |oid: &ObjectId, kind: Kind| { | ||
missing.try_insert(*oid, kind).expect("no duplicate oid"); | ||
}; | ||
|
||
let mut check = Connectivity::new(db, record_missing_and_assert_no_duplicate); | ||
for commit in commits.into_iter() { | ||
check.check_commit(commit).expect("commit is present") | ||
} | ||
missing | ||
} | ||
|
||
fn hex_to_ids<'a>(hex_ids: impl IntoIterator<Item = &'a str>) -> Vec<ObjectId> { | ||
hex_ids.into_iter().map(hex_to_id).collect() | ||
} | ||
|
||
fn hex_to_objects<'a>(hex_ids: impl IntoIterator<Item = &'a str>, kind: Kind) -> HashMap<ObjectId, Kind> { | ||
hex_to_ids(hex_ids).into_iter().map(|id| (id, kind)).collect() | ||
} | ||
|
||
// Get a `&Vec<ObjectID` for each commit in the test fixture repository | ||
fn all_commits() -> &'static [ObjectId] { | ||
static ALL_COMMITS: Lazy<Vec<ObjectId>> = Lazy::new(|| { | ||
hex_to_ids([ | ||
"5d18db2e2aabadf7b914435ef34f2faf8b4546dd", | ||
"3a3dfaa55a515f3fb3a25751107bbb523af6a1b0", | ||
"734c926856a328d1168ffd7088532e0d1ad19bbe", | ||
]) | ||
}); | ||
&ALL_COMMITS | ||
} | ||
|
||
#[test] | ||
fn no_missing() { | ||
// The "base" repo is the original, and has every object present | ||
assert_eq!(check_missing("base", all_commits()), HashMap::default()); | ||
} | ||
|
||
#[test] | ||
fn missing_blobs() { | ||
// The "blobless" repo is cloned with `--filter=blob:none`, and is missing one blob | ||
let expected = hex_to_objects(["c18147dc648481eeb65dc5e66628429a64843327"], Kind::Blob); | ||
assert_eq!(check_missing("blobless", all_commits()), expected); | ||
} | ||
|
||
#[test] | ||
fn missing_trees() { | ||
// The "treeless" repo is cloned with `--filter=tree:0`, and is missing two trees | ||
// NOTE: This repo is also missing a blob, but we have no way of knowing that, as the tree referencing it is missing | ||
let expected = hex_to_objects( | ||
[ | ||
"9561cfbae43c5e2accdfcd423378588dd10d827f", | ||
"fc264b3b6875a46e9031483aeb9994a1b897ffd3", | ||
], | ||
Kind::Tree, | ||
); | ||
assert_eq!(check_missing("treeless", all_commits()), expected); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
make_test_repos.tar.xz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
set -x | ||
set -euo pipefail | ||
|
||
# We override the global config with our own local one (see below) | ||
export GIT_CONFIG_GLOBAL="$PWD/.gitconfig" | ||
|
||
# We need to be able to do partial clones, so enable it | ||
# - needs to be present in the persistent gitconfig, as a clone with `--no-local` | ||
git config --global uploadpack.allowFilter true | ||
|
||
# First build out a base repository | ||
git init base | ||
( | ||
cd base | ||
|
||
echo "blob 1" > blob-1 | ||
git add -A | ||
git commit -m "commit 1" | ||
echo "blob-2" > blob-2 | ||
git add -A | ||
git commit -m "commit 2" | ||
git rm blob-1 | ||
git add -A | ||
git commit -m "commit 3" | ||
) | ||
|
||
# Blobless clone | ||
git clone --no-local --no-hardlinks --filter=blob:none ./base blobless | ||
|
||
# Treeless (and blobless) clone | ||
git clone --no-local --no-hardlinks --filter=tree:0 ./base treeless |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
use gix_hash::ObjectId; | ||
|
||
pub fn hex_to_id(hex: &str) -> ObjectId { | ||
ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") | ||
} | ||
|
||
mod connectivity; |
Oops, something went wrong.