Skip to content

Commit

Permalink
feat!: Support for 'fast-tracking' reaching the beginning of the comm…
Browse files Browse the repository at this point in the history
…it-graph during traversals.

It's implemented by sorting commits oldest first when choosing the next one to traverse,
which can greatly reduce the time it takes to reach the first commit of a graph.

Co-authored-by: Sebastian Thiel <[email protected]>
  • Loading branch information
nrdxp and Byron committed Sep 26, 2024
1 parent 6ac14d7 commit 14d6bb9
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 35 deletions.
26 changes: 13 additions & 13 deletions gix/src/remote/connection/fetch/update_refs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,19 @@ pub(crate) fn update(
.find_object(local_id)?
.try_into_commit()
.map_err(|_| ())
.and_then(|c| {
c.committer().map(|a| a.time.seconds).map_err(|_| ())
}).and_then(|local_commit_time|
remote_id
.to_owned()
.ancestors(&repo.objects)
.sorting(
gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
seconds: local_commit_time
},
)
.map_err(|_| ())
);
.and_then(|c| c.committer().map(|a| a.time.seconds).map_err(|_| ()))
.and_then(|local_commit_time| {
remote_id
.to_owned()
.ancestors(&repo.objects)
.sorting(
gix_traverse::commit::simple::Sorting::ByCommitTimeCutoff {
order: Default::default(),
seconds: local_commit_time,
},
)
.map_err(|_| ())
});
match ancestors {
Ok(mut ancestors) => {
ancestors.any(|cid| cid.map_or(false, |c| c.id == local_id))
Expand Down
4 changes: 2 additions & 2 deletions gix/src/revision/spec/parse/delegate/navigate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
match oid
.attach(repo)
.ancestors()
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
.sorting(crate::revision::walk::Sorting::ByCommitTime(Default::default()))
.all()
{
Ok(iter) => {
Expand Down Expand Up @@ -245,7 +245,7 @@ impl<'repo> delegate::Navigate for Delegate<'repo> {
.filter(|r| r.id().header().ok().map_or(false, |obj| obj.kind().is_commit()))
.filter_map(|r| r.detach().peeled),
)
.sorting(crate::revision::walk::Sorting::ByCommitTimeNewestFirst)
.sorting(crate::revision::walk::Sorting::ByCommitTime(Default::default()))
.all()
{
Ok(iter) => {
Expand Down
31 changes: 18 additions & 13 deletions gix/src/revision/walk.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use gix_hash::ObjectId;
use gix_object::FindExt;
use gix_traverse::commit::simple::CommitTimeOrder;

use crate::{ext::ObjectIdExt, revision, Repository};

Expand Down Expand Up @@ -39,24 +40,27 @@ pub enum Sorting {
/// as it avoids overlapping branches.
#[default]
BreadthFirst,
/// Commits are sorted by their commit time in descending order, that is newest first.
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
/// In the *sample history* the order would be `8, 7, 6, 4, 5, 2, 3, 1`
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for [`NewestFirst`](CommitTimeOrder::NewestFirst),
/// or `1, 2, 3, 4, 5, 6, 7, 8` for [`OldestFirst`](CommitTimeOrder::OldestFirst).
///
/// # Performance
///
/// This mode benefits greatly from having an [object cache](crate::Repository::object_cache_size) configured
/// to avoid having to look up each commit twice.
ByCommitTimeNewestFirst,
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
ByCommitTime(CommitTimeOrder),
/// This sorting is similar to [`ByCommitTime`](Sorting::ByCommitTimeCutoff), but adds a cutoff to not return commits older than
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
///
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
ByCommitTimeNewestFirstCutoffOlderThan {
ByCommitTimeCutoff {
/// The order in wich to prioritize lookups
order: CommitTimeOrder,
/// The amount of seconds since unix epoch to use as cut-off time.
seconds: gix_date::SecondsSinceUnixEpoch,
},
Expand All @@ -66,9 +70,9 @@ impl Sorting {
fn into_simple(self) -> Option<gix_traverse::commit::simple::Sorting> {
Some(match self {
Sorting::BreadthFirst => gix_traverse::commit::simple::Sorting::BreadthFirst,
Sorting::ByCommitTimeNewestFirst => gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirst,
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
gix_traverse::commit::simple::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds }
Sorting::ByCommitTime(order) => gix_traverse::commit::simple::Sorting::ByCommitTime(order),
Sorting::ByCommitTimeCutoff { seconds, order } => {
gix_traverse::commit::simple::Sorting::ByCommitTimeCutoff { order, seconds }
}
})
}
Expand Down Expand Up @@ -208,15 +212,16 @@ impl<'repo> Platform<'repo> {
/// Prune the commit with the given `ids` such that they won't be returned, and such that none of their ancestors is returned either.
///
/// Note that this forces the [sorting](Self::sorting) to
/// [`ByCommitTimeNewestFirstCutoffOlderThan`](Sorting::ByCommitTimeNewestFirstCutoffOlderThan) configured with
/// [`ByCommitTimeCutoff`](Sorting::ByCommitTimeCutoff) configured with
/// the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either.
///
/// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date.
#[doc(alias = "hide", alias = "git2")]
pub fn with_pruned(mut self, ids: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
let mut cutoff = match self.sorting {
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(seconds),
Sorting::BreadthFirst | Sorting::ByCommitTimeNewestFirst => None,
let (mut cutoff, order) = match self.sorting {
Sorting::ByCommitTimeCutoff { seconds, order } => (Some(seconds), order),
Sorting::ByCommitTime(order) => (None, order),
Sorting::BreadthFirst => (None, CommitTimeOrder::default()),
};
for id in ids.into_iter() {
let id = id.into();
Expand All @@ -231,7 +236,7 @@ impl<'repo> Platform<'repo> {
}

if let Some(cutoff) = cutoff {
self.sorting = Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: cutoff }
self.sorting = Sorting::ByCommitTimeCutoff { seconds: cutoff, order }
}
self
}
Expand Down
11 changes: 7 additions & 4 deletions gix/tests/id/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod ancestors {
let commits_by_commit_date = head
.ancestors()
.use_commit_graph(!use_commit_graph)
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
.sorting(gix::revision::walk::Sorting::ByCommitTime(Default::default()))
.all()?
.map(|c| c.map(gix::revision::walk::Info::detach))
.collect::<Result<Vec<_>, _>>()?;
Expand Down Expand Up @@ -119,7 +119,7 @@ mod ancestors {
let head = repo.head()?.into_peeled_id()?;
let commits = head
.ancestors()
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst) // assure we have time set
.sorting(gix::revision::walk::Sorting::ByCommitTime(Default::default())) // assure we have time set
.use_commit_graph(use_commit_graph)
.all()?
.collect::<Result<Vec<_>, _>>()?;
Expand Down Expand Up @@ -162,8 +162,11 @@ mod ancestors {
for use_commit_graph in [false, true] {
for sorting in [
gix::revision::walk::Sorting::BreadthFirst,
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
gix::revision::walk::Sorting::ByCommitTime(Default::default()),
gix::revision::walk::Sorting::ByCommitTimeCutoff {
order: Default::default(),
seconds: 0,
},
] {
let commits_graph_order = head
.ancestors()
Expand Down
10 changes: 7 additions & 3 deletions gix/tests/repository/shallow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ fn yes() -> crate::Result {
}

mod traverse {
use gix_traverse::commit::simple::CommitTimeOrder;
use serial_test::parallel;

use crate::util::{hex_to_id, named_subrepo_opts};
Expand All @@ -53,8 +54,11 @@ mod traverse {
fn boundary_is_detected_triggering_no_error() -> crate::Result {
for sorting in [
gix::revision::walk::Sorting::BreadthFirst,
gix::revision::walk::Sorting::ByCommitTimeNewestFirst,
gix::revision::walk::Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds: 0 },
gix::revision::walk::Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
gix::revision::walk::Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: 0,
},
] {
for toggle in [false, true] {
for name in ["shallow.git", "shallow"] {
Expand Down Expand Up @@ -97,7 +101,7 @@ mod traverse {
.head_id()?
.ancestors()
.use_commit_graph(toggle)
.sorting(gix::revision::walk::Sorting::ByCommitTimeNewestFirst)
.sorting(gix::revision::walk::Sorting::ByCommitTime(CommitTimeOrder::NewestFirst))
.all()?
.map(|c| c.map(|c| c.id))
.collect::<Result<_, _>>()?;
Expand Down

0 comments on commit 14d6bb9

Please sign in to comment.