create src

This commit is contained in:
awfixer
2026-03-11 02:04:19 -07:00
commit 52f7a22bf2
2595 changed files with 402870 additions and 0 deletions

2025
src-traverse/CHANGELOG.md Normal file

File diff suppressed because it is too large Load Diff

37
src-traverse/Cargo.toml Normal file
View File

@@ -0,0 +1,37 @@
lints.workspace = true
[package]
name = "src-traverse"
version = "0.54.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "A crate of the gitoxide project"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
edition = "2021"
include = ["src/**/*", "LICENSE-*"]
rust-version = "1.82"
autotests = false
[lib]
doctest = false
[features]
## Enable support for the SHA-1 hash by enabling the respective feature in the `src-hash` crate.
sha1 = ["src-hash/sha1"]
[dependencies]
src-hash = { version = "^0.22.1", path = "../src-hash" }
src-object = { version = "^0.57.0", path = "../src-object" }
src-date = { version = "^0.15.0", path = "../src-date" }
src-hashtable = { version = "^0.12.0", path = "../src-hashtable" }
src-revwalk = { version = "^0.28.0", path = "../src-revwalk" }
src-commitgraph = { version = "^0.34.0", path = "../src-commitgraph" }
smallvec = "1.15.1"
thiserror = "2.0.18"
bitflags = "2"
[dev-dependencies]
src-hash = { path = "../src-hash", features = ["sha1"] }
[package.metadata.docs.rs]
features = ["sha1"]

1
src-traverse/LICENSE-APACHE Symbolic link
View File

@@ -0,0 +1 @@
../LICENSE-APACHE

1
src-traverse/LICENSE-MIT Symbolic link
View File

@@ -0,0 +1 @@
../LICENSE-MIT

View File

@@ -0,0 +1,114 @@
//! Provide multiple traversal implementations with different performance envelopes.
//!
//! Use [`Simple`] for fast walks that maintain minimal state, or [`Topo`] for a more elaborate traversal.
use gix_hash::ObjectId;
use gix_object::FindExt;
use gix_revwalk::{graph::IdMap, PriorityQueue};
use smallvec::SmallVec;
/// A fast iterator over the ancestors of one or more starting commits.
pub struct Simple<Find, Predicate> {
objects: Find,
cache: Option<gix_commitgraph::Graph>,
predicate: Predicate,
state: simple::State,
parents: Parents,
sorting: simple::Sorting,
}
/// Simple ancestors traversal, without the need to keep track of graph-state.
pub mod simple;
/// A commit walker that walks in topographical order, like `git rev-list
/// --topo-order` or `--date-order` depending on the chosen [`topo::Sorting`].
///
/// Instantiate with [`topo::Builder`].
pub struct Topo<Find, Predicate> {
commit_graph: Option<gix_commitgraph::Graph>,
find: Find,
predicate: Predicate,
indegrees: IdMap<i32>,
states: IdMap<topo::WalkFlags>,
explore_queue: PriorityQueue<topo::iter::GenAndCommitTime, ObjectId>,
indegree_queue: PriorityQueue<topo::iter::GenAndCommitTime, ObjectId>,
topo_queue: topo::iter::Queue,
parents: Parents,
min_gen: u32,
buf: Vec<u8>,
}
pub mod topo;
/// Specify how to handle commit parents during traversal.
#[derive(Default, Copy, Clone)]
pub enum Parents {
/// Traverse all parents, useful for traversing the entire ancestry.
#[default]
All,
/// Only traverse along the first parent, which commonly ignores all branches.
First,
}
/// The collection of parent ids we saw as part of the iteration.
///
/// Note that this list is truncated if [`Parents::First`] was used.
pub type ParentIds = SmallVec<[gix_hash::ObjectId; 1]>;
/// Information about a commit that we obtained naturally as part of the iteration.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct Info {
/// The id of the commit.
pub id: gix_hash::ObjectId,
/// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled.
pub parent_ids: ParentIds,
/// The time at which the commit was created. It will only be `Some(_)` if the chosen traversal was
/// taking dates into consideration.
pub commit_time: Option<gix_date::SecondsSinceUnixEpoch>,
}
/// Information about a commit that can be obtained either from a [`gix_object::CommitRefIter`] or
/// a [`gix_commitgraph::file::Commit`].
#[derive(Clone, Copy)]
pub enum Either<'buf, 'cache> {
/// See [`gix_object::CommitRefIter`].
CommitRefIter(gix_object::CommitRefIter<'buf>),
/// See [`gix_commitgraph::file::Commit`].
CachedCommit(gix_commitgraph::file::Commit<'cache>),
}
impl Either<'_, '_> {
/// Get a commits `tree_id` by either getting it from a [`gix_commitgraph::Graph`], if
/// present, or a [`gix_object::CommitRefIter`] otherwise.
pub fn tree_id(self) -> Result<ObjectId, gix_object::decode::Error> {
match self {
Self::CommitRefIter(mut commit_ref_iter) => commit_ref_iter.tree_id(),
Self::CachedCommit(commit) => Ok(commit.root_tree_id().into()),
}
}
/// Get a committer timestamp by either getting it from a [`gix_commitgraph::Graph`], if
/// present, or a [`gix_object::CommitRefIter`] otherwise.
pub fn commit_time(self) -> Result<gix_date::SecondsSinceUnixEpoch, gix_object::decode::Error> {
match self {
Self::CommitRefIter(commit_ref_iter) => commit_ref_iter.committer().map(|c| c.seconds()),
Self::CachedCommit(commit) => Ok(commit.committer_timestamp() as gix_date::SecondsSinceUnixEpoch),
}
}
}
/// Find information about a commit by either getting it from a [`gix_commitgraph::Graph`], if
/// present, or a [`gix_object::CommitRefIter`] otherwise.
pub fn find<'cache, 'buf, Find>(
cache: Option<&'cache gix_commitgraph::Graph>,
objects: Find,
id: &gix_hash::oid,
buf: &'buf mut Vec<u8>,
) -> Result<Either<'buf, 'cache>, gix_object::find::existing_iter::Error>
where
Find: gix_object::Find,
{
match cache.and_then(|cache| cache.commit_by_id(id).map(Either::CachedCommit)) {
Some(c) => Ok(c),
None => objects.find_commit_iter(id, buf).map(Either::CommitRefIter),
}
}

View File

@@ -0,0 +1,791 @@
use std::{cmp::Reverse, collections::VecDeque};
use gix_date::SecondsSinceUnixEpoch;
use gix_hash::ObjectId;
use smallvec::SmallVec;
#[derive(Default, Debug, Copy, Clone)]
/// The order with which to prioritize the search.
pub enum CommitTimeOrder {
#[default]
/// Sort commits by newest first.
NewestFirst,
/// Sort commits by oldest first.
#[doc(alias = "Sort::REVERSE", alias = "git2")]
OldestFirst,
}
/// Specify how to sort commits during a [simple](super::Simple) traversal.
///
/// ### Sample History
///
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
/// (*their X-alignment doesn't matter*).
///
/// ```text
/// ---1----2----4----7 <- second parent of 8
/// \ \
/// 3----5----6----8---
/// ```
#[derive(Default, Debug, Copy, Clone)]
pub enum Sorting {
/// Commits are sorted as they are mentioned in the commit graph.
///
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`.
///
/// ### Note
///
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
/// as it avoids overlapping branches.
#[default]
BreadthFirst,
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for [`NewestFirst`](CommitTimeOrder::NewestFirst),
/// or `1, 2, 3, 4, 5, 6, 7, 8` for [`OldestFirst`](CommitTimeOrder::OldestFirst).
///
/// # Performance
///
/// This mode benefits greatly from having an object_cache in `find()`
/// to avoid having to lookup each commit twice.
ByCommitTime(CommitTimeOrder),
/// This sorting is similar to [`ByCommitTime`](Sorting::ByCommitTime), but adds a cutoff to not return commits older than
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
///
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`.
ByCommitTimeCutoff {
/// The order in which to prioritize lookups.
order: CommitTimeOrder,
/// The number of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
seconds: gix_date::SecondsSinceUnixEpoch,
},
}
/// The error is part of the item returned by the [Ancestors](super::Simple) iterator.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Find(#[from] gix_object::find::existing_iter::Error),
#[error(transparent)]
ObjectDecode(#[from] gix_object::decode::Error),
}
use Result as Either;
type QueueKey<T> = Either<T, Reverse<T>>;
type CommitDateQueue = gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, (ObjectId, CommitState)>;
type Candidates = VecDeque<crate::commit::Info>;
/// The state used and potentially shared by multiple graph traversals.
#[derive(Clone)]
pub(super) struct State {
next: VecDeque<(ObjectId, CommitState)>,
queue: CommitDateQueue,
buf: Vec<u8>,
seen: gix_revwalk::graph::IdMap<CommitState>,
parents_buf: Vec<u8>,
parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>,
/// The list (FIFO) of thus far interesting commits.
///
/// As they may turn hidden later, we have to keep them until the conditions are met to return them.
/// If `None`, there is nothing to do with hidden commits.
// TODO(perf): review this as we don't really need candidates anymore, given our current way of doing things.
// However, maybe they can see use when getting an incremental traversal done.
candidates: Option<Candidates>,
}
#[derive(Debug, Clone, Copy)]
enum CommitState {
/// The commit may be returned, it hasn't been hidden yet.
Interesting,
/// The commit should not be returned.
Hidden,
}
impl CommitState {
pub fn is_hidden(&self) -> bool {
matches!(self, CommitState::Hidden)
}
pub fn is_interesting(&self) -> bool {
matches!(self, CommitState::Interesting)
}
}
///
mod init {
use std::cmp::Reverse;
use super::{
super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
collect_parents, Candidates, CommitDateQueue, CommitState, CommitTimeOrder, Error, State,
};
use gix_date::SecondsSinceUnixEpoch;
use gix_hash::{oid, ObjectId};
use gix_hashtable::hash_map::Entry;
use gix_object::{CommitRefIter, FindExt};
use std::collections::VecDeque;
use Err as Oldest;
use Ok as Newest;
impl Default for State {
fn default() -> Self {
State {
next: Default::default(),
queue: gix_revwalk::PriorityQueue::new(),
buf: vec![],
seen: Default::default(),
parents_buf: vec![],
parent_ids: Default::default(),
candidates: None,
}
}
}
impl State {
fn clear(&mut self) {
let Self {
next,
queue,
buf,
seen,
parents_buf: _,
parent_ids: _,
candidates,
} = self;
next.clear();
queue.clear();
buf.clear();
seen.clear();
*candidates = None;
}
}
fn to_queue_key(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
match order {
CommitTimeOrder::NewestFirst => Newest(i),
CommitTimeOrder::OldestFirst => Oldest(Reverse(i)),
}
}
/// Builder
impl<Find, Predicate> Simple<Find, Predicate>
where
Find: gix_object::Find,
{
/// Set the `sorting` method.
pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
self.sorting = sorting;
match self.sorting {
Sorting::BreadthFirst => {
self.queue_to_vecdeque();
}
Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
let state = &mut self.state;
for (commit_id, commit_state) in state.next.drain(..) {
add_to_queue(
commit_id,
commit_state,
order,
sorting.cutoff_time(),
&mut state.queue,
&self.objects,
&mut state.buf,
)?;
}
}
}
Ok(self)
}
/// Change our commit parent handling mode to the given one.
pub fn parents(mut self, mode: Parents) -> Self {
self.parents = mode;
if matches!(self.parents, Parents::First) {
self.queue_to_vecdeque();
}
self
}
/// Hide the given `tips`, along with all commits reachable by them so that they will not be returned
/// by the traversal.
///
/// This function fully traverses all hidden tips and their ancestors, marking them as hidden
/// before iteration begins. This approach ensures correct behavior regardless
/// of graph topology or traversal order, matching git's `rev-list --not` behavior,
/// at great cost to performance, unfortunately.
///
/// Note that hidden objects are expected to exist.
// TODO(perf): make this hiding iterative to avoid traversing the entire graph, always.
pub fn hide(mut self, tips: impl IntoIterator<Item = ObjectId>) -> Result<Self, Error> {
// Collect hidden tips first
let hidden_tips: Vec<ObjectId> = tips.into_iter().collect();
if hidden_tips.is_empty() {
return Ok(self);
}
// Fully traverse all hidden tips and mark all reachable commits as Hidden.
// This is "graph painting" - we paint all hidden commits upfront rather than
// interleaving hidden and interesting traversals, which ensures correct behavior
// regardless of graph topology or traversal order.
let mut queue: VecDeque<ObjectId> = VecDeque::new();
for id_to_ignore in hidden_tips {
if self.state.seen.insert(id_to_ignore, CommitState::Hidden).is_none() {
queue.push_back(id_to_ignore);
}
}
// Process all hidden commits and their ancestors
while let Some(id) = queue.pop_front() {
match super::super::find(self.cache.as_ref(), &self.objects, &id, &mut self.state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut self.state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and retry
self.cache = None;
// Re-add to queue to retry without cache
if self.state.seen.get(&id).is_some_and(CommitState::is_hidden) {
queue.push_back(id);
}
continue;
}
for (parent_id, _commit_time) in self.state.parent_ids.drain(..) {
if self.state.seen.insert(parent_id, CommitState::Hidden).is_none() {
queue.push_back(parent_id);
}
}
}
Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id: parent_id }) => {
if self.state.seen.insert(parent_id, CommitState::Hidden).is_none() {
queue.push_back(parent_id);
}
}
Ok(_unused_token) => break,
Err(err) => return Err(err.into()),
}
}
}
Err(err) => return Err(err.into()),
}
}
// Now that all hidden commits are painted, we no longer need special handling
// during the main traversal. We can remove hidden commits from the main queues
// and simply skip them during iteration.
//
// Note: We don't need the candidates buffer anymore since hidden commits are
// pre-painted. But we keep it for compatibility with existing behavior and
// in case interesting commits were already queued before hide() was called.
self.state.candidates = None;
// Remove any hidden commits from the interesting queues
self.state
.next
.retain(|(id, _)| !self.state.seen.get(id).is_some_and(CommitState::is_hidden));
Ok(self)
}
/// Set the commitgraph as `cache` to greatly accelerate any traversal.
///
/// The cache will be used if possible, but we will fall back without error to using the object
/// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well.
pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self {
self.cache = cache;
self
}
fn queue_to_vecdeque(&mut self) {
let state = &mut self.state;
state.next.extend(
std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
.into_iter_unordered()
.map(|(_time, id)| id),
);
}
}
fn add_to_queue(
commit_id: ObjectId,
commit_state: CommitState,
order: CommitTimeOrder,
cutoff_time: Option<SecondsSinceUnixEpoch>,
queue: &mut CommitDateQueue,
objects: &impl gix_object::Find,
buf: &mut Vec<u8>,
) -> Result<(), Error> {
let commit_iter = objects.find_commit_iter(&commit_id, buf)?;
let time = commit_iter.committer()?.seconds();
let key = to_queue_key(time, order);
match (cutoff_time, order) {
(Some(cutoff_time), _) if time >= cutoff_time => {
queue.insert(key, (commit_id, commit_state));
}
(Some(_), _) => {}
(None, _) => {
queue.insert(key, (commit_id, commit_state));
}
}
Ok(())
}
/// Lifecycle
impl<Find> Simple<Find, fn(&oid) -> bool>
where
Find: gix_object::Find,
{
/// Create a new instance.
///
/// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, find: Find) -> Self {
Self::filtered(tips, find, |_| true)
}
}
/// Lifecycle
impl<Find, Predicate> Simple<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
/// Create a new instance with commit filtering enabled.
///
/// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
/// * `predicate` - indicate whether a given commit should be included in the result as well
/// as whether its parent commits should be traversed.
pub fn filtered(
tips: impl IntoIterator<Item = impl Into<ObjectId>>,
find: Find,
mut predicate: Predicate,
) -> Self {
let tips = tips.into_iter();
let mut state = State::default();
{
state.clear();
state.next.reserve(tips.size_hint().0);
for tip in tips.map(Into::into) {
let commit_state = CommitState::Interesting;
let seen = state.seen.insert(tip, commit_state);
// We know there can only be duplicate interesting ones.
if seen.is_none() && predicate(&tip) {
state.next.push_back((tip, commit_state));
}
}
}
Self {
objects: find,
cache: None,
predicate,
state,
parents: Default::default(),
sorting: Default::default(),
}
}
}
/// Access
impl<Find, Predicate> Simple<Find, Predicate> {
/// Return an iterator for accessing data of the current commit, parsed lazily.
pub fn commit_iter(&self) -> CommitRefIter<'_> {
CommitRefIter::from_bytes(self.commit_data())
}
/// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`].
pub fn commit_data(&self) -> &[u8] {
&self.state.buf
}
}
impl<Find, Predicate> Iterator for Simple<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
type Item = Result<Info, Error>;
fn next(&mut self) -> Option<Self::Item> {
if matches!(self.parents, Parents::First) {
self.next_by_topology()
} else {
match self.sorting {
Sorting::BreadthFirst => self.next_by_topology(),
Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
}
}
.or_else(|| {
self.state
.candidates
.as_mut()
.and_then(|candidates| candidates.pop_front().map(Ok))
})
}
}
impl Sorting {
/// If not topo sort, provide the cutoff date if present.
fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
match self {
Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
_ => None,
}
}
}
/// Utilities
impl<Find, Predicate> Simple<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
fn next_by_commit_date(
&mut self,
order: CommitTimeOrder,
cutoff: Option<SecondsSinceUnixEpoch>,
) -> Option<Result<Info, Error>> {
let state = &mut self.state;
let next = &mut state.queue;
'skip_hidden: loop {
let (commit_time, (oid, _queued_commit_state)) = match next.pop()? {
(Newest(t) | Oldest(Reverse(t)), o) => (t, o),
};
let mut parents: ParentIds = Default::default();
// Always use the state that is actually stored, as we may change the type as we go.
let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added");
if can_deplete_candidates_early(
next.iter_unordered().map(|t| t.1),
commit_state,
state.candidates.as_ref(),
) {
return None;
}
match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and try again with ODB
self.cache = None;
return self.next_by_commit_date(order, cutoff);
}
for (id, parent_commit_time) in state.parent_ids.drain(..) {
parents.push(id);
insert_into_seen_and_queue(
&mut state.seen,
id,
&mut state.candidates,
commit_state,
&mut self.predicate,
next,
order,
cutoff,
|| parent_commit_time,
);
}
}
Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
parents.push(id);
insert_into_seen_and_queue(
&mut state.seen,
id,
&mut state.candidates,
commit_state,
&mut self.predicate,
next,
order,
cutoff,
|| {
let parent =
self.objects.find_commit_iter(id.as_ref(), &mut state.parents_buf).ok();
parent
.and_then(|parent| {
parent.committer().ok().map(|committer| committer.seconds())
})
.unwrap_or_default()
},
);
}
Ok(_unused_token) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => return Some(Err(err.into())),
}
match commit_state {
CommitState::Interesting => {
let info = Info {
id: oid,
parent_ids: parents,
commit_time: Some(commit_time),
};
match state.candidates.as_mut() {
None => return Some(Ok(info)),
Some(candidates) => {
// assure candidates aren't prematurely returned - hidden commits may catch up with
// them later.
candidates.push_back(info);
}
}
}
CommitState::Hidden => continue 'skip_hidden,
}
}
}
}
/// Returns `true` if we have only hidden cursors queued for traversal, assuming that we don't see interesting ones ever again.
///
/// `unqueued_commit_state` is the state of the commit that is currently being processed.
fn can_deplete_candidates_early(
mut queued_states: impl Iterator<Item = CommitState>,
unqueued_commit_state: CommitState,
candidates: Option<&Candidates>,
) -> bool {
if candidates.is_none() {
return false;
}
if unqueued_commit_state.is_interesting() {
return false;
}
let mut is_empty = true;
queued_states.all(|state| {
is_empty = false;
state.is_hidden()
}) && !is_empty
}
/// Utilities
impl<Find, Predicate> Simple<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
let state = &mut self.state;
let next = &mut state.next;
'skip_hidden: loop {
let (oid, _queued_commit_state) = next.pop_front()?;
let mut parents: ParentIds = Default::default();
// Always use the state that is actually stored, as we may change the type as we go.
let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added");
if can_deplete_candidates_early(next.iter().map(|t| t.1), commit_state, state.candidates.as_ref()) {
return None;
}
match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and try again with ODB
self.cache = None;
return self.next_by_topology();
}
for (pid, _commit_time) in state.parent_ids.drain(..) {
parents.push(pid);
insert_into_seen_and_next(
&mut state.seen,
pid,
&mut state.candidates,
commit_state,
&mut self.predicate,
next,
);
if commit_state.is_interesting() && matches!(self.parents, Parents::First) {
break;
}
}
}
Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id: pid }) => {
parents.push(pid);
insert_into_seen_and_next(
&mut state.seen,
pid,
&mut state.candidates,
commit_state,
&mut self.predicate,
next,
);
if commit_state.is_interesting() && matches!(self.parents, Parents::First) {
break;
}
}
Ok(_a_token_past_the_parents) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => return Some(Err(err.into())),
}
match commit_state {
CommitState::Interesting => {
let info = Info {
id: oid,
parent_ids: parents,
commit_time: None,
};
match state.candidates.as_mut() {
None => return Some(Ok(info)),
Some(candidates) => {
// assure candidates aren't prematurely returned - hidden commits may catch up with
// them later.
candidates.push_back(info);
}
}
}
CommitState::Hidden => continue 'skip_hidden,
}
}
}
}
#[inline]
fn remove_candidate(candidates: Option<&mut Candidates>, remove: ObjectId) -> Option<()> {
let candidates = candidates?;
let pos = candidates
.iter_mut()
.enumerate()
.find_map(|(idx, info)| (info.id == remove).then_some(idx))?;
candidates.remove(pos);
None
}
fn insert_into_seen_and_next(
seen: &mut gix_revwalk::graph::IdMap<CommitState>,
parent_id: ObjectId,
candidates: &mut Option<Candidates>,
commit_state: CommitState,
predicate: &mut impl FnMut(&oid) -> bool,
next: &mut VecDeque<(ObjectId, CommitState)>,
) {
let enqueue = match seen.entry(parent_id) {
Entry::Occupied(mut e) => {
let enqueue = handle_seen(commit_state, *e.get(), parent_id, candidates);
if commit_state.is_hidden() {
e.insert(commit_state);
}
enqueue
}
Entry::Vacant(e) => {
e.insert(commit_state);
match commit_state {
CommitState::Interesting => predicate(&parent_id),
CommitState::Hidden => true,
}
}
};
if enqueue {
next.push_back((parent_id, commit_state));
}
}
#[allow(clippy::too_many_arguments)]
fn insert_into_seen_and_queue(
seen: &mut gix_revwalk::graph::IdMap<CommitState>,
parent_id: ObjectId,
candidates: &mut Option<Candidates>,
commit_state: CommitState,
predicate: &mut impl FnMut(&oid) -> bool,
queue: &mut CommitDateQueue,
order: CommitTimeOrder,
cutoff: Option<SecondsSinceUnixEpoch>,
get_parent_commit_time: impl FnOnce() -> gix_date::SecondsSinceUnixEpoch,
) {
let enqueue = match seen.entry(parent_id) {
Entry::Occupied(mut e) => {
let enqueue = handle_seen(commit_state, *e.get(), parent_id, candidates);
if commit_state.is_hidden() {
e.insert(commit_state);
}
enqueue
}
Entry::Vacant(e) => {
e.insert(commit_state);
match commit_state {
CommitState::Interesting => (predicate)(&parent_id),
CommitState::Hidden => true,
}
}
};
if enqueue {
let parent_commit_time = get_parent_commit_time();
let key = to_queue_key(parent_commit_time, order);
match cutoff {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => {}
Some(_) | None => queue.insert(key, (parent_id, commit_state)),
}
}
}
#[inline]
#[must_use]
fn handle_seen(
next_state: CommitState,
current_state: CommitState,
id: ObjectId,
candidates: &mut Option<Candidates>,
) -> bool {
match (current_state, next_state) {
(CommitState::Hidden, CommitState::Hidden) => false,
(CommitState::Interesting, CommitState::Interesting) => false,
(CommitState::Hidden, CommitState::Interesting) => {
// keep traversing to paint more hidden. After all, the commit_state overrides the current parent state
true
}
(CommitState::Interesting, CommitState::Hidden) => {
remove_candidate(candidates.as_mut(), id);
true
}
}
}
}
fn collect_parents(
dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>,
cache: Option<&gix_commitgraph::Graph>,
parents: gix_commitgraph::file::commit::Parents<'_>,
) -> bool {
dest.clear();
let cache = cache.as_ref().expect("parents iter is available, backed by `cache`");
for parent_id in parents {
match parent_id {
Ok(pos) => dest.push({
let parent = cache.commit_at(pos);
(
parent.id().to_owned(),
parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill
)
}),
Err(_err) => return false,
}
}
true
}

View File

@@ -0,0 +1,194 @@
use gix_hash::{oid, ObjectId};
use gix_revwalk::{graph::IdMap, PriorityQueue};
use crate::commit::{
find,
topo::{iter::gen_and_commit_time, Error, Sorting, WalkFlags},
Info, Parents, Topo,
};
/// Builder for [`Topo`].
pub struct Builder<Find, Predicate> {
commit_graph: Option<gix_commitgraph::Graph>,
find: Find,
predicate: Predicate,
sorting: Sorting,
parents: Parents,
tips: Vec<ObjectId>,
ends: Vec<ObjectId>,
}
impl<Find> Builder<Find, fn(&oid) -> bool>
where
Find: gix_object::Find,
{
/// Create a new `Builder` for a [`Topo`] that reads commits from a repository with `find`.
/// starting at the `tips` and ending at the `ends`. Like `git rev-list
/// --topo-order ^ends tips`.
pub fn from_iters(
find: Find,
tips: impl IntoIterator<Item = impl Into<ObjectId>>,
ends: Option<impl IntoIterator<Item = impl Into<ObjectId>>>,
) -> Self {
Self::new(find).with_tips(tips).with_ends(ends.into_iter().flatten())
}
/// Create a new `Builder` for a [`Topo`] that reads commits from a
/// repository with `find`.
pub fn new(find: Find) -> Self {
Self {
commit_graph: Default::default(),
find,
sorting: Default::default(),
parents: Default::default(),
tips: Default::default(),
ends: Default::default(),
predicate: |_| true,
}
}
/// Set a `predicate` to filter out revisions from the walk. Can be used to
/// implement e.g. filtering on paths or time. This does *not* exclude the
/// parent(s) of a revision that is excluded. Specify a revision as an 'end'
/// if you want that behavior.
pub fn with_predicate<Predicate>(self, predicate: Predicate) -> Builder<Find, Predicate>
where
Predicate: FnMut(&oid) -> bool,
{
Builder {
commit_graph: self.commit_graph,
find: self.find,
sorting: self.sorting,
parents: self.parents,
tips: self.tips,
ends: self.ends,
predicate,
}
}
}
impl<Find, Predicate> Builder<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
/// Add commits to start reading from.
///
/// The behavior is similar to specifying additional `ends` in `git rev-list --topo-order ^ends tips`.
pub fn with_tips(mut self, tips: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
self.tips.extend(tips.into_iter().map(Into::into));
self
}
/// Add commits ending the traversal.
///
/// These commits themselves will not be read, i.e. the behavior is similar to specifying additional
/// `ends` in `git rev-list --topo-order ^ends tips`.
pub fn with_ends(mut self, ends: impl IntoIterator<Item = impl Into<ObjectId>>) -> Self {
self.ends.extend(ends.into_iter().map(Into::into));
self
}
/// Set the `sorting` to use for the topological walk.
pub fn sorting(mut self, sorting: Sorting) -> Self {
self.sorting = sorting;
self
}
/// Specify how to handle commit `parents` during traversal.
pub fn parents(mut self, parents: Parents) -> Self {
self.parents = parents;
self
}
/// Set or unset the `commit_graph` to use for the iteration.
pub fn with_commit_graph(mut self, commit_graph: Option<gix_commitgraph::Graph>) -> Self {
self.commit_graph = commit_graph;
self
}
/// Build a new [`Topo`] instance.
///
/// Note that merely building an instance is currently expensive.
pub fn build(self) -> Result<Topo<Find, Predicate>, Error> {
let mut w = Topo {
commit_graph: self.commit_graph,
find: self.find,
predicate: self.predicate,
indegrees: IdMap::default(),
states: IdMap::default(),
explore_queue: PriorityQueue::new(),
indegree_queue: PriorityQueue::new(),
topo_queue: super::iter::Queue::new(self.sorting),
parents: self.parents,
min_gen: gix_commitgraph::GENERATION_NUMBER_INFINITY,
buf: vec![],
};
// Initial flags for the states of the tips and ends. All of them are
// seen and added to the explore and indegree queues. The ends are by
// definition (?) uninteresting and bottom.
let tip_flags = WalkFlags::Seen | WalkFlags::Explored | WalkFlags::InDegree;
let end_flags = tip_flags | WalkFlags::Uninteresting | WalkFlags::Bottom;
for (id, flags) in self
.tips
.iter()
.map(|id| (id, tip_flags))
.chain(self.ends.iter().map(|id| (id, end_flags)))
{
*w.indegrees.entry(*id).or_default() = 1;
let commit = find(w.commit_graph.as_ref(), &w.find, id, &mut w.buf)?;
let (gen, time) = gen_and_commit_time(commit)?;
if gen < w.min_gen {
w.min_gen = gen;
}
w.states.insert(*id, flags);
w.explore_queue.insert((gen, time), *id);
w.indegree_queue.insert((gen, time), *id);
}
// NOTE: Parents of the ends must also be marked uninteresting for some
// reason. See handle_commit()
for id in &self.ends {
let parents = w.collect_all_parents(id)?;
for (id, _) in parents {
w.states
.entry(id)
.and_modify(|s| *s |= WalkFlags::Uninteresting)
.or_insert(WalkFlags::Uninteresting | WalkFlags::Seen);
}
}
w.compute_indegrees_to_depth(w.min_gen)?;
// NOTE: in Git the ends are also added to the topo_queue in addition to
// the tips, but then in simplify_commit() Git is told to ignore it. For
// now the tests pass.
for id in self.tips.iter() {
let i = w.indegrees.get(id).ok_or(Error::MissingIndegreeUnexpected)?;
if *i != 1 {
continue;
}
let commit = find(w.commit_graph.as_ref(), &w.find, id, &mut w.buf)?;
let (_, time) = gen_and_commit_time(commit)?;
let parent_ids = w.collect_all_parents(id)?.into_iter().map(|e| e.0).collect();
w.topo_queue.push(
time,
Info {
id: *id,
parent_ids,
commit_time: Some(time),
},
);
}
w.topo_queue.initial_sort();
Ok(w)
}
}

View File

@@ -0,0 +1,322 @@
use gix_hash::{oid, ObjectId};
use gix_revwalk::PriorityQueue;
use smallvec::SmallVec;
use crate::commit::{
find,
topo::{Error, Sorting, WalkFlags},
Either, Info, Parents, Topo,
};
pub(in crate::commit) type GenAndCommitTime = (u32, i64);
// Git's priority queue works as a LIFO stack if no compare function is set,
// which is the case for `--topo-order.` However, even in that case the initial
// items of the queue are sorted according to the commit time before beginning
// the walk.
#[derive(Debug)]
pub(in crate::commit) enum Queue {
Date(PriorityQueue<i64, Info>),
Topo(Vec<(i64, Info)>),
}
impl Queue {
pub(super) fn new(s: Sorting) -> Self {
match s {
Sorting::DateOrder => Self::Date(PriorityQueue::new()),
Sorting::TopoOrder => Self::Topo(vec![]),
}
}
pub(super) fn push(&mut self, commit_time: i64, info: Info) {
match self {
Self::Date(q) => q.insert(commit_time, info),
Self::Topo(q) => q.push((commit_time, info)),
}
}
fn pop(&mut self) -> Option<Info> {
match self {
Self::Date(q) => q.pop().map(|(_, info)| info),
Self::Topo(q) => q.pop().map(|(_, info)| info),
}
}
pub(super) fn initial_sort(&mut self) {
if let Self::Topo(ref mut inner_vec) = self {
inner_vec.sort_by(|a, b| a.0.cmp(&b.0));
}
}
}
impl<Find, Predicate> Topo<Find, Predicate>
where
Find: gix_object::Find,
{
pub(super) fn compute_indegrees_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
while let Some(((gen, _), _)) = self.indegree_queue.peek() {
if *gen >= gen_cutoff {
self.indegree_walk_step()?;
} else {
break;
}
}
Ok(())
}
fn indegree_walk_step(&mut self) -> Result<(), Error> {
if let Some(((gen, _), id)) = self.indegree_queue.pop() {
self.explore_to_depth(gen)?;
let parents = self.collect_parents(&id)?;
for (id, gen_time) in parents {
self.indegrees.entry(id).and_modify(|e| *e += 1).or_insert(2);
let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
if !state.contains(WalkFlags::InDegree) {
*state |= WalkFlags::InDegree;
self.indegree_queue.insert(gen_time, id);
}
}
}
Ok(())
}
fn explore_to_depth(&mut self, gen_cutoff: u32) -> Result<(), Error> {
while let Some(((gen, _), _)) = self.explore_queue.peek() {
if *gen >= gen_cutoff {
self.explore_walk_step()?;
} else {
break;
}
}
Ok(())
}
fn explore_walk_step(&mut self) -> Result<(), Error> {
if let Some((_, id)) = self.explore_queue.pop() {
let parents = self.collect_parents(&id)?;
self.process_parents(&id, &parents)?;
for (id, gen_time) in parents {
let state = self.states.get_mut(&id).ok_or(Error::MissingStateUnexpected)?;
if !state.contains(WalkFlags::Explored) {
*state |= WalkFlags::Explored;
self.explore_queue.insert(gen_time, id);
}
}
}
Ok(())
}
fn expand_topo_walk(&mut self, id: &oid) -> Result<(), Error> {
let parents = self.collect_parents(id)?;
self.process_parents(id, &parents)?;
for (pid, (parent_gen, parent_commit_time)) in parents {
let parent_state = self.states.get(&pid).ok_or(Error::MissingStateUnexpected)?;
if parent_state.contains(WalkFlags::Uninteresting) {
continue;
}
if parent_gen < self.min_gen {
self.min_gen = parent_gen;
self.compute_indegrees_to_depth(self.min_gen)?;
}
let i = self.indegrees.get_mut(&pid).ok_or(Error::MissingIndegreeUnexpected)?;
*i -= 1;
if *i != 1 {
continue;
}
let parent_ids = self.collect_all_parents(&pid)?.into_iter().map(|e| e.0).collect();
self.topo_queue.push(
parent_commit_time,
Info {
id: pid,
parent_ids,
commit_time: Some(parent_commit_time),
},
);
}
Ok(())
}
fn process_parents(&mut self, id: &oid, parents: &[(ObjectId, GenAndCommitTime)]) -> Result<(), Error> {
let state = self.states.get_mut(id).ok_or(Error::MissingStateUnexpected)?;
if state.contains(WalkFlags::Added) {
return Ok(());
}
*state |= WalkFlags::Added;
// If the current commit is uninteresting we pass that on to ALL
// parents, otherwise we set the Seen flag.
let (pass, insert) = if state.contains(WalkFlags::Uninteresting) {
let flags = WalkFlags::Uninteresting;
for (id, _) in parents {
let grand_parents = self.collect_all_parents(id)?;
for (id, _) in &grand_parents {
self.states
.entry(*id)
.and_modify(|s| *s |= WalkFlags::Uninteresting)
.or_insert(WalkFlags::Uninteresting | WalkFlags::Seen);
}
}
(flags, flags)
} else {
// NOTE: git sets SEEN like we do but keeps the SYMMETRIC_LEFT and
// ANCENSTRY_PATH if they are set, but they have no purpose here.
let flags = WalkFlags::empty();
(flags, WalkFlags::Seen)
};
for (id, _) in parents {
self.states.entry(*id).and_modify(|s| *s |= pass).or_insert(insert);
}
Ok(())
}
fn collect_parents(&mut self, id: &oid) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
collect_parents(
&mut self.commit_graph,
&self.find,
id,
matches!(self.parents, Parents::First),
&mut self.buf,
)
}
// Same as collect_parents but disregards the first_parent flag
pub(super) fn collect_all_parents(
&mut self,
id: &oid,
) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error> {
collect_parents(&mut self.commit_graph, &self.find, id, false, &mut self.buf)
}
fn pop_commit(&mut self) -> Option<Result<Info, Error>> {
let commit = self.topo_queue.pop()?;
let i = match self.indegrees.get_mut(&commit.id) {
Some(i) => i,
None => {
return Some(Err(Error::MissingIndegreeUnexpected));
}
};
*i = 0;
if let Err(e) = self.expand_topo_walk(&commit.id) {
return Some(Err(e));
}
Some(Ok(commit))
}
}
impl<Find, Predicate> Iterator for Topo<Find, Predicate>
where
Find: gix_object::Find,
Predicate: FnMut(&oid) -> bool,
{
type Item = Result<Info, Error>;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.pop_commit()? {
Ok(id) => {
if (self.predicate)(&id.id) {
return Some(Ok(id));
}
}
Err(e) => return Some(Err(e)),
}
}
}
}
fn collect_parents<Find>(
cache: &mut Option<gix_commitgraph::Graph>,
f: Find,
id: &oid,
first_only: bool,
buf: &mut Vec<u8>,
) -> Result<SmallVec<[(ObjectId, GenAndCommitTime); 1]>, Error>
where
Find: gix_object::Find,
{
let mut parents = SmallVec::<[(ObjectId, GenAndCommitTime); 1]>::new();
match find(cache.as_ref(), &f, id, buf)? {
Either::CommitRefIter(c) => {
for token in c {
use gix_object::commit::ref_iter::Token as T;
match token {
Ok(T::Tree { .. }) => continue,
Ok(T::Parent { id }) => {
parents.push((id, (0, 0))); // Dummy numbers to be filled in
if first_only {
break;
}
}
Ok(_past_parents) => break,
Err(err) => return Err(err.into()),
}
}
// Need to check the cache again. That a commit is not in the cache
// doesn't mean a parent is not.
for (id, gen_time) in parents.iter_mut() {
let commit = find(cache.as_ref(), &f, id, buf)?;
*gen_time = gen_and_commit_time(commit)?;
}
}
Either::CachedCommit(c) => {
for pos in c.iter_parents() {
let Ok(pos) = pos else {
// drop corrupt cache and use ODB from now on.
*cache = None;
return collect_parents(cache, f, id, first_only, buf);
};
let parent_commit = cache
.as_ref()
.expect("cache exists if CachedCommit was returned")
.commit_at(pos);
parents.push((
parent_commit.id().into(),
(parent_commit.generation(), parent_commit.committer_timestamp() as i64),
));
if first_only {
break;
}
}
}
}
Ok(parents)
}
pub(super) fn gen_and_commit_time(c: Either<'_, '_>) -> Result<GenAndCommitTime, Error> {
match c {
Either::CommitRefIter(c) => {
let mut commit_time = 0;
for token in c {
use gix_object::commit::ref_iter::Token as T;
match token {
Ok(T::Tree { .. }) => continue,
Ok(T::Parent { .. }) => continue,
Ok(T::Author { .. }) => continue,
Ok(T::Committer { signature }) => {
commit_time = signature.seconds();
break;
}
Ok(_unused_token) => break,
Err(err) => return Err(err.into()),
}
}
Ok((gix_commitgraph::GENERATION_NUMBER_INFINITY, commit_time))
}
Either::CachedCommit(c) => Ok((c.generation(), c.committer_timestamp() as i64)),
}
}

View File

@@ -0,0 +1,71 @@
//! Topological commit traversal, similar to `git log --topo-order`, which keeps track of graph state.
use bitflags::bitflags;
/// The errors that can occur during creation and iteration.
#[derive(thiserror::Error, Debug)]
#[allow(missing_docs)]
pub enum Error {
#[error("Indegree information is missing")]
MissingIndegreeUnexpected,
#[error("Internal state (bitflags) not found")]
MissingStateUnexpected,
#[error(transparent)]
ObjectDecode(#[from] gix_object::decode::Error),
#[error(transparent)]
Find(#[from] gix_object::find::existing_iter::Error),
}
bitflags! {
/// Set of flags to describe the state of a particular commit while iterating.
// NOTE: The names correspond to the names of the flags in revision.h
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) struct WalkFlags: u8 {
/// Commit has been seen
const Seen = 0b000001;
/// Commit has been processed by the Explore walk
const Explored = 0b000010;
/// Commit has been processed by the Indegree walk
const InDegree = 0b000100;
/// Commit is deemed uninteresting for whatever reason
const Uninteresting = 0b001000;
/// Commit marks the end of a walk, like `foo` in `git rev-list foo..bar`
const Bottom = 0b010000;
/// Parents have been processed
const Added = 0b100000;
}
}
/// Sorting to use for the topological walk.
///
/// ### Sample History
///
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
/// (*their X-alignment doesn't matter*).
///
/// ```text
/// ---1----2----4----7 <- second parent of 8
/// \ \
/// 3----5----6----8---
/// ```
#[derive(Clone, Copy, Debug, Default)]
pub enum Sorting {
/// Show no parents before all of its children are shown, but otherwise show
/// commits in the commit timestamp order.
///
/// This is equivalent to `git rev-list --date-order`.
#[default]
DateOrder,
/// Show no parents before all of its children are shown, and avoid
/// showing commits on multiple lines of history intermixed.
///
/// In the *sample history* the order would be `8, 6, 5, 3, 7, 4, 2, 1`.
/// This is equivalent to `git rev-list --topo-order`.
TopoOrder,
}
mod init;
pub use init::Builder;
pub(super) mod iter;

8
src-traverse/src/lib.rs Normal file
View File

@@ -0,0 +1,8 @@
//! Various ways to traverse commit graphs and trees with implementations as iterator
#![deny(missing_docs, rust_2018_idioms)]
#![forbid(unsafe_code)]
pub mod commit;
/// Tree traversal
pub mod tree;

View File

@@ -0,0 +1,102 @@
use std::collections::VecDeque;
use gix_hash::ObjectId;
/// The error is part of the item returned by the [`breadthfirst()`](crate::tree::breadthfirst()) and
///[`depthfirst()`](crate::tree::depthfirst()) functions.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Find(#[from] gix_object::find::existing_iter::Error),
#[error("The delegate cancelled the operation")]
Cancelled,
#[error(transparent)]
ObjectDecode(#[from] gix_object::decode::Error),
}
/// The state used and potentially shared by multiple tree traversals.
#[derive(Default, Clone)]
pub struct State {
next: VecDeque<ObjectId>,
buf: Vec<u8>,
}
impl State {
fn clear(&mut self) {
self.next.clear();
self.buf.clear();
}
}
pub(super) mod function {
use std::borrow::BorrowMut;
use gix_object::{FindExt, TreeRefIter};
use super::{Error, State};
use crate::tree::Visit;
/// Start a breadth-first iteration over the `root` trees entries.
///
/// Note that non-trees will be listed first, so the natural order of entries within a tree is lost.
///
/// * `root`
/// * the tree to iterate in a nested fashion.
/// * `state` - all state used for the iteration. If multiple iterations are performed, allocations can be minimized by reusing
/// this state.
/// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
/// an iterator over entries if the object is present and is a tree. Caching should be implemented within this function
/// as needed. The return value is `Option<TreeIter>` which degenerates all error information. Not finding a commit should also
/// be considered an errors as all objects in the tree DAG should be present in the database. Hence [`Error::Find`] should
/// be escalated into a more specific error if it's encountered by the caller.
/// * `delegate` - A way to observe entries and control the iteration while allowing the optimizer to let you pay only for what you use.
pub fn breadthfirst<StateMut, Find, V>(
root: TreeRefIter<'_>,
mut state: StateMut,
objects: Find,
delegate: &mut V,
) -> Result<(), Error>
where
Find: gix_object::Find,
StateMut: BorrowMut<State>,
V: Visit,
{
let state = state.borrow_mut();
state.clear();
let mut tree = root;
loop {
for entry in tree {
let entry = entry?;
if entry.mode.is_tree() {
delegate.push_path_component(entry.filename);
let action = delegate.visit_tree(&entry);
match action {
std::ops::ControlFlow::Continue(false) => {}
std::ops::ControlFlow::Continue(true) => {
delegate.pop_path_component();
delegate.push_back_tracked_path_component(entry.filename);
state.next.push_back(entry.oid.to_owned());
}
std::ops::ControlFlow::Break(()) => {
return Err(Error::Cancelled);
}
}
} else {
delegate.push_path_component(entry.filename);
if delegate.visit_nontree(&entry).is_break() {
return Err(Error::Cancelled);
}
}
delegate.pop_path_component();
}
match state.next.pop_front() {
Some(oid) => {
delegate.pop_front_tracked_path_and_set_current();
tree = objects.find_tree_iter(&oid, &mut state.buf)?;
}
None => break Ok(()),
}
}
}
}

View File

@@ -0,0 +1,113 @@
pub use super::breadthfirst::Error;
/// The state used and potentially shared by multiple tree traversals, reusing memory.
#[derive(Default, Clone)]
pub struct State {
freelist: Vec<Vec<u8>>,
}
impl State {
/// Pop one empty buffer from the free-list.
pub fn pop_buf(&mut self) -> Vec<u8> {
match self.freelist.pop() {
None => Vec::new(),
Some(mut buf) => {
buf.clear();
buf
}
}
}
/// Make `buf` available for re-use with [`Self::pop_buf()`].
pub fn push_buf(&mut self, buf: Vec<u8>) {
self.freelist.push(buf);
}
}
pub(super) mod function {
use std::borrow::BorrowMut;
use gix_hash::ObjectId;
use gix_object::{FindExt, TreeRefIter};
use super::{Error, State};
use crate::tree::Visit;
/// A depth-first traversal of the `root` tree, that preserves the natural order of a tree while immediately descending
/// into sub-trees.
///
/// `state` can be passed to re-use memory during multiple invocations.
pub fn depthfirst<StateMut, Find, V>(
root: ObjectId,
mut state: StateMut,
objects: Find,
delegate: &mut V,
) -> Result<(), Error>
where
Find: gix_object::Find,
StateMut: BorrowMut<State>,
V: Visit,
{
enum Machine {
GetTree(ObjectId),
Iterate {
tree_buf: Vec<u8>,
byte_offset_to_next_entry: usize,
},
}
let state = state.borrow_mut();
let mut stack = vec![Machine::GetTree(root)];
'outer: while let Some(item) = stack.pop() {
match item {
Machine::GetTree(id) => {
let mut buf = state.pop_buf();
objects.find_tree_iter(&id, &mut buf)?;
stack.push(Machine::Iterate {
tree_buf: buf,
byte_offset_to_next_entry: 0,
});
}
Machine::Iterate {
tree_buf: buf,
byte_offset_to_next_entry,
} => {
let mut iter = TreeRefIter::from_bytes(&buf[byte_offset_to_next_entry..]);
delegate.pop_back_tracked_path_and_set_current();
while let Some(entry) = iter.next() {
let entry = entry?;
if entry.mode.is_tree() {
delegate.push_path_component(entry.filename);
let res = delegate.visit_tree(&entry);
delegate.pop_path_component();
match res {
std::ops::ControlFlow::Continue(true) => {}
std::ops::ControlFlow::Break(()) => break 'outer,
std::ops::ControlFlow::Continue(false) => continue,
}
delegate.push_back_tracked_path_component("".into());
delegate.push_back_tracked_path_component(entry.filename);
let recurse_tree = Machine::GetTree(entry.oid.to_owned());
let continue_at_next_entry = Machine::Iterate {
byte_offset_to_next_entry: iter.offset_to_next_entry(&buf),
tree_buf: buf,
};
stack.push(continue_at_next_entry);
stack.push(recurse_tree);
continue 'outer;
} else {
delegate.push_path_component(entry.filename);
if let std::ops::ControlFlow::Break(()) = delegate.visit_nontree(&entry) {
break 'outer;
}
delegate.pop_path_component();
}
}
state.push_buf(buf);
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,72 @@
use std::collections::VecDeque;
use gix_object::bstr::{BStr, BString};
/// A trait to allow responding to a traversal designed to observe all entries in a tree, recursively while keeping track of
/// paths if desired.
pub trait Visit {
/// Sets the full path in the back of the queue so future calls to push and pop components affect it instead.
///
/// Note that the first call is made without an accompanying call to [`Self::push_back_tracked_path_component()`]
///
/// This is used by the depth-first traversal of trees.
fn pop_back_tracked_path_and_set_current(&mut self);
/// Sets the full path in front of the queue so future calls to push and pop components affect it instead.
///
/// This is used by the breadth-first traversal of trees.
fn pop_front_tracked_path_and_set_current(&mut self);
/// Append a `component` to the end of a path, which may be empty.
///
/// If `component` is empty, store the current path.
fn push_back_tracked_path_component(&mut self, component: &BStr);
/// Append a `component` to the end of a path, which may be empty.
fn push_path_component(&mut self, component: &BStr);
/// Removes the last component from the path, which may leave it empty.
fn pop_path_component(&mut self);
/// Observe a tree entry that is a tree and return an instruction whether to continue or not.
/// [std::ops::ControlFlow::Break] can be used to prevent traversing it, for example if it's known to the caller already.
///
/// The implementation may use the current path to learn where in the tree the change is located.
fn visit_tree(&mut self, entry: &gix_object::tree::EntryRef<'_>) -> visit::Action;
/// Observe a tree entry that is NO tree and return an instruction whether to continue or not.
/// [std::ops::ControlFlow::Break] has no effect here.
///
/// The implementation may use the current path to learn where in the tree the change is located.
fn visit_nontree(&mut self, entry: &gix_object::tree::EntryRef<'_>) -> visit::Action;
}
/// A [Visit] implementation to record every observed change and keep track of the changed paths.
///
/// Recorders can also be instructed to track the filename only, or no location at all.
#[derive(Clone, Debug)]
pub struct Recorder {
path_deque: VecDeque<BString>,
path: BString,
/// How to track the location.
location: Option<recorder::Location>,
/// The observed entries.
pub records: Vec<recorder::Entry>,
}
///
pub mod visit {
/// What to do after an entry was [recorded](super::Visit::visit_tree()).
///
/// Use [`std::ops::ControlFlow::Break`] to stop the traversal of entries, making this the last call to [`visit_(tree|nontree)(…)`](super::Visit::visit_nontree()).
/// Use [`std::ops::ControlFlow::Continue`] with `true` to continue the traversal and descend into tree entries.
/// Use [`std::ops::ControlFlow::Continue`] with `false` to skip descending into the entry (only useful in [`visit_tree(…)`](super::Visit::visit_tree())).
pub type Action = std::ops::ControlFlow<(), bool>;
}
///
pub mod recorder;
///
pub mod breadthfirst;
pub use breadthfirst::function::breadthfirst;
///
pub mod depthfirst;
pub use depthfirst::function::depthfirst;

View File

@@ -0,0 +1,148 @@
use gix_hash::ObjectId;
use gix_object::{
bstr::{BStr, BString, ByteSlice, ByteVec},
tree,
};
use crate::tree::{visit::Action, Recorder, Visit};
/// Describe how to track the location of an entry.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Location {
/// Track the entire path, relative to the repository.
Path,
/// Keep only the file-name as location, which may be enough for some calculations.
///
/// This is less expensive than tracking the entire `Path`.
FileName,
}
/// An owned entry as observed by a call to [`visit_(tree|nontree)(…)`][Visit::visit_tree()], enhanced with the full path to it.
/// Otherwise similar to [`gix_object::tree::EntryRef`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Entry {
/// The kind of entry, similar to entries in a unix directory tree.
pub mode: tree::EntryMode,
/// The full path to the entry. A root entry would be `d`, and a file `a` within the directory would be `d/a`.
///
/// This is independent of the platform and the path separators actually used there.
pub filepath: BString,
/// The id of the entry which can be used to locate it in an object database.
pub oid: ObjectId,
}
impl Entry {
fn new(entry: &tree::EntryRef<'_>, filepath: BString) -> Self {
Entry {
filepath,
oid: entry.oid.to_owned(),
mode: entry.mode,
}
}
}
impl Default for Recorder {
fn default() -> Self {
Recorder {
path_deque: Default::default(),
path: Default::default(),
location: Location::Path.into(),
records: vec![],
}
}
}
impl Recorder {
fn pop_element(&mut self) {
if let Some(pos) = self.path.rfind_byte(b'/') {
self.path.resize(pos, 0);
} else {
self.path.clear();
}
}
fn push_element(&mut self, name: &BStr) {
if name.is_empty() {
return;
}
if !self.path.is_empty() {
self.path.push(b'/');
}
self.path.push_str(name);
}
}
/// Builder
impl Recorder {
/// Obtain a copy of the currently tracked, full path of the entry.
pub fn track_location(mut self, location: Option<Location>) -> Self {
self.location = location;
self
}
}
/// Access
impl Recorder {
/// Obtain a copy of the currently tracked, full path of the entry.
pub fn path_clone(&self) -> BString {
self.path.clone()
}
/// Return the currently set path.
pub fn path(&self) -> &BStr {
self.path.as_ref()
}
}
impl Visit for Recorder {
fn pop_back_tracked_path_and_set_current(&mut self) {
if let Some(Location::Path) = self.location {
self.path = self.path_deque.pop_back().unwrap_or_default();
}
}
fn pop_front_tracked_path_and_set_current(&mut self) {
if let Some(Location::Path) = self.location {
self.path = self
.path_deque
.pop_front()
.expect("every call is matched with push_tracked_path_component");
}
}
fn push_back_tracked_path_component(&mut self, component: &BStr) {
if let Some(Location::Path) = self.location {
self.push_element(component);
self.path_deque.push_back(self.path.clone());
}
}
fn push_path_component(&mut self, component: &BStr) {
match self.location {
None => {}
Some(Location::Path) => {
self.push_element(component);
}
Some(Location::FileName) => {
self.path.clear();
self.path.extend_from_slice(component);
}
}
}
fn pop_path_component(&mut self) {
if let Some(Location::Path) = self.location {
self.pop_element();
}
}
fn visit_tree(&mut self, entry: &tree::EntryRef<'_>) -> Action {
self.records.push(Entry::new(entry, self.path_clone()));
std::ops::ControlFlow::Continue(true)
}
fn visit_nontree(&mut self, entry: &tree::EntryRef<'_>) -> Action {
self.records.push(Entry::new(entry, self.path_clone()));
std::ops::ControlFlow::Continue(true)
}
}

View File

@@ -0,0 +1,25 @@
lints.workspace = true
[package]
name = "src-traverse-tests"
version = "0.0.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "Integration tests for the src-traverse crate"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
edition = "2021"
rust-version = "1.82"
[[test]]
name = "traverse"
path = "traverse/main.rs"
[dev-dependencies]
insta = "1.46.3"
src-traverse = { path = ".." }
src-testtools = { path = "../../tests/tools" }
src-odb = { path = "../../src-odb" }
src-hash = { path = "../../src-hash" }
src-object = { path = "../../src-object" }
src-commitgraph = { path = "../../src-commitgraph" }
src-path = { path = "../../src-path" }

Binary file not shown.

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env bash
set -eu -o pipefail
function commit_at() {
local message=${1:?first argument is the commit message}
local timestamp=${2:?second argument is the timestamp}
GIT_COMMITTER_DATE="$timestamp -0700"
GIT_AUTHOR_DATE="$timestamp -0700"
export GIT_COMMITTER_DATE GIT_AUTHOR_DATE
git commit --allow-empty -m "$message"
}
function optimize() {
git commit-graph write --no-progress --reachable
git repack -adq
}
# Test 1: Hidden traversal has a longer path to shared ancestors
# Graph structure:
# A(tip) --> shared
# /
# H(hidden) --> X --> Y --> shared
#
# This tests that shared is correctly hidden even though the interesting
# path (A->shared) is shorter than the hidden path (H->X->Y->shared).
(git init long_hidden_path && cd long_hidden_path
git checkout -b main
# Create base commit with oldest timestamp
commit_at "shared" 1000000000
# Create hidden branch with intermediate commits
git checkout -b hidden_branch
commit_at "Y" 1000000100
commit_at "X" 1000000200
commit_at "H" 1000000300 # hidden tip
# Go back to main and create tip A (newest timestamp)
git checkout main
commit_at "A" 1000000400 # tip
optimize
)
# Test 2: Similar structure but with interesting path longer than hidden path
# Graph structure:
# A(tip) --> B --> C --> D(shared)
# /
# H(hidden) --------->+
#
# This tests that D is correctly hidden when the interesting path
# (A->B->C->D) is longer than the hidden path (H->D).
(git init long_interesting_path && cd long_interesting_path
git checkout -b main
# Create base commit with oldest timestamp
commit_at "D" 1000000000
# Create hidden branch (direct to D)
git checkout -b hidden_branch
commit_at "H" 1000000100 # hidden tip, direct child of D
# Go back to main and create longer path
git checkout main
commit_at "C" 1000000200
commit_at "B" 1000000300
commit_at "A" 1000000400 # tip
optimize
)

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env bash
set -eu -o pipefail
function tick () {
if test -z "${tick+set}"
then
tick=1112911993
else
tick=$(($tick + 60))
fi
GIT_COMMITTER_DATE="$tick -0700"
GIT_AUTHOR_DATE="$tick -0700"
export GIT_COMMITTER_DATE GIT_AUTHOR_DATE
}
tick
function commit() {
local message=${1:?first argument is the commit message}
tick
git commit --allow-empty -m "$message"
}
function optimize() {
git commit-graph write --no-progress --reachable
git repack -adq
}
function collect_baselines() {
git rev-list --topo-order HEAD > all-commits.baseline
git rev-list --topo-order --first-parent HEAD > first-parent.baseline
git rev-list --date-order ^f1cce1b5c7efcdfa106e95caa6c45a2cae48a481 HEAD > date-order.baseline
}
git init
git config merge.ff false
git checkout -q -b main
for i in {0..5}; do
commit c$i
done
git branch branch1
for i in {6..8}; do
commit c$i
done
git checkout -q branch1
commit b1c1
git checkout -q main
commit c9
git merge branch1 -m merge
git checkout -q branch1
commit c10
commit c11
git checkout -q branch1
commit b1c2
git checkout -q main
git merge branch1 -m merge
commit c12
optimize
collect_baselines

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env bash
set -eu -o pipefail
function tick () {
if test -z "${tick+set}"
then
tick=1112911993
else
tick=$(($tick + 60))
fi
GIT_COMMITTER_DATE="$tick -0700"
GIT_AUTHOR_DATE="$tick -0700"
export GIT_COMMITTER_DATE GIT_AUTHOR_DATE
}
tick
function commit() {
local message=${1:?first argument is the commit message}
tick
git commit --allow-empty -m "$message"
}
function optimize() {
git commit-graph write --no-progress --reachable
git repack -adq
}
(git init simple && cd simple
git config merge.ff false
git checkout -q -b main
commit c1
commit c2
commit c3
commit c4
git checkout -q -b branch1
git checkout -q -b branch2
commit b2c1
commit b2c2
git checkout branch1
commit b1c1
commit b1c2
git checkout -q main
commit c5
git merge branch1 branch2 -m merge
optimize
)
(git init intermixed && cd intermixed
git config merge.ff false
git checkout -q -b main
commit c1
commit c2
git checkout -q -b branch1
git checkout -q -b branch2
commit b2c1
git checkout branch1
commit b1c1
git checkout branch2
commit b2c2
git checkout branch1
commit b1c2
git checkout -q main
commit c3
git merge branch1 branch2 -m merge
optimize
)
(git init disjoint_branches && cd disjoint_branches
git checkout -b main
commit a1
commit a2
commit a3
git checkout --orphan disjoint
commit b1
commit b2
commit b3
optimize
)

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -eu -o pipefail
# all commits have the same date as it's set by `src-testtools` to a fixed value.
git init -q
git config merge.ff false
git checkout -q -b main
git commit -q --allow-empty -m c1
git commit -q --allow-empty -m c2
git commit -q --allow-empty -m c3
git commit -q --allow-empty -m c4
git checkout -q -b branch1
git commit -q --allow-empty -m b1c1
git commit -q --allow-empty -m b1c2
git checkout -q main
git commit -q --allow-empty -m c5
git merge branch1 -m m1b1
git commit-graph write --no-progress --reachable
git repack -adq

View File

@@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git config merge.ff false
# Commit in year 2000
git checkout -q -b main
GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" git commit -q --allow-empty -m c1 #134385f6d781b7e97062102c6a483440bfda2a03-
# Commit in year 2001
git checkout -q -b branch1
GIT_COMMITTER_DATE="2001-01-02 00:00:00 +0000" git commit -q --allow-empty -m b1c1 #bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac-
# Commit in year 2000
git checkout -q main
GIT_COMMITTER_DATE="2000-01-02 00:00:00 +0000" git commit -q --allow-empty -m c2 #9902e3c3e8f0c569b4ab295ddf473e6de763e1e7-
# Commit from branch1 made in 2001 merged in 2002
GIT_COMMITTER_DATE="2002-01-02 00:00:00 +0000" git merge branch1 -m m1b1 #288e509293165cb5630d08f4185bdf2445bf6170-
git commit-graph write --no-progress --reachable
git repack -adq

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q -b main
touch a b c
mkdir d e f
touch d/a e/b f/c f/z
mkdir f/d
touch f/d/x
git add .
git commit -q -m c1

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q -b main
touch a b c
mkdir d e f
touch d/a e/b f/c f/z
mkdir f/ISSUE_TEMPLATE
touch f/ISSUE_TEMPLATE/x f/FUNDING.yml f/dependabot.yml
git add .
git commit -q -m c1

View File

@@ -0,0 +1,2 @@
mod simple;
mod topo;

View File

@@ -0,0 +1,199 @@
//! Some dates adjusted to be a year apart, but still 'c1' and 'c2' with the same date.
use super::*;
use crate::util::fixture;
use gix_traverse::commit::simple::CommitTimeOrder;
fn adjusted_dates_repo() -> crate::Result<(std::path::PathBuf, gix_odb::Handle)> {
let dir = fixture("make_traversal_repo_for_commits_with_dates.sh")?;
let odb = gix_odb::at(dir.join(".git").join("objects"))?;
Ok((dir, odb))
}
#[test]
fn head_breadth_first() -> crate::Result {
let (repo_dir, odb) = adjusted_dates_repo()?;
// Timestamps show b1c1 (978393600) is a year newer than c2 (946771200),
// explaining why date-order puts b1c1 before c2.
insta::assert_snapshot!(git_graph_with_time(&repo_dir)?, @r"
* 288e509293165cb5630d08f4185bdf2445bf6170 1009929600 (HEAD -> main) m1b1
|\
| * bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac 978393600 (branch1) b1c1
* | 9902e3c3e8f0c569b4ab295ddf473e6de763e1e7 946771200 c2
|/
* 134385f6d781b7e97062102c6a483440bfda2a03 946771200 c1
");
let tip = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170"); // m1b1
// Git also shows `b1c1` first, making topo-order similar to date order,
// even though c2 *is* the first parent.
let expected = [
tip,
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_date_order() -> crate::Result {
let (_repo_dir, odb) = adjusted_dates_repo()?;
// Graph with timestamps shown in `head_breadth_first`
let tip = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170"); // m1b1
// NewestFirst
let expected_newest = [
tip,
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_newest);
// OldestFirst
let expected_oldest = [
tip,
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::OldestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_oldest);
Ok(())
}
#[test]
fn head_date_order_with_cutoff() -> crate::Result {
let (_repo_dir, odb) = adjusted_dates_repo()?;
// Graph shown in `head_breadth_first`
let tip = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170"); // m1b1
let expected = [
tip,
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
];
for order in [CommitTimeOrder::NewestFirst, CommitTimeOrder::OldestFirst] {
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTimeCutoff {
order,
seconds: 978393600, // =2001-01-02 00:00:00 +0000
},
Parents::All,
[],
)?;
assert_eq!(result, expected, "order = {order:?}");
}
Ok(())
}
#[test]
fn head_date_order_with_cutoff_disabled() -> crate::Result {
let (_repo_dir, odb) = adjusted_dates_repo()?;
// Graph shown in `head_breadth_first`
let tip = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170"); // m1b1
let very_early = 878393600; // an early date before any commit
// NewestFirst with early cutoff (effectively disabled)
let expected_newest = [
tip,
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: very_early,
},
Parents::All,
[],
)?;
assert_eq!(result, expected_newest);
// OldestFirst with early cutoff
let expected_oldest = [
tip,
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
hex_to_id("bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"), // b1c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::OldestFirst,
seconds: very_early,
},
Parents::All,
[],
)?;
assert_eq!(result, expected_oldest);
Ok(())
}
#[test]
fn date_order_with_cutoff_is_applied_to_starting_position() -> crate::Result {
let (_repo_dir, odb) = adjusted_dates_repo()?;
// Graph shown in `head_breadth_first`
let tip = hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"); // c2
for order in [CommitTimeOrder::NewestFirst, CommitTimeOrder::OldestFirst] {
let graph = commit_graph(odb.store_ref());
let count = Simple::new([tip], &odb)
.sorting(Sorting::ByCommitTimeCutoff {
order,
seconds: 978393600, // =2001-01-02 00:00:00 +0000
})?
.commit_graph(graph)
.count();
assert_eq!(
count, 0,
"initial tips that don't pass cutoff value are not returned either"
);
}
Ok(())
}
#[test]
fn head_date_order_first_parent_only() -> crate::Result {
let (_repo_dir, odb) = adjusted_dates_repo()?;
// Graph shown in `head_breadth_first`
let tip = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170"); // m1b1
let expected = [
tip,
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
for order in [CommitTimeOrder::NewestFirst, CommitTimeOrder::OldestFirst] {
let result = traverse_both([tip], &odb, Sorting::ByCommitTime(order), Parents::First, [])?;
assert_eq!(result, expected, "order = {order:?}");
}
Ok(())
}

View File

@@ -0,0 +1,102 @@
use super::*;
use gix_traverse::commit::simple::CommitTimeOrder;
fn simple_repo() -> crate::Result<(std::path::PathBuf, gix_odb::Handle)> {
named_fixture("make_repos.sh", "simple")
}
#[test]
fn head_breadth_first() -> crate::Result {
let (repo_dir, odb) = simple_repo()?;
// Timestamps show branch1 commits are newer than branch2, with c5 being the newest.
insta::assert_snapshot!(git_graph_with_time(&repo_dir)?, @r"
*-. f49838d84281c3988eeadd988d97dd358c9f9dc4 1112912533 (HEAD -> main) merge
|\ \
| | * 48e8dac19508f4238f06c8de2b10301ce64a641c 1112912353 (branch2) b2c2
| | * cb6a6befc0a852ac74d74e0354e0f004af29cb79 1112912293 b2c1
| * | 66a309480201c4157b0eae86da69f2d606aadbe7 1112912473 (branch1) b1c2
| * | 80947acb398362d8236fcb8bf0f8a9dac640583f 1112912413 b1c1
| |/
* / 0edb95c0c0d9933d88f532ec08fcd405d0eee882 1112912533 c5
|/
* 8cb5f13b66ce52a49399a2c49f537ee2b812369c 1112912233 c4
* 33aa07785dd667c0196064e3be3c51dd9b4744ef 1112912173 c3
* ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83 1112912113 c2
* 65d6af66f60b8e39fd1ba6a1423178831e764ec5 1112912053 c1
");
let tip = hex_to_id("f49838d84281c3988eeadd988d97dd358c9f9dc4"); // merge
// This is very different from what git does as it keeps commits together,
// whereas we spread them out breadth-first.
let expected = [
tip,
hex_to_id("0edb95c0c0d9933d88f532ec08fcd405d0eee882"), // c5
hex_to_id("66a309480201c4157b0eae86da69f2d606aadbe7"), // b1c2
hex_to_id("48e8dac19508f4238f06c8de2b10301ce64a641c"), // b2c2
hex_to_id("8cb5f13b66ce52a49399a2c49f537ee2b812369c"), // c4
hex_to_id("80947acb398362d8236fcb8bf0f8a9dac640583f"), // b1c1
hex_to_id("cb6a6befc0a852ac74d74e0354e0f004af29cb79"), // b2c1
hex_to_id("33aa07785dd667c0196064e3be3c51dd9b4744ef"), // c3
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_date_order() -> crate::Result {
let (_repo_dir, odb) = simple_repo()?;
// Graph with timestamps shown in `head_breadth_first`
let tip = hex_to_id("f49838d84281c3988eeadd988d97dd358c9f9dc4"); // merge
// NewestFirst - exactly what git shows
let expected_newest = [
tip,
hex_to_id("0edb95c0c0d9933d88f532ec08fcd405d0eee882"), // c5
hex_to_id("66a309480201c4157b0eae86da69f2d606aadbe7"), // b1c2
hex_to_id("80947acb398362d8236fcb8bf0f8a9dac640583f"), // b1c1
hex_to_id("48e8dac19508f4238f06c8de2b10301ce64a641c"), // b2c2
hex_to_id("cb6a6befc0a852ac74d74e0354e0f004af29cb79"), // b2c1
hex_to_id("8cb5f13b66ce52a49399a2c49f537ee2b812369c"), // c4
hex_to_id("33aa07785dd667c0196064e3be3c51dd9b4744ef"), // c3
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_newest);
// OldestFirst
let expected_oldest = [
tip,
hex_to_id("48e8dac19508f4238f06c8de2b10301ce64a641c"), // b2c2
hex_to_id("cb6a6befc0a852ac74d74e0354e0f004af29cb79"), // b2c1
hex_to_id("8cb5f13b66ce52a49399a2c49f537ee2b812369c"), // c4
hex_to_id("33aa07785dd667c0196064e3be3c51dd9b4744ef"), // c3
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
hex_to_id("66a309480201c4157b0eae86da69f2d606aadbe7"), // b1c2
hex_to_id("80947acb398362d8236fcb8bf0f8a9dac640583f"), // b1c1
hex_to_id("0edb95c0c0d9933d88f532ec08fcd405d0eee882"), // c5
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::OldestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_oldest);
Ok(())
}

View File

@@ -0,0 +1,95 @@
use super::*;
use gix_traverse::commit::simple::CommitTimeOrder;
fn intermixed_repo() -> crate::Result<(std::path::PathBuf, gix_odb::Handle)> {
named_fixture("make_repos.sh", "intermixed")
}
#[test]
fn head_breadth_first() -> crate::Result {
let (repo_dir, odb) = intermixed_repo()?;
// Timestamps show the intermixed ordering: b1 and b2 commits are interleaved
// with main branch commits by time.
insta::assert_snapshot!(git_graph_with_time(&repo_dir)?, @r"
*-. 58912d92944087dcb09dca79cdd2a937cc158bed 1112912413 (HEAD -> main) merge
|\ \
| | * a9c28710e058af4e5163699960234adb9fb2abc7 1112912293 (branch2) b2c2
| | * b648f955b930ca95352fae6f22cb593ee0244b27 1112912173 b2c1
| * | 0f6632a5a7d81417488b86692b729e49c1b73056 1112912353 (branch1) b1c2
| * | 77fd3c6832c0cd542f7a39f3af9250c3268db979 1112912233 b1c1
| |/
* / 2dce37be587e07caef8c4a5ab60b423b13a8536a 1112912413 c3
|/
* ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83 1112912113 c2
* 65d6af66f60b8e39fd1ba6a1423178831e764ec5 1112912053 c1
");
let tip = hex_to_id("58912d92944087dcb09dca79cdd2a937cc158bed"); // merge
// This is very different from what git does as it keeps commits together,
// whereas we spread them out breadth-first.
let expected = [
tip,
hex_to_id("2dce37be587e07caef8c4a5ab60b423b13a8536a"), // c3
hex_to_id("0f6632a5a7d81417488b86692b729e49c1b73056"), // b1c2
hex_to_id("a9c28710e058af4e5163699960234adb9fb2abc7"), // b2c2
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("77fd3c6832c0cd542f7a39f3af9250c3268db979"), // b1c1
hex_to_id("b648f955b930ca95352fae6f22cb593ee0244b27"), // b2c1
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_date_order() -> crate::Result {
let (_repo_dir, odb) = intermixed_repo()?;
// Graph with timestamps shown in `head_breadth_first`
let tip = hex_to_id("58912d92944087dcb09dca79cdd2a937cc158bed"); // merge
// NewestFirst - exactly what git shows
let expected_newest = [
tip,
hex_to_id("2dce37be587e07caef8c4a5ab60b423b13a8536a"), // c3
hex_to_id("0f6632a5a7d81417488b86692b729e49c1b73056"), // b1c2
hex_to_id("a9c28710e058af4e5163699960234adb9fb2abc7"), // b2c2
hex_to_id("77fd3c6832c0cd542f7a39f3af9250c3268db979"), // b1c1
hex_to_id("b648f955b930ca95352fae6f22cb593ee0244b27"), // b2c1
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_newest);
// OldestFirst
let expected_oldest = [
tip,
hex_to_id("a9c28710e058af4e5163699960234adb9fb2abc7"), // b2c2
hex_to_id("b648f955b930ca95352fae6f22cb593ee0244b27"), // b2c1
hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"), // c2
hex_to_id("65d6af66f60b8e39fd1ba6a1423178831e764ec5"), // c1
hex_to_id("0f6632a5a7d81417488b86692b729e49c1b73056"), // b1c2
hex_to_id("77fd3c6832c0cd542f7a39f3af9250c3268db979"), // b1c1
hex_to_id("2dce37be587e07caef8c4a5ab60b423b13a8536a"), // c3
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::OldestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected_oldest);
Ok(())
}

View File

@@ -0,0 +1,196 @@
use super::*;
use crate::util::{fixture, git_rev_list};
#[test]
fn disjoint_hidden_and_interesting() -> crate::Result {
let (repo_dir, odb) = named_fixture("make_repos.sh", "disjoint_branches")?;
insta::assert_snapshot!(git_graph(&repo_dir)?, @"
* e07cf1277ff7c43090f1acfc85a46039e7de1272 (HEAD -> disjoint) b3
* 94cf3f3a4c782b672173423e7a4157a02957dd48 b2
* 34e5ff5ce3d3ba9f0a00d11a7fad72551fff0861 b1
* b5665181bf4c338ab16b10da0524d81b96aff209 (main) a3
* f0230ce37b83d8e9f51ea6322ed7e8bd148d8e28 a2
* 674aca0765b935ac5e7f7e9ab83af7f79272b5b0 a1
");
let tip = hex_to_id("e07cf1277ff7c43090f1acfc85a46039e7de1272"); // b3
let hidden = [hex_to_id("b5665181bf4c338ab16b10da0524d81b96aff209")]; // a3
let expected = [
tip,
hex_to_id("94cf3f3a4c782b672173423e7a4157a02957dd48"), // b2
hex_to_id("34e5ff5ce3d3ba9f0a00d11a7fad72551fff0861"), // b1
];
for sorting in all_sortings() {
let result = traverse_both([tip], &odb, sorting, Parents::All, hidden)?;
assert_eq!(result, expected, "sorting = {sorting:?}");
}
Ok(())
}
#[test]
fn all_hidden() -> crate::Result {
let (_repo_dir, odb) = named_fixture("make_repos.sh", "disjoint_branches")?;
let tips = [
hex_to_id("e07cf1277ff7c43090f1acfc85a46039e7de1272"), // b3
hex_to_id("b5665181bf4c338ab16b10da0524d81b96aff209"), // a3
];
// The start positions are also declared hidden, so nothing should be visible.
let hidden = tips;
for sorting in all_sortings() {
let result = traverse_both(tips, &odb, sorting, Parents::All, hidden)?;
assert!(result.is_empty(), "sorting = {sorting:?}");
}
Ok(())
}
#[test]
fn some_hidden_and_all_hidden() -> crate::Result {
let (repo_dir, odb) = named_fixture("make_repos.sh", "simple")?;
insta::assert_snapshot!(git_graph(&repo_dir)?, @r"
*-. f49838d84281c3988eeadd988d97dd358c9f9dc4 (HEAD -> main) merge
|\ \
| | * 48e8dac19508f4238f06c8de2b10301ce64a641c (branch2) b2c2
| | * cb6a6befc0a852ac74d74e0354e0f004af29cb79 b2c1
| * | 66a309480201c4157b0eae86da69f2d606aadbe7 (branch1) b1c2
| * | 80947acb398362d8236fcb8bf0f8a9dac640583f b1c1
| |/
* / 0edb95c0c0d9933d88f532ec08fcd405d0eee882 c5
|/
* 8cb5f13b66ce52a49399a2c49f537ee2b812369c c4
* 33aa07785dd667c0196064e3be3c51dd9b4744ef c3
* ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83 c2
* 65d6af66f60b8e39fd1ba6a1423178831e764ec5 c1
");
// Test: Hidden has to catch up with non-hidden
let tip_c2 = hex_to_id("ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83");
let hidden_c5 = hex_to_id("0edb95c0c0d9933d88f532ec08fcd405d0eee882");
for sorting in all_sortings() {
let result = traverse_both([tip_c2], &odb, sorting, Parents::All, [hidden_c5])?;
assert!(
result.is_empty(),
"c2 is reachable from hidden c5, sorting = {sorting:?}"
);
}
// Test: merge tip with two branch tips hidden
let tip_merge = hex_to_id("f49838d84281c3988eeadd988d97dd358c9f9dc4");
let hidden_branches = [
hex_to_id("48e8dac19508f4238f06c8de2b10301ce64a641c"), // b2c2
hex_to_id("66a309480201c4157b0eae86da69f2d606aadbe7"), // b1c2
];
let expected = [
tip_merge,
hex_to_id("0edb95c0c0d9933d88f532ec08fcd405d0eee882"), // c5
];
for sorting in all_sortings() {
let result = traverse_both([tip_merge], &odb, sorting, Parents::All, hidden_branches)?;
assert_eq!(result, expected, "sorting = {sorting:?}");
}
// Test: single-parent mode with hidden catching up
let tip_b1c1 = hex_to_id("80947acb398362d8236fcb8bf0f8a9dac640583f");
let hidden_merge = hex_to_id("f49838d84281c3988eeadd988d97dd358c9f9dc4");
let result = traverse_both([tip_b1c1], &odb, Sorting::BreadthFirst, Parents::First, [hidden_merge])?;
assert!(result.is_empty(), "b1c1 is reachable from hidden merge");
Ok(())
}
fn hidden_bug_repo(name: &str) -> crate::Result<(std::path::PathBuf, gix_odb::Handle)> {
let dir = fixture("make_repo_for_hidden_bug.sh")?;
let repo_path = dir.join(name);
let odb = gix_odb::at(repo_path.join(".git").join("objects"))?;
Ok((repo_path, odb))
}
#[test]
fn hidden_tip_with_longer_path_to_shared_ancestor() -> crate::Result {
// Graph:
// A(tip) --> shared
// /
// H(hidden) --> X --> Y --> shared
//
// Expected: only A is returned (shared is reachable from H)
let (repo_path, odb) = hidden_bug_repo("long_hidden_path")?;
insta::assert_snapshot!(git_graph(&repo_path)?, @"
* b6cf469d740a02645b7b9f7cdb98977a6cd7e5ab (HEAD -> main) A
| * 2955979fbddb1bddb9e1b1ca993789cacf612b18 (hidden_branch) H
| * ae431c4e51a81a1df4ac22a52c4e247734ee3c9d X
| * ab31ef4cacc50169f2b1d753c1e4efd55d570bbc Y
|/
* f1543941113388f8a194164420fd7da96f73c2ce shared
");
let commits = parse_commit_names(&repo_path)?;
let tip_a = commits["A"];
let hidden_h = commits["H"];
let shared = commits["shared"];
let expected = vec![tip_a];
for sorting in all_sortings() {
let result = traverse([tip_a], &odb, sorting, Parents::All, [hidden_h])?;
assert_eq!(
result, expected,
"sorting = {sorting:?}: 'shared' ({shared}) should NOT be returned because it's \
reachable from hidden tip H"
);
}
// Verify against git
let git_output = git_rev_list(&repo_path, &["main", "--not", "hidden_branch"])?;
assert_eq!(git_output, expected, "git rev-list should show only A");
Ok(())
}
#[test]
fn interesting_tip_with_longer_path_to_shared_ancestor() -> crate::Result {
// Graph:
// A(tip) --> B --> C --> D(shared)
// /
// H(hidden) --------->+
//
// Expected: A, B, C are returned (D is reachable from H)
let (repo_path, odb) = hidden_bug_repo("long_interesting_path")?;
insta::assert_snapshot!(git_graph(&repo_path)?, @"
* 8822f888affa916a2c945ef3b17447f29f8aabff (HEAD -> main) A
* 90f80e3c031e9149cfa631493663ffe52d645aab B
* 2f353d445c4c552eec8e84f0f6f73999d08a8073 C
| * 7e0cf8f62783a0eb1043fbe56d220308c3e0289e (hidden_branch) H
|/
* 359b53df58a6e26b95e276a9d1c9e2b33a3b50bf D
");
let commits = parse_commit_names(&repo_path)?;
let tip_a = commits["A"];
let hidden_h = commits["H"];
let d = commits["D"];
let expected: Vec<_> = ["A", "B", "C"].iter().map(|name| commits[*name]).collect();
for sorting in all_sortings() {
let result = traverse([tip_a], &odb, sorting, Parents::All, [hidden_h])?;
assert_eq!(
result, expected,
"sorting = {sorting:?}: 'D' ({d}) should NOT be returned because it's \
reachable from hidden tip H"
);
}
// Verify against git
let git_output = git_rev_list(&repo_path, &["main", "--not", "hidden_branch"])?;
assert_eq!(git_output, expected, "git rev-list should show A, B, C");
Ok(())
}

View File

@@ -0,0 +1,73 @@
use crate::hex_to_id;
use crate::util::{commit_graph, git_graph, git_graph_with_time, named_fixture, parse_commit_names};
use gix_hash::ObjectId;
use gix_traverse::commit::{simple::Sorting, Parents, Simple};
mod adjusted_dates;
mod different_date;
mod different_date_intermixed;
mod hide;
mod same_date;
/// Run a simple traversal and collect the resulting commit IDs.
fn traverse(
tips: impl IntoIterator<Item = ObjectId>,
odb: &gix_odb::Handle,
sorting: Sorting,
parents: Parents,
hidden: impl IntoIterator<Item = ObjectId>,
) -> crate::Result<Vec<ObjectId>> {
let graph = commit_graph(odb.store_ref());
Simple::new(tips, odb)
.sorting(sorting)?
.parents(parents)
.hide(hidden)?
.commit_graph(graph)
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()
.map_err(Into::into)
}
/// Run a traversal with both commit-graph enabled and disabled to ensure consistency.
fn traverse_both(
tips: impl IntoIterator<Item = ObjectId> + Clone,
odb: &gix_odb::Handle,
sorting: Sorting,
parents: Parents,
hidden: impl IntoIterator<Item = ObjectId> + Clone,
) -> crate::Result<Vec<ObjectId>> {
// Without commit graph
let without_graph: Vec<_> = Simple::new(tips.clone(), odb)
.sorting(sorting)?
.parents(parents)
.hide(hidden.clone())?
.commit_graph(None)
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
// With commit graph
let graph = commit_graph(odb.store_ref());
let with_graph: Vec<_> = Simple::new(tips, odb)
.sorting(sorting)?
.parents(parents)
.hide(hidden)?
.commit_graph(graph)
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
assert_eq!(
without_graph, with_graph,
"results must be consistent with and without commit-graph"
);
Ok(with_graph)
}
fn all_sortings() -> impl Iterator<Item = Sorting> {
use gix_traverse::commit::simple::CommitTimeOrder;
[
Sorting::BreadthFirst,
Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
Sorting::ByCommitTime(CommitTimeOrder::OldestFirst),
]
.into_iter()
}

View File

@@ -0,0 +1,222 @@
//! Same dates are somewhat special as they show how sorting-details on priority queues affects ordering
use super::*;
use crate::util::fixture;
use gix_hash::oid;
use gix_traverse::commit::simple::CommitTimeOrder;
fn same_date_repo() -> crate::Result<(std::path::PathBuf, gix_odb::Handle)> {
let dir = fixture("make_traversal_repo_for_commits_same_date.sh")?;
let odb = gix_odb::at(dir.join(".git").join("objects"))?;
Ok((dir, odb))
}
#[test]
fn c4_breadth_first() -> crate::Result {
let (repo_dir, odb) = same_date_repo()?;
insta::assert_snapshot!(git_graph(&repo_dir)?, @r"
* 01ec18a3ebf2855708ad3c9d244306bc1fae3e9b (HEAD -> main) m1b1
|\
| * ce2e8ffaa9608a26f7b21afc1db89cadb54fd353 (branch1) b1c2
| * 9152eeee2328073cf23dcf8e90c949170b711659 b1c1
* | efd9a841189668f1bab5b8ebade9cd0a1b139a37 c5
|/
* 9556057aee5abb06912922e9f26c46386a816822 c4
* 17d78c64cef6c33a10a604573fd2c429e477fd63 c3
* 9902e3c3e8f0c569b4ab295ddf473e6de763e1e7 c2
* 134385f6d781b7e97062102c6a483440bfda2a03 c1
");
let tip = hex_to_id("9556057aee5abb06912922e9f26c46386a816822"); // c4
let expected = [
tip,
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_breadth_first() -> crate::Result {
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tip = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); // m1b1
// We always take the first parent first, then the second, and so on.
// Deviation: git for some reason displays b1c2 *before* c5, but I think it's better
// to have a strict parent order.
let expected = [
tip,
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("ce2e8ffaa9608a26f7b21afc1db89cadb54fd353"), // b1c2
hex_to_id("9556057aee5abb06912922e9f26c46386a816822"), // c4
hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"), // b1c1
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_date_order() -> crate::Result {
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tip = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); // m1b1
let expected = [
tip,
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("ce2e8ffaa9608a26f7b21afc1db89cadb54fd353"), // b1c2
hex_to_id("9556057aee5abb06912922e9f26c46386a816822"), // c4
hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"), // b1c1
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::NewestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected);
let result = traverse_both(
[tip],
&odb,
Sorting::ByCommitTime(CommitTimeOrder::OldestFirst),
Parents::All,
[],
)?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_first_parent_only_breadth_first() -> crate::Result {
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tip = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); // m1b1
let expected = [
tip,
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("9556057aee5abb06912922e9f26c46386a816822"), // c4
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both([tip], &odb, Sorting::BreadthFirst, Parents::First, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn head_c4_breadth_first() -> crate::Result {
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tips = [
hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"), // m1b1
hex_to_id("9556057aee5abb06912922e9f26c46386a816822"), // c4
];
let expected = [
tips[0],
tips[1],
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("ce2e8ffaa9608a26f7b21afc1db89cadb54fd353"), // b1c2
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"), // b1c1
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let result = traverse_both(tips, &odb, Sorting::BreadthFirst, Parents::All, [])?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn filtered_commit_does_not_block_ancestors_reachable_from_another_commit() -> crate::Result {
// I don't see a use case for the predicate returning false for a commit but return true for
// at least one of its ancestors, so this test is kind of dubious. But we do want
// `Ancestors` to not eagerly blacklist all of a commit's ancestors when blacklisting that
// one commit, and this test happens to check that.
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tip = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); // m1b1
let filter_out = hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"); // b1c1
let expected = [
tip,
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("ce2e8ffaa9608a26f7b21afc1db89cadb54fd353"), // b1c2
hex_to_id("9556057aee5abb06912922e9f26c46386a816822"), // c4
hex_to_id("17d78c64cef6c33a10a604573fd2c429e477fd63"), // c3
hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7"), // c2
hex_to_id("134385f6d781b7e97062102c6a483440bfda2a03"), // c1
];
let graph = commit_graph(odb.store_ref());
let result: Vec<_> = Simple::filtered([tip], &odb, move |id: &oid| id != filter_out)
.sorting(Sorting::BreadthFirst)?
.parents(Parents::All)
.hide([])?
.commit_graph(graph)
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn predicate_only_called_once_even_if_fork_point() -> crate::Result {
// The `self.seen` check should come before the `self.predicate` check, as we don't know how
// expensive calling `self.predicate` may be.
let (_repo_dir, odb) = same_date_repo()?;
// Graph shown in `c4_breadth_first`
let tip = hex_to_id("01ec18a3ebf2855708ad3c9d244306bc1fae3e9b"); // m1b1
let filter_out = hex_to_id("9556057aee5abb06912922e9f26c46386a816822"); // c4
let expected = [
tip,
hex_to_id("efd9a841189668f1bab5b8ebade9cd0a1b139a37"), // c5
hex_to_id("ce2e8ffaa9608a26f7b21afc1db89cadb54fd353"), // b1c2
hex_to_id("9152eeee2328073cf23dcf8e90c949170b711659"), // b1c1
];
let mut seen = false;
let graph = commit_graph(odb.store_ref());
let result: Vec<_> = Simple::filtered([tip], &odb, move |id: &oid| {
if id == filter_out {
assert!(!seen, "predicate should only be called once for c4");
seen = true;
false
} else {
true
}
})
.sorting(Sorting::BreadthFirst)?
.parents(Parents::All)
.hide([])?
.commit_graph(graph)
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
assert_eq!(result, expected);
Ok(())
}

View File

@@ -0,0 +1,367 @@
use crate::hex_to_id;
use crate::util::{commit_graph, fixture, fixture_odb};
use gix_hash::{oid, ObjectId};
use gix_object::bstr::ByteSlice;
use gix_traverse::commit::{topo, Parents};
use std::path::PathBuf;
fn odb() -> crate::Result<gix_odb::Handle> {
fixture_odb("make_repo_for_topo.sh")
}
fn fixture_dir() -> crate::Result<PathBuf> {
fixture("make_repo_for_topo.sh")
}
/// Run a topo traversal with both commit-graph enabled and disabled to ensure consistency.
fn traverse_both(
tips: impl IntoIterator<Item = ObjectId> + Clone,
ends: impl IntoIterator<Item = ObjectId> + Clone,
odb: &gix_odb::Handle,
sorting: topo::Sorting,
parents: Parents,
) -> crate::Result<Vec<ObjectId>> {
// Without commit graph
let without_graph: Vec<_> = topo::Builder::from_iters(odb, tips.clone(), Some(ends.clone()))
.sorting(sorting)
.with_commit_graph(None)
.parents(parents)
.build()?
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
// With commit graph
let graph = commit_graph(odb.store_ref());
let with_graph: Vec<_> = topo::Builder::from_iters(odb, tips, Some(ends))
.sorting(sorting)
.with_commit_graph(graph)
.parents(parents)
.build()?
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
assert_eq!(
without_graph, with_graph,
"results must be consistent with and without commit-graph"
);
Ok(with_graph)
}
/// Run a topo traversal with a predicate filter.
fn traverse_with_predicate(
tips: impl IntoIterator<Item = ObjectId> + Clone,
ends: impl IntoIterator<Item = ObjectId> + Clone,
odb: &gix_odb::Handle,
sorting: topo::Sorting,
parents: Parents,
predicate: impl FnMut(&oid) -> bool + Clone,
) -> crate::Result<Vec<ObjectId>> {
// Without commit graph
let without_graph: Vec<_> = topo::Builder::from_iters(odb, tips.clone(), Some(ends.clone()))
.sorting(sorting)
.with_commit_graph(None)
.parents(parents)
.with_predicate(predicate.clone())
.build()?
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
// With commit graph
let graph = commit_graph(odb.store_ref());
let with_graph: Vec<_> = topo::Builder::from_iters(odb, tips, Some(ends))
.sorting(sorting)
.with_commit_graph(graph)
.parents(parents)
.with_predicate(predicate)
.build()?
.map(|res| res.map(|info| info.id))
.collect::<Result<Vec<_>, _>>()?;
assert_eq!(
without_graph, with_graph,
"results must be consistent with and without commit-graph"
);
Ok(with_graph)
}
/// Read baseline file and parse expected commit hashes.
fn read_baseline(fixture_dir: &std::path::Path, name: &str) -> crate::Result<Vec<String>> {
let buf = std::fs::read(fixture_dir.join(format!("{name}.baseline")))?;
Ok(buf.lines().map(|s| s.to_str().unwrap().to_string()).collect())
}
mod basic {
use super::*;
#[test]
fn simple() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"3be0c4c793c634c8fd95054345d4935d10a0879a",
"2083b02a78e88b747e305b6ed3d5a861cf9fb73f",
"302a5d0530ec688c241f32c2f2b61b964dd17bee",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"22fbc169eeca3c9678fc7028aa80fad5ef49019f",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
"f1cce1b5c7efcdfa106e95caa6c45a2cae48a481",
"945d8a360915631ad545e0cf04630d86d3d4eaa1",
"a863c02247a6c5ba32dff5224459f52aa7f77f7b",
"2f291881edfb0597493a52d26ea09dd7340ce507",
"9c46b8765703273feb10a2ebd810e70b8e2ca44a",
"fb3e21cf45b04b617011d2b30973f3e5ce60d0cd",
]
.map(hex_to_id);
let result = traverse_both([tip], [], &odb, topo::Sorting::TopoOrder, Parents::All)?;
assert_eq!(result, expected);
// Verify against baseline
let baseline = read_baseline(&fixture_dir()?, "all-commits")?;
let expected_strs: Vec<_> = expected.iter().map(std::string::ToString::to_string).collect();
assert_eq!(expected_strs, baseline, "Baseline must match the expectation");
Ok(())
}
#[test]
fn one_end() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let end = hex_to_id("f1cce1b5c7efcdfa106e95caa6c45a2cae48a481");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"3be0c4c793c634c8fd95054345d4935d10a0879a",
"2083b02a78e88b747e305b6ed3d5a861cf9fb73f",
"302a5d0530ec688c241f32c2f2b61b964dd17bee",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"22fbc169eeca3c9678fc7028aa80fad5ef49019f",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
]
.map(hex_to_id);
let result = traverse_both([tip], [end], &odb, topo::Sorting::TopoOrder, Parents::All)?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn empty_range() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("f1cce1b5c7efcdfa106e95caa6c45a2cae48a481");
let end = hex_to_id("eeab3243aad67bc838fc4425f759453bf0b47785");
let result = traverse_both([tip], [end], &odb, topo::Sorting::TopoOrder, Parents::All)?;
assert!(result.is_empty());
Ok(())
}
#[test]
fn two_tips_two_ends() -> crate::Result {
let odb = odb()?;
let tips = [
hex_to_id("d09384f312b03e4a1413160739805ff25e8fe99d"),
hex_to_id("3be0c4c793c634c8fd95054345d4935d10a0879a"),
];
let ends = [
hex_to_id("1a27cb1a26c9faed9f0d1975326fe51123ab01ed"),
hex_to_id("22fbc169eeca3c9678fc7028aa80fad5ef49019f"),
];
let expected = [
"3be0c4c793c634c8fd95054345d4935d10a0879a",
"2083b02a78e88b747e305b6ed3d5a861cf9fb73f",
"302a5d0530ec688c241f32c2f2b61b964dd17bee",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
]
.map(hex_to_id);
let result = traverse_both(tips, ends, &odb, topo::Sorting::TopoOrder, Parents::All)?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn with_dummy_predicate() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let filter_out = hex_to_id("eeab3243aad67bc838fc4425f759453bf0b47785");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"3be0c4c793c634c8fd95054345d4935d10a0879a",
"2083b02a78e88b747e305b6ed3d5a861cf9fb73f",
"302a5d0530ec688c241f32c2f2b61b964dd17bee",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"22fbc169eeca3c9678fc7028aa80fad5ef49019f",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
"f1cce1b5c7efcdfa106e95caa6c45a2cae48a481",
"945d8a360915631ad545e0cf04630d86d3d4eaa1",
"a863c02247a6c5ba32dff5224459f52aa7f77f7b",
"2f291881edfb0597493a52d26ea09dd7340ce507",
"9c46b8765703273feb10a2ebd810e70b8e2ca44a",
"fb3e21cf45b04b617011d2b30973f3e5ce60d0cd",
]
.map(hex_to_id);
let result = traverse_with_predicate([tip], [], &odb, topo::Sorting::TopoOrder, Parents::All, move |oid| {
oid != filter_out
})?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn end_along_first_parent() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("d09384f312b03e4a1413160739805ff25e8fe99d");
let end = hex_to_id("33eb18340e4eaae3e3dcf80222b02f161cd3f966");
let expected = [
"d09384f312b03e4a1413160739805ff25e8fe99d",
"22fbc169eeca3c9678fc7028aa80fad5ef49019f",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
]
.map(hex_to_id);
let result = traverse_both([tip], [end], &odb, topo::Sorting::TopoOrder, Parents::All)?;
assert_eq!(result, expected);
Ok(())
}
}
mod first_parent {
use super::*;
#[test]
fn basic() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
"f1cce1b5c7efcdfa106e95caa6c45a2cae48a481",
"945d8a360915631ad545e0cf04630d86d3d4eaa1",
"a863c02247a6c5ba32dff5224459f52aa7f77f7b",
"2f291881edfb0597493a52d26ea09dd7340ce507",
"9c46b8765703273feb10a2ebd810e70b8e2ca44a",
"fb3e21cf45b04b617011d2b30973f3e5ce60d0cd",
]
.map(hex_to_id);
let result = traverse_both([tip], [], &odb, topo::Sorting::TopoOrder, Parents::First)?;
assert_eq!(result, expected);
// Verify against baseline
let baseline = read_baseline(&fixture_dir()?, "first-parent")?;
let expected_strs: Vec<_> = expected.iter().map(std::string::ToString::to_string).collect();
assert_eq!(expected_strs, baseline, "Baseline must match the expectation");
Ok(())
}
#[test]
fn with_end() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let end = hex_to_id("f1cce1b5c7efcdfa106e95caa6c45a2cae48a481");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
]
.map(hex_to_id);
let result = traverse_both([tip], [end], &odb, topo::Sorting::TopoOrder, Parents::First)?;
assert_eq!(result, expected);
Ok(())
}
#[test]
fn end_is_second_parent() -> crate::Result {
let odb = odb()?;
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let end = hex_to_id("3be0c4c793c634c8fd95054345d4935d10a0879a");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
]
.map(hex_to_id);
let result = traverse_both([tip], [end], &odb, topo::Sorting::TopoOrder, Parents::First)?;
assert_eq!(result, expected);
Ok(())
}
}
mod date_order {
use super::*;
#[test]
fn with_ends() -> crate::Result {
let odb = odb()?;
// Same tip and end as basic::one_end() but the order should be different.
let tip = hex_to_id("62ed296d9986f50477e9f7b7e81cd0258939a43d");
let end = hex_to_id("f1cce1b5c7efcdfa106e95caa6c45a2cae48a481");
let expected = [
"62ed296d9986f50477e9f7b7e81cd0258939a43d",
"722bf6b8c3d9e3a11fa5100a02ed9b140e1d209c",
"3be0c4c793c634c8fd95054345d4935d10a0879a",
"2083b02a78e88b747e305b6ed3d5a861cf9fb73f",
"302a5d0530ec688c241f32c2f2b61b964dd17bee",
"d09384f312b03e4a1413160739805ff25e8fe99d",
"eeab3243aad67bc838fc4425f759453bf0b47785",
"22fbc169eeca3c9678fc7028aa80fad5ef49019f",
"693c775700cf90bd158ee6e7f14dd1b7bd83a4ce",
"33eb18340e4eaae3e3dcf80222b02f161cd3f966",
"1a27cb1a26c9faed9f0d1975326fe51123ab01ed",
]
.map(hex_to_id);
let result = traverse_both([tip], [end], &odb, topo::Sorting::DateOrder, Parents::All)?;
assert_eq!(result, expected);
// Verify against baseline
let baseline = read_baseline(&fixture_dir()?, "date-order")?;
let expected_strs: Vec<_> = expected.iter().map(std::string::ToString::to_string).collect();
assert_eq!(expected_strs, baseline, "Baseline must match the expectation");
Ok(())
}
}

View File

@@ -0,0 +1,5 @@
mod util;
pub use util::{hex_to_id, Result};
mod commit;
mod tree;

View File

@@ -0,0 +1,374 @@
use crate::hex_to_id;
use crate::util::fixture_odb;
fn odb() -> crate::Result<gix_odb::Handle> {
fixture_odb("make_traversal_repo_for_trees.sh")
}
mod depthfirst {
use gix_object::FindExt;
use gix_traverse::{tree, tree::recorder::Location};
use super::*;
use crate::util::fixture_odb;
#[test]
fn full_path_and_filename() -> crate::Result {
let db = odb()?;
let mut state = gix_traverse::tree::depthfirst::State::default();
let mut buf = state.pop_buf();
let mut recorder = tree::Recorder::default();
let tree = db
.find_commit(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)?
.tree();
gix_traverse::tree::depthfirst(tree, &mut state, &db, &mut recorder)?;
insta::assert_debug_snapshot!(recorder.records, @r#"
[
Entry {
mode: EntryMode(0o100644),
filepath: "a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "d",
oid: Sha1(496d6428b9cf92981dc9495211e6e1120fb6f2ba),
},
Entry {
mode: EntryMode(0o100644),
filepath: "d/a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "e",
oid: Sha1(4277b6e69d25e5efa77c455340557b384a4c018a),
},
Entry {
mode: EntryMode(0o100644),
filepath: "e/b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "f",
oid: Sha1(70fb16fc77b03e16acb4a5b1a6caf79ba302919a),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "f/d",
oid: Sha1(5805b676e247eb9a8046ad0c4d249cd2fb2513df),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/d/x",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/z",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
]
"#);
recorder.records.clear();
recorder = recorder.track_location(Some(Location::FileName));
gix_traverse::tree::depthfirst(tree, state, &db, &mut recorder)?;
insta::assert_debug_snapshot!(recorder.records, @r#"
[
Entry {
mode: EntryMode(0o100644),
filepath: "a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "d",
oid: Sha1(496d6428b9cf92981dc9495211e6e1120fb6f2ba),
},
Entry {
mode: EntryMode(0o100644),
filepath: "a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "e",
oid: Sha1(4277b6e69d25e5efa77c455340557b384a4c018a),
},
Entry {
mode: EntryMode(0o100644),
filepath: "b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "f",
oid: Sha1(70fb16fc77b03e16acb4a5b1a6caf79ba302919a),
},
Entry {
mode: EntryMode(0o100644),
filepath: "c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o40000),
filepath: "d",
oid: Sha1(5805b676e247eb9a8046ad0c4d249cd2fb2513df),
},
Entry {
mode: EntryMode(0o100644),
filepath: "x",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "z",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
]
"#);
Ok(())
}
#[test]
fn more_difficult_fixture() -> crate::Result {
let db = fixture_odb("make_traversal_repo_for_trees_depthfirst.sh")?;
let mut state = gix_traverse::tree::depthfirst::State::default();
let mut buf = state.pop_buf();
let mut recorder = tree::Recorder::default();
let tree = db
.find_commit(&hex_to_id("fe63a8a9fb7c27c089835aae92cbda675523803a"), &mut buf)?
.tree();
gix_traverse::tree::depthfirst(tree, &mut state, &db, &mut recorder)?;
insta::assert_debug_snapshot!(recorder.records.into_iter().filter(|e| e.mode.is_no_tree()).collect::<Vec<_>>(), @r#"
[
Entry {
mode: EntryMode(0o100644),
filepath: "a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "d/a",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "e/b",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/FUNDING.yml",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/ISSUE_TEMPLATE/x",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/c",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/dependabot.yml",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
Entry {
mode: EntryMode(0o100644),
filepath: "f/z",
oid: Sha1(e69de29bb2d1d6434b8b29ae775ad8c2e48c5391),
},
]
"#);
Ok(())
}
}
mod breadthfirst {
use gix_object::bstr::BString;
use gix_odb::pack::FindExt;
use gix_traverse::{tree, tree::recorder::Location};
use super::*;
#[test]
fn full_path() -> crate::Result {
let db = odb()?;
let mut buf = Vec::new();
let mut buf2 = Vec::new();
let mut commit = db
.find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)?
.0;
// Full paths - that's the default.
let mut recorder = tree::Recorder::default();
gix_traverse::tree::breadthfirst(
db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)?
.0,
tree::breadthfirst::State::default(),
&db,
&mut recorder,
)?;
use gix_object::tree::EntryKind::*;
use gix_traverse::tree::recorder::Entry;
assert_eq!(
recorder.records,
vec![
Entry {
mode: Blob.into(),
filepath: "a".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Blob.into(),
filepath: "b".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Blob.into(),
filepath: "c".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Tree.into(),
filepath: "d".into(),
oid: hex_to_id("496d6428b9cf92981dc9495211e6e1120fb6f2ba")
},
Entry {
mode: Tree.into(),
filepath: "e".into(),
oid: hex_to_id("4277b6e69d25e5efa77c455340557b384a4c018a")
},
Entry {
mode: Tree.into(),
filepath: "f".into(),
oid: hex_to_id("70fb16fc77b03e16acb4a5b1a6caf79ba302919a")
},
Entry {
mode: Blob.into(),
filepath: "d/a".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Blob.into(),
filepath: "e/b".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Blob.into(),
filepath: "f/c".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Tree.into(),
filepath: "f/d".into(),
oid: hex_to_id("5805b676e247eb9a8046ad0c4d249cd2fb2513df")
},
Entry {
mode: Blob.into(),
filepath: "f/z".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
},
Entry {
mode: Blob.into(),
filepath: "f/d/x".into(),
oid: hex_to_id("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
}
]
);
Ok(())
}
#[test]
fn filename_only() -> crate::Result<()> {
let db = odb()?;
let mut buf = Vec::new();
let mut buf2 = Vec::new();
let mut commit = db
.find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)?
.0;
let mut recorder = tree::Recorder::default().track_location(Some(Location::FileName));
gix_traverse::tree::breadthfirst(
db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)?
.0,
tree::breadthfirst::State::default(),
&db,
&mut recorder,
)?;
assert_eq!(
recorder.records.into_iter().map(|e| e.filepath).collect::<Vec<_>>(),
["a", "b", "c", "d", "e", "f", "a", "b", "c", "d", "z", "x"]
.into_iter()
.map(BString::from)
.collect::<Vec<_>>()
);
Ok(())
}
#[test]
fn no_location() -> crate::Result<()> {
let db = odb()?;
let mut buf = Vec::new();
let mut buf2 = Vec::new();
let mut commit = db
.find_commit_iter(&hex_to_id("85df34aa34848b8138b2b3dcff5fb5c2b734e0ce"), &mut buf)?
.0;
let mut recorder = tree::Recorder::default().track_location(None);
gix_traverse::tree::breadthfirst(
db.find_tree_iter(&commit.tree_id().expect("a tree is available in a commit"), &mut buf2)?
.0,
tree::breadthfirst::State::default(),
&db,
&mut recorder,
)?;
for path in recorder.records.into_iter().map(|e| e.filepath) {
assert_eq!(path, "", "path should be empty as it's not tracked at all");
}
Ok(())
}
}

View File

@@ -0,0 +1,93 @@
use gix_hash::ObjectId;
use std::path::PathBuf;
pub use gix_testtools::Result;
/// Convert a hexadecimal hash into its corresponding `ObjectId` or _panic_.
pub fn hex_to_id(hex: &str) -> ObjectId {
ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex")
}
/// Get the path to a fixture directory from a script that creates a single repository.
pub fn fixture(script_name: &str) -> Result<PathBuf> {
gix_testtools::scripted_fixture_read_only_standalone(script_name)
}
/// Get an object database handle from a fixture script that creates a single repository.
pub fn fixture_odb(script_name: &str) -> Result<gix_odb::Handle> {
let dir = fixture(script_name)?;
Ok(gix_odb::at(dir.join(".git").join("objects"))?)
}
/// Get a fixture path and object database for a named sub-repository within a fixture.
pub fn named_fixture(script_name: &str, repo_name: &str) -> Result<(PathBuf, gix_odb::Handle)> {
let dir = fixture(script_name)?;
let repo_dir = dir.join(repo_name);
let odb = gix_odb::at(repo_dir.join(".git").join("objects"))?;
Ok((repo_dir, odb))
}
/// Load a commit graph if available for the given object store.
pub fn commit_graph(store: &gix_odb::Store) -> Option<gix_commitgraph::Graph> {
gix_commitgraph::at(store.path().join("info")).ok()
}
/// Execute `git log --oneline --graph --decorate --all` in the given repository
/// and return the output as a string. Useful for snapshot testing.
pub fn git_graph(repo_dir: impl AsRef<std::path::Path>) -> Result<String> {
git_graph_internal(repo_dir, false)
}
/// Like `git_graph`, but includes commit timestamps (Unix epoch seconds).
/// Use this for tests where commit ordering depends on time.
pub fn git_graph_with_time(repo_dir: impl AsRef<std::path::Path>) -> Result<String> {
git_graph_internal(repo_dir, true)
}
fn git_graph_internal(repo_dir: impl AsRef<std::path::Path>, with_time: bool) -> Result<String> {
use gix_object::bstr::{ByteSlice, ByteVec};
let format = if with_time {
"--pretty=format:%H %ct%d %s"
} else {
"--pretty=format:%H %d %s"
};
let out = std::process::Command::new(gix_path::env::exe_invocation())
.current_dir(repo_dir)
.args(["log", "--oneline", "--graph", "--decorate", "--all", format])
.output()?;
if !out.status.success() {
return Err(format!("git log failed: {err}", err = out.stderr.to_str_lossy()).into());
}
Ok(out.stdout.into_string_lossy())
}
/// Parse commit names to IDs from git log output.
/// Returns a map of commit message (first word) to ObjectId.
pub fn parse_commit_names(repo_path: &std::path::Path) -> Result<std::collections::HashMap<String, ObjectId>> {
let output = std::process::Command::new("git")
.current_dir(repo_path)
.args(["log", "--all", "--format=%H %s"])
.output()?;
let mut commits = std::collections::HashMap::new();
for line in String::from_utf8_lossy(&output.stdout).lines() {
let mut parts = line.split_whitespace();
if let (Some(hash), Some(name)) = (parts.next(), parts.next()) {
commits.insert(name.to_string(), hex_to_id(hash));
}
}
Ok(commits)
}
/// Run `git rev-list` with the given arguments and return the resulting commit IDs.
/// Useful for verifying traversal results against git's baseline behavior.
pub fn git_rev_list(repo_path: &std::path::Path, args: &[&str]) -> Result<Vec<ObjectId>> {
let output = std::process::Command::new("git")
.current_dir(repo_path)
.arg("rev-list")
.args(args)
.output()?;
Ok(String::from_utf8_lossy(&output.stdout)
.lines()
.map(|s| hex_to_id(s.trim()))
.collect())
}