create src

This commit is contained in:
awfixer
2026-03-11 02:04:19 -07:00
commit 52f7a22bf2
2595 changed files with 402870 additions and 0 deletions

1532
src-commitgraph/CHANGELOG.md Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,49 @@
lints.workspace = true
[package]
name = "src-commitgraph"
version = "0.34.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
documentation = "https://git-scm.com/docs/commit-graph"
license = "MIT OR Apache-2.0"
description = "Read-only access to the git commitgraph file format"
authors = ["Conor Davis <gitoxide@conor.fastmail.fm>", "Sebastian Thiel <sebastian.thiel@icloud.com>"]
edition = "2021"
include = ["src/**/*", "LICENSE-*"]
rust-version = "1.82"
[lib]
doctest = false
[features]
## Enable support for the SHA-1 hash by enabling the respective feature in the `src-hash` crate.
sha1 = ["src-hash/sha1"]
## Data structures implement `serde::Serialize` and `serde::Deserialize`
serde = ["dep:serde", "src-hash/serde", "bstr/serde"]
[dependencies]
src-hash = { version = "^0.22.1", path = "../src-hash" }
src-chunk = { version = "^0.7.0", path = "../src-chunk" }
src-error = { version = "^0.2.0", path = "../src-error" }
bstr = { version = "1.12.0", default-features = false, features = ["std"] }
memmap2 = "0.9.10"
nonempty = "0.12.0"
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
document-features = { version = "0.2.0", optional = true }
[dev-dependencies]
src-testtools = { path = "../tests/tools" }
src-date = { path = "../src-date" }
src-hash = { path = "../src-hash", features = ["sha1", "sha256"] }
[package.metadata.docs.rs]
all-features = true
features = ["sha1", "document-features"]
[package.metadata.cargo-machete]
ignored = [
# Needed for `bstr/serde` feature forwarding, even though no direct `bstr` paths are referenced.
"bstr",
]

View File

@@ -0,0 +1 @@
../LICENSE-APACHE

1
src-commitgraph/LICENSE-MIT Symbolic link
View File

@@ -0,0 +1 @@
../LICENSE-MIT

9
src-commitgraph/fuzz/.gitignore vendored Normal file
View File

@@ -0,0 +1,9 @@
target
corpus
artifacts
coverage
# These usually involve a lot of local CPU time, keep them.
$artifacts
$corpus

View File

@@ -0,0 +1,37 @@
[package]
name = "src-commitgraph-fuzz"
version = "0.0.0"
publish = false
edition = "2021"
[package.metadata]
cargo-fuzz = true
[package.metadata.cargo-machete]
ignored = [
# Kept for fuzz-input modeling support in this fuzz package.
"arbitrary",
]
[dependencies]
anyhow = "1.0.76"
arbitrary = { version = "1.3.2", features = ["derive"] }
libfuzzer-sys = "0.4"
memmap2 = "0.9.0"
[dependencies.src-commitgraph]
path = ".."
features = ["sha1"]
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "fuzz_file"
path = "fuzz_targets/fuzz_file.rs"
test = false
doc = false

View File

@@ -0,0 +1,29 @@
#![no_main]
use anyhow::Result;
use gix_commitgraph::File;
use libfuzzer_sys::fuzz_target;
use std::hint::black_box;
fn fuzz(data: &[u8]) -> Result<()> {
let data = {
let mut d = memmap2::MmapMut::map_anon(data.len())?;
d.copy_from_slice(data);
d.make_read_only()?
};
let file = File::new(data, "does not matter".into()).map_err(|e| e.into_inner())?;
_ = black_box(file.iter_base_graph_ids().count());
_ = black_box(file.iter_commits().count());
_ = black_box(file.iter_ids().count());
let _ = black_box(file.checksum());
let _ = black_box(file.verify_checksum());
let _ = black_box(file.object_hash());
Ok(())
}
fuzz_target!(|data: &[u8]| {
_ = black_box(fuzz(data));
});

View File

@@ -0,0 +1,104 @@
use crate::{file, file::Commit, File, Graph, Position};
/// Access
impl Graph {
/// Returns the commit at the given position `pos`.
///
/// # Panics
/// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
pub fn commit_at(&self, pos: Position) -> Commit<'_> {
let r = self.lookup_by_pos(pos);
r.file.commit_at(r.pos)
}
/// The kind of hash used in this `Graph`.
///
/// Note that it is always conforming to the hash used in the owning repository.
pub fn object_hash(&self) -> gix_hash::Kind {
self.files.first().object_hash()
}
/// Returns the commit matching the given `id`.
pub fn commit_by_id(&self, id: impl AsRef<gix_hash::oid>) -> Option<Commit<'_>> {
let r = self.lookup_by_id(id.as_ref())?;
Some(r.file.commit_at(r.file_pos))
}
/// Returns the `hash` at the given position `pos`.
///
/// # Panics
/// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
pub fn id_at(&self, pos: Position) -> &gix_hash::oid {
let r = self.lookup_by_pos(pos);
r.file.id_at(r.pos)
}
/// Iterate over commits in unsorted order.
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
self.files.iter().flat_map(File::iter_commits)
}
/// Iterate over commit IDs in unsorted order.
pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
self.files.iter().flat_map(File::iter_ids)
}
/// Translate the given `id` to its position in the file.
pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<Position> {
Some(self.lookup_by_id(id.as_ref())?.graph_pos)
}
/// Returns the number of commits stored in this file.
pub fn num_commits(&self) -> u32 {
self.files.iter().map(File::num_commits).sum()
}
}
/// Access fundamentals
impl Graph {
fn lookup_by_id(&self, id: &gix_hash::oid) -> Option<LookupByIdResult<'_>> {
let mut current_file_start = 0;
for file in &self.files {
if let Some(lex_pos) = file.lookup(id) {
return Some(LookupByIdResult {
file,
file_pos: lex_pos,
graph_pos: Position(current_file_start + lex_pos.0),
});
}
current_file_start += file.num_commits();
}
None
}
fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> {
let mut remaining = pos.0;
for (file_index, file) in self.files.iter().enumerate() {
match remaining.checked_sub(file.num_commits()) {
Some(v) => remaining = v,
None => {
return LookupByPositionResult {
file,
_file_index: file_index,
pos: file::Position(remaining),
}
}
}
}
panic!("graph position too large: {}", pos.0);
}
}
#[derive(Clone)]
struct LookupByIdResult<'a> {
pub file: &'a File,
pub graph_pos: Position,
pub file_pos: file::Position,
}
#[derive(Clone)]
struct LookupByPositionResult<'a> {
pub file: &'a File,
pub _file_index: usize,
pub pos: file::Position,
}

View File

@@ -0,0 +1,140 @@
use std::{
fmt::{Debug, Formatter},
path::Path,
};
use crate::{
file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH},
File,
};
/// Access
impl File {
/// The number of base graphs that this file depends on.
pub fn base_graph_count(&self) -> u8 {
self.base_graph_count
}
/// Returns the commit data for the commit located at the given lexicographical position.
///
/// `pos` must range from 0 to `self.num_commits()`.
///
/// # Panics
///
/// Panics if `pos` is out of bounds.
pub fn commit_at(&self, pos: file::Position) -> Commit<'_> {
Commit::new(self, pos)
}
/// The kind of hash used in this File.
///
/// Note that it is always conforming to the hash used in the owning repository.
pub fn object_hash(&self) -> gix_hash::Kind {
self.object_hash
}
/// Returns an object id at the given index in our list of (sorted) hashes.
/// The position ranges from 0 to `self.num_commits()`
// copied from src-odb/src/pack/index/ext
pub fn id_at(&self, pos: file::Position) -> &gix_hash::oid {
assert!(
pos.0 < self.num_commits(),
"expected lexicographical position less than {}, got {}",
self.num_commits(),
pos.0
);
let pos: usize = pos
.0
.try_into()
.expect("an architecture able to hold 32 bits of integer");
let start = self.oid_lookup_offset + (pos * self.hash_len);
gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
}
/// Return an iterator over all object hashes stored in the base graph.
pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
let start = self.base_graphs_list_offset.unwrap_or(0);
let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)];
base_graphs_list
.chunks_exact(self.hash_len)
.map(gix_hash::oid::from_bytes_unchecked)
}
/// return an iterator over all commits in this file.
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
(0..self.num_commits()).map(move |i| self.commit_at(file::Position(i)))
}
/// Return an iterator over all object hashes stored in this file.
pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
(0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
}
/// Translate the given object hash to its position within this file, if present.
// copied from src-odb/src/pack/index/ext
pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<file::Position> {
self.lookup_inner(id.as_ref())
}
fn lookup_inner(&self, id: &gix_hash::oid) -> Option<file::Position> {
let first_byte = usize::from(id.first_byte());
let mut upper_bound = self.fan[first_byte];
let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = self.id_at(file::Position(mid));
use std::cmp::Ordering::*;
match id.cmp(mid_sha) {
Less => upper_bound = mid,
Equal => return Some(file::Position(mid)),
Greater => lower_bound = mid + 1,
}
}
None
}
/// Returns the number of commits in this graph file.
///
/// The maximum valid `file::Position` that can be used with this file is one less than
/// `num_commits()`.
pub fn num_commits(&self) -> u32 {
self.fan[255]
}
/// Returns the path to this file.
pub fn path(&self) -> &Path {
&self.path
}
}
impl File {
/// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk.
pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] {
assert!(
pos.0 < self.num_commits(),
"expected lexicographical position less than {}, got {}",
self.num_commits(),
pos.0
);
let pos: usize = pos
.0
.try_into()
.expect("an architecture able to hold 32 bits of integer");
let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
let start = self.commit_data_offset + (pos * entry_size);
&self.data[start..][..entry_size]
}
/// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk.
pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> {
Some(&self.data[self.extra_edges_list_range.clone()?])
}
}
impl Debug for File {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, r#"File("{:?}")"#, self.path.display())
}
}

View File

@@ -0,0 +1,256 @@
//! Low-level operations on individual commits.
use crate::{
file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT},
File, Position,
};
use gix_error::{message, Message};
use std::{
fmt::{Debug, Formatter},
slice::Chunks,
};
/// A commit as stored in a [`File`].
#[derive(Copy, Clone)]
pub struct Commit<'a> {
file: &'a File,
pos: file::Position,
// We can parse the below fields lazily if needed.
commit_timestamp: u64,
generation: u32,
parent1: ParentEdge,
parent2: ParentEdge,
root_tree_id: &'a gix_hash::oid,
}
#[inline]
fn read_u32(b: &[u8]) -> u32 {
u32::from_be_bytes(b.try_into().unwrap())
}
impl<'a> Commit<'a> {
pub(crate) fn new(file: &'a File, pos: file::Position) -> Self {
let bytes = file.commit_data_bytes(pos);
Commit {
file,
pos,
root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]),
parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])),
parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])),
// TODO: Add support for corrected commit date offset overflow.
// See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and
// https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test.
generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2,
commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap())
& 0x0003_ffff_ffff,
}
}
/// Returns the committer timestamp of this commit.
///
/// The value is the number of seconds since 1970-01-01 00:00:00 UTC.
pub fn committer_timestamp(&self) -> u64 {
self.commit_timestamp
}
/// Returns the generation number of this commit.
///
/// Commits without parents have generation number 1. Commits with parents have a generation
/// number that is the max of their parents' generation numbers + 1.
pub fn generation(&self) -> u32 {
self.generation
}
/// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph].
pub fn iter_parents(self) -> Parents<'a> {
// I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported
// fuse-after-first-error behavior, and b) was significantly shorter or more understandable
// than ParentIterator. So here we are.
Parents {
commit_data: self,
state: ParentIteratorState::First,
}
}
/// Returns the hash of this commit.
pub fn id(&self) -> &'a gix_hash::oid {
self.file.id_at(self.pos)
}
/// Returns the first parent of this commit.
pub fn parent1(&self) -> Result<Option<Position>, Message> {
self.iter_parents().next().transpose()
}
/// Returns the position at which this commit is stored in the parent [File].
pub fn position(&self) -> file::Position {
self.pos
}
/// Return the hash of the tree this commit points to.
pub fn root_tree_id(&self) -> &gix_hash::oid {
self.root_tree_id
}
}
impl Debug for Commit<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}",
self.id(),
self.pos,
self.generation(),
self.root_tree_id(),
self.parent1,
self.parent2,
)
}
}
impl Eq for Commit<'_> {}
impl PartialEq for Commit<'_> {
fn eq(&self, other: &Self) -> bool {
std::ptr::eq(self.file, other.file) && self.pos == other.pos
}
}
/// An iterator over parents of a [`Commit`].
pub struct Parents<'a> {
commit_data: Commit<'a>,
state: ParentIteratorState<'a>,
}
impl Iterator for Parents<'_> {
type Item = Result<Position, Message>;
fn next(&mut self) -> Option<Self::Item> {
let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted);
match state {
ParentIteratorState::First => match self.commit_data.parent1 {
ParentEdge::None => match self.commit_data.parent2 {
ParentEdge::None => None,
_ => Some(Err(message!(
"commit {} has a second parent but not a first parent",
self.commit_data.id()
))),
},
ParentEdge::GraphPosition(pos) => {
self.state = ParentIteratorState::Second;
Some(Ok(pos))
}
ParentEdge::ExtraEdgeIndex(_) => Some(Err(message!(
"commit {}'s first parent is an extra edge index, which is invalid",
self.commit_data.id(),
))),
},
ParentIteratorState::Second => match self.commit_data.parent2 {
ParentEdge::None => None,
ParentEdge::GraphPosition(pos) => Some(Ok(pos)),
ParentEdge::ExtraEdgeIndex(extra_edge_index) => {
if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() {
let start_offset: usize = extra_edge_index
.try_into()
.expect("an architecture able to hold 32 bits of integer");
let start_offset = start_offset
.checked_mul(4)
.expect("an extended edge index small enough to fit in usize");
if let Some(tail) = extra_edges_list.get(start_offset..) {
self.state = ParentIteratorState::Extra(tail.chunks(4));
// This recursive call is what blocks me from replacing ParentIterator
// with a std::iter::from_fn closure.
self.next()
} else {
Some(Err(message!(
"commit {}'s extra edges overflows the commit-graph file's extra edges list",
self.commit_data.id()
)))
}
} else {
Some(Err(message!(
"commit {} has extra edges, but commit-graph file has no extra edges list",
self.commit_data.id()
)))
}
}
},
ParentIteratorState::Extra(mut chunks) => {
if let Some(chunk) = chunks.next() {
let extra_edge = read_u32(chunk);
match ExtraEdge::from_raw(extra_edge) {
ExtraEdge::Internal(pos) => {
self.state = ParentIteratorState::Extra(chunks);
Some(Ok(pos))
}
ExtraEdge::Last(pos) => Some(Ok(pos)),
}
} else {
Some(Err(message!(
"commit {}'s extra edges overflows the commit-graph file's extra edges list",
self.commit_data.id()
)))
}
}
ParentIteratorState::Exhausted => None,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
match (&self.state, self.commit_data.parent1, self.commit_data.parent2) {
(ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)),
(ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)),
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)),
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)),
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None),
(ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)),
(ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)),
(ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)),
(ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None),
(ParentIteratorState::Extra(_), _, _) => (1, None),
(ParentIteratorState::Exhausted, _, _) => (0, Some(0)),
}
}
}
#[derive(Debug)]
enum ParentIteratorState<'a> {
First,
Second,
Extra(Chunks<'a, u8>),
Exhausted,
}
#[derive(Clone, Copy, Debug)]
enum ParentEdge {
None,
GraphPosition(Position),
ExtraEdgeIndex(u32),
}
impl ParentEdge {
pub fn from_raw(raw: u32) -> ParentEdge {
if raw == NO_PARENT {
return ParentEdge::None;
}
if raw & EXTENDED_EDGES_MASK != 0 {
ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK)
} else {
ParentEdge::GraphPosition(Position(raw))
}
}
}
enum ExtraEdge {
Internal(Position),
Last(Position),
}
impl ExtraEdge {
pub fn from_raw(raw: u32) -> Self {
if raw & LAST_EXTENDED_EDGE_MASK != 0 {
Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK))
} else {
Self::Internal(Position(raw))
}
}
}

View File

@@ -0,0 +1,203 @@
use std::path::{Path, PathBuf};
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
use crate::{
file::{
BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
},
File,
};
const MIN_FILE_SIZE: usize = HEADER_LEN
+ gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
+ FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
+ gix_hash::Kind::shortest().len_in_bytes();
impl File {
/// Try to parse the commit graph file at `path`.
pub fn at(path: impl AsRef<Path>) -> Result<File, Exn<Message>> {
Self::try_from(path.as_ref())
}
/// A lower-level constructor which constructs a new instance directly from the mapping in `data`,
/// assuming that it originated from `path`.
///
/// Note that `path` is only used for verification of the hash its basename contains, but otherwise
/// is not of importance.
pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result<File, Exn<Message>> {
let data_size = data.len();
if data_size < MIN_FILE_SIZE {
return Err(message("Commit-graph file too small even for an empty graph").raise());
}
let mut ofs = 0;
if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
return Err(message("Commit-graph file does not start with expected signature").raise());
}
ofs += SIGNATURE.len();
match data[ofs] {
1 => (),
x => {
return Err(message!("Unsupported commit-graph file version: {x}").raise());
}
}
ofs += 1;
let object_hash = gix_hash::Kind::try_from(data[ofs])
.map_err(|v| message!("Commit-graph file uses unsupported hash version: {v}").raise())?;
ofs += 1;
let chunk_count = data[ofs];
// Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
// it redundant.
ofs += 1;
let base_graph_count = data[ofs];
ofs += 1;
let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, u32::from(chunk_count))
.or_raise(|| message!("Couldn't read commit-graph file with {chunk_count} chunks at offset {ofs}"))?;
let base_graphs_list_offset = chunks
.validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
let chunk_size = chunk_range.len();
if chunk_size % object_hash.len_in_bytes() != 0 {
return Err(message!("Commit-graph chunk {BASE_GRAPHS_LIST_CHUNK_ID:?} has invalid size: {msg}",
msg = format!(
"chunk size {} is not a multiple of {}",
chunk_size,
object_hash.len_in_bytes()
),
).raise());
}
let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
.try_into()
.expect("base graph count to fit in 32-bits");
if chunk_base_graph_count != u32::from(base_graph_count) {
return Err(message!("Commit-graph {BASE_GRAPHS_LIST_CHUNK_ID:?} chunk contains {chunk_base_graph_count} base graphs, but commit-graph file header claims {base_graph_count} base graphs").raise())
}
Ok(chunk_range.start)
})
.ok()
.transpose()?;
let (commit_data_offset, commit_data_count): (_, u32) = chunks
.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
let chunk_size = chunk_range.len();
let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
if chunk_size % entry_size != 0 {
return Err(message!("Commit-graph chunk {COMMIT_DATA_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {entry_size}").raise())
}
Ok((
chunk_range.start,
(chunk_size / entry_size)
.try_into()
.expect("number of commits in CDAT chunk to fit in 32 bits"),
))
})??;
let fan_offset = chunks
.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
let chunk_size = chunk_range.len();
let expected_size = 4 * FAN_LEN;
if chunk_size != expected_size {
return Err(message!("Commit-graph chunk {OID_FAN_CHUNK_ID:?} has invalid size: expected chunk length {expected_size}, got {chunk_size}").raise())
}
Ok(chunk_range.start)
})?
.or_raise(|| message("Error getting offset for OID fan chunk"))?;
let (oid_lookup_offset, oid_lookup_count): (_, u32) = chunks
.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
let chunk_size = chunk_range.len();
if chunk_size % object_hash.len_in_bytes() != 0 {
return Err(message!("Commit-graph chunk {OID_LOOKUP_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {hash_len}", hash_len = object_hash.len_in_bytes()).raise())
}
Ok((
chunk_range.start,
(chunk_size / object_hash.len_in_bytes())
.try_into()
.expect("number of commits in OIDL chunk to fit in 32 bits"),
))
})?
.or_raise(|| message("Error getting offset for OID lookup chunk"))?;
let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
let trailer = &data[chunks.highest_offset() as usize..];
if trailer.len() != object_hash.len_in_bytes() {
return Err(message!(
"Expected commit-graph trailer to contain {} bytes, got {}",
object_hash.len_in_bytes(),
trailer.len()
)
.raise());
}
if base_graph_count > 0 && base_graphs_list_offset.is_none() {
return Err(message!("Chunk named {BASE_GRAPHS_LIST_CHUNK_ID:?} was not found in chunk file index").into());
}
let (fan, _) = read_fan(&data[fan_offset..]);
if oid_lookup_count != fan[255] {
return Err(message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {OID_LOOKUP_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
chunk1_commits = fan[255],
chunk2_commits = oid_lookup_count,
).raise());
}
if commit_data_count != fan[255] {
return Err(
message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {COMMIT_DATA_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
chunk1_commits = fan[255],
chunk2_commits = commit_data_count,
).raise(),
);
}
Ok(File {
base_graph_count,
base_graphs_list_offset,
commit_data_offset,
data,
extra_edges_list_range,
fan,
oid_lookup_offset,
path,
hash_len: object_hash.len_in_bytes(),
object_hash,
})
}
}
impl TryFrom<&Path> for File {
type Error = Exn<Message>;
fn try_from(path: &Path) -> Result<Self, Self::Error> {
let data = std::fs::File::open(path)
.and_then(|file| {
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
#[allow(unsafe_code)]
unsafe {
memmap2::MmapOptions::new().map_copy_read_only(&file)
}
})
.or_raise(|| message!("Could not open commit-graph file at '{path}'", path = path.display()))?;
Self::new(data, path.to_owned())
}
}
// Copied from src-odb/pack/index/init.rs
fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
assert!(d.len() >= FAN_LEN * 4);
let mut fan = [0; FAN_LEN];
for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) {
*f = u32::from_be_bytes(c.try_into().unwrap());
}
(fan, FAN_LEN * 4)
}

View File

@@ -0,0 +1,46 @@
//! Operations on a single commit-graph file.
use std::fmt::{Display, Formatter};
pub use self::commit::Commit;
mod access;
pub mod commit;
mod init;
pub mod verify;
const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16;
pub(crate) const FAN_LEN: usize = 256;
const HEADER_LEN: usize = 8;
const SIGNATURE: &[u8] = b"CGPH";
type ChunkId = gix_chunk::Id;
const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE";
const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT";
const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE";
const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF";
const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL";
// Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for
// NO_PARENT. Fixed in https://github.com/git/git/commit/4d515253afcef985e94400adbfed7044959f9121 .
const NO_PARENT: u32 = 0x7000_0000;
const EXTENDED_EDGES_MASK: u32 = 0x8000_0000;
const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000;
/// The position of a given commit within a graph file, starting at 0.
///
/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position
/// is its position in this ordering. If a commit graph spans multiple files, each file's commits
/// start at lexicographical position 0, so it is unique across a single file but is not unique across
/// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]),
/// which is unique across the whole commit graph.
/// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each.
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Position(pub u32);
impl Display for Position {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}

View File

@@ -0,0 +1,133 @@
//! Auxiliary types used in commit graph file verification methods.
use std::{
cmp::{max, min},
collections::HashMap,
path::Path,
};
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX};
/// The positive result of [`File::traverse()`] providing some statistical information.
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Outcome {
/// The largest encountered [`file::Commit`] generation number.
pub max_generation: u32,
/// The smallest encountered [`file::Commit`] generation number.
pub min_generation: u32,
/// The largest number of parents in a single [`file::Commit`].
pub max_parents: u32,
/// The total number of [`commits`][file::Commit]s seen in the iteration.
pub num_commits: u32,
/// A mapping of `N -> number of commits with N parents`.
pub parent_counts: HashMap<u32, u32>,
}
/// Verification
impl File {
/// Returns the trailing checksum over the entire content of this file.
pub fn checksum(&self) -> &gix_hash::oid {
gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..])
}
/// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it.
///
/// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error.
pub fn traverse<'a, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Exn<Message>>
where
Processor: FnMut(&file::Commit<'a>) -> Result<(), Exn>,
{
self.verify_checksum()?;
verify_split_chain_filename_hash(&self.path, self.checksum())?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &gix_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(message!(
"commit at file position {} has invalid ID {}",
commit.position(),
commit.id()
)
.raise());
}
return Err(message!(
"commit at file position {} with ID {} is out of order relative to its predecessor with ID {prev_id}",
commit.position(),
commit.id()
)
.raise());
}
if commit.root_tree_id() == null_id {
return Err(message!(
"commit {} has invalid root tree ID {}",
commit.id(),
commit.root_tree_id()
)
.raise());
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(message!("commit {} has invalid generation {}", commit.id(), commit.generation()).raise());
}
processor(&commit).or_raise(|| message!("processor failed on commit {}", commit.id()))?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit.iter_parents().try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}
/// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
/// checksum itself.
///
/// Return the actual checksum on success or [`Exn<Message>`] if there is a mismatch.
pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, Exn<Message>> {
// Even though we could use gix_hash::bytes_of_file(…), this would require extending our
// Error type to support io::Error. As we only gain progress, there probably isn't much value
// as these files are usually small enough to process them in less than a second, even for the large ones.
// But it's possible, once a progress instance is passed.
let data_len_without_trailer = self.data.len() - self.hash_len;
let mut hasher = gix_hash::hasher(self.object_hash());
hasher.update(&self.data[..data_len_without_trailer]);
let actual = hasher
.try_finalize()
.map_err(|e| message!("failed to hash commit graph file: {e}").raise())?;
actual.verify(self.checksum()).map_err(|e| message!("{e}").raise())?;
Ok(actual)
}
}
/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the
/// expected hash.
fn verify_split_chain_filename_hash(path: &Path, expected: &gix_hash::oid) -> Result<(), Exn<Message>> {
path.file_name()
.and_then(std::ffi::OsStr::to_str)
.and_then(|filename| filename.strip_suffix(".graph"))
.and_then(|stem| stem.strip_prefix("graph-"))
.map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) {
Ok(actual) if actual == expected => Ok(()),
_ => Err(message!("commit-graph filename should be graph-{}.graph", expected.to_hex()).raise()),
})
}

106
src-commitgraph/src/init.rs Normal file
View File

@@ -0,0 +1,106 @@
use crate::{File, Graph, MAX_COMMITS};
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
use std::{
io::{BufRead, BufReader},
path::Path,
};
/// Instantiate a `Graph` from various sources.
impl Graph {
/// Instantiate a commit graph from `path` which may be a directory containing graph files or the graph file itself.
pub fn at(path: &Path) -> Result<Self, Exn<Message>> {
Self::try_from(path)
}
/// Instantiate a commit graph from the directory containing all of its files.
pub fn from_commit_graphs_dir(path: &Path) -> Result<Self, Exn<Message>> {
let commit_graphs_dir = path;
let chain_file_path = commit_graphs_dir.join("commit-graph-chain");
let chain_file = std::fs::File::open(&chain_file_path).or_raise(|| {
message!(
"Could not open commit-graph chain file at '{}'",
chain_file_path.display()
)
})?;
let mut files = Vec::new();
for line in BufReader::new(chain_file).lines() {
let hash = line.or_raise(|| {
message!(
"Could not read from commit-graph file at '{}'",
chain_file_path.display()
)
})?;
let graph_file_path = commit_graphs_dir.join(format!("graph-{hash}.graph"));
files.push(
File::at(&graph_file_path)
.or_raise(|| message!("Could not open commit-graph file at '{}'", graph_file_path.display()))?,
);
}
Ok(Self::new(files)?)
}
/// Instantiate a commit graph from a `.git/objects/info/commit-graph` or
/// `.git/objects/info/commit-graphs/graph-*.graph` file.
pub fn from_file(path: &Path) -> Result<Self, Exn<Message>> {
let file = File::at(path).or_raise(|| message!("Could not open commit-graph file at '{}'", path.display()))?;
Ok(Self::new(vec![file])?)
}
/// Instantiate a commit graph from an `.git/objects/info` directory.
pub fn from_info_dir(info_dir: &Path) -> Result<Self, Exn<Message>> {
Self::from_file(&info_dir.join("commit-graph"))
.or_else(|_| Self::from_commit_graphs_dir(&info_dir.join("commit-graphs")))
}
/// Create a new commit graph from a list of `files`.
pub fn new(files: Vec<File>) -> Result<Self, Message> {
let files = nonempty::NonEmpty::from_vec(files)
.ok_or_else(|| message!("Commit-graph must contain at least one file"))?;
let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
if num_commits > u64::from(MAX_COMMITS) {
return Err(message!(
"Commit-graph files contain {num_commits} commits altogether, but only {MAX_COMMITS} commits are allowed"
));
}
let mut f1 = files.first();
for f2 in files.tail() {
if f1.object_hash() != f2.object_hash() {
return Err(message!(
"Commit-graph files mismatch: '{path1}' uses hash {hash1:?}, but '{path2}' uses hash {hash2:?}",
path1 = f1.path().display(),
hash1 = f1.object_hash(),
path2 = f2.path().display(),
hash2 = f2.object_hash(),
));
}
f1 = f2;
}
Ok(Self { files })
}
}
impl TryFrom<&Path> for Graph {
type Error = Exn<Message>;
fn try_from(path: &Path) -> Result<Self, Self::Error> {
if path.is_file() {
// Assume we are looking at `.git/objects/info/commit-graph` or
// `.git/objects/info/commit-graphs/graph-*.graph`.
Self::from_file(path)
} else if path.is_dir() {
if path.join("commit-graph-chain").is_file() {
Self::from_commit_graphs_dir(path)
} else {
Self::from_info_dir(path)
}
} else {
Err(message!(
"Did not find any files that look like commit graphs at '{}'",
path.display()
)
.raise())
}
}
}

View File

@@ -0,0 +1,78 @@
//! Read, verify, and traverse git commit graphs.
//!
//! A [commit graph][Graph] is an index of commits in the git commit history.
//! The [Graph] stores commit data in a way that accelerates lookups considerably compared to
//! traversing the git history by usual means.
//!
//! As generating the full commit graph from scratch can take some time, git may write new commits
//! to separate [files][File] instead of overwriting the original file.
//! Eventually, git will merge these files together as the number of files grows.
//! ## Feature Flags
#![cfg_attr(
all(doc, feature = "document-features"),
doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
use gix_error::{Exn, Message};
use std::path::Path;
/// A single commit-graph file.
///
/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple
/// files, all interesting graph operations belong on [`Graph`].
pub struct File {
base_graph_count: u8,
base_graphs_list_offset: Option<usize>,
commit_data_offset: usize,
data: memmap2::Mmap,
extra_edges_list_range: Option<std::ops::Range<usize>>,
fan: [u32; file::FAN_LEN],
oid_lookup_offset: usize,
path: std::path::PathBuf,
hash_len: usize,
object_hash: gix_hash::Kind,
}
/// A complete commit graph.
///
/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it
/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are
/// generated via `git commit-graph write ...` commands.
pub struct Graph {
files: nonempty::NonEmpty<File>,
}
/// Instantiate a commit graph from an `.git/objects/info` directory, or one of the various commit-graph files.
pub fn at(path: impl AsRef<Path>) -> Result<Graph, Exn<Message>> {
Graph::at(path.as_ref())
}
mod access;
pub mod file;
///
pub mod init;
pub mod verify;
/// The number of generations that are considered 'infinite' commit history.
pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff;
/// The largest valid generation number.
///
/// If a commit's real generation number is larger than this, the commit graph will cap the value to
/// this number.
/// The largest distinct generation number is `GENERATION_NUMBER_MAX - 1`.
pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff;
/// The maximum number of commits that can be stored in a commit graph.
pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1;
/// A generalized position for use in [`Graph`].
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
pub struct Position(pub u32);
impl std::fmt::Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}

View File

@@ -0,0 +1,141 @@
//! Auxiliary types used by graph verification methods.
use std::{
cmp::{max, min},
collections::BTreeMap,
};
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
use crate::{
file::{self},
Graph, Position, GENERATION_NUMBER_MAX,
};
/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Outcome {
/// The length of the longest path between any two commits in this graph.
///
/// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
/// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
/// If the longest path length is too large to fit in a [u32], then this will be [None].
pub longest_path_length: Option<u32>,
/// The total number of commits traversed.
pub num_commits: u32,
/// A mapping of `N -> number of commits with N parents`.
pub parent_counts: BTreeMap<u32, u32>,
}
impl Graph {
/// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
///
/// When `processor` returns an error, the entire verification is stopped and the error returned.
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Exn<Message>>
where
E: std::error::Error + Send + Sync + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(message!(
"Commit-graph should be composed of at most 256 files but actually contains {} files",
self.files.len()
)
.raise());
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(message!(
"'{}' should have {} base graphs, but claims {} base graphs",
file.path().display(),
file_index,
file.base_graph_count()
)
.raise());
}
for (base_graph_index, (expected, actual)) in self
.files
.iter()
.take(file_index)
.map(crate::File::checksum)
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(message!(
"'{}' base graph at index {} should have ID {} but is {}",
file.path().display(),
base_graph_index,
expected,
actual
)
.raise());
}
}
let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
let file_stats = file.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(|err| err.raise_erased())?;
if parent_pos >= next_file_start_pos {
return Err(message!(
"Commit {} has parent position {parent_pos} that is out of range (should be in range 0-{})",
commit.id(),
Position(next_file_start_pos.0 - 1)
)
.raise_erased());
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(message!(
"Commit {}'s generation should be {expected_generation} but is {}",
commit.id(),
commit.generation()
)
.raise_erased());
}
processor(commit).or_raise_erased(|| message!("processor failed on commit {id}", id = commit.id()))?;
Ok(())
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
}

View File

@@ -0,0 +1,113 @@
use crate::{check_common, graph_and_expected, graph_and_expected_named};
#[test]
fn single_parent() {
let (cg, refs) = graph_and_expected("single_parent.sh", &["parent", "child"]);
check_common(&cg, &refs);
assert_eq!(cg.commit_at(refs["parent"].pos()).generation(), 1);
assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2);
}
#[test]
fn single_commit_huge_dates_generation_v2_also_do_not_allow_huge_dates() {
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "v2", &["HEAD"]);
let info = &refs["HEAD"];
let actual = cg.commit_by_id(info.id).expect("present");
assert_eq!(
actual.committer_timestamp(),
1,
"overflow happened, can't represent huge dates"
);
assert_eq!(
info.time.seconds, 68719476737,
"this is the value we would want to see, but it's not possible in V2 either, as that is just about generations"
);
assert_eq!(actual.generation(), 1, "generations are fine though");
}
#[test]
fn single_commit_huge_dates_overflow_v1() {
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "v1", &["HEAD"]);
let info = &refs["HEAD"];
let actual = cg.commit_by_id(info.id).expect("present");
assert_eq!(actual.committer_timestamp(), 1, "overflow happened");
assert_eq!(
info.time.seconds, 68719476737,
"this is the value we would want to see, but it's not possible in V1"
);
assert_eq!(actual.generation(), 1, "generations are fine though");
}
#[test]
fn single_commit_future_64bit_dates_work() {
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "max-date", &["HEAD"]);
let info = &refs["HEAD"];
let actual = cg.commit_by_id(info.id).expect("present");
assert_eq!(
actual.committer_timestamp(),
info.time.seconds.try_into().expect("timestamps in bound"),
"this is close the highest representable value in the graph, like year 2500, so we are good for longer than I should care about"
);
assert_eq!(actual.generation(), 1);
}
#[test]
fn generation_numbers_overflow_is_handled_in_chained_graph() {
let names = ["extra", "old-2", "future-2", "old-1", "future-1"];
let (cg, mut refs) = graph_and_expected("generation_number_overflow.sh", &names);
for (r, expected) in names
.iter()
.map(|n| refs.remove(n.to_owned()).expect("present"))
.zip((1..=5).rev())
{
assert_eq!(
cg.commit_by_id(r.id).expect("present").generation(),
expected,
"actually, this test seems to have valid generation numbers from the get-go. How to repro the actual issue?"
);
}
}
#[test]
fn octopus_merges() {
let (cg, refs) = graph_and_expected(
"octopus_merges.sh",
&[
"root",
"parent1",
"parent2",
"parent3",
"parent4",
"three_parents",
"four_parents",
],
);
check_common(&cg, &refs);
assert_eq!(cg.commit_at(refs["root"].pos()).generation(), 1);
assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 2);
assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 2);
assert_eq!(cg.commit_at(refs["parent3"].pos()).generation(), 2);
assert_eq!(cg.commit_at(refs["parent4"].pos()).generation(), 2);
assert_eq!(cg.commit_at(refs["three_parents"].pos()).generation(), 3);
assert_eq!(cg.commit_at(refs["four_parents"].pos()).generation(), 3);
}
#[test]
fn single_commit() {
let (cg, refs) = graph_and_expected("single_commit.sh", &["commit"]);
check_common(&cg, &refs);
assert_eq!(cg.commit_at(refs["commit"].pos()).generation(), 1);
}
#[test]
fn two_parents() {
let (cg, refs) = graph_and_expected("two_parents.sh", &["parent1", "parent2", "child"]);
check_common(&cg, &refs);
assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 1);
assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 1);
assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2);
}

View File

@@ -0,0 +1,178 @@
use std::{
collections::{HashMap, HashSet},
hash::BuildHasher,
io::{BufRead, Cursor},
path::Path,
process::Command,
};
use gix_commitgraph::{Graph, Position as GraphPosition};
use gix_testtools::scripted_fixture_read_only;
mod access;
pub fn check_common(cg: &Graph, expected: &HashMap<String, RefInfo, impl BuildHasher>) {
cg.verify_integrity(|_| Ok::<_, gix_error::Message>(()))
.expect("graph is valid");
assert_eq!(
usize::try_from(cg.num_commits()).expect("an architecture able to hold 32 bits of integer"),
expected.len()
);
for ref_info in expected.values() {
assert_eq!(cg.id_at(ref_info.pos()), ref_info.id(), "id_at({})", ref_info.pos());
assert_eq!(
cg.lookup(ref_info.id()),
Some(ref_info.pos()),
"lookup({})",
ref_info.id()
);
let expected_parents: Vec<_> = ref_info
.parent_ids()
.map(|id| {
expected
.values()
.find(|item| item.id() == id)
.expect("find RefInfo by id")
})
.collect();
let commit = cg.commit_at(ref_info.pos());
assert_eq!(commit.id(), ref_info.id());
assert_eq!(
commit.committer_timestamp(),
ref_info.time.seconds.try_into().expect("timestamp in bounds")
);
assert_eq!(commit.root_tree_id(), ref_info.root_tree_id());
assert_eq!(
commit.parent1().expect("failed to access commit's parent1"),
expected_parents.iter().map(|x| x.pos()).next()
);
assert_eq!(
commit
.iter_parents()
.collect::<std::result::Result<Vec<_>, _>>()
.expect("failed to access commit's parents"),
expected_parents.iter().map(|x| x.pos()).collect::<Vec<_>>()
);
}
assert_eq!(
cg.iter_ids().collect::<HashSet<_>>(),
expected.values().map(RefInfo::id).collect::<HashSet<_>>()
);
}
pub fn graph_and_expected(
script_path: &str,
refs: &[&'static str],
) -> (gix_commitgraph::Graph, HashMap<String, RefInfo>) {
graph_and_expected_named(script_path, "", refs)
}
pub fn graph_and_expected_named(
script_path: &str,
name: &str,
refs: &[&'static str],
) -> (gix_commitgraph::Graph, HashMap<String, RefInfo>) {
let repo_dir = scripted_fixture_read_only(script_path)
.expect("script succeeds all the time")
.join(name);
let expected = inspect_refs(&repo_dir, refs);
let cg =
Graph::from_info_dir(&repo_dir.join(".git").join("objects").join("info")).expect("graph present and valid");
let object_hash = cg.object_hash();
let any_ref = expected.values().next().expect("at least one ref");
assert_eq!(
object_hash,
any_ref.id().kind(),
"graph hash kind should match fixture object IDs"
);
(cg, expected)
}
pub struct RefInfo {
id: gix_hash::ObjectId,
pub time: gix_date::Time,
parent_ids: Vec<gix_hash::ObjectId>,
pos: GraphPosition,
root_tree_id: gix_hash::ObjectId,
}
impl RefInfo {
pub fn id(&self) -> &gix_hash::oid {
&self.id
}
pub fn pos(&self) -> GraphPosition {
self.pos
}
pub fn parent_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
self.parent_ids.iter().map(AsRef::as_ref)
}
pub fn root_tree_id(&self) -> &gix_hash::oid {
&self.root_tree_id
}
}
fn inspect_refs(repo_dir: impl AsRef<Path>, refs: &[&'static str]) -> HashMap<String, RefInfo> {
let output = Command::new("git")
.arg("-C")
.arg(repo_dir.as_ref())
.arg("show")
.arg("--no-patch")
.arg("--pretty=format:%S %H %T %ct %P")
.args(refs)
.arg("--")
.env_remove("GIT_DIR")
.output()
.expect("failed to execute `git show`");
// Output format: <refname> <id> <tree_id> <parent_ids>
let mut infos: Vec<_> = Cursor::new(output.stdout)
.lines()
.map(|x| x.expect("failed to read `git show` output"))
.map(|x| {
let parts = x.trim_end().split(' ').collect::<Vec<_>>();
(
parts[0].to_string(),
gix_hash::ObjectId::from_hex(parts[1].as_bytes()).expect("40 bytes hex"),
gix_hash::ObjectId::from_hex(parts[2].as_bytes()).expect("40 bytes hex"),
gix_date::Time::new(parts[3].parse().expect("valid stamp"), 0),
parts[4..]
.iter()
.map(|x| gix_hash::ObjectId::from_hex(x.as_bytes()).expect("40 bytes hex"))
.collect(),
)
})
.collect();
infos.sort_by_key(|x| x.1);
let get_pos = |id: &gix_hash::oid| -> GraphPosition {
let pos: u32 = infos
.binary_search_by_key(&id, |x| &x.1)
.expect("sorted_ids to contain id")
.try_into()
.expect("graph position to fit in u32");
GraphPosition(pos)
};
infos
.iter()
.cloned()
.map(|(name, id, root_tree_id, time, parent_ids)| {
(
name,
RefInfo {
id,
parent_ids,
root_tree_id,
time,
pos: get_pos(&id),
},
)
})
.collect()
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -eu -o pipefail
function tick() {
if test -z "${tick+set}"
then
tick=1112911993
else
tick=$(($tick + 60))
fi
GIT_COMMITTER_DATE="$tick -0700"
GIT_AUTHOR_DATE="$tick -0700"
export GIT_COMMITTER_DATE GIT_AUTHOR_DATE
}
function force_tag() {
local name head_oid common_dir
name=${1:?argument the tag name}
# This should only be needed with 32-bit `git`, so fail otherwise.
word_size="$(
git --version --build-options |
awk '$1 == "sizeof-size_t:" { print $2 }'
)"
((word_size == 4))
# Manually create the tag.
head_oid="$(git rev-parse HEAD)"
common_dir="$(git rev-parse --git-common-dir)"
(set -o noclobber; echo "$head_oid" > "$common_dir/refs/tags/$name")
}
function tagged_commit() {
local message=${1:?first argument is the commit message and tag name}
local date=${2:-}
local file="$message.t"
echo "$1" > "$file"
git add -- "$file"
if [ -n "$date" ]; then
export GIT_COMMITTER_DATE="$date"
else
tick
fi
git commit -m "$message"
git tag -- "$message" || force_tag "$message"
}
tick
# adapted from git/t/t5318 'lower layers have overflow chunk'
UNIX_EPOCH_ZERO="@0 +0000"
FUTURE_DATE="@4147483646 +0000"
git init
git config commitGraph.generationVersion 2
tagged_commit future-1 "$FUTURE_DATE"
tagged_commit old-1 "$UNIX_EPOCH_ZERO"
git commit-graph write --reachable
tagged_commit future-2 "$FUTURE_DATE"
tagged_commit old-2 "$UNIX_EPOCH_ZERO"
git commit-graph write --reachable --split=no-merge
tagged_commit extra
# this makes sure it's actually in chain format.
git commit-graph write --reachable --split=no-merge

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q --orphan root
git commit -q --allow-empty -m root
git checkout -q -b parent1 root
git commit -q --allow-empty -m parent1
git checkout -q -b parent2 root
git commit -q --allow-empty -m parent2
git checkout -q -b parent3 root
git commit -q --allow-empty -m parent3
git checkout -q -b parent4 root
git commit -q --allow-empty -m parent4
git checkout -q -b three_parents parent1
git merge -q -m three_parents --no-ff parent2 parent3 >/dev/null
git checkout -q -b four_parents parent2
git merge -q -m four_parents --no-ff parent1 parent3 parent4 >/dev/null
git commit-graph write --no-progress --reachable
git repack -adq

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -eu -o pipefail
# The goal with this repo is to have the smallest commit-graph file possible, in the hopes that an
git init -q
git checkout -q -b commit
git commit -q --allow-empty -m commit
git commit-graph write --no-progress --reachable
git repack -adq

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -eu -o pipefail
function setup_repo() {
local version=${1:?need generation version}
local time=${2:?timestamp seconds since unix epoch}
git init -q
# one past the max 32bit date git can represent
export GIT_COMMITTER_DATE="@${time} +0000"
git config commitGraph.generationVersion ${version}
git commit -q --allow-empty -m c1
git commit-graph write --no-progress --reachable
}
(mkdir v1 && cd v1 && setup_repo 1 68719476737) # the year 4000 something (overflows in graph)
(mkdir v2 && cd v2 && setup_repo 2 68719476737)
(mkdir max-date && cd max-date && setup_repo 1 17147483646) # the year 2500ish

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q -b parent
git commit -q --allow-empty -m parent
git checkout -q -b child parent
git commit -q --allow-empty -m child
git commit-graph write --no-progress --reachable
git repack -adq

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q -b commit1
git commit -q --allow-empty -m commit1
git checkout -q -b commit2 commit1
git commit -q --allow-empty -m commit2
git checkout -q -b commit3 commit2
git commit -q --allow-empty -m commit3
git show-ref -s commit1 | git commit-graph write --no-progress --split=no-merge --stdin-commits
git show-ref -s commit2 | git commit-graph write --no-progress --split=no-merge --stdin-commits
git show-ref -s commit3 | git commit-graph write --no-progress --split=no-merge --stdin-commits
git repack -adq

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
git checkout -q --orphan parent1
git commit -q --allow-empty -m parent1
git checkout -q --orphan parent2
git commit -q --allow-empty -m parent2
git checkout -q -b child parent1
git merge -q --allow-unrelated-histories --no-ff -m child parent2 >/dev/null
git commit-graph write --no-progress --reachable
git repack -adq