mirror of
https://github.com/awfixers-stuff/src.git
synced 2026-03-29 05:45:58 +00:00
create src
This commit is contained in:
1532
src-commitgraph/CHANGELOG.md
Normal file
1532
src-commitgraph/CHANGELOG.md
Normal file
File diff suppressed because it is too large
Load Diff
49
src-commitgraph/Cargo.toml
Normal file
49
src-commitgraph/Cargo.toml
Normal file
@@ -0,0 +1,49 @@
|
||||
lints.workspace = true
|
||||
|
||||
[package]
|
||||
name = "src-commitgraph"
|
||||
version = "0.34.0"
|
||||
repository = "https://github.com/GitoxideLabs/gitoxide"
|
||||
documentation = "https://git-scm.com/docs/commit-graph"
|
||||
license = "MIT OR Apache-2.0"
|
||||
description = "Read-only access to the git commitgraph file format"
|
||||
authors = ["Conor Davis <gitoxide@conor.fastmail.fm>", "Sebastian Thiel <sebastian.thiel@icloud.com>"]
|
||||
edition = "2021"
|
||||
include = ["src/**/*", "LICENSE-*"]
|
||||
rust-version = "1.82"
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
|
||||
[features]
|
||||
## Enable support for the SHA-1 hash by enabling the respective feature in the `src-hash` crate.
|
||||
sha1 = ["src-hash/sha1"]
|
||||
## Data structures implement `serde::Serialize` and `serde::Deserialize`
|
||||
serde = ["dep:serde", "src-hash/serde", "bstr/serde"]
|
||||
|
||||
[dependencies]
|
||||
src-hash = { version = "^0.22.1", path = "../src-hash" }
|
||||
src-chunk = { version = "^0.7.0", path = "../src-chunk" }
|
||||
src-error = { version = "^0.2.0", path = "../src-error" }
|
||||
|
||||
bstr = { version = "1.12.0", default-features = false, features = ["std"] }
|
||||
memmap2 = "0.9.10"
|
||||
nonempty = "0.12.0"
|
||||
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
|
||||
|
||||
document-features = { version = "0.2.0", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
src-testtools = { path = "../tests/tools" }
|
||||
src-date = { path = "../src-date" }
|
||||
src-hash = { path = "../src-hash", features = ["sha1", "sha256"] }
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
features = ["sha1", "document-features"]
|
||||
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = [
|
||||
# Needed for `bstr/serde` feature forwarding, even though no direct `bstr` paths are referenced.
|
||||
"bstr",
|
||||
]
|
||||
1
src-commitgraph/LICENSE-APACHE
Symbolic link
1
src-commitgraph/LICENSE-APACHE
Symbolic link
@@ -0,0 +1 @@
|
||||
../LICENSE-APACHE
|
||||
1
src-commitgraph/LICENSE-MIT
Symbolic link
1
src-commitgraph/LICENSE-MIT
Symbolic link
@@ -0,0 +1 @@
|
||||
../LICENSE-MIT
|
||||
9
src-commitgraph/fuzz/.gitignore
vendored
Normal file
9
src-commitgraph/fuzz/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
target
|
||||
corpus
|
||||
artifacts
|
||||
coverage
|
||||
|
||||
# These usually involve a lot of local CPU time, keep them.
|
||||
$artifacts
|
||||
$corpus
|
||||
|
||||
37
src-commitgraph/fuzz/Cargo.toml
Normal file
37
src-commitgraph/fuzz/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
name = "src-commitgraph-fuzz"
|
||||
version = "0.0.0"
|
||||
publish = false
|
||||
edition = "2021"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = [
|
||||
# Kept for fuzz-input modeling support in this fuzz package.
|
||||
"arbitrary",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.76"
|
||||
arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||
libfuzzer-sys = "0.4"
|
||||
memmap2 = "0.9.0"
|
||||
|
||||
[dependencies.src-commitgraph]
|
||||
path = ".."
|
||||
features = ["sha1"]
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_file"
|
||||
path = "fuzz_targets/fuzz_file.rs"
|
||||
test = false
|
||||
doc = false
|
||||
29
src-commitgraph/fuzz/fuzz_targets/fuzz_file.rs
Normal file
29
src-commitgraph/fuzz/fuzz_targets/fuzz_file.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
#![no_main]
|
||||
|
||||
use anyhow::Result;
|
||||
use gix_commitgraph::File;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use std::hint::black_box;
|
||||
|
||||
fn fuzz(data: &[u8]) -> Result<()> {
|
||||
let data = {
|
||||
let mut d = memmap2::MmapMut::map_anon(data.len())?;
|
||||
d.copy_from_slice(data);
|
||||
d.make_read_only()?
|
||||
};
|
||||
let file = File::new(data, "does not matter".into()).map_err(|e| e.into_inner())?;
|
||||
|
||||
_ = black_box(file.iter_base_graph_ids().count());
|
||||
_ = black_box(file.iter_commits().count());
|
||||
_ = black_box(file.iter_ids().count());
|
||||
|
||||
let _ = black_box(file.checksum());
|
||||
let _ = black_box(file.verify_checksum());
|
||||
let _ = black_box(file.object_hash());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
_ = black_box(fuzz(data));
|
||||
});
|
||||
104
src-commitgraph/src/access.rs
Normal file
104
src-commitgraph/src/access.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use crate::{file, file::Commit, File, Graph, Position};
|
||||
|
||||
/// Access
|
||||
impl Graph {
|
||||
/// Returns the commit at the given position `pos`.
|
||||
///
|
||||
/// # Panics
|
||||
/// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
|
||||
pub fn commit_at(&self, pos: Position) -> Commit<'_> {
|
||||
let r = self.lookup_by_pos(pos);
|
||||
r.file.commit_at(r.pos)
|
||||
}
|
||||
|
||||
/// The kind of hash used in this `Graph`.
|
||||
///
|
||||
/// Note that it is always conforming to the hash used in the owning repository.
|
||||
pub fn object_hash(&self) -> gix_hash::Kind {
|
||||
self.files.first().object_hash()
|
||||
}
|
||||
|
||||
/// Returns the commit matching the given `id`.
|
||||
pub fn commit_by_id(&self, id: impl AsRef<gix_hash::oid>) -> Option<Commit<'_>> {
|
||||
let r = self.lookup_by_id(id.as_ref())?;
|
||||
Some(r.file.commit_at(r.file_pos))
|
||||
}
|
||||
|
||||
/// Returns the `hash` at the given position `pos`.
|
||||
///
|
||||
/// # Panics
|
||||
/// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()].
|
||||
pub fn id_at(&self, pos: Position) -> &gix_hash::oid {
|
||||
let r = self.lookup_by_pos(pos);
|
||||
r.file.id_at(r.pos)
|
||||
}
|
||||
|
||||
/// Iterate over commits in unsorted order.
|
||||
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
|
||||
self.files.iter().flat_map(File::iter_commits)
|
||||
}
|
||||
|
||||
/// Iterate over commit IDs in unsorted order.
|
||||
pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
|
||||
self.files.iter().flat_map(File::iter_ids)
|
||||
}
|
||||
|
||||
/// Translate the given `id` to its position in the file.
|
||||
pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<Position> {
|
||||
Some(self.lookup_by_id(id.as_ref())?.graph_pos)
|
||||
}
|
||||
|
||||
/// Returns the number of commits stored in this file.
|
||||
pub fn num_commits(&self) -> u32 {
|
||||
self.files.iter().map(File::num_commits).sum()
|
||||
}
|
||||
}
|
||||
|
||||
/// Access fundamentals
|
||||
impl Graph {
|
||||
fn lookup_by_id(&self, id: &gix_hash::oid) -> Option<LookupByIdResult<'_>> {
|
||||
let mut current_file_start = 0;
|
||||
for file in &self.files {
|
||||
if let Some(lex_pos) = file.lookup(id) {
|
||||
return Some(LookupByIdResult {
|
||||
file,
|
||||
file_pos: lex_pos,
|
||||
graph_pos: Position(current_file_start + lex_pos.0),
|
||||
});
|
||||
}
|
||||
current_file_start += file.num_commits();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> {
|
||||
let mut remaining = pos.0;
|
||||
for (file_index, file) in self.files.iter().enumerate() {
|
||||
match remaining.checked_sub(file.num_commits()) {
|
||||
Some(v) => remaining = v,
|
||||
None => {
|
||||
return LookupByPositionResult {
|
||||
file,
|
||||
_file_index: file_index,
|
||||
pos: file::Position(remaining),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("graph position too large: {}", pos.0);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct LookupByIdResult<'a> {
|
||||
pub file: &'a File,
|
||||
pub graph_pos: Position,
|
||||
pub file_pos: file::Position,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct LookupByPositionResult<'a> {
|
||||
pub file: &'a File,
|
||||
pub _file_index: usize,
|
||||
pub pos: file::Position,
|
||||
}
|
||||
140
src-commitgraph/src/file/access.rs
Normal file
140
src-commitgraph/src/file/access.rs
Normal file
@@ -0,0 +1,140 @@
|
||||
use std::{
|
||||
fmt::{Debug, Formatter},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH},
|
||||
File,
|
||||
};
|
||||
|
||||
/// Access
|
||||
impl File {
|
||||
/// The number of base graphs that this file depends on.
|
||||
pub fn base_graph_count(&self) -> u8 {
|
||||
self.base_graph_count
|
||||
}
|
||||
|
||||
/// Returns the commit data for the commit located at the given lexicographical position.
|
||||
///
|
||||
/// `pos` must range from 0 to `self.num_commits()`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `pos` is out of bounds.
|
||||
pub fn commit_at(&self, pos: file::Position) -> Commit<'_> {
|
||||
Commit::new(self, pos)
|
||||
}
|
||||
|
||||
/// The kind of hash used in this File.
|
||||
///
|
||||
/// Note that it is always conforming to the hash used in the owning repository.
|
||||
pub fn object_hash(&self) -> gix_hash::Kind {
|
||||
self.object_hash
|
||||
}
|
||||
|
||||
/// Returns an object id at the given index in our list of (sorted) hashes.
|
||||
/// The position ranges from 0 to `self.num_commits()`
|
||||
// copied from src-odb/src/pack/index/ext
|
||||
pub fn id_at(&self, pos: file::Position) -> &gix_hash::oid {
|
||||
assert!(
|
||||
pos.0 < self.num_commits(),
|
||||
"expected lexicographical position less than {}, got {}",
|
||||
self.num_commits(),
|
||||
pos.0
|
||||
);
|
||||
let pos: usize = pos
|
||||
.0
|
||||
.try_into()
|
||||
.expect("an architecture able to hold 32 bits of integer");
|
||||
let start = self.oid_lookup_offset + (pos * self.hash_len);
|
||||
gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
|
||||
}
|
||||
|
||||
/// Return an iterator over all object hashes stored in the base graph.
|
||||
pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
|
||||
let start = self.base_graphs_list_offset.unwrap_or(0);
|
||||
let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)];
|
||||
base_graphs_list
|
||||
.chunks_exact(self.hash_len)
|
||||
.map(gix_hash::oid::from_bytes_unchecked)
|
||||
}
|
||||
|
||||
/// return an iterator over all commits in this file.
|
||||
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
|
||||
(0..self.num_commits()).map(move |i| self.commit_at(file::Position(i)))
|
||||
}
|
||||
|
||||
/// Return an iterator over all object hashes stored in this file.
|
||||
pub fn iter_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
|
||||
(0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
|
||||
}
|
||||
|
||||
/// Translate the given object hash to its position within this file, if present.
|
||||
// copied from src-odb/src/pack/index/ext
|
||||
pub fn lookup(&self, id: impl AsRef<gix_hash::oid>) -> Option<file::Position> {
|
||||
self.lookup_inner(id.as_ref())
|
||||
}
|
||||
|
||||
fn lookup_inner(&self, id: &gix_hash::oid) -> Option<file::Position> {
|
||||
let first_byte = usize::from(id.first_byte());
|
||||
let mut upper_bound = self.fan[first_byte];
|
||||
let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
|
||||
|
||||
while lower_bound < upper_bound {
|
||||
let mid = (lower_bound + upper_bound) / 2;
|
||||
let mid_sha = self.id_at(file::Position(mid));
|
||||
|
||||
use std::cmp::Ordering::*;
|
||||
match id.cmp(mid_sha) {
|
||||
Less => upper_bound = mid,
|
||||
Equal => return Some(file::Position(mid)),
|
||||
Greater => lower_bound = mid + 1,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the number of commits in this graph file.
|
||||
///
|
||||
/// The maximum valid `file::Position` that can be used with this file is one less than
|
||||
/// `num_commits()`.
|
||||
pub fn num_commits(&self) -> u32 {
|
||||
self.fan[255]
|
||||
}
|
||||
|
||||
/// Returns the path to this file.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
}
|
||||
|
||||
impl File {
|
||||
/// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk.
|
||||
pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] {
|
||||
assert!(
|
||||
pos.0 < self.num_commits(),
|
||||
"expected lexicographical position less than {}, got {}",
|
||||
self.num_commits(),
|
||||
pos.0
|
||||
);
|
||||
let pos: usize = pos
|
||||
.0
|
||||
.try_into()
|
||||
.expect("an architecture able to hold 32 bits of integer");
|
||||
let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
|
||||
let start = self.commit_data_offset + (pos * entry_size);
|
||||
&self.data[start..][..entry_size]
|
||||
}
|
||||
|
||||
/// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk.
|
||||
pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> {
|
||||
Some(&self.data[self.extra_edges_list_range.clone()?])
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for File {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, r#"File("{:?}")"#, self.path.display())
|
||||
}
|
||||
}
|
||||
256
src-commitgraph/src/file/commit.rs
Normal file
256
src-commitgraph/src/file/commit.rs
Normal file
@@ -0,0 +1,256 @@
|
||||
//! Low-level operations on individual commits.
|
||||
use crate::{
|
||||
file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT},
|
||||
File, Position,
|
||||
};
|
||||
use gix_error::{message, Message};
|
||||
use std::{
|
||||
fmt::{Debug, Formatter},
|
||||
slice::Chunks,
|
||||
};
|
||||
|
||||
/// A commit as stored in a [`File`].
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Commit<'a> {
|
||||
file: &'a File,
|
||||
pos: file::Position,
|
||||
// We can parse the below fields lazily if needed.
|
||||
commit_timestamp: u64,
|
||||
generation: u32,
|
||||
parent1: ParentEdge,
|
||||
parent2: ParentEdge,
|
||||
root_tree_id: &'a gix_hash::oid,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read_u32(b: &[u8]) -> u32 {
|
||||
u32::from_be_bytes(b.try_into().unwrap())
|
||||
}
|
||||
|
||||
impl<'a> Commit<'a> {
|
||||
pub(crate) fn new(file: &'a File, pos: file::Position) -> Self {
|
||||
let bytes = file.commit_data_bytes(pos);
|
||||
Commit {
|
||||
file,
|
||||
pos,
|
||||
root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]),
|
||||
parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])),
|
||||
parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])),
|
||||
// TODO: Add support for corrected commit date offset overflow.
|
||||
// See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and
|
||||
// https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test.
|
||||
generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2,
|
||||
commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap())
|
||||
& 0x0003_ffff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the committer timestamp of this commit.
|
||||
///
|
||||
/// The value is the number of seconds since 1970-01-01 00:00:00 UTC.
|
||||
pub fn committer_timestamp(&self) -> u64 {
|
||||
self.commit_timestamp
|
||||
}
|
||||
|
||||
/// Returns the generation number of this commit.
|
||||
///
|
||||
/// Commits without parents have generation number 1. Commits with parents have a generation
|
||||
/// number that is the max of their parents' generation numbers + 1.
|
||||
pub fn generation(&self) -> u32 {
|
||||
self.generation
|
||||
}
|
||||
|
||||
/// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph].
|
||||
pub fn iter_parents(self) -> Parents<'a> {
|
||||
// I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported
|
||||
// fuse-after-first-error behavior, and b) was significantly shorter or more understandable
|
||||
// than ParentIterator. So here we are.
|
||||
Parents {
|
||||
commit_data: self,
|
||||
state: ParentIteratorState::First,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the hash of this commit.
|
||||
pub fn id(&self) -> &'a gix_hash::oid {
|
||||
self.file.id_at(self.pos)
|
||||
}
|
||||
|
||||
/// Returns the first parent of this commit.
|
||||
pub fn parent1(&self) -> Result<Option<Position>, Message> {
|
||||
self.iter_parents().next().transpose()
|
||||
}
|
||||
|
||||
/// Returns the position at which this commit is stored in the parent [File].
|
||||
pub fn position(&self) -> file::Position {
|
||||
self.pos
|
||||
}
|
||||
|
||||
/// Return the hash of the tree this commit points to.
|
||||
pub fn root_tree_id(&self) -> &gix_hash::oid {
|
||||
self.root_tree_id
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Commit<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}",
|
||||
self.id(),
|
||||
self.pos,
|
||||
self.generation(),
|
||||
self.root_tree_id(),
|
||||
self.parent1,
|
||||
self.parent2,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Commit<'_> {}
|
||||
|
||||
impl PartialEq for Commit<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
std::ptr::eq(self.file, other.file) && self.pos == other.pos
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over parents of a [`Commit`].
|
||||
pub struct Parents<'a> {
|
||||
commit_data: Commit<'a>,
|
||||
state: ParentIteratorState<'a>,
|
||||
}
|
||||
|
||||
impl Iterator for Parents<'_> {
|
||||
type Item = Result<Position, Message>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted);
|
||||
match state {
|
||||
ParentIteratorState::First => match self.commit_data.parent1 {
|
||||
ParentEdge::None => match self.commit_data.parent2 {
|
||||
ParentEdge::None => None,
|
||||
_ => Some(Err(message!(
|
||||
"commit {} has a second parent but not a first parent",
|
||||
self.commit_data.id()
|
||||
))),
|
||||
},
|
||||
ParentEdge::GraphPosition(pos) => {
|
||||
self.state = ParentIteratorState::Second;
|
||||
Some(Ok(pos))
|
||||
}
|
||||
ParentEdge::ExtraEdgeIndex(_) => Some(Err(message!(
|
||||
"commit {}'s first parent is an extra edge index, which is invalid",
|
||||
self.commit_data.id(),
|
||||
))),
|
||||
},
|
||||
ParentIteratorState::Second => match self.commit_data.parent2 {
|
||||
ParentEdge::None => None,
|
||||
ParentEdge::GraphPosition(pos) => Some(Ok(pos)),
|
||||
ParentEdge::ExtraEdgeIndex(extra_edge_index) => {
|
||||
if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() {
|
||||
let start_offset: usize = extra_edge_index
|
||||
.try_into()
|
||||
.expect("an architecture able to hold 32 bits of integer");
|
||||
let start_offset = start_offset
|
||||
.checked_mul(4)
|
||||
.expect("an extended edge index small enough to fit in usize");
|
||||
if let Some(tail) = extra_edges_list.get(start_offset..) {
|
||||
self.state = ParentIteratorState::Extra(tail.chunks(4));
|
||||
// This recursive call is what blocks me from replacing ParentIterator
|
||||
// with a std::iter::from_fn closure.
|
||||
self.next()
|
||||
} else {
|
||||
Some(Err(message!(
|
||||
"commit {}'s extra edges overflows the commit-graph file's extra edges list",
|
||||
self.commit_data.id()
|
||||
)))
|
||||
}
|
||||
} else {
|
||||
Some(Err(message!(
|
||||
"commit {} has extra edges, but commit-graph file has no extra edges list",
|
||||
self.commit_data.id()
|
||||
)))
|
||||
}
|
||||
}
|
||||
},
|
||||
ParentIteratorState::Extra(mut chunks) => {
|
||||
if let Some(chunk) = chunks.next() {
|
||||
let extra_edge = read_u32(chunk);
|
||||
match ExtraEdge::from_raw(extra_edge) {
|
||||
ExtraEdge::Internal(pos) => {
|
||||
self.state = ParentIteratorState::Extra(chunks);
|
||||
Some(Ok(pos))
|
||||
}
|
||||
ExtraEdge::Last(pos) => Some(Ok(pos)),
|
||||
}
|
||||
} else {
|
||||
Some(Err(message!(
|
||||
"commit {}'s extra edges overflows the commit-graph file's extra edges list",
|
||||
self.commit_data.id()
|
||||
)))
|
||||
}
|
||||
}
|
||||
ParentIteratorState::Exhausted => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
match (&self.state, self.commit_data.parent1, self.commit_data.parent2) {
|
||||
(ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)),
|
||||
(ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)),
|
||||
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)),
|
||||
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)),
|
||||
(ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None),
|
||||
(ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)),
|
||||
(ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)),
|
||||
(ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)),
|
||||
(ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None),
|
||||
(ParentIteratorState::Extra(_), _, _) => (1, None),
|
||||
(ParentIteratorState::Exhausted, _, _) => (0, Some(0)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ParentIteratorState<'a> {
|
||||
First,
|
||||
Second,
|
||||
Extra(Chunks<'a, u8>),
|
||||
Exhausted,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum ParentEdge {
|
||||
None,
|
||||
GraphPosition(Position),
|
||||
ExtraEdgeIndex(u32),
|
||||
}
|
||||
|
||||
impl ParentEdge {
|
||||
pub fn from_raw(raw: u32) -> ParentEdge {
|
||||
if raw == NO_PARENT {
|
||||
return ParentEdge::None;
|
||||
}
|
||||
if raw & EXTENDED_EDGES_MASK != 0 {
|
||||
ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK)
|
||||
} else {
|
||||
ParentEdge::GraphPosition(Position(raw))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum ExtraEdge {
|
||||
Internal(Position),
|
||||
Last(Position),
|
||||
}
|
||||
|
||||
impl ExtraEdge {
|
||||
pub fn from_raw(raw: u32) -> Self {
|
||||
if raw & LAST_EXTENDED_EDGE_MASK != 0 {
|
||||
Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK))
|
||||
} else {
|
||||
Self::Internal(Position(raw))
|
||||
}
|
||||
}
|
||||
}
|
||||
203
src-commitgraph/src/file/init.rs
Normal file
203
src-commitgraph/src/file/init.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
|
||||
|
||||
use crate::{
|
||||
file::{
|
||||
BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH,
|
||||
EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE,
|
||||
},
|
||||
File,
|
||||
};
|
||||
|
||||
const MIN_FILE_SIZE: usize = HEADER_LEN
|
||||
+ gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/)
|
||||
+ FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */
|
||||
+ gix_hash::Kind::shortest().len_in_bytes();
|
||||
|
||||
impl File {
|
||||
/// Try to parse the commit graph file at `path`.
|
||||
pub fn at(path: impl AsRef<Path>) -> Result<File, Exn<Message>> {
|
||||
Self::try_from(path.as_ref())
|
||||
}
|
||||
|
||||
/// A lower-level constructor which constructs a new instance directly from the mapping in `data`,
|
||||
/// assuming that it originated from `path`.
|
||||
///
|
||||
/// Note that `path` is only used for verification of the hash its basename contains, but otherwise
|
||||
/// is not of importance.
|
||||
pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result<File, Exn<Message>> {
|
||||
let data_size = data.len();
|
||||
if data_size < MIN_FILE_SIZE {
|
||||
return Err(message("Commit-graph file too small even for an empty graph").raise());
|
||||
}
|
||||
|
||||
let mut ofs = 0;
|
||||
if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE {
|
||||
return Err(message("Commit-graph file does not start with expected signature").raise());
|
||||
}
|
||||
ofs += SIGNATURE.len();
|
||||
|
||||
match data[ofs] {
|
||||
1 => (),
|
||||
x => {
|
||||
return Err(message!("Unsupported commit-graph file version: {x}").raise());
|
||||
}
|
||||
}
|
||||
ofs += 1;
|
||||
|
||||
let object_hash = gix_hash::Kind::try_from(data[ofs])
|
||||
.map_err(|v| message!("Commit-graph file uses unsupported hash version: {v}").raise())?;
|
||||
ofs += 1;
|
||||
|
||||
let chunk_count = data[ofs];
|
||||
// Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make
|
||||
// it redundant.
|
||||
ofs += 1;
|
||||
|
||||
let base_graph_count = data[ofs];
|
||||
ofs += 1;
|
||||
|
||||
let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, u32::from(chunk_count))
|
||||
.or_raise(|| message!("Couldn't read commit-graph file with {chunk_count} chunks at offset {ofs}"))?;
|
||||
|
||||
let base_graphs_list_offset = chunks
|
||||
.validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| {
|
||||
let chunk_size = chunk_range.len();
|
||||
if chunk_size % object_hash.len_in_bytes() != 0 {
|
||||
return Err(message!("Commit-graph chunk {BASE_GRAPHS_LIST_CHUNK_ID:?} has invalid size: {msg}",
|
||||
msg = format!(
|
||||
"chunk size {} is not a multiple of {}",
|
||||
chunk_size,
|
||||
object_hash.len_in_bytes()
|
||||
),
|
||||
).raise());
|
||||
}
|
||||
let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes())
|
||||
.try_into()
|
||||
.expect("base graph count to fit in 32-bits");
|
||||
if chunk_base_graph_count != u32::from(base_graph_count) {
|
||||
return Err(message!("Commit-graph {BASE_GRAPHS_LIST_CHUNK_ID:?} chunk contains {chunk_base_graph_count} base graphs, but commit-graph file header claims {base_graph_count} base graphs").raise())
|
||||
}
|
||||
Ok(chunk_range.start)
|
||||
})
|
||||
.ok()
|
||||
.transpose()?;
|
||||
|
||||
let (commit_data_offset, commit_data_count): (_, u32) = chunks
|
||||
.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| {
|
||||
let chunk_size = chunk_range.len();
|
||||
|
||||
let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
|
||||
if chunk_size % entry_size != 0 {
|
||||
return Err(message!("Commit-graph chunk {COMMIT_DATA_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {entry_size}").raise())
|
||||
}
|
||||
Ok((
|
||||
chunk_range.start,
|
||||
(chunk_size / entry_size)
|
||||
.try_into()
|
||||
.expect("number of commits in CDAT chunk to fit in 32 bits"),
|
||||
))
|
||||
})??;
|
||||
|
||||
let fan_offset = chunks
|
||||
.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| {
|
||||
let chunk_size = chunk_range.len();
|
||||
|
||||
let expected_size = 4 * FAN_LEN;
|
||||
if chunk_size != expected_size {
|
||||
return Err(message!("Commit-graph chunk {OID_FAN_CHUNK_ID:?} has invalid size: expected chunk length {expected_size}, got {chunk_size}").raise())
|
||||
}
|
||||
Ok(chunk_range.start)
|
||||
})?
|
||||
.or_raise(|| message("Error getting offset for OID fan chunk"))?;
|
||||
|
||||
let (oid_lookup_offset, oid_lookup_count): (_, u32) = chunks
|
||||
.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| {
|
||||
let chunk_size = chunk_range.len();
|
||||
|
||||
if chunk_size % object_hash.len_in_bytes() != 0 {
|
||||
return Err(message!("Commit-graph chunk {OID_LOOKUP_CHUNK_ID:?} has invalid size: chunk size {chunk_size} is not a multiple of {hash_len}", hash_len = object_hash.len_in_bytes()).raise())
|
||||
}
|
||||
Ok((
|
||||
chunk_range.start,
|
||||
(chunk_size / object_hash.len_in_bytes())
|
||||
.try_into()
|
||||
.expect("number of commits in OIDL chunk to fit in 32 bits"),
|
||||
))
|
||||
})?
|
||||
.or_raise(|| message("Error getting offset for OID lookup chunk"))?;
|
||||
|
||||
let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok();
|
||||
|
||||
let trailer = &data[chunks.highest_offset() as usize..];
|
||||
if trailer.len() != object_hash.len_in_bytes() {
|
||||
return Err(message!(
|
||||
"Expected commit-graph trailer to contain {} bytes, got {}",
|
||||
object_hash.len_in_bytes(),
|
||||
trailer.len()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
|
||||
if base_graph_count > 0 && base_graphs_list_offset.is_none() {
|
||||
return Err(message!("Chunk named {BASE_GRAPHS_LIST_CHUNK_ID:?} was not found in chunk file index").into());
|
||||
}
|
||||
|
||||
let (fan, _) = read_fan(&data[fan_offset..]);
|
||||
if oid_lookup_count != fan[255] {
|
||||
return Err(message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {OID_LOOKUP_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
|
||||
chunk1_commits = fan[255],
|
||||
chunk2_commits = oid_lookup_count,
|
||||
).raise());
|
||||
}
|
||||
if commit_data_count != fan[255] {
|
||||
return Err(
|
||||
message!("Commit-graph {OID_FAN_CHUNK_ID:?} chunk contains {chunk1_commits} commits, but {COMMIT_DATA_CHUNK_ID:?} chunk contains {chunk2_commits} commits",
|
||||
chunk1_commits = fan[255],
|
||||
chunk2_commits = commit_data_count,
|
||||
).raise(),
|
||||
);
|
||||
}
|
||||
Ok(File {
|
||||
base_graph_count,
|
||||
base_graphs_list_offset,
|
||||
commit_data_offset,
|
||||
data,
|
||||
extra_edges_list_range,
|
||||
fan,
|
||||
oid_lookup_offset,
|
||||
path,
|
||||
hash_len: object_hash.len_in_bytes(),
|
||||
object_hash,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Path> for File {
|
||||
type Error = Exn<Message>;
|
||||
|
||||
fn try_from(path: &Path) -> Result<Self, Self::Error> {
|
||||
let data = std::fs::File::open(path)
|
||||
.and_then(|file| {
|
||||
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
|
||||
#[allow(unsafe_code)]
|
||||
unsafe {
|
||||
memmap2::MmapOptions::new().map_copy_read_only(&file)
|
||||
}
|
||||
})
|
||||
.or_raise(|| message!("Could not open commit-graph file at '{path}'", path = path.display()))?;
|
||||
Self::new(data, path.to_owned())
|
||||
}
|
||||
}
|
||||
|
||||
// Copied from src-odb/pack/index/init.rs
|
||||
fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) {
|
||||
assert!(d.len() >= FAN_LEN * 4);
|
||||
|
||||
let mut fan = [0; FAN_LEN];
|
||||
for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) {
|
||||
*f = u32::from_be_bytes(c.try_into().unwrap());
|
||||
}
|
||||
(fan, FAN_LEN * 4)
|
||||
}
|
||||
46
src-commitgraph/src/file/mod.rs
Normal file
46
src-commitgraph/src/file/mod.rs
Normal file
@@ -0,0 +1,46 @@
|
||||
//! Operations on a single commit-graph file.
|
||||
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
pub use self::commit::Commit;
|
||||
|
||||
mod access;
|
||||
pub mod commit;
|
||||
mod init;
|
||||
pub mod verify;
|
||||
|
||||
const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16;
|
||||
pub(crate) const FAN_LEN: usize = 256;
|
||||
const HEADER_LEN: usize = 8;
|
||||
|
||||
const SIGNATURE: &[u8] = b"CGPH";
|
||||
|
||||
type ChunkId = gix_chunk::Id;
|
||||
const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE";
|
||||
const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT";
|
||||
const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE";
|
||||
const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF";
|
||||
const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL";
|
||||
|
||||
// Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for
|
||||
// NO_PARENT. Fixed in https://github.com/git/git/commit/4d515253afcef985e94400adbfed7044959f9121 .
|
||||
const NO_PARENT: u32 = 0x7000_0000;
|
||||
const EXTENDED_EDGES_MASK: u32 = 0x8000_0000;
|
||||
const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000;
|
||||
|
||||
/// The position of a given commit within a graph file, starting at 0.
|
||||
///
|
||||
/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position
|
||||
/// is its position in this ordering. If a commit graph spans multiple files, each file's commits
|
||||
/// start at lexicographical position 0, so it is unique across a single file but is not unique across
|
||||
/// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]),
|
||||
/// which is unique across the whole commit graph.
|
||||
/// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each.
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||
pub struct Position(pub u32);
|
||||
|
||||
impl Display for Position {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
133
src-commitgraph/src/file/verify.rs
Normal file
133
src-commitgraph/src/file/verify.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Auxiliary types used in commit graph file verification methods.
|
||||
use std::{
|
||||
cmp::{max, min},
|
||||
collections::HashMap,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
|
||||
|
||||
use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX};
|
||||
|
||||
/// The positive result of [`File::traverse()`] providing some statistical information.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
|
||||
pub struct Outcome {
|
||||
/// The largest encountered [`file::Commit`] generation number.
|
||||
pub max_generation: u32,
|
||||
/// The smallest encountered [`file::Commit`] generation number.
|
||||
pub min_generation: u32,
|
||||
/// The largest number of parents in a single [`file::Commit`].
|
||||
pub max_parents: u32,
|
||||
/// The total number of [`commits`][file::Commit]s seen in the iteration.
|
||||
pub num_commits: u32,
|
||||
/// A mapping of `N -> number of commits with N parents`.
|
||||
pub parent_counts: HashMap<u32, u32>,
|
||||
}
|
||||
|
||||
/// Verification
|
||||
impl File {
|
||||
/// Returns the trailing checksum over the entire content of this file.
|
||||
pub fn checksum(&self) -> &gix_hash::oid {
|
||||
gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..])
|
||||
}
|
||||
|
||||
/// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it.
|
||||
///
|
||||
/// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error.
|
||||
pub fn traverse<'a, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Exn<Message>>
|
||||
where
|
||||
Processor: FnMut(&file::Commit<'a>) -> Result<(), Exn>,
|
||||
{
|
||||
self.verify_checksum()?;
|
||||
verify_split_chain_filename_hash(&self.path, self.checksum())?;
|
||||
|
||||
let null_id = self.object_hash().null_ref();
|
||||
|
||||
let mut stats = Outcome {
|
||||
max_generation: 0,
|
||||
max_parents: 0,
|
||||
min_generation: GENERATION_NUMBER_INFINITY,
|
||||
num_commits: self.num_commits(),
|
||||
parent_counts: HashMap::new(),
|
||||
};
|
||||
|
||||
// TODO: Verify self.fan values as we go.
|
||||
let mut prev_id: &gix_hash::oid = null_id;
|
||||
for commit in self.iter_commits() {
|
||||
if commit.id() <= prev_id {
|
||||
if commit.id() == null_id {
|
||||
return Err(message!(
|
||||
"commit at file position {} has invalid ID {}",
|
||||
commit.position(),
|
||||
commit.id()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
return Err(message!(
|
||||
"commit at file position {} with ID {} is out of order relative to its predecessor with ID {prev_id}",
|
||||
commit.position(),
|
||||
commit.id()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
if commit.root_tree_id() == null_id {
|
||||
return Err(message!(
|
||||
"commit {} has invalid root tree ID {}",
|
||||
commit.id(),
|
||||
commit.root_tree_id()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
if commit.generation() > GENERATION_NUMBER_MAX {
|
||||
return Err(message!("commit {} has invalid generation {}", commit.id(), commit.generation()).raise());
|
||||
}
|
||||
|
||||
processor(&commit).or_raise(|| message!("processor failed on commit {}", commit.id()))?;
|
||||
|
||||
stats.max_generation = max(stats.max_generation, commit.generation());
|
||||
stats.min_generation = min(stats.min_generation, commit.generation());
|
||||
let parent_count = commit.iter_parents().try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))?;
|
||||
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
|
||||
prev_id = commit.id();
|
||||
}
|
||||
|
||||
if stats.min_generation == GENERATION_NUMBER_INFINITY {
|
||||
stats.min_generation = 0;
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
/// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
|
||||
/// checksum itself.
|
||||
///
|
||||
/// Return the actual checksum on success or [`Exn<Message>`] if there is a mismatch.
|
||||
pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, Exn<Message>> {
|
||||
// Even though we could use gix_hash::bytes_of_file(…), this would require extending our
|
||||
// Error type to support io::Error. As we only gain progress, there probably isn't much value
|
||||
// as these files are usually small enough to process them in less than a second, even for the large ones.
|
||||
// But it's possible, once a progress instance is passed.
|
||||
let data_len_without_trailer = self.data.len() - self.hash_len;
|
||||
let mut hasher = gix_hash::hasher(self.object_hash());
|
||||
hasher.update(&self.data[..data_len_without_trailer]);
|
||||
let actual = hasher
|
||||
.try_finalize()
|
||||
.map_err(|e| message!("failed to hash commit graph file: {e}").raise())?;
|
||||
actual.verify(self.checksum()).map_err(|e| message!("{e}").raise())?;
|
||||
Ok(actual)
|
||||
}
|
||||
}
|
||||
|
||||
/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the
|
||||
/// expected hash.
|
||||
fn verify_split_chain_filename_hash(path: &Path, expected: &gix_hash::oid) -> Result<(), Exn<Message>> {
|
||||
path.file_name()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
.and_then(|filename| filename.strip_suffix(".graph"))
|
||||
.and_then(|stem| stem.strip_prefix("graph-"))
|
||||
.map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) {
|
||||
Ok(actual) if actual == expected => Ok(()),
|
||||
_ => Err(message!("commit-graph filename should be graph-{}.graph", expected.to_hex()).raise()),
|
||||
})
|
||||
}
|
||||
106
src-commitgraph/src/init.rs
Normal file
106
src-commitgraph/src/init.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
use crate::{File, Graph, MAX_COMMITS};
|
||||
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
|
||||
use std::{
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
/// Instantiate a `Graph` from various sources.
|
||||
impl Graph {
|
||||
/// Instantiate a commit graph from `path` which may be a directory containing graph files or the graph file itself.
|
||||
pub fn at(path: &Path) -> Result<Self, Exn<Message>> {
|
||||
Self::try_from(path)
|
||||
}
|
||||
|
||||
/// Instantiate a commit graph from the directory containing all of its files.
|
||||
pub fn from_commit_graphs_dir(path: &Path) -> Result<Self, Exn<Message>> {
|
||||
let commit_graphs_dir = path;
|
||||
let chain_file_path = commit_graphs_dir.join("commit-graph-chain");
|
||||
let chain_file = std::fs::File::open(&chain_file_path).or_raise(|| {
|
||||
message!(
|
||||
"Could not open commit-graph chain file at '{}'",
|
||||
chain_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let mut files = Vec::new();
|
||||
for line in BufReader::new(chain_file).lines() {
|
||||
let hash = line.or_raise(|| {
|
||||
message!(
|
||||
"Could not read from commit-graph file at '{}'",
|
||||
chain_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let graph_file_path = commit_graphs_dir.join(format!("graph-{hash}.graph"));
|
||||
files.push(
|
||||
File::at(&graph_file_path)
|
||||
.or_raise(|| message!("Could not open commit-graph file at '{}'", graph_file_path.display()))?,
|
||||
);
|
||||
}
|
||||
Ok(Self::new(files)?)
|
||||
}
|
||||
|
||||
/// Instantiate a commit graph from a `.git/objects/info/commit-graph` or
|
||||
/// `.git/objects/info/commit-graphs/graph-*.graph` file.
|
||||
pub fn from_file(path: &Path) -> Result<Self, Exn<Message>> {
|
||||
let file = File::at(path).or_raise(|| message!("Could not open commit-graph file at '{}'", path.display()))?;
|
||||
Ok(Self::new(vec![file])?)
|
||||
}
|
||||
|
||||
/// Instantiate a commit graph from an `.git/objects/info` directory.
|
||||
pub fn from_info_dir(info_dir: &Path) -> Result<Self, Exn<Message>> {
|
||||
Self::from_file(&info_dir.join("commit-graph"))
|
||||
.or_else(|_| Self::from_commit_graphs_dir(&info_dir.join("commit-graphs")))
|
||||
}
|
||||
|
||||
/// Create a new commit graph from a list of `files`.
|
||||
pub fn new(files: Vec<File>) -> Result<Self, Message> {
|
||||
let files = nonempty::NonEmpty::from_vec(files)
|
||||
.ok_or_else(|| message!("Commit-graph must contain at least one file"))?;
|
||||
let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
|
||||
if num_commits > u64::from(MAX_COMMITS) {
|
||||
return Err(message!(
|
||||
"Commit-graph files contain {num_commits} commits altogether, but only {MAX_COMMITS} commits are allowed"
|
||||
));
|
||||
}
|
||||
|
||||
let mut f1 = files.first();
|
||||
for f2 in files.tail() {
|
||||
if f1.object_hash() != f2.object_hash() {
|
||||
return Err(message!(
|
||||
"Commit-graph files mismatch: '{path1}' uses hash {hash1:?}, but '{path2}' uses hash {hash2:?}",
|
||||
path1 = f1.path().display(),
|
||||
hash1 = f1.object_hash(),
|
||||
path2 = f2.path().display(),
|
||||
hash2 = f2.object_hash(),
|
||||
));
|
||||
}
|
||||
f1 = f2;
|
||||
}
|
||||
|
||||
Ok(Self { files })
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Path> for Graph {
|
||||
type Error = Exn<Message>;
|
||||
|
||||
fn try_from(path: &Path) -> Result<Self, Self::Error> {
|
||||
if path.is_file() {
|
||||
// Assume we are looking at `.git/objects/info/commit-graph` or
|
||||
// `.git/objects/info/commit-graphs/graph-*.graph`.
|
||||
Self::from_file(path)
|
||||
} else if path.is_dir() {
|
||||
if path.join("commit-graph-chain").is_file() {
|
||||
Self::from_commit_graphs_dir(path)
|
||||
} else {
|
||||
Self::from_info_dir(path)
|
||||
}
|
||||
} else {
|
||||
Err(message!(
|
||||
"Did not find any files that look like commit graphs at '{}'",
|
||||
path.display()
|
||||
)
|
||||
.raise())
|
||||
}
|
||||
}
|
||||
}
|
||||
78
src-commitgraph/src/lib.rs
Normal file
78
src-commitgraph/src/lib.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
//! Read, verify, and traverse git commit graphs.
|
||||
//!
|
||||
//! A [commit graph][Graph] is an index of commits in the git commit history.
|
||||
//! The [Graph] stores commit data in a way that accelerates lookups considerably compared to
|
||||
//! traversing the git history by usual means.
|
||||
//!
|
||||
//! As generating the full commit graph from scratch can take some time, git may write new commits
|
||||
//! to separate [files][File] instead of overwriting the original file.
|
||||
//! Eventually, git will merge these files together as the number of files grows.
|
||||
//! ## Feature Flags
|
||||
#![cfg_attr(
|
||||
all(doc, feature = "document-features"),
|
||||
doc = ::document_features::document_features!()
|
||||
)]
|
||||
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
|
||||
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
|
||||
|
||||
use gix_error::{Exn, Message};
|
||||
use std::path::Path;
|
||||
|
||||
/// A single commit-graph file.
|
||||
///
|
||||
/// All operations on a `File` are local to that graph file. Since a commit graph can span multiple
|
||||
/// files, all interesting graph operations belong on [`Graph`].
|
||||
pub struct File {
|
||||
base_graph_count: u8,
|
||||
base_graphs_list_offset: Option<usize>,
|
||||
commit_data_offset: usize,
|
||||
data: memmap2::Mmap,
|
||||
extra_edges_list_range: Option<std::ops::Range<usize>>,
|
||||
fan: [u32; file::FAN_LEN],
|
||||
oid_lookup_offset: usize,
|
||||
path: std::path::PathBuf,
|
||||
hash_len: usize,
|
||||
object_hash: gix_hash::Kind,
|
||||
}
|
||||
|
||||
/// A complete commit graph.
|
||||
///
|
||||
/// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it
|
||||
/// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are
|
||||
/// generated via `git commit-graph write ...` commands.
|
||||
pub struct Graph {
|
||||
files: nonempty::NonEmpty<File>,
|
||||
}
|
||||
|
||||
/// Instantiate a commit graph from an `.git/objects/info` directory, or one of the various commit-graph files.
|
||||
pub fn at(path: impl AsRef<Path>) -> Result<Graph, Exn<Message>> {
|
||||
Graph::at(path.as_ref())
|
||||
}
|
||||
|
||||
mod access;
|
||||
pub mod file;
|
||||
///
|
||||
pub mod init;
|
||||
pub mod verify;
|
||||
|
||||
/// The number of generations that are considered 'infinite' commit history.
|
||||
pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff;
|
||||
/// The largest valid generation number.
|
||||
///
|
||||
/// If a commit's real generation number is larger than this, the commit graph will cap the value to
|
||||
/// this number.
|
||||
/// The largest distinct generation number is `GENERATION_NUMBER_MAX - 1`.
|
||||
pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff;
|
||||
|
||||
/// The maximum number of commits that can be stored in a commit graph.
|
||||
pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1;
|
||||
|
||||
/// A generalized position for use in [`Graph`].
|
||||
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
|
||||
pub struct Position(pub u32);
|
||||
|
||||
impl std::fmt::Display for Position {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
141
src-commitgraph/src/verify.rs
Normal file
141
src-commitgraph/src/verify.rs
Normal file
@@ -0,0 +1,141 @@
|
||||
//! Auxiliary types used by graph verification methods.
|
||||
use std::{
|
||||
cmp::{max, min},
|
||||
collections::BTreeMap,
|
||||
};
|
||||
|
||||
use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
|
||||
|
||||
use crate::{
|
||||
file::{self},
|
||||
Graph, Position, GENERATION_NUMBER_MAX,
|
||||
};
|
||||
|
||||
/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
|
||||
pub struct Outcome {
|
||||
/// The length of the longest path between any two commits in this graph.
|
||||
///
|
||||
/// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
|
||||
/// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
|
||||
/// If the longest path length is too large to fit in a [u32], then this will be [None].
|
||||
pub longest_path_length: Option<u32>,
|
||||
/// The total number of commits traversed.
|
||||
pub num_commits: u32,
|
||||
/// A mapping of `N -> number of commits with N parents`.
|
||||
pub parent_counts: BTreeMap<u32, u32>,
|
||||
}
|
||||
|
||||
impl Graph {
|
||||
/// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
|
||||
///
|
||||
/// When `processor` returns an error, the entire verification is stopped and the error returned.
|
||||
pub fn verify_integrity<E>(
|
||||
&self,
|
||||
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
|
||||
) -> Result<Outcome, Exn<Message>>
|
||||
where
|
||||
E: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
if self.files.len() > 256 {
|
||||
// A file in a split chain can only have up to 255 base files.
|
||||
return Err(message!(
|
||||
"Commit-graph should be composed of at most 256 files but actually contains {} files",
|
||||
self.files.len()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
|
||||
let mut stats = Outcome {
|
||||
longest_path_length: None,
|
||||
num_commits: 0,
|
||||
parent_counts: BTreeMap::new(),
|
||||
};
|
||||
let mut max_generation = 0u32;
|
||||
|
||||
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
|
||||
// a separate loop, e.g. self.iter_sorted_ids().
|
||||
|
||||
let mut file_start_pos = Position(0);
|
||||
for (file_index, file) in self.files.iter().enumerate() {
|
||||
if usize::from(file.base_graph_count()) != file_index {
|
||||
return Err(message!(
|
||||
"'{}' should have {} base graphs, but claims {} base graphs",
|
||||
file.path().display(),
|
||||
file_index,
|
||||
file.base_graph_count()
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
|
||||
for (base_graph_index, (expected, actual)) in self
|
||||
.files
|
||||
.iter()
|
||||
.take(file_index)
|
||||
.map(crate::File::checksum)
|
||||
.zip(file.iter_base_graph_ids())
|
||||
.enumerate()
|
||||
{
|
||||
if actual != expected {
|
||||
return Err(message!(
|
||||
"'{}' base graph at index {} should have ID {} but is {}",
|
||||
file.path().display(),
|
||||
base_graph_index,
|
||||
expected,
|
||||
actual
|
||||
)
|
||||
.raise());
|
||||
}
|
||||
}
|
||||
|
||||
let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
|
||||
let file_stats = file.traverse(|commit| {
|
||||
let mut max_parent_generation = 0u32;
|
||||
for parent_pos in commit.iter_parents() {
|
||||
let parent_pos = parent_pos.map_err(|err| err.raise_erased())?;
|
||||
if parent_pos >= next_file_start_pos {
|
||||
return Err(message!(
|
||||
"Commit {} has parent position {parent_pos} that is out of range (should be in range 0-{})",
|
||||
commit.id(),
|
||||
Position(next_file_start_pos.0 - 1)
|
||||
)
|
||||
.raise_erased());
|
||||
}
|
||||
let parent = self.commit_at(parent_pos);
|
||||
max_parent_generation = max(max_parent_generation, parent.generation());
|
||||
}
|
||||
|
||||
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
|
||||
// generation should be GENERATION_NUMBER_MAX too.
|
||||
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
|
||||
if commit.generation() != expected_generation {
|
||||
return Err(message!(
|
||||
"Commit {}'s generation should be {expected_generation} but is {}",
|
||||
commit.id(),
|
||||
commit.generation()
|
||||
)
|
||||
.raise_erased());
|
||||
}
|
||||
|
||||
processor(commit).or_raise_erased(|| message!("processor failed on commit {id}", id = commit.id()))?;
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
max_generation = max(max_generation, file_stats.max_generation);
|
||||
stats.num_commits += file_stats.num_commits;
|
||||
for (key, value) in file_stats.parent_counts.into_iter() {
|
||||
*stats.parent_counts.entry(key).or_insert(0) += value;
|
||||
}
|
||||
file_start_pos = next_file_start_pos;
|
||||
}
|
||||
|
||||
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
|
||||
Some(max_generation.saturating_sub(1))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok(stats)
|
||||
}
|
||||
}
|
||||
113
src-commitgraph/tests/access/mod.rs
Normal file
113
src-commitgraph/tests/access/mod.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use crate::{check_common, graph_and_expected, graph_and_expected_named};
|
||||
|
||||
#[test]
|
||||
fn single_parent() {
|
||||
let (cg, refs) = graph_and_expected("single_parent.sh", &["parent", "child"]);
|
||||
check_common(&cg, &refs);
|
||||
|
||||
assert_eq!(cg.commit_at(refs["parent"].pos()).generation(), 1);
|
||||
assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_commit_huge_dates_generation_v2_also_do_not_allow_huge_dates() {
|
||||
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "v2", &["HEAD"]);
|
||||
let info = &refs["HEAD"];
|
||||
let actual = cg.commit_by_id(info.id).expect("present");
|
||||
assert_eq!(
|
||||
actual.committer_timestamp(),
|
||||
1,
|
||||
"overflow happened, can't represent huge dates"
|
||||
);
|
||||
assert_eq!(
|
||||
info.time.seconds, 68719476737,
|
||||
"this is the value we would want to see, but it's not possible in V2 either, as that is just about generations"
|
||||
);
|
||||
assert_eq!(actual.generation(), 1, "generations are fine though");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_commit_huge_dates_overflow_v1() {
|
||||
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "v1", &["HEAD"]);
|
||||
let info = &refs["HEAD"];
|
||||
let actual = cg.commit_by_id(info.id).expect("present");
|
||||
assert_eq!(actual.committer_timestamp(), 1, "overflow happened");
|
||||
assert_eq!(
|
||||
info.time.seconds, 68719476737,
|
||||
"this is the value we would want to see, but it's not possible in V1"
|
||||
);
|
||||
assert_eq!(actual.generation(), 1, "generations are fine though");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_commit_future_64bit_dates_work() {
|
||||
let (cg, refs) = graph_and_expected_named("single_commit_huge_dates.sh", "max-date", &["HEAD"]);
|
||||
let info = &refs["HEAD"];
|
||||
let actual = cg.commit_by_id(info.id).expect("present");
|
||||
assert_eq!(
|
||||
actual.committer_timestamp(),
|
||||
info.time.seconds.try_into().expect("timestamps in bound"),
|
||||
"this is close the highest representable value in the graph, like year 2500, so we are good for longer than I should care about"
|
||||
);
|
||||
assert_eq!(actual.generation(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generation_numbers_overflow_is_handled_in_chained_graph() {
|
||||
let names = ["extra", "old-2", "future-2", "old-1", "future-1"];
|
||||
let (cg, mut refs) = graph_and_expected("generation_number_overflow.sh", &names);
|
||||
for (r, expected) in names
|
||||
.iter()
|
||||
.map(|n| refs.remove(n.to_owned()).expect("present"))
|
||||
.zip((1..=5).rev())
|
||||
{
|
||||
assert_eq!(
|
||||
cg.commit_by_id(r.id).expect("present").generation(),
|
||||
expected,
|
||||
"actually, this test seems to have valid generation numbers from the get-go. How to repro the actual issue?"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn octopus_merges() {
|
||||
let (cg, refs) = graph_and_expected(
|
||||
"octopus_merges.sh",
|
||||
&[
|
||||
"root",
|
||||
"parent1",
|
||||
"parent2",
|
||||
"parent3",
|
||||
"parent4",
|
||||
"three_parents",
|
||||
"four_parents",
|
||||
],
|
||||
);
|
||||
check_common(&cg, &refs);
|
||||
|
||||
assert_eq!(cg.commit_at(refs["root"].pos()).generation(), 1);
|
||||
assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 2);
|
||||
assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 2);
|
||||
assert_eq!(cg.commit_at(refs["parent3"].pos()).generation(), 2);
|
||||
assert_eq!(cg.commit_at(refs["parent4"].pos()).generation(), 2);
|
||||
assert_eq!(cg.commit_at(refs["three_parents"].pos()).generation(), 3);
|
||||
assert_eq!(cg.commit_at(refs["four_parents"].pos()).generation(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_commit() {
|
||||
let (cg, refs) = graph_and_expected("single_commit.sh", &["commit"]);
|
||||
check_common(&cg, &refs);
|
||||
|
||||
assert_eq!(cg.commit_at(refs["commit"].pos()).generation(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_parents() {
|
||||
let (cg, refs) = graph_and_expected("two_parents.sh", &["parent1", "parent2", "child"]);
|
||||
check_common(&cg, &refs);
|
||||
|
||||
assert_eq!(cg.commit_at(refs["parent1"].pos()).generation(), 1);
|
||||
assert_eq!(cg.commit_at(refs["parent2"].pos()).generation(), 1);
|
||||
assert_eq!(cg.commit_at(refs["child"].pos()).generation(), 2);
|
||||
}
|
||||
178
src-commitgraph/tests/commitgraph.rs
Normal file
178
src-commitgraph/tests/commitgraph.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
hash::BuildHasher,
|
||||
io::{BufRead, Cursor},
|
||||
path::Path,
|
||||
process::Command,
|
||||
};
|
||||
|
||||
use gix_commitgraph::{Graph, Position as GraphPosition};
|
||||
use gix_testtools::scripted_fixture_read_only;
|
||||
|
||||
mod access;
|
||||
|
||||
pub fn check_common(cg: &Graph, expected: &HashMap<String, RefInfo, impl BuildHasher>) {
|
||||
cg.verify_integrity(|_| Ok::<_, gix_error::Message>(()))
|
||||
.expect("graph is valid");
|
||||
assert_eq!(
|
||||
usize::try_from(cg.num_commits()).expect("an architecture able to hold 32 bits of integer"),
|
||||
expected.len()
|
||||
);
|
||||
for ref_info in expected.values() {
|
||||
assert_eq!(cg.id_at(ref_info.pos()), ref_info.id(), "id_at({})", ref_info.pos());
|
||||
assert_eq!(
|
||||
cg.lookup(ref_info.id()),
|
||||
Some(ref_info.pos()),
|
||||
"lookup({})",
|
||||
ref_info.id()
|
||||
);
|
||||
|
||||
let expected_parents: Vec<_> = ref_info
|
||||
.parent_ids()
|
||||
.map(|id| {
|
||||
expected
|
||||
.values()
|
||||
.find(|item| item.id() == id)
|
||||
.expect("find RefInfo by id")
|
||||
})
|
||||
.collect();
|
||||
|
||||
let commit = cg.commit_at(ref_info.pos());
|
||||
assert_eq!(commit.id(), ref_info.id());
|
||||
assert_eq!(
|
||||
commit.committer_timestamp(),
|
||||
ref_info.time.seconds.try_into().expect("timestamp in bounds")
|
||||
);
|
||||
assert_eq!(commit.root_tree_id(), ref_info.root_tree_id());
|
||||
assert_eq!(
|
||||
commit.parent1().expect("failed to access commit's parent1"),
|
||||
expected_parents.iter().map(|x| x.pos()).next()
|
||||
);
|
||||
assert_eq!(
|
||||
commit
|
||||
.iter_parents()
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.expect("failed to access commit's parents"),
|
||||
expected_parents.iter().map(|x| x.pos()).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
cg.iter_ids().collect::<HashSet<_>>(),
|
||||
expected.values().map(RefInfo::id).collect::<HashSet<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
pub fn graph_and_expected(
|
||||
script_path: &str,
|
||||
refs: &[&'static str],
|
||||
) -> (gix_commitgraph::Graph, HashMap<String, RefInfo>) {
|
||||
graph_and_expected_named(script_path, "", refs)
|
||||
}
|
||||
|
||||
pub fn graph_and_expected_named(
|
||||
script_path: &str,
|
||||
name: &str,
|
||||
refs: &[&'static str],
|
||||
) -> (gix_commitgraph::Graph, HashMap<String, RefInfo>) {
|
||||
let repo_dir = scripted_fixture_read_only(script_path)
|
||||
.expect("script succeeds all the time")
|
||||
.join(name);
|
||||
let expected = inspect_refs(&repo_dir, refs);
|
||||
let cg =
|
||||
Graph::from_info_dir(&repo_dir.join(".git").join("objects").join("info")).expect("graph present and valid");
|
||||
let object_hash = cg.object_hash();
|
||||
let any_ref = expected.values().next().expect("at least one ref");
|
||||
assert_eq!(
|
||||
object_hash,
|
||||
any_ref.id().kind(),
|
||||
"graph hash kind should match fixture object IDs"
|
||||
);
|
||||
|
||||
(cg, expected)
|
||||
}
|
||||
|
||||
pub struct RefInfo {
|
||||
id: gix_hash::ObjectId,
|
||||
pub time: gix_date::Time,
|
||||
parent_ids: Vec<gix_hash::ObjectId>,
|
||||
pos: GraphPosition,
|
||||
root_tree_id: gix_hash::ObjectId,
|
||||
}
|
||||
|
||||
impl RefInfo {
|
||||
pub fn id(&self) -> &gix_hash::oid {
|
||||
&self.id
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> GraphPosition {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn parent_ids(&self) -> impl Iterator<Item = &gix_hash::oid> {
|
||||
self.parent_ids.iter().map(AsRef::as_ref)
|
||||
}
|
||||
|
||||
pub fn root_tree_id(&self) -> &gix_hash::oid {
|
||||
&self.root_tree_id
|
||||
}
|
||||
}
|
||||
|
||||
fn inspect_refs(repo_dir: impl AsRef<Path>, refs: &[&'static str]) -> HashMap<String, RefInfo> {
|
||||
let output = Command::new("git")
|
||||
.arg("-C")
|
||||
.arg(repo_dir.as_ref())
|
||||
.arg("show")
|
||||
.arg("--no-patch")
|
||||
.arg("--pretty=format:%S %H %T %ct %P")
|
||||
.args(refs)
|
||||
.arg("--")
|
||||
.env_remove("GIT_DIR")
|
||||
.output()
|
||||
.expect("failed to execute `git show`");
|
||||
// Output format: <refname> <id> <tree_id> <parent_ids>
|
||||
let mut infos: Vec<_> = Cursor::new(output.stdout)
|
||||
.lines()
|
||||
.map(|x| x.expect("failed to read `git show` output"))
|
||||
.map(|x| {
|
||||
let parts = x.trim_end().split(' ').collect::<Vec<_>>();
|
||||
(
|
||||
parts[0].to_string(),
|
||||
gix_hash::ObjectId::from_hex(parts[1].as_bytes()).expect("40 bytes hex"),
|
||||
gix_hash::ObjectId::from_hex(parts[2].as_bytes()).expect("40 bytes hex"),
|
||||
gix_date::Time::new(parts[3].parse().expect("valid stamp"), 0),
|
||||
parts[4..]
|
||||
.iter()
|
||||
.map(|x| gix_hash::ObjectId::from_hex(x.as_bytes()).expect("40 bytes hex"))
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
infos.sort_by_key(|x| x.1);
|
||||
|
||||
let get_pos = |id: &gix_hash::oid| -> GraphPosition {
|
||||
let pos: u32 = infos
|
||||
.binary_search_by_key(&id, |x| &x.1)
|
||||
.expect("sorted_ids to contain id")
|
||||
.try_into()
|
||||
.expect("graph position to fit in u32");
|
||||
GraphPosition(pos)
|
||||
};
|
||||
|
||||
infos
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|(name, id, root_tree_id, time, parent_ids)| {
|
||||
(
|
||||
name,
|
||||
RefInfo {
|
||||
id,
|
||||
parent_ids,
|
||||
root_tree_id,
|
||||
time,
|
||||
pos: get_pos(&id),
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
BIN
src-commitgraph/tests/fixtures/generated-archives/generation_number_overflow.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/generation_number_overflow.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/generation_number_overflow_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/generation_number_overflow_sha256.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/octopus_merges.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/octopus_merges.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/octopus_merges_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/octopus_merges_sha256.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_huge_dates.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_huge_dates.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_huge_dates_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_huge_dates_sha256.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_commit_sha256.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent_huge_dates.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent_huge_dates.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/single_parent_sha256.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/two_parents.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/two_parents.tar
vendored
Normal file
Binary file not shown.
BIN
src-commitgraph/tests/fixtures/generated-archives/two_parents_sha256.tar
vendored
Normal file
BIN
src-commitgraph/tests/fixtures/generated-archives/two_parents_sha256.tar
vendored
Normal file
Binary file not shown.
65
src-commitgraph/tests/fixtures/generation_number_overflow.sh
Executable file
65
src-commitgraph/tests/fixtures/generation_number_overflow.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
function tick() {
|
||||
if test -z "${tick+set}"
|
||||
then
|
||||
tick=1112911993
|
||||
else
|
||||
tick=$(($tick + 60))
|
||||
fi
|
||||
GIT_COMMITTER_DATE="$tick -0700"
|
||||
GIT_AUTHOR_DATE="$tick -0700"
|
||||
export GIT_COMMITTER_DATE GIT_AUTHOR_DATE
|
||||
}
|
||||
|
||||
function force_tag() {
|
||||
local name head_oid common_dir
|
||||
name=${1:?argument the tag name}
|
||||
|
||||
# This should only be needed with 32-bit `git`, so fail otherwise.
|
||||
word_size="$(
|
||||
git --version --build-options |
|
||||
awk '$1 == "sizeof-size_t:" { print $2 }'
|
||||
)"
|
||||
((word_size == 4))
|
||||
|
||||
# Manually create the tag.
|
||||
head_oid="$(git rev-parse HEAD)"
|
||||
common_dir="$(git rev-parse --git-common-dir)"
|
||||
(set -o noclobber; echo "$head_oid" > "$common_dir/refs/tags/$name")
|
||||
}
|
||||
|
||||
function tagged_commit() {
|
||||
local message=${1:?first argument is the commit message and tag name}
|
||||
local date=${2:-}
|
||||
local file="$message.t"
|
||||
echo "$1" > "$file"
|
||||
git add -- "$file"
|
||||
if [ -n "$date" ]; then
|
||||
export GIT_COMMITTER_DATE="$date"
|
||||
else
|
||||
tick
|
||||
fi
|
||||
git commit -m "$message"
|
||||
git tag -- "$message" || force_tag "$message"
|
||||
}
|
||||
|
||||
tick
|
||||
|
||||
# adapted from git/t/t5318 'lower layers have overflow chunk'
|
||||
UNIX_EPOCH_ZERO="@0 +0000"
|
||||
FUTURE_DATE="@4147483646 +0000"
|
||||
|
||||
git init
|
||||
git config commitGraph.generationVersion 2
|
||||
|
||||
tagged_commit future-1 "$FUTURE_DATE"
|
||||
tagged_commit old-1 "$UNIX_EPOCH_ZERO"
|
||||
git commit-graph write --reachable
|
||||
tagged_commit future-2 "$FUTURE_DATE"
|
||||
tagged_commit old-2 "$UNIX_EPOCH_ZERO"
|
||||
git commit-graph write --reachable --split=no-merge
|
||||
tagged_commit extra
|
||||
# this makes sure it's actually in chain format.
|
||||
git commit-graph write --reachable --split=no-merge
|
||||
28
src-commitgraph/tests/fixtures/octopus_merges.sh
Executable file
28
src-commitgraph/tests/fixtures/octopus_merges.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
git init -q
|
||||
|
||||
git checkout -q --orphan root
|
||||
git commit -q --allow-empty -m root
|
||||
|
||||
git checkout -q -b parent1 root
|
||||
git commit -q --allow-empty -m parent1
|
||||
|
||||
git checkout -q -b parent2 root
|
||||
git commit -q --allow-empty -m parent2
|
||||
|
||||
git checkout -q -b parent3 root
|
||||
git commit -q --allow-empty -m parent3
|
||||
|
||||
git checkout -q -b parent4 root
|
||||
git commit -q --allow-empty -m parent4
|
||||
|
||||
git checkout -q -b three_parents parent1
|
||||
git merge -q -m three_parents --no-ff parent2 parent3 >/dev/null
|
||||
|
||||
git checkout -q -b four_parents parent2
|
||||
git merge -q -m four_parents --no-ff parent1 parent3 parent4 >/dev/null
|
||||
|
||||
git commit-graph write --no-progress --reachable
|
||||
git repack -adq
|
||||
12
src-commitgraph/tests/fixtures/single_commit.sh
Executable file
12
src-commitgraph/tests/fixtures/single_commit.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
# The goal with this repo is to have the smallest commit-graph file possible, in the hopes that an
|
||||
|
||||
git init -q
|
||||
|
||||
git checkout -q -b commit
|
||||
git commit -q --allow-empty -m commit
|
||||
|
||||
git commit-graph write --no-progress --reachable
|
||||
git repack -adq
|
||||
20
src-commitgraph/tests/fixtures/single_commit_huge_dates.sh
Executable file
20
src-commitgraph/tests/fixtures/single_commit_huge_dates.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
function setup_repo() {
|
||||
local version=${1:?need generation version}
|
||||
local time=${2:?timestamp seconds since unix epoch}
|
||||
git init -q
|
||||
|
||||
# one past the max 32bit date git can represent
|
||||
export GIT_COMMITTER_DATE="@${time} +0000"
|
||||
git config commitGraph.generationVersion ${version}
|
||||
|
||||
git commit -q --allow-empty -m c1
|
||||
|
||||
git commit-graph write --no-progress --reachable
|
||||
}
|
||||
|
||||
(mkdir v1 && cd v1 && setup_repo 1 68719476737) # the year 4000 something (overflows in graph)
|
||||
(mkdir v2 && cd v2 && setup_repo 2 68719476737)
|
||||
(mkdir max-date && cd max-date && setup_repo 1 17147483646) # the year 2500ish
|
||||
13
src-commitgraph/tests/fixtures/single_parent.sh
Executable file
13
src-commitgraph/tests/fixtures/single_parent.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
git init -q
|
||||
|
||||
git checkout -q -b parent
|
||||
git commit -q --allow-empty -m parent
|
||||
|
||||
git checkout -q -b child parent
|
||||
git commit -q --allow-empty -m child
|
||||
|
||||
git commit-graph write --no-progress --reachable
|
||||
git repack -adq
|
||||
16
src-commitgraph/tests/fixtures/split_chain.sh
Executable file
16
src-commitgraph/tests/fixtures/split_chain.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
git init -q
|
||||
|
||||
git checkout -q -b commit1
|
||||
git commit -q --allow-empty -m commit1
|
||||
git checkout -q -b commit2 commit1
|
||||
git commit -q --allow-empty -m commit2
|
||||
git checkout -q -b commit3 commit2
|
||||
git commit -q --allow-empty -m commit3
|
||||
|
||||
git show-ref -s commit1 | git commit-graph write --no-progress --split=no-merge --stdin-commits
|
||||
git show-ref -s commit2 | git commit-graph write --no-progress --split=no-merge --stdin-commits
|
||||
git show-ref -s commit3 | git commit-graph write --no-progress --split=no-merge --stdin-commits
|
||||
git repack -adq
|
||||
16
src-commitgraph/tests/fixtures/two_parents.sh
Executable file
16
src-commitgraph/tests/fixtures/two_parents.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eu -o pipefail
|
||||
|
||||
git init -q
|
||||
|
||||
git checkout -q --orphan parent1
|
||||
git commit -q --allow-empty -m parent1
|
||||
|
||||
git checkout -q --orphan parent2
|
||||
git commit -q --allow-empty -m parent2
|
||||
|
||||
git checkout -q -b child parent1
|
||||
git merge -q --allow-unrelated-histories --no-ff -m child parent2 >/dev/null
|
||||
|
||||
git commit-graph write --no-progress --reachable
|
||||
git repack -adq
|
||||
Reference in New Issue
Block a user