create src

This commit is contained in:
awfixer
2026-03-11 02:04:19 -07:00
commit 52f7a22bf2
2595 changed files with 402870 additions and 0 deletions

1047
src-status/CHANGELOG.md Normal file

File diff suppressed because it is too large Load Diff

51
src-status/Cargo.toml Normal file
View File

@@ -0,0 +1,51 @@
lints.workspace = true
[package]
name = "src-status"
version = "0.27.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "A crate of the gitoxide project dealing with 'git status'-like functionality"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>", "Pascal Kuthe <pascal.kuthe@semimod.de>"]
edition = "2021"
include = ["src/**/*", "LICENSE-*"]
rust-version = "1.82"
autotests = false
[lib]
doctest = false
[features]
## Enable support for the SHA-1 hash by enabling the respective feature in the `src-hash` crate.
sha1 = ["src-hash/sha1"]
## Add support for tracking rewrites along with checking for worktree modifications.
worktree-rewrites = ["dep:src-dir", "dep:src-diff"]
[dependencies]
src-index = { version = "^0.48.0", path = "../src-index" }
src-fs = { version = "^0.19.1", path = "../src-fs" }
src-hash = { version = "^0.22.1", path = "../src-hash" }
src-object = { version = "^0.57.0", path = "../src-object" }
src-path = { version = "^0.11.1", path = "../src-path" }
src-features = { version = "^0.46.1", path = "../src-features", features = ["progress"] }
src-filter = { version = "^0.27.0", path = "../src-filter" }
src-worktree = { version = "^0.49.0", path = "../src-worktree", default-features = false, features = ["attributes"] }
src-pathspec = { version = "^0.16.0", path = "../src-pathspec" }
src-dir = { version = "^0.22.0", path = "../src-dir", optional = true }
src-diff = { version = "^0.60.0", path = "../src-diff", default-features = false, features = ["blob"], optional = true }
thiserror = "2.0.18"
filetime = "0.2.27"
bstr = { version = "1.12.0", default-features = false }
document-features = { version = "0.2.0", optional = true }
[target.'cfg(not(target_has_atomic = "64"))'.dependencies]
portable-atomic = "1"
[dev-dependencies]
src-hash = { path = "../src-hash", features = ["sha1"] }
[package.metadata.docs.rs]
features = ["sha1", "document-features", "worktree-rewrites"]

1
src-status/LICENSE-APACHE Symbolic link
View File

@@ -0,0 +1 @@
../LICENSE-APACHE

1
src-status/LICENSE-MIT Symbolic link
View File

@@ -0,0 +1 @@
../LICENSE-MIT

View File

@@ -0,0 +1,684 @@
use std::{
io,
path::Path,
slice::Chunks,
sync::atomic::{AtomicUsize, Ordering},
};
use bstr::BStr;
use filetime::FileTime;
use gix_features::parallel::{in_parallel_if, Reduce};
use gix_filter::pipeline::convert::ToGitOutcome;
use gix_object::FindExt;
use crate::index_as_worktree::types::ConflictIndexEntry;
use crate::{
index_as_worktree::{
traits,
traits::{read_data::Stream, CompareBlobs, SubmoduleStatus},
types::{Error, Options},
Change, Conflict, Context, EntryStatus, Outcome, VisitEntry,
},
is_dir_to_mode, AtomicU64, SymlinkCheck,
};
/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them
/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and
/// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting).
/// `options` are used to configure the operation.
///
/// Note `worktree` must be the root path of the worktree, not a path inside of the worktree.
///
/// Note that `index` may require changes to be up-to-date with the working tree and avoid expensive computations by updating
/// respective entries with stat information from the worktree, and its timestamp is adjusted to the current time for which it
/// will be considered fresh. All changes that would be applied to the index are delegated to the caller, which receives these
/// as [`EntryStatus`].
/// The `pathspec` is used to determine which index entries to check for status in the first place.
///
/// `objects` is used to access the version of an object in the object database for direct comparison.
///
/// **It's important to note that the `index` should have its [timestamp updated](gix_index::State::set_timestamp()) with a timestamp
/// from just before making this call *if* [entries were updated](Outcome::entries_to_update)**
///
/// ### Note
///
/// Technically, this function does more as it also provides additional information, like whether a file has conflicts,
/// and files that were added with `git add` are shown as a special as well. It also provides updates to entry filesystem
/// stats like `git status` would if it had to determine the hash.
/// If that happened, the index should be written back after updating the entries with these updated stats, see [Outcome::skipped].
///
/// Thus, some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed
/// by the `collector`.
#[allow(clippy::too_many_arguments)]
pub fn index_as_worktree<'index, T, U, Find, E>(
index: &'index gix_index::State,
worktree: &Path,
collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>,
compare: impl CompareBlobs<Output = T> + Send + Clone,
submodule: impl SubmoduleStatus<Output = U, Error = E> + Send + Clone,
objects: Find,
progress: &mut dyn gix_features::progress::Progress,
Context {
pathspec,
stack,
filter,
should_interrupt,
}: Context<'_>,
options: Options,
) -> Result<Outcome, Error>
where
T: Send,
U: Send,
E: std::error::Error + Send + Sync + 'static,
Find: gix_object::Find + Send + Clone,
{
// the order is absolutely critical here we use the old timestamp to detect racy index entries
// (modified at or after the last index update) during the index update we then set those
// entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp.
let timestamp = index.timestamp();
let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit(
500, // just like git
index.entries().len().into(),
options.thread_limit,
None,
);
let range = index
.prefixed_entries_range(pathspec.common_prefix())
.unwrap_or(0..index.entries().len());
let (entries, path_backing) = (index.entries(), index.path_backing());
let mut num_entries = entries.len();
let entry_index_offset = range.start;
let entries = &entries[range];
let _span = gix_features::trace::detail!("gix_status::index_as_worktree",
num_entries = entries.len(),
chunk_size = chunk_size,
thread_limit = ?thread_limit);
let entries_skipped_by_common_prefix = num_entries - entries.len();
let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update) = Default::default();
let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default();
num_entries = entries.len();
progress.init(entries.len().into(), gix_features::progress::count("files"));
let count = progress.counter();
let new_state = {
let options = &options;
let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags);
let (symlink_metadata_calls, entries_to_update) = (&symlink_metadata_calls, &entries_to_update);
let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes);
let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads);
move |_| {
(
State {
buf: Vec::new(),
buf2: Vec::new(),
attr_stack: stack,
path_stack: SymlinkCheck::new(worktree.into()),
timestamp,
path_backing,
filter,
options,
skipped_by_pathspec,
skipped_by_entry_flags,
symlink_metadata_calls,
entries_to_update,
racy_clean,
worktree_reads,
worktree_bytes,
odb_reads,
odb_bytes,
},
compare,
submodule,
objects,
pathspec,
)
}
};
in_parallel_if(
|| true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though.
gix_features::interrupt::Iter::new(
OffsetIter {
inner: entries.chunks(chunk_size),
offset: entry_index_offset,
},
should_interrupt,
),
thread_limit,
new_state,
|(entry_offset, chunk_entries), (state, blobdiff, submdule, objects, pathspec)| {
let all_entries = index.entries();
let mut out = Vec::new();
let mut idx = 0;
while let Some(entry) = chunk_entries.get(idx) {
let absolute_entry_index = entry_offset + idx;
if idx == 0 && entry.stage_raw() != 0 {
let offset = entry_offset.checked_sub(1).and_then(|prev_idx| {
let prev_entry = &all_entries[prev_idx];
let entry_path = entry.path_in(state.path_backing);
if prev_entry.stage_raw() == 0 || prev_entry.path_in(state.path_backing) != entry_path {
// prev_entry (in previous chunk) does not belong to our conflict
return None;
}
Conflict::try_from_entry(all_entries, state.path_backing, absolute_entry_index, entry_path)
.map(|(_conflict, offset, _entries)| offset)
});
if let Some(entries_to_skip_as_conflict_originates_in_previous_chunk) = offset {
// skip current entry as it's done, along with following conflict entries
idx += entries_to_skip_as_conflict_originates_in_previous_chunk + 1;
continue;
}
}
let res = state.process(
all_entries,
entry,
absolute_entry_index,
pathspec,
blobdiff,
submdule,
objects,
&mut idx,
);
idx += 1;
count.fetch_add(1, Ordering::Relaxed);
if let Some(res) = res {
out.push(res);
}
}
out
},
ReduceChange {
collector,
entries: index.entries(),
},
)?;
Ok(Outcome {
entries_to_process: num_entries,
entries_processed: count.load(Ordering::Relaxed),
entries_skipped_by_common_prefix,
entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed),
entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed),
entries_to_update: entries_to_update.load(Ordering::Relaxed),
symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed),
racy_clean: racy_clean.load(Ordering::Relaxed),
worktree_files_read: worktree_reads.load(Ordering::Relaxed),
worktree_bytes: worktree_bytes.load(Ordering::Relaxed),
odb_objects_read: odb_reads.load(Ordering::Relaxed),
odb_bytes: odb_bytes.load(Ordering::Relaxed),
})
}
struct State<'a, 'b> {
buf: Vec<u8>,
buf2: Vec<u8>,
timestamp: FileTime,
/// This is the cheap stack that only assure that we don't go through symlinks.
/// It's always used to get the path to perform an lstat on.
path_stack: SymlinkCheck,
/// This is the expensive stack that will need to check for `.gitattributes` files each time
/// it changes directory. It's only used when we know we have to read a worktree file, which in turn
/// requires attributes to drive the filter configuration.
attr_stack: gix_worktree::Stack,
filter: gix_filter::Pipeline,
path_backing: &'b gix_index::PathStorageRef,
options: &'a Options,
skipped_by_pathspec: &'a AtomicUsize,
skipped_by_entry_flags: &'a AtomicUsize,
symlink_metadata_calls: &'a AtomicUsize,
entries_to_update: &'a AtomicUsize,
racy_clean: &'a AtomicUsize,
worktree_bytes: &'a AtomicU64,
worktree_reads: &'a AtomicUsize,
odb_bytes: &'a AtomicU64,
odb_reads: &'a AtomicUsize,
}
type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, usize, &'index BStr, EntryStatus<T, U>), Error>;
impl<'index> State<'_, 'index> {
#[allow(clippy::too_many_arguments)]
fn process<T, U, Find, E>(
&mut self,
entries: &'index [gix_index::Entry],
entry: &'index gix_index::Entry,
entry_index: usize,
pathspec: &mut gix_pathspec::Search,
diff: &mut impl CompareBlobs<Output = T>,
submodule: &mut impl SubmoduleStatus<Output = U, Error = E>,
objects: &Find,
outer_entry_index: &mut usize,
) -> Option<StatusResult<'index, T, U>>
where
E: std::error::Error + Send + Sync + 'static,
Find: gix_object::Find,
{
if entry.flags.intersects(
gix_index::entry::Flags::UPTODATE
| gix_index::entry::Flags::SKIP_WORKTREE
| gix_index::entry::Flags::ASSUME_VALID
| gix_index::entry::Flags::FSMONITOR_VALID,
) {
self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed);
return None;
}
let path = entry.path_in(self.path_backing);
let is_excluded = pathspec
.pattern_matching_relative_path(
path,
Some(entry.mode.is_submodule()),
&mut |relative_path, case, is_dir, out| {
self.attr_stack
.set_case(case)
.at_entry(relative_path, Some(is_dir_to_mode(is_dir)), objects)
.is_ok_and(|platform| platform.matching_attributes(out))
},
)
.is_none_or(|m| m.is_excluded());
if is_excluded {
self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed);
return None;
}
let status = if entry.stage_raw() != 0 {
Ok(
Conflict::try_from_entry(entries, self.path_backing, entry_index, path).map(
|(conflict, offset, entries)| {
*outer_entry_index += offset; // let out loop skip over entries related to the conflict
EntryStatus::Conflict {
summary: conflict,
entries: Box::new({
let mut a: [Option<ConflictIndexEntry>; 3] = Default::default();
let src = entries.into_iter().map(|e| e.map(ConflictIndexEntry::from));
for (a, b) in a.iter_mut().zip(src) {
*a = b;
}
a
}),
}
},
),
)
} else {
self.compute_status(entry, path, diff, submodule, objects)
};
match status {
Ok(None) => None,
Ok(Some(status)) => Some(Ok((entry, entry_index, path, status))),
Err(err) => Some(Err(err)),
}
}
/// # On how racy-git is handled here
///
/// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat
/// information between index and worktree and if they match we don't need to look at the content.
/// This usually just works but if a file updates quickly we could run into the following situation:
///
/// * save file version `A` from disk into worktree (git add)
/// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default*
/// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content
///
/// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed.
/// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.*
/// This is the racy git problem.
///
/// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the
/// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts.
/// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the
/// racy git problem resolved.
///
/// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at
/// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there
/// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect.
/// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed".
/// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D
///
/// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry
/// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition
/// do we `return` early. Since we don't `return` early we just do a full content comparison below,
/// which always yields the correct result, there is no race condition there.
///
/// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is
/// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp.
/// However, if the file did actually change then we really ran into one of those rare race conditions in that case we,
/// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed.
/// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files
/// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file,
/// which is a constant.
///
/// Adapted from [here](https://github.com/GitoxideLabs/gitoxide/pull/805#discussion_r1164676777).
fn compute_status<T, U, Find, E>(
&mut self,
entry: &gix_index::Entry,
rela_path: &BStr,
diff: &mut impl CompareBlobs<Output = T>,
submodule: &mut impl SubmoduleStatus<Output = U, Error = E>,
objects: &Find,
) -> Result<Option<EntryStatus<T, U>>, Error>
where
E: std::error::Error + Send + Sync + 'static,
Find: gix_object::Find,
{
let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) {
Ok(path) => path,
Err(err) if crate::stack::is_symlink_step_error(&err) => return Ok(Some(Change::Removed.into())),
Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => {
return Ok(Some(Change::Removed.into()))
}
Err(err) => return Err(Error::Io(err.into())),
};
self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed);
let metadata = match gix_index::fs::Metadata::from_path_no_follow(worktree_path) {
Ok(metadata) if metadata.is_dir() => {
// index entries are normally only for files/symlinks
// if a file turned into a directory it was removed
// the only exception here are submodules which are
// part of the index despite being directories
if entry.mode.is_submodule() {
let status = submodule
.status(entry, rela_path)
.map_err(|err| Error::SubmoduleStatus {
rela_path: rela_path.into(),
source: Box::new(err),
})?;
return Ok(status.map(|status| Change::SubmoduleModification(status).into()));
} else {
return Ok(Some(Change::Removed.into()));
}
}
Ok(metadata) => metadata,
Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => {
return Ok(Some(Change::Removed.into()))
}
Err(err) => {
return Err(Error::Io(err.into()));
}
};
if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) {
return Ok(Some(EntryStatus::IntentToAdd));
}
let new_stat = gix_index::entry::Stat::from_fs(&metadata)?;
let executable_bit_changed =
match entry
.mode
.change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit)
{
Some(gix_index::entry::mode::Change::Type { new_mode }) => {
return Ok(Some(
Change::Type {
worktree_mode: new_mode,
}
.into(),
))
}
Some(gix_index::entry::mode::Change::ExecutableBit) => true,
None => false,
};
// We implement racy-git. See racy-git.txt in the git documentation for detailed documentation.
//
// A file is racy if:
// 1. Its `mtime` is at or after the last index timestamp and its entry stat information
// matches the on-disk file, but the file contents are actually modified
// 2. Its size is 0 (set after detecting a file was racy previously)
//
// The first case is detected below by checking the timestamp if the file is marked unmodified.
// The second case is usually detected either because the on-disk file is not empty, hence
// the basic stat match fails, or by checking whether the size doesn't fit the oid.
let mut racy_clean = false;
if !executable_bit_changed
&& new_stat.matches(&entry.stat, self.options.stat)
// TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters?
&& (!entry.id.is_empty_blob() || entry.stat.size == 0)
{
racy_clean = new_stat.is_racy(self.timestamp, self.options.stat);
if !racy_clean {
return Ok(None);
} else {
self.racy_clean.fetch_add(1, Ordering::Relaxed);
}
}
self.buf.clear();
self.buf2.clear();
let file_size_bytes = if cfg!(windows) && metadata.is_symlink() {
// symlinks on Windows seem to have a length of zero, so just pretend
// they have the correct length to avoid short-cutting, and enforce a full buffer check.
u64::from(entry.stat.size)
} else {
metadata.len()
};
let fetch_data = ReadDataImpl {
buf: &mut self.buf,
path: worktree_path,
rela_path,
entry,
file_len: file_size_bytes,
filter: &mut self.filter,
attr_stack: &mut self.attr_stack,
core_symlinks:
// If this is legitimately a symlink, then pretend symlinks are enabled as the option seems stale.
// Otherwise, respect the option.
if metadata.is_symlink()
&& entry.mode.to_tree_entry_mode().map(|m| m.kind()) == Some(gix_object::tree::EntryKind::Link)
{
true
} else {
self.options.fs.symlink
},
id: &entry.id,
objects,
worktree_reads: self.worktree_reads,
worktree_bytes: self.worktree_bytes,
odb_reads: self.odb_reads,
odb_bytes: self.odb_bytes,
};
let content_change = diff.compare_blobs(entry, file_size_bytes, fetch_data, &mut self.buf2)?;
// This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated.
if content_change.is_some() || executable_bit_changed {
let set_entry_stat_size_zero = content_change.is_some() && racy_clean;
Ok(Some(
Change::Modification {
executable_bit_changed,
content_change,
set_entry_stat_size_zero,
}
.into(),
))
} else {
self.entries_to_update.fetch_add(1, Ordering::Relaxed);
Ok(Some(EntryStatus::NeedsUpdate(new_stat)))
}
}
}
struct ReduceChange<'a, 'index, T: VisitEntry<'index>> {
collector: &'a mut T,
entries: &'index [gix_index::Entry],
}
impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce
for ReduceChange<'_, 'index, C>
{
type Input = Vec<StatusResult<'index, T, U>>;
type FeedProduce = ();
type Output = ();
type Error = Error;
fn feed(&mut self, items: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
for item in items {
let (entry, entry_index, path, status) = item?;
self.collector
.visit_entry(self.entries, entry, entry_index, path, status);
}
Ok(())
}
fn finalize(self) -> Result<Self::Output, Self::Error> {
Ok(())
}
}
struct ReadDataImpl<'a, Find>
where
Find: gix_object::Find,
{
buf: &'a mut Vec<u8>,
path: &'a Path,
rela_path: &'a BStr,
file_len: u64,
entry: &'a gix_index::Entry,
filter: &'a mut gix_filter::Pipeline,
attr_stack: &'a mut gix_worktree::Stack,
core_symlinks: bool,
id: &'a gix_hash::oid,
objects: Find,
worktree_bytes: &'a AtomicU64,
worktree_reads: &'a AtomicUsize,
odb_bytes: &'a AtomicU64,
odb_reads: &'a AtomicUsize,
}
impl<'a, Find> traits::ReadData<'a> for ReadDataImpl<'a, Find>
where
Find: gix_object::Find,
{
fn read_blob(self) -> Result<&'a [u8], Error> {
Ok(self.objects.find_blob(self.id, self.buf).map(|b| {
self.odb_reads.fetch_add(1, Ordering::Relaxed);
self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed);
b.data
})?)
}
fn stream_worktree_file(self) -> Result<Stream<'a>, Error> {
self.buf.clear();
// symlinks are only stored as actual symlinks if the FS supports it otherwise they are just
// normal files with their content equal to the linked path (so can be read normally)
//
let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK;
// TODO: what to do about precompose unicode and ignore_case for symlinks
let out = if is_symlink && self.core_symlinks {
let symlink_path = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(
std::fs::read_link(self.path).map_err(gix_hash::io::Error::from)?,
));
self.buf.extend_from_slice(&symlink_path);
self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed);
Stream {
inner: ToGitOutcome::Buffer(self.buf),
bytes: None,
len: None,
}
} else {
self.buf.clear();
let platform = self
.attr_stack
.at_entry(self.rela_path, Some(self.entry.mode), &self.objects)
.map_err(gix_hash::io::Error::from)?;
let file = std::fs::File::open(self.path).map_err(gix_hash::io::Error::from)?;
let out = self
.filter
.convert_to_git(
file,
self.path,
&mut |_path, attrs| {
platform.matching_attributes(attrs);
},
&mut |buf| Ok(self.objects.find_blob(self.id, buf).map(|_| Some(()))?),
)
.map_err(|err| Error::Io(io::Error::other(err).into()))?;
let len = match out {
ToGitOutcome::Unchanged(_) => Some(self.file_len),
ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None,
};
Stream {
inner: out,
bytes: Some(self.worktree_bytes),
len,
}
};
self.worktree_reads.fetch_add(1, Ordering::Relaxed);
Ok(out)
}
}
struct OffsetIter<'a, T> {
inner: Chunks<'a, T>,
offset: usize,
}
impl<'a, T> Iterator for OffsetIter<'a, T> {
type Item = (usize, &'a [T]);
fn next(&mut self) -> Option<Self::Item> {
let block = self.inner.next()?;
let offset = self.offset;
self.offset += block.len();
Some((offset, block))
}
}
impl Conflict {
/// Given `entries` and `path_backing`, both values obtained from an [index](gix_index::State), use `start_index` and enumerate
/// all conflict stages that still match `entry_path` to produce a conflict description.
/// Also return the amount of extra-entries that were part of the conflict declaration (not counting the entry at `start_index`)
///
/// If for some reason entry at `start_index` isn't in conflicting state, `None` is returned.
///
/// Return `(Self, num_consumed_entries, three_possibly_entries)`.
pub fn try_from_entry<'entry>(
entries: &'entry [gix_index::Entry],
path_backing: &gix_index::PathStorageRef,
start_index: usize,
entry_path: &BStr,
) -> Option<(Self, usize, [Option<&'entry gix_index::Entry>; 3])> {
use Conflict::*;
let mut mask = None::<u8>;
let mut seen: [Option<&gix_index::Entry>; 3] = Default::default();
let mut num_consumed_entries = 0_usize;
for (stage, entry) in (start_index..(start_index + 3).min(entries.len())).filter_map(|idx| {
let entry = &entries[idx];
let stage = entry.stage_raw();
(stage > 0 && entry.path_in(path_backing) == entry_path).then_some((stage, entry))
}) {
// This could be `1 << (stage - 1)` but let's be specific.
*mask.get_or_insert(0) |= match stage {
1 => 0b001,
2 => 0b010,
3 => 0b100,
_ => 0,
};
num_consumed_entries = stage as usize - 1;
seen[num_consumed_entries] = Some(entry);
}
mask.map(|mask| {
(
match mask {
0b001 => BothDeleted,
0b010 => AddedByUs,
0b011 => DeletedByThem,
0b100 => AddedByThem,
0b101 => DeletedByUs,
0b110 => BothAdded,
0b111 => BothModified,
_ => unreachable!("BUG: bitshifts and typical entry layout doesn't allow for more"),
},
num_consumed_entries,
seen,
)
})
}
}

View File

@@ -0,0 +1,11 @@
//! Changes between an index and a worktree.
///
mod types;
pub use types::{Change, Conflict, ConflictIndexEntry, Context, EntryStatus, Error, Options, Outcome, VisitEntry};
mod recorder;
pub use recorder::{Record, Recorder};
pub(super) mod function;
///
pub mod traits;

View File

@@ -0,0 +1,47 @@
use bstr::BStr;
use gix_index as index;
use crate::index_as_worktree::{EntryStatus, VisitEntry};
/// A record of a change.
///
/// It's created either if there is a conflict or a change, or both.
#[derive(Debug, Clone)]
pub struct Record<'index, T, U> {
/// The index entry that is changed.
pub entry: &'index index::Entry,
/// The index of the `entry` relative to all entries in the input index.
pub entry_index: usize,
/// The path to the entry.
pub relative_path: &'index BStr,
/// The status information itself.
pub status: EntryStatus<T, U>,
}
/// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`.
#[derive(Debug, Default)]
pub struct Recorder<'index, T = (), U = ()> {
/// collected changes, index entries without conflicts or changes are excluded.
pub records: Vec<Record<'index, T, U>>,
}
impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> {
type ContentChange = T;
type SubmoduleStatus = U;
fn visit_entry(
&mut self,
_entries: &'index [index::Entry],
entry: &'index index::Entry,
entry_index: usize,
relative_path: &'index BStr,
status: EntryStatus<Self::ContentChange, Self::SubmoduleStatus>,
) {
self.records.push(Record {
entry,
entry_index,
relative_path,
status,
});
}
}

View File

@@ -0,0 +1,169 @@
use std::{io::Read, sync::atomic::AtomicBool};
use bstr::BStr;
use gix_hash::ObjectId;
use gix_index as index;
use index::Entry;
use crate::index_as_worktree::Error;
/// Compares the content of two blobs in some way.
pub trait CompareBlobs {
/// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()].
type Output;
/// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size`
/// and allow streaming its bytes using `data`.
/// If this function returns `None` the `entry` and the worktree blob are assumed to be identical.
/// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself.
/// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer.
fn compare_blobs<'a, 'b>(
&mut self,
entry: &gix_index::Entry,
worktree_blob_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error>;
}
/// Determine the status of a submodule, which always indicates that it changed if present.
pub trait SubmoduleStatus {
/// The status result, describing in which way the submodule changed.
type Output;
/// A custom error that may occur while computing the submodule status.
type Error: std::error::Error + Send + Sync + 'static;
/// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected.
fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result<Option<Self::Output>, Self::Error>;
}
/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files.
pub trait ReadData<'a> {
/// Returns the contents of this blob.
///
/// This potentially performs IO and other expensive operations
/// and should only be called when necessary.
fn read_blob(self) -> Result<&'a [u8], Error>;
/// Stream a worktree file in such a manner that its content matches what would be put into git.
fn stream_worktree_file(self) -> Result<read_data::Stream<'a>, Error>;
}
///
pub mod read_data {
use std::sync::atomic::Ordering;
use gix_filter::pipeline::convert::ToGitOutcome;
use crate::AtomicU64;
/// A stream with worktree file data.
pub struct Stream<'a> {
pub(crate) inner: ToGitOutcome<'a, std::fs::File>,
pub(crate) bytes: Option<&'a AtomicU64>,
pub(crate) len: Option<u64>,
}
impl<'a> Stream<'a> {
/// Return the underlying byte-buffer if there is one.
///
/// If `None`, read from this instance like a stream.
/// Note that this method should only be called once to assure proper accounting of the amount of bytes read.
pub fn as_bytes(&self) -> Option<&'a [u8]> {
self.inner.as_bytes().inspect(|v| {
if let Some(bytes) = self.bytes {
bytes.fetch_add(v.len() as u64, Ordering::Relaxed);
}
})
}
/// Return the size of the stream in bytes if it is known in advance.
pub fn size(&self) -> Option<u64> {
self.len
}
}
impl std::io::Read for Stream<'_> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
if let Some(bytes) = self.bytes {
bytes.fetch_add(n as u64, Ordering::Relaxed);
}
Ok(n)
}
}
}
/// Compares to blobs by comparing their size and oid, and only looks at the file if
/// the size matches, therefore it's very fast.
#[derive(Clone)]
pub struct FastEq;
impl CompareBlobs for FastEq {
type Output = ();
// TODO: make all streaming IOPs interruptible.
fn compare_blobs<'a, 'b>(
&mut self,
entry: &Entry,
worktree_file_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error> {
// make sure to account for racily smudged entries here so that they don't always keep
// showing up as modified even after their contents have changed again, to a potentially
// unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs.
if u64::from(entry.stat.size) != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) {
return Ok(Some(()));
}
HashEq
.compare_blobs(entry, worktree_file_size, data, buf)
.map(|opt| opt.map(|_| ()))
}
}
/// Compares files to blobs by *always* comparing their hashes.
///
/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and
/// therefore always returns an OID that can be reused later.
#[derive(Clone)]
pub struct HashEq;
impl CompareBlobs for HashEq {
type Output = ObjectId;
fn compare_blobs<'a, 'b>(
&mut self,
entry: &Entry,
_worktree_blob_size: u64,
data: impl ReadData<'a>,
buf: &mut Vec<u8>,
) -> Result<Option<Self::Output>, Error> {
let mut stream = data.stream_worktree_file()?;
match stream.as_bytes() {
Some(buffer) => {
let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer)
.map_err(gix_hash::io::Error::from)?;
Ok((entry.id != file_hash).then_some(file_hash))
}
None => {
let file_hash = match stream.size() {
None => {
stream.read_to_end(buf).map_err(gix_hash::io::Error::from)?;
gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf)
.map_err(gix_hash::io::Error::from)?
}
Some(len) => gix_object::compute_stream_hash(
entry.id.kind(),
gix_object::Kind::Blob,
&mut stream,
len,
&mut gix_features::progress::Discard,
&AtomicBool::default(),
)?,
};
Ok((entry.id != file_hash).then_some(file_hash))
}
}
}
}

View File

@@ -0,0 +1,252 @@
use std::sync::atomic::AtomicBool;
use bstr::{BStr, BString};
use gix_index::entry;
/// The error returned by [index_as_worktree()`](crate::index_as_worktree()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Could not convert path to UTF8")]
IllformedUtf8,
#[error("The clock was off when reading file related metadata after updating a file on disk")]
Time(#[from] std::time::SystemTimeError),
#[error("IO error while writing blob or reading file metadata or changing filetype")]
Io(#[from] gix_hash::io::Error),
#[error("Failed to obtain blob from object database")]
Find(#[from] gix_object::find::existing_object::Error),
#[error("Could not determine status for submodule at '{rela_path}'")]
SubmoduleStatus {
rela_path: BString,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
}
/// Options that control how the index status with a worktree is computed.
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct Options {
/// Capabilities of the file system which affect the status computation.
pub fs: gix_fs::Capabilities,
/// If set, don't use more than this amount of threads.
/// Otherwise, usually use as many threads as there are logical cores.
/// A value of 0 is interpreted as no-limit
pub thread_limit: Option<usize>,
/// Options that control how stat comparisons are made when checking if a file is fresh.
pub stat: gix_index::entry::stat::Options,
}
/// The context for [index_as_worktree()`](crate::index_as_worktree()).
#[derive(Clone)]
pub struct Context<'a> {
/// The pathspec to limit the amount of paths that are checked. Can be empty to allow all paths.
///
/// Note that these are expected to have a [common_prefix()](gix_pathspec::Search::common_prefix()) according
/// to the prefix of the repository to efficiently limit the scope of the paths we process.
pub pathspec: gix_pathspec::Search,
/// A stack pre-configured to allow accessing attributes for each entry, as required for `filter`
/// and possibly pathspecs.
pub stack: gix_worktree::Stack,
/// A filter to be able to perform conversions from and to the worktree format.
///
/// It is needed to potentially refresh the index with data read from the worktree, which needs to be converted back
/// to the form stored in Git.
///
/// Note that for this to be correct, the attribute `stack` must be configured correctly as well.
pub filter: gix_filter::Pipeline,
/// A flag to query to learn if cancellation is requested.
pub should_interrupt: &'a AtomicBool,
}
/// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()).
#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
pub struct Outcome {
/// The total amount of entries that is to be processed.
pub entries_to_process: usize,
/// The amount of entries we actually processed. If this isn't the entire set, the operation was interrupted.
pub entries_processed: usize,
/// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs.
/// This is similar to the current working directory.
pub entries_skipped_by_common_prefix: usize,
/// The amount of entries that were skipped due to exclusion by *pathspecs*.
pub entries_skipped_by_pathspec: usize,
/// The amount of entries that were skipped as the entry flag indicated this.
pub entries_skipped_by_entry_flags: usize,
/// The amount of times we queried symlink-metadata for a file on disk.
pub symlink_metadata_calls: usize,
/// The amount of entries whose stats would need to be updated as its modification couldn't be determined without
/// an expensive calculation.
///
/// With these updates, this calculation will be avoided next time the status runs.
/// Note that the stat updates are delegated to the caller.
pub entries_to_update: usize,
/// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean,
/// i.e. didn't change.
pub racy_clean: usize,
/// The amount of bytes read from the worktree in order to determine if an entry changed, across all files.
pub worktree_bytes: u64,
/// The amount of files read in full from the worktree (and into memory).
pub worktree_files_read: usize,
/// The amount of bytes read from the object database in order to determine if an entry changed, across all objects.
pub odb_bytes: u64,
/// The amount of objects read from the object database.
pub odb_objects_read: usize,
}
impl Outcome {
/// The total amount of skipped entries, i.e. those that weren't processed at all.
pub fn skipped(&self) -> usize {
self.entries_skipped_by_common_prefix + self.entries_skipped_by_pathspec + self.entries_skipped_by_entry_flags
}
}
/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Change<T = (), U = ()> {
/// This corresponding file does not exist in the worktree anymore.
Removed,
/// The type of file changed compared to the worktree.
///
/// Examples include when a symlink is now a regular file, or a regular file was replaced with a named pipe.
///
/// ### Deviation
///
/// A change to a non-file is marked as `modification` in Git, but that's related to the content which we can't evaluate.
/// Hence, a type-change is considered more appropriate.
Type {
/// The mode the worktree file would have if it was added to the index, and the mode that differs compared
/// to what's currently stored in the index.
worktree_mode: gix_index::entry::Mode,
},
/// This worktree file was modified in some form, like a permission change or content change or both,
/// as compared to this entry.
Modification {
/// Indicates that one of the stat changes was an executable bit change
/// which is a significant change itself.
executable_bit_changed: bool,
/// The output of the [`CompareBlobs`](crate::index_as_worktree::traits::CompareBlobs) run on this entry.
/// If there is no content change and only the executable bit
/// changed then this is `None`.
content_change: Option<T>,
/// If true, the caller is expected to set [entry.stat.size = 0](gix_index::entry::Stat::size) to assure this
/// otherwise racily clean entry can still be detected as dirty next time this is called, but this time without
/// reading it from disk to hash it. It's a performance optimization and not doing so won't change the correctness
/// of the operation.
set_entry_stat_size_zero: bool,
},
/// A submodule is initialized and checked out, and there was modification to either:
///
/// * the `HEAD` as compared to the superproject's desired commit for `HEAD`
/// * the worktree has at least one modified file
/// * there is at least one untracked file
///
/// The exact nature of the modification is handled by the caller which may retain information per submodule or
/// re-compute details as needed when seeing this variant.
SubmoduleModification(U),
}
/// Like [`gix_index::Entry`], but without disk-metadata.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ConflictIndexEntry {
/// The object id for this entry's ODB representation (assuming it's up-to-date with it).
pub id: gix_hash::ObjectId,
/// Additional flags for use in algorithms and for efficiently storing stage information, primarily
/// to obtain the [stage](entry::Flags::stage()).
pub flags: entry::Flags,
/// The kind of item this entry represents - it's not all blobs in the index anymore.
pub mode: entry::Mode,
}
impl From<&gix_index::Entry> for ConflictIndexEntry {
fn from(
gix_index::Entry {
stat: _,
id,
flags,
mode,
..
}: &gix_index::Entry,
) -> Self {
ConflictIndexEntry {
id: *id,
flags: *flags,
mode: *mode,
}
}
}
/// Information about an entry.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum EntryStatus<T = (), U = ()> {
/// The entry is in a conflicting state, and we provide all related entries along with a summary.
Conflict {
/// An analysis on the conflict itself based on the observed index entries.
summary: Conflict,
/// The entries from stage 1 to stage 3, where stage 1 is at index 0 and stage 3 at index 2.
/// Note that when there are conflicts, there is no stage 0.
/// Further, all entries are looking at the same path.
entries: Box<[Option<ConflictIndexEntry>; 3]>,
},
/// There is no conflict and a change was discovered.
Change(Change<T, U>),
/// The entry didn't change, but its state caused extra work that can be avoided next time if its stats would be updated to the
/// given stat.
NeedsUpdate(
/// The new stats which represent what's currently in the working tree. If these replace the current stats in the entry,
/// next time this operation runs we can determine the actual state much faster.
gix_index::entry::Stat,
),
/// An index entry that corresponds to an untracked worktree file marked with `git add --intent-to-add`.
///
/// This means it's not available in the object database yet even though now an entry exists that represents the worktree file.
/// The entry represents the promise of adding a new file, no matter the actual stat or content.
/// Effectively this means nothing changed.
/// This also means the file is still present, and that no detailed change checks were performed.
IntentToAdd,
}
impl<T, U> From<Change<T, U>> for EntryStatus<T, U> {
fn from(value: Change<T, U>) -> Self {
EntryStatus::Change(value)
}
}
/// Describes a conflicting entry as comparison between 'our' version and 'their' version of it.
///
/// If one side isn't specified, it is assumed to have modified the entry. In general, there would be no conflict
/// if both parties ended up in the same state.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Conflict {
/// Both deleted a different version of the entry.
BothDeleted,
/// We added, they modified, ending up in different states.
AddedByUs,
/// They deleted the entry, we modified it.
DeletedByThem,
/// They added the entry, we modified it, ending up in different states.
AddedByThem,
/// We deleted the entry, they modified it, ending up in different states.
DeletedByUs,
/// Both added the entry in different states.
BothAdded,
/// Both modified the entry, ending up in different states.
BothModified,
}
/// Observe the status of an entry by comparing an index entry to the worktree.
pub trait VisitEntry<'index> {
/// Data generated by comparing an entry with a file.
type ContentChange;
/// Data obtained when checking the submodule status.
type SubmoduleStatus;
/// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index`
/// (for accessing `entry` and surrounding in the complete list of `entries`).
fn visit_entry(
&mut self,
entries: &'index [gix_index::Entry],
entry: &'index gix_index::Entry,
entry_index: usize,
rela_path: &'index BStr,
status: EntryStatus<Self::ContentChange, Self::SubmoduleStatus>,
);
}

View File

@@ -0,0 +1,612 @@
//! Changes between the index and the worktree along with optional rename tracking.
mod types;
pub use types::{Context, DirwalkContext, Entry, Error, Options, Outcome, RewriteSource, Sorting, Summary, VisitEntry};
mod recorder;
pub use recorder::Recorder;
pub(super) mod function {
use std::{borrow::Cow, path::Path};
use bstr::ByteSlice;
use gix_worktree::stack::State;
use crate::{
index_as_worktree::traits::{CompareBlobs, SubmoduleStatus},
index_as_worktree_with_renames::{
function::rewrite::ModificationOrDirwalkEntry, Context, Entry, Error, Options, Outcome, RewriteSource,
VisitEntry,
},
is_dir_to_mode,
};
/// Similar to [`index_as_worktree(…)`](crate::index_as_worktree()), except that it will automatically
/// track renames if enabled, while additionally providing information about untracked files
/// (or more, depending on the configuration).
///
/// * `index`
/// - used for checking modifications, and also for knowing which files are tracked during
/// the working-dir traversal.
/// * `worktree`
/// - The root of the worktree, in a format that respects `core.precomposeUnicode`.
/// * `collector`
/// - A [`VisitEntry`] implementation that sees the results of this operation.
/// * `compare`
/// - An implementation to compare two blobs for equality, used during index modification checks.
/// * `submodule`
/// - An implementation to determine the status of a submodule when encountered during
/// index modification checks.
/// * `objects`
/// - A way to obtain objects from the git object database.
/// * `progress`
/// - A way to send progress information for the index modification checks.
/// * `ctx`
/// - Additional information that will be accessed during index modification checks and traversal.
/// * `options`
/// - a way to configure both paths of the operation.
#[allow(clippy::too_many_arguments)]
pub fn index_as_worktree_with_renames<'index, T, U, Find, E>(
index: &'index gix_index::State,
worktree: &Path,
collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>,
compare: impl CompareBlobs<Output = T> + Send + Clone,
submodule: impl SubmoduleStatus<Output = U, Error = E> + Send + Clone,
objects: Find,
progress: &mut dyn gix_features::progress::Progress,
mut ctx: Context<'_>,
options: Options<'_>,
) -> Result<Outcome, Error>
where
T: Send + Clone,
U: Send + Clone,
E: std::error::Error + Send + Sync + 'static,
Find: gix_object::Find + gix_object::FindHeader + Send + Clone,
{
gix_features::parallel::threads(|scope| -> Result<Outcome, Error> {
let (tx, rx) = std::sync::mpsc::channel();
let walk_outcome = options
.dirwalk
.map(|options| {
gix_features::parallel::build_thread()
.name("gix_status::dirwalk".into())
.spawn_scoped(scope, {
let tx = tx.clone();
let mut collect = dirwalk::Delegate {
tx,
should_interrupt: ctx.should_interrupt,
};
let dirwalk_ctx = ctx.dirwalk;
let objects = objects.clone();
let mut excludes = match ctx.resource_cache.attr_stack.state() {
State::CreateDirectoryAndAttributesStack { .. } | State::AttributesStack(_) => None,
State::AttributesAndIgnoreStack { .. } | State::IgnoreStack(_) => {
Some(ctx.resource_cache.attr_stack.clone())
}
};
let mut pathspec_attr_stack = ctx
.pathspec
.patterns()
.any(|p| !p.attributes.is_empty())
.then(|| ctx.resource_cache.attr_stack.clone());
let mut pathspec = ctx.pathspec.clone();
move || -> Result<_, Error> {
gix_dir::walk(
worktree,
gix_dir::walk::Context {
should_interrupt: Some(ctx.should_interrupt),
git_dir_realpath: dirwalk_ctx.git_dir_realpath,
current_dir: dirwalk_ctx.current_dir,
index,
ignore_case_index_lookup: dirwalk_ctx.ignore_case_index_lookup,
pathspec: &mut pathspec,
pathspec_attributes: &mut |relative_path, case, is_dir, out| {
let stack = pathspec_attr_stack
.as_mut()
.expect("can only be called if attributes are used in patterns");
stack
.set_case(case)
.at_entry(relative_path, Some(is_dir_to_mode(is_dir)), &objects)
.is_ok_and(|platform| platform.matching_attributes(out))
},
excludes: excludes.as_mut(),
objects: &objects,
explicit_traversal_root: Some(worktree),
},
options,
&mut collect,
)
.map_err(Error::DirWalk)
}
})
.map_err(Error::SpawnThread)
})
.transpose()?;
let entries = &index.entries()[index
.prefixed_entries_range(ctx.pathspec.common_prefix())
.unwrap_or(0..index.entries().len())];
let filter = options.rewrites.is_some().then(|| {
(
ctx.resource_cache.filter.worktree_filter.clone(),
ctx.resource_cache.attr_stack.clone(),
)
});
let tracked_modifications_outcome = gix_features::parallel::build_thread()
.name("gix_status::index_as_worktree".into())
.spawn_scoped(scope, {
let mut collect = tracked_modifications::Delegate { tx };
let objects = objects.clone();
let stack = ctx.resource_cache.attr_stack.clone();
let filter = ctx.resource_cache.filter.worktree_filter.clone();
move || -> Result<_, Error> {
crate::index_as_worktree(
index,
worktree,
&mut collect,
compare,
submodule,
objects,
progress,
crate::index_as_worktree::Context {
pathspec: ctx.pathspec,
stack,
filter,
should_interrupt: ctx.should_interrupt,
},
options.tracked_file_modifications,
)
.map_err(Error::TrackedFileModifications)
}
})
.map_err(Error::SpawnThread)?;
let tracker = options
.rewrites
.map(gix_diff::rewrites::Tracker::<ModificationOrDirwalkEntry<'index, T, U>>::new)
.zip(filter);
let rewrite_outcome = match tracker {
Some((mut tracker, (mut filter, mut attrs))) => {
let mut entries_for_sorting = options.sorting.map(|_| Vec::new());
let mut buf = Vec::new();
for event in rx {
let (change, location) = match event {
Event::IndexEntry(record) => {
let location = Cow::Borrowed(record.relative_path);
(ModificationOrDirwalkEntry::Modification(record), location)
}
Event::DirEntry(entry, collapsed_directory_status) => {
let location = Cow::Owned(entry.rela_path.clone());
(
ModificationOrDirwalkEntry::DirwalkEntry {
id: rewrite::calculate_worktree_id(
options.object_hash,
worktree,
entry.disk_kind,
entry.rela_path.as_bstr(),
&mut filter,
&mut attrs,
&objects,
&mut buf,
ctx.should_interrupt,
)?,
entry,
collapsed_directory_status,
},
location,
)
}
};
if let Some(v) = entries_for_sorting.as_mut() {
v.push((change, location));
} else if let Some(change) = tracker.try_push_change(change, location.as_ref()) {
collector.visit_entry(rewrite::change_to_entry(change, entries));
}
}
let mut entries_for_sorting = entries_for_sorting.map(|mut v| {
v.sort_by(|a, b| a.1.cmp(&b.1));
let mut remaining = Vec::new();
for (change, location) in v {
if let Some(change) = tracker.try_push_change(change, location.as_ref()) {
remaining.push(rewrite::change_to_entry(change, entries));
}
}
remaining
});
let outcome = tracker.emit(
|dest, src| {
match src {
None => {
let entry = rewrite::change_to_entry(dest.change, entries);
if let Some(v) = entries_for_sorting.as_mut() {
v.push(entry);
} else {
collector.visit_entry(entry);
}
}
Some(src) => {
let ModificationOrDirwalkEntry::DirwalkEntry {
id,
entry,
collapsed_directory_status,
} = dest.change
else {
unreachable!("BUG: only possible destinations are dirwalk entries (additions)");
};
let source = match src.change {
ModificationOrDirwalkEntry::Modification(record) => {
RewriteSource::RewriteFromIndex {
index_entries: entries,
source_entry: record.entry,
source_entry_index: record.entry_index,
source_rela_path: record.relative_path,
source_status: record.status.clone(),
}
}
ModificationOrDirwalkEntry::DirwalkEntry {
id,
entry,
collapsed_directory_status,
} => RewriteSource::CopyFromDirectoryEntry {
source_dirwalk_entry: entry.clone(),
source_dirwalk_entry_collapsed_directory_status:
*collapsed_directory_status,
source_dirwalk_entry_id: *id,
},
};
let entry = Entry::Rewrite {
source,
dirwalk_entry: entry,
dirwalk_entry_collapsed_directory_status: collapsed_directory_status,
dirwalk_entry_id: id,
diff: src.diff,
copy: src.kind == gix_diff::rewrites::tracker::visit::SourceKind::Copy,
};
if let Some(v) = entries_for_sorting.as_mut() {
v.push(entry);
} else {
collector.visit_entry(entry);
}
}
}
std::ops::ControlFlow::Continue(())
},
&mut ctx.resource_cache,
&objects,
|_cb| {
// NOTE: to make this work, we'd want to wait the index modification check to complete.
// Then it's possible to efficiently emit the tracked files along with what we already sent,
// i.e. untracked and ignored files.
gix_features::trace::debug!("full-tree copy tracking isn't currently supported");
Ok::<_, std::io::Error>(())
},
)?;
if let Some(mut v) = entries_for_sorting {
v.sort_by(|a, b| a.destination_rela_path().cmp(b.destination_rela_path()));
for entry in v {
collector.visit_entry(entry);
}
}
Some(outcome)
}
None => {
let mut entries_for_sorting = options.sorting.map(|_| Vec::new());
for event in rx {
let entry = match event {
Event::IndexEntry(record) => Entry::Modification {
entries,
entry: record.entry,
entry_index: record.entry_index,
rela_path: record.relative_path,
status: record.status,
},
Event::DirEntry(entry, collapsed_directory_status) => Entry::DirectoryContents {
entry,
collapsed_directory_status,
},
};
if let Some(v) = entries_for_sorting.as_mut() {
v.push(entry);
} else {
collector.visit_entry(entry);
}
}
if let Some(mut v) = entries_for_sorting {
v.sort_by(|a, b| a.destination_rela_path().cmp(b.destination_rela_path()));
for entry in v {
collector.visit_entry(entry);
}
}
None
}
};
let walk_outcome = walk_outcome
.map(|handle| handle.join().expect("no panic"))
.transpose()?;
let tracked_modifications_outcome = tracked_modifications_outcome.join().expect("no panic")?;
Ok(Outcome {
dirwalk: walk_outcome.map(|t| t.0),
tracked_file_modification: tracked_modifications_outcome,
rewrites: rewrite_outcome,
})
})
}
enum Event<'index, T, U> {
IndexEntry(crate::index_as_worktree::Record<'index, T, U>),
DirEntry(gix_dir::Entry, Option<gix_dir::entry::Status>),
}
mod tracked_modifications {
use bstr::BStr;
use gix_index::Entry;
use crate::{
index_as_worktree::{EntryStatus, Record},
index_as_worktree_with_renames::function::Event,
};
pub(super) struct Delegate<'index, T, U> {
pub(super) tx: std::sync::mpsc::Sender<Event<'index, T, U>>,
}
impl<'index, T, U> crate::index_as_worktree::VisitEntry<'index> for Delegate<'index, T, U> {
type ContentChange = T;
type SubmoduleStatus = U;
fn visit_entry(
&mut self,
_entries: &'index [Entry],
entry: &'index Entry,
entry_index: usize,
rela_path: &'index BStr,
status: EntryStatus<Self::ContentChange, Self::SubmoduleStatus>,
) {
self.tx
.send(Event::IndexEntry(Record {
entry,
entry_index,
relative_path: rela_path,
status,
}))
.ok();
}
}
}
mod dirwalk {
use std::sync::atomic::{AtomicBool, Ordering};
use gix_dir::{entry::Status, walk::Action, EntryRef};
use super::Event;
pub(super) struct Delegate<'index, 'a, T, U> {
pub(super) tx: std::sync::mpsc::Sender<Event<'index, T, U>>,
pub(super) should_interrupt: &'a AtomicBool,
}
impl<T, U> gix_dir::walk::Delegate for Delegate<'_, '_, T, U> {
fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option<Status>) -> Action {
// Status never shows untracked entries of untrackable type
if entry.disk_kind != Some(gix_dir::entry::Kind::Untrackable) {
let entry = entry.to_owned();
self.tx.send(Event::DirEntry(entry, collapsed_directory_status)).ok();
}
if self.should_interrupt.load(Ordering::Relaxed) {
std::ops::ControlFlow::Break(())
} else {
std::ops::ControlFlow::Continue(())
}
}
}
}
mod rewrite {
use crate::{
index_as_worktree::{Change, EntryStatus},
index_as_worktree_with_renames::{Entry, Error},
};
use bstr::BStr;
use gix_diff::{rewrites::tracker::ChangeKind, tree::visit::Relation};
use gix_dir::entry::Kind;
use gix_filter::pipeline::convert::ToGitOutcome;
use gix_hash::oid;
use gix_object::tree::EntryMode;
use std::io::ErrorKind;
use std::{io::Read, path::Path};
#[derive(Clone)]
pub enum ModificationOrDirwalkEntry<'index, T, U>
where
T: Clone,
U: Clone,
{
Modification(crate::index_as_worktree::Record<'index, T, U>),
DirwalkEntry {
id: gix_hash::ObjectId,
entry: gix_dir::Entry,
collapsed_directory_status: Option<gix_dir::entry::Status>,
},
}
impl<T, U> gix_diff::rewrites::tracker::Change for ModificationOrDirwalkEntry<'_, T, U>
where
T: Clone,
U: Clone,
{
fn id(&self) -> &oid {
match self {
ModificationOrDirwalkEntry::Modification(m) => &m.entry.id,
ModificationOrDirwalkEntry::DirwalkEntry { id, .. } => id,
}
}
fn relation(&self) -> Option<Relation> {
// TODO: figure out if index or worktree can provide containerization - worktree should be possible.
// index would take some processing.
None
}
fn kind(&self) -> ChangeKind {
match self {
ModificationOrDirwalkEntry::Modification(m) => match &m.status {
EntryStatus::Conflict { .. } | EntryStatus::IntentToAdd | EntryStatus::NeedsUpdate(_) => {
ChangeKind::Modification
}
EntryStatus::Change(c) => match c {
Change::Removed => ChangeKind::Deletion,
Change::Type { .. } | Change::Modification { .. } | Change::SubmoduleModification(_) => {
ChangeKind::Modification
}
},
},
ModificationOrDirwalkEntry::DirwalkEntry { .. } => ChangeKind::Addition,
}
}
fn entry_mode(&self) -> EntryMode {
match self {
ModificationOrDirwalkEntry::Modification(c) => c.entry.mode.to_tree_entry_mode(),
ModificationOrDirwalkEntry::DirwalkEntry { entry, .. } => entry.disk_kind.map(|kind| {
match kind {
Kind::Untrackable => {
// Trees are never tracked for rewrites, so we 'pretend'.
gix_object::tree::EntryKind::Tree
}
Kind::File => gix_object::tree::EntryKind::Blob,
Kind::Symlink => gix_object::tree::EntryKind::Link,
Kind::Repository | Kind::Directory => gix_object::tree::EntryKind::Tree,
}
.into()
}),
}
.unwrap_or(gix_object::tree::EntryKind::Blob.into())
}
fn id_and_entry_mode(&self) -> (&oid, EntryMode) {
(self.id(), self.entry_mode())
}
}
/// Note that for non-files, we always return a null-sha and assume that the rename-tracking
/// does nothing for these anyway.
#[allow(clippy::too_many_arguments)]
pub(super) fn calculate_worktree_id(
object_hash: gix_hash::Kind,
worktree_root: &Path,
disk_kind: Option<gix_dir::entry::Kind>,
rela_path: &BStr,
filter: &mut gix_filter::Pipeline,
attrs: &mut gix_worktree::Stack,
objects: &dyn gix_object::Find,
buf: &mut Vec<u8>,
should_interrupt: &std::sync::atomic::AtomicBool,
) -> Result<gix_hash::ObjectId, Error> {
let Some(kind) = disk_kind else {
return Ok(object_hash.null());
};
Ok(match kind {
Kind::Untrackable => {
// Go along with unreadable files, they are passed along without rename tracking.
return Ok(object_hash.null());
}
Kind::File => {
let platform = attrs
.at_entry(rela_path, None, objects)
.map_err(Error::SetAttributeContext)?;
let rela_path = gix_path::from_bstr(rela_path);
let file_path = worktree_root.join(rela_path.as_ref());
let file = match std::fs::File::open(&file_path) {
Ok(f) => f,
Err(err)
if matches!(
err.kind(),
ErrorKind::NotFound | ErrorKind::PermissionDenied | ErrorKind::Interrupted
) =>
{
gix_features::trace::debug!(
?file_path,
?err,
"ignoring worktree file as it can't be read for hashing"
);
return Ok(object_hash.null());
}
Err(err) => return Err(Error::OpenWorktreeFile(err)),
};
let out = filter.convert_to_git(
file,
rela_path.as_ref(),
&mut |_path, attrs| {
platform.matching_attributes(attrs);
},
&mut |_buf| Ok(None),
)?;
match out {
ToGitOutcome::Unchanged(mut file) => gix_object::compute_stream_hash(
object_hash,
gix_object::Kind::Blob,
&mut file,
file_path.metadata().map_err(Error::OpenWorktreeFile)?.len(),
&mut gix_features::progress::Discard,
should_interrupt,
)
.map_err(Error::HashFile)?,
ToGitOutcome::Buffer(buf) => gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf)
.map_err(|err| Error::HashFile(err.into()))?,
ToGitOutcome::Process(mut stream) => {
buf.clear();
stream.read_to_end(buf).map_err(|err| Error::HashFile(err.into()))?;
gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf)
.map_err(|err| Error::HashFile(err.into()))?
}
}
}
Kind::Symlink => {
let path = worktree_root.join(gix_path::from_bstr(rela_path));
let target = gix_path::into_bstr(std::fs::read_link(path).map_err(Error::ReadLink)?);
gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &target)
.map_err(|err| Error::HashFile(err.into()))?
}
Kind::Directory | Kind::Repository => object_hash.null(),
})
}
#[inline]
pub(super) fn change_to_entry<'index, T, U>(
change: ModificationOrDirwalkEntry<'index, T, U>,
entries: &'index [gix_index::Entry],
) -> Entry<'index, T, U>
where
T: Clone,
U: Clone,
{
match change {
ModificationOrDirwalkEntry::Modification(r) => Entry::Modification {
entries,
entry: r.entry,
entry_index: r.entry_index,
rela_path: r.relative_path,
status: r.status,
},
ModificationOrDirwalkEntry::DirwalkEntry {
id: _,
entry,
collapsed_directory_status,
} => Entry::DirectoryContents {
entry,
collapsed_directory_status,
},
}
}
}
}

View File

@@ -0,0 +1,17 @@
use crate::index_as_worktree_with_renames::{Entry, VisitEntry};
/// Convenience implementation of [`VisitEntry`] that collects all changes into a `Vec`.
#[derive(Debug, Default)]
pub struct Recorder<'index, T = (), U = ()> {
/// The collected changes.
pub records: Vec<Entry<'index, T, U>>,
}
impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> {
type ContentChange = T;
type SubmoduleStatus = U;
fn visit_entry(&mut self, entry: Entry<'index, Self::ContentChange, Self::SubmoduleStatus>) {
self.records.push(entry);
}
}

View File

@@ -0,0 +1,375 @@
use std::sync::atomic::AtomicBool;
use bstr::{BStr, ByteSlice};
use crate::index_as_worktree::{Change, EntryStatus};
/// The error returned by [index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
TrackedFileModifications(#[from] crate::index_as_worktree::Error),
#[error(transparent)]
DirWalk(gix_dir::walk::Error),
#[error(transparent)]
SpawnThread(std::io::Error),
#[error("Failed to change the context for querying gitattributes to the respective path")]
SetAttributeContext(std::io::Error),
#[error("Could not open worktree file for reading")]
OpenWorktreeFile(std::io::Error),
#[error(transparent)]
HashFile(gix_hash::io::Error),
#[error("Could not read worktree link content")]
ReadLink(std::io::Error),
#[error(transparent)]
ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
#[error(transparent)]
RewriteTracker(#[from] gix_diff::rewrites::tracker::emit::Error),
}
/// The way all output should be sorted.
#[derive(Clone, Copy, Default, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub enum Sorting {
/// The entries are sorted by their path in a case-sensitive fashion.
#[default]
ByPathCaseSensitive,
}
/// Provide additional information collected during the runtime of [`index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()).
#[derive(Clone, Debug, Default)]
pub struct Outcome {
/// The outcome of the modification check of tracked files.
pub tracked_file_modification: crate::index_as_worktree::Outcome,
/// The outcome of the directory walk, or `None` if its [options](Options::dirwalk) also weren't present which means
/// the dirwalk never ran.
pub dirwalk: Option<gix_dir::walk::Outcome>,
/// The result of the rewrite operation, if [rewrites were configured](Options::rewrites).
pub rewrites: Option<gix_diff::rewrites::Outcome>,
}
/// Either an index entry for renames or another directory entry in case of copies.
#[derive(Clone, Debug)]
pub enum RewriteSource<'index, ContentChange, SubmoduleStatus> {
/// The source originates in the index and is detected as missing in the working tree.
/// This can also happen for copies.
RewriteFromIndex {
/// All entries in the index.
index_entries: &'index [gix_index::Entry],
/// The entry that is the source of the rewrite, which means it was removed on disk,
/// equivalent to [Change::Removed](crate::index_as_worktree::Change::Removed).
///
/// Note that the [entry-id](gix_index::Entry::id) is the content-id of the source of the rewrite.
source_entry: &'index gix_index::Entry,
/// The index of the `source_entry` for lookup in `index_entries` - useful to look at neighbors.
source_entry_index: usize,
/// The repository-relative path of the `source_entry`.
source_rela_path: &'index BStr,
/// The computed status of the `source_entry`.
source_status: EntryStatus<ContentChange, SubmoduleStatus>,
},
/// This source originates in the directory tree and is always the source of copies.
CopyFromDirectoryEntry {
/// The source of the copy operation, which is also an entry of the directory walk.
///
/// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the source of the rewrite.
source_dirwalk_entry: gix_dir::Entry,
/// `collapsed_directory_status` is `Some(dir_status)` if this `source_dirwalk_entry` was part of a directory with the given
/// `dir_status` that wasn't the same as the one of `source_dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was
/// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch).
/// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All).
source_dirwalk_entry_collapsed_directory_status: Option<gix_dir::entry::Status>,
/// The object id as it would appear if the entry was written to the object database.
/// It's the same as `dirwalk_entry_id`, or `diff` is `Some(_)` to indicate that the copy was determined by similarity.
source_dirwalk_entry_id: gix_hash::ObjectId,
},
}
/// An 'entry' in the sense of a merge of modified tracked files and results from a directory walk.
#[derive(Clone, Debug)]
pub enum Entry<'index, ContentChange, SubmoduleStatus> {
/// A tracked file was modified, and index-specific information is passed.
Modification {
/// All entries in the index.
entries: &'index [gix_index::Entry],
/// The entry with modifications.
entry: &'index gix_index::Entry,
/// The index of the `entry` for lookup in `entries` - useful to look at neighbors.
entry_index: usize,
/// The repository-relative path of the entry.
rela_path: &'index BStr,
/// The computed status of the entry.
status: EntryStatus<ContentChange, SubmoduleStatus>,
},
/// An entry returned by the directory walk, without any relation to the index.
///
/// This can happen if ignored files are returned as well, or if rename-tracking is disabled.
DirectoryContents {
/// The entry found during the disk traversal.
entry: gix_dir::Entry,
/// `collapsed_directory_status` is `Some(dir_status)` if this `entry` was part of a directory with the given
/// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was
/// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch).
/// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All).
collapsed_directory_status: Option<gix_dir::entry::Status>,
},
/// The rewrite tracking discovered a match between a deleted and added file, and considers them equal enough,
/// depending on the tracker settings.
///
/// Note that the source of the rewrite is always the index as it detects the absence of entries, something that
/// can't be done during a directory walk.
Rewrite {
/// The source of the rewrite operation.
source: RewriteSource<'index, ContentChange, SubmoduleStatus>,
/// The untracked entry found during the disk traversal, the destination of the rewrite.
///
/// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the destination of the rewrite, and the current
/// location of the entry.
dirwalk_entry: gix_dir::Entry,
/// `collapsed_directory_status` is `Some(dir_status)` if this `dirwalk_entry` was part of a directory with the given
/// `dir_status` that wasn't the same as the one of `dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was
/// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch).
/// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All).
dirwalk_entry_collapsed_directory_status: Option<gix_dir::entry::Status>,
/// The object id as it would appear if the entry was written to the object database, specifically hashed in order to determine equality.
/// Note that it doesn't (necessarily) exist in the object database, and may be [null](gix_hash::ObjectId::null) if no hashing
/// was performed.
dirwalk_entry_id: gix_hash::ObjectId,
/// It's `None` if the 'source.id' is equal to `dirwalk_entry_id`, as identity made an actual diff computation unnecessary.
/// Otherwise, and if enabled, it's `Some(stats)` to indicate how similar both entries were.
diff: Option<gix_diff::blob::DiffLineStats>,
/// If true, this rewrite is created by copy, and 'source.id' is pointing to its source.
/// Otherwise, it's a rename, and 'source.id' points to a deleted object,
/// as renames are tracked as deletions and additions of the same or similar content.
copy: bool,
},
}
/// An easy to grasp summary of the changes of the worktree compared to the index.
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub enum Summary {
/// An entry exists in the index but doesn't in the worktree.
Removed,
/// A file exists in the worktree but doesn't have a corresponding entry in the index.
///
/// In a `git status`, this would be an untracked file.
Added,
/// A file or submodule was modified, compared to the state recorded in the index.
/// On Unix, the change of executable bit also counts as modification.
///
/// If the modification is a submodule, it could also stem from various other factors, like
/// having modified or untracked files, or changes in the index.
Modified,
/// The type of the entry in the worktree changed compared to the index.
///
/// This can happen if a file in the worktree now is a directory, or a symlink, for example.
TypeChange,
/// A match between an entry in the index and a differently named file in the worktree was detected,
/// considering the index the source of a rename operation, and the worktree file the destination.
///
/// Note that the renamed file may also have been modified, but is considered similar enough.
///
/// To obtain this state, rewrite-tracking must have been enabled, as otherwise the source would be
/// considered `Removed` and the destination would be considered `Added`.
Renamed,
/// A match between an entry in the index and a differently named file in the worktree was detected,
/// considering the index the source of the copy of a worktree file.
///
/// Note that the copied file may also have been modified, but is considered similar enough.
///
/// To obtain this state, rewrite-and-copy-tracking must have been enabled, as otherwise the source would be
/// considered `Removed` and the destination would be considered `Added`.
Copied,
/// An index entry with a corresponding worktree file that corresponds to an untracked worktree
/// file marked with `git add --intent-to-add`.
///
/// This means it's not available in the object database yet even though now an entry exists
/// that represents the worktree file.
/// The entry represents the promise of adding a new file, no matter the actual stat or content.
/// Effectively this means nothing changed.
/// This also means the file is still present, and that no detailed change checks were performed.
IntentToAdd,
/// Describes a conflicting entry in the index, which also means that
/// no further comparison to the worktree file was performed.
///
/// As this variant only describes the state of the index, the corresponding worktree file may
/// or may not exist.
Conflict,
}
/// Access
impl<ContentChange, SubmoduleStatus> RewriteSource<'_, ContentChange, SubmoduleStatus> {
/// The repository-relative path of this source.
pub fn rela_path(&self) -> &BStr {
match self {
RewriteSource::RewriteFromIndex { source_rela_path, .. } => source_rela_path,
RewriteSource::CopyFromDirectoryEntry {
source_dirwalk_entry, ..
} => source_dirwalk_entry.rela_path.as_bstr(),
}
}
}
/// Access
impl<ContentChange, SubmoduleStatus> Entry<'_, ContentChange, SubmoduleStatus> {
/// Return a summary of the entry as digest of its status, or `None` if this entry is
/// created from the directory walk and is *not untracked*, or if it is merely to communicate
/// a needed update to the index entry.
pub fn summary(&self) -> Option<Summary> {
Some(match self {
Entry::Modification {
status: EntryStatus::Conflict { .. },
..
} => Summary::Conflict,
Entry::Modification {
status: EntryStatus::IntentToAdd,
..
} => Summary::IntentToAdd,
Entry::Modification {
status: EntryStatus::NeedsUpdate(_),
..
} => return None,
Entry::Modification {
status: EntryStatus::Change(change),
..
} => match change {
Change::SubmoduleModification(_) | Change::Modification { .. } => Summary::Modified,
Change::Type { .. } => Summary::TypeChange,
Change::Removed => Summary::Removed,
},
Entry::DirectoryContents { entry, .. } => {
if matches!(entry.status, gix_dir::entry::Status::Untracked) {
Summary::Added
} else {
return None;
}
}
Entry::Rewrite { copy, .. } => {
if *copy {
Summary::Copied
} else {
Summary::Renamed
}
}
})
}
/// The repository-relative path at which the source of a rewrite is located.
///
/// If this isn't a rewrite, the path is the location of the entry itself.
pub fn source_rela_path(&self) -> &BStr {
match self {
Entry::Modification { rela_path, .. } => rela_path,
Entry::DirectoryContents { entry, .. } => entry.rela_path.as_bstr(),
Entry::Rewrite { source, .. } => source.rela_path(),
}
}
/// The repository-relative path at which the destination of a rewrite is located.
///
/// If this isn't a rewrite, the path is the location of the entry itself.
pub fn destination_rela_path(&self) -> &BStr {
match self {
Entry::Modification { rela_path, .. } => rela_path,
Entry::DirectoryContents { entry, .. } => entry.rela_path.as_bstr(),
Entry::Rewrite { dirwalk_entry, .. } => dirwalk_entry.rela_path.as_bstr(),
}
}
}
/// Options for use in [index_as_worktree_with_renames()](crate::index_as_worktree_with_renames()).
#[derive(Clone, Default)]
pub struct Options<'a> {
/// The way all output should be sorted.
///
/// If `None`, and depending on the `rewrites` field, output will be immediate but the output order
/// isn't determined, and may differ between two runs. `rewrites` also depend on the order of entries that
/// are presented to it, hence for deterministic results, sorting needs to be enabled.
///
/// If `Some(_)`, all entries are collected beforehand, so they can be sorted before outputting any of them
/// to the user.
///
/// If immediate output of entries in any order is desired, this should be `None`,
/// along with `rewrites` being `None` as well.
pub sorting: Option<Sorting>,
/// The kind of hash to create when hashing worktree entries.
pub object_hash: gix_hash::Kind,
/// Options to configure how modifications to tracked files should be obtained.
pub tracked_file_modifications: crate::index_as_worktree::Options,
/// Options to control the directory walk that informs about untracked files.
///
/// Note that we forcefully disable emission of tracked files to avoid any overlap
/// between emissions to indicate modifications, and those that are obtained by
/// the directory walk.
///
/// If `None`, the directory walk portion will not run at all, yielding data similar
/// to a bare [index_as_worktree()](crate::index_as_worktree()) call.
pub dirwalk: Option<gix_dir::walk::Options<'a>>,
/// The configuration for the rewrite tracking. Note that if set, the [`dirwalk`](Self::dirwalk) should be configured
/// to *not* collapse untracked and ignored entries, as rewrite tracking is on a file-by-file basis.
/// Also note that when `Some(_)`, it will collect certain changes depending on the exact configuration, which typically increases
/// the latency until the first entries are received. Note that some entries are never candidates for renames, which means
/// they are forwarded to the caller right away.
///
/// If `None`, no tracking will occur, which means that all output becomes visible to the delegate immediately.
pub rewrites: Option<gix_diff::Rewrites>,
}
/// The context for [index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()).
pub struct Context<'a> {
/// The pathspec to limit the amount of paths that are checked. Can be empty to allow all paths.
///
/// Note that these are expected to have a [common_prefix()](gix_pathspec::Search::common_prefix()) according
/// to the prefix of the repository to efficiently limit the scope of the paths we process, both for the
/// index modifications as well as for the directory walk.
pub pathspec: gix_pathspec::Search,
/// A fully-configured platform capable of producing diffable buffers similar to what Git would do, for use
/// with rewrite tracking.
///
/// Note that it contains resources that are additionally used here:
///
/// * `attr_stack`
/// - A stack pre-configured to allow accessing attributes for each entry, as required for `filter`
/// and possibly pathspecs.
/// It *may* also allow accessing `.gitignore` information for use in the directory walk.
/// If no excludes information is present, the directory walk will identify ignored files as untracked, which
/// might be desirable under certain circumstances.
/// * `filter`
/// - A filter to be able to perform conversions from and to the worktree format.
/// It is needed to potentially refresh the index with data read from the worktree, which needs to be converted back
/// to the form stored in Git.
pub resource_cache: gix_diff::blob::Platform,
/// A flag to query to learn if cancellation is requested.
pub should_interrupt: &'a AtomicBool,
/// The context for the directory walk.
pub dirwalk: DirwalkContext<'a>,
}
/// All information that is required to perform a [dirwalk](gix_dir::walk()).
pub struct DirwalkContext<'a> {
/// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`].
///
/// It's used to help us differentiate our own `.git` directory from nested unrelated repositories,
/// which is needed if `core.worktree` is used to nest the `.git` directory deeper within.
pub git_dir_realpath: &'a std::path::Path,
/// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`.
/// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository.
pub current_dir: &'a std::path::Path,
/// A utility to lookup index entries faster, and deal with ignore-case handling.
///
/// Must be set if [`ignore_case`](gix_dir::walk::Options::ignore_case) is `true`, or else some entries won't be found if their case is different.
///
/// [Read more in `src-dir`](gix_dir::walk::Context::ignore_case_index_lookup).
pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>,
}
/// Observe the status of an entry by comparing an index entry to the worktree, along
/// with potential directory walk results.
pub trait VisitEntry<'a> {
/// Data generated by comparing an entry with a file.
type ContentChange;
/// Data obtained when checking the submodule status.
type SubmoduleStatus;
/// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index`
/// (for accessing `entry` and surrounding in the complete list of `entries`).
fn visit_entry(&mut self, entry: Entry<'a, Self::ContentChange, Self::SubmoduleStatus>);
}

62
src-status/src/lib.rs Normal file
View File

@@ -0,0 +1,62 @@
//! This crate includes the various diffs `git` can do between different representations
//! of the repository state, like comparisons between…
//!
//! * index and working tree
//! * *tree and index*
//!
//! …while also being able to check if the working tree is dirty, quickly, by instructing the operation to stop once the first
//! change was found.
//!
//! ### Tree-Index Status
//!
//! This status is not actually implemented here as it's not implemented directly. Instead, one creates an Index from a tree
//! and then diffs two indices with `gix_diff::index(index_from_tree, usually_dot_git_index)`. This adds about 15% to the runtime
//! and comes at the cost of another index in memory.
//! Once there are generators implementing depth-first tree iteration should become trivial, but for now it's very hard if one
//! wants to return referenced state of the iterator (which is not possible).
//!
//! ### Difference to `src-diff`
//!
//! Technically, `status` is just another form of diff between different kind of sides, i.e. an index and a working tree.
//! This is the difference to `src-diff`, which compares only similar items.
//!
//! ### Feature Flags
#![cfg_attr(
all(doc, feature = "document-features"),
doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
#[cfg(target_has_atomic = "64")]
use std::sync::atomic::AtomicU64;
#[cfg(not(target_has_atomic = "64"))]
use portable_atomic::AtomicU64;
pub mod index_as_worktree;
pub use index_as_worktree::function::index_as_worktree;
#[cfg(feature = "worktree-rewrites")]
pub mod index_as_worktree_with_renames;
#[cfg(feature = "worktree-rewrites")]
pub use index_as_worktree_with_renames::function::index_as_worktree_with_renames;
/// A stack that validates we are not going through a symlink in a way that is read-only.
///
/// It can efficiently validate paths when these are queried in sort-order, which leads to each component
/// to only be checked once.
pub struct SymlinkCheck {
/// Supports querying additional information, like the stack root.
pub inner: gix_fs::Stack,
}
mod stack;
fn is_dir_to_mode(is_dir: bool) -> gix_index::entry::Mode {
if is_dir {
gix_index::entry::Mode::DIR
} else {
gix_index::entry::Mode::FILE
}
}

89
src-status/src/stack.rs Normal file
View File

@@ -0,0 +1,89 @@
use std::{
borrow::Cow,
path::{Path, PathBuf},
};
use bstr::BStr;
use gix_fs::{stack::ToNormalPathComponents, Stack};
use crate::SymlinkCheck;
#[derive(Debug, thiserror::Error)]
#[error("Cannot step through symlink to perform an lstat")]
struct CannotStepThroughSymlink;
pub(crate) fn is_symlink_step_error(err: &std::io::Error) -> bool {
err.get_ref()
.and_then(|source| source.downcast_ref::<CannotStepThroughSymlink>())
.is_some()
}
impl SymlinkCheck {
/// Create a new stack that starts operating at `root`.
pub fn new(root: PathBuf) -> Self {
Self {
inner: gix_fs::Stack::new(root),
}
}
/// Return a valid filesystem path located in our root by appending `relative_path`, which is guaranteed to
/// not pass through a symbolic link. That way the caller can be sure to not be misled by an attacker that
/// tries to make us reach outside of the repository.
///
/// Note that the file pointed to by `relative_path` may still be a symbolic link, or not exist at all,
/// and that an error may also be produced if directories on the path leading to the leaf
/// component of `relative_path` are missing.
///
/// ### Note
///
/// On windows, no verification is performed, instead only the combined path is provided as usual.
pub fn verified_path(&mut self, relative_path: impl ToNormalPathComponents) -> std::io::Result<&Path> {
self.inner.make_relative_path_current(relative_path, &mut Delegate)?;
Ok(self.inner.current())
}
/// Like [`Self::verified_path()`], but do not fail if there is no directory entry at `relative_path` or on the way
/// to `relative_path`. Instead.
/// For convenience, this incarnation is tuned to be easy to use with Git paths, i.e. slash-separated `BString` path.
pub fn verified_path_allow_nonexisting(&mut self, relative_path: &BStr) -> std::io::Result<Cow<'_, Path>> {
let rela_path = gix_path::try_from_bstr(relative_path).map_err(std::io::Error::other)?;
if let Err(err) = self.verified_path(rela_path.as_ref()) {
if err.kind() == std::io::ErrorKind::NotFound {
Ok(Cow::Owned(self.inner.root().join(rela_path)))
} else {
Err(err)
}
} else {
Ok(Cow::Borrowed(self.inner.current()))
}
}
}
struct Delegate;
impl gix_fs::stack::Delegate for Delegate {
fn push_directory(&mut self, _stack: &Stack) -> std::io::Result<()> {
Ok(())
}
#[cfg_attr(windows, allow(unused_variables))]
fn push(&mut self, is_last_component: bool, stack: &Stack) -> std::io::Result<()> {
#[cfg(windows)]
{
Ok(())
}
#[cfg(not(windows))]
{
if is_last_component {
return Ok(());
}
if stack.current().symlink_metadata()?.is_symlink() {
return Err(std::io::Error::other(CannotStepThroughSymlink));
}
Ok(())
}
}
fn pop_directory(&mut self) {}
}

View File

@@ -0,0 +1,38 @@
lints.workspace = true
[package]
name = "src-status-tests"
version = "0.0.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "A crate to drive src-status tests with different features"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>", "Pascal Kuthe <pascal.kuthe@semimod.de>"]
edition = "2021"
publish = false
rust-version = "1.82"
[[test]]
name = "status"
path = "status/mod.rs"
[features]
src-features-parallel = ["src-features/parallel"]
[dev-dependencies]
src-status = { path = "..", features = ["worktree-rewrites"] }
src-testtools = { path = "../../tests/tools" }
src-index = { path = "../../src-index" }
src-fs = { path = "../../src-fs" }
src-diff = { path = "../../src-diff" }
src-filter = { path = "../../src-filter" }
src-path = { path = "../../src-path" }
src-dir = { path = "../../src-dir" }
src-odb = { path = "../../src-odb" }
src-hash = { path = "../../src-hash" }
src-object = { path = "../../src-object" }
src-features = { path = "../../src-features", features = ["parallel"] }
src-pathspec = { path = "../../src-pathspec" }
src-worktree = { path = "../../src-worktree" }
filetime = "0.2.27"
bstr = { version = "1.12.0", default-features = false }
pretty_assertions = "1.4.0"

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env bash
set -eu -o pipefail
(git init both-deleted && cd both-deleted
echo test > file
git add file && git commit -m file &&
git branch alt && git mv file added-by-them
git commit -m "file renamed in added-by-them" && git checkout alt
git mv file added-by-us
git commit -m "file renamed in added-by-us"
git reset --hard alt
git merge main || :
)
(git init deleted-by-us && cd deleted-by-us
git init
>file && git add file && git commit -m "initial"
echo change >> file && git commit -am "modify"
git checkout -b side HEAD^
git rm file
git commit -m delete
git merge main || :
)
(git init deleted-by-them && cd deleted-by-them
echo "This is some content." > file
git add file
git commit -m "Initial commit"
git checkout -b conflict
git rm file
git commit -m "Delete file in feature branch"
git checkout main
echo "Modified by main branch." >> file
git add file
git commit -m "Modified file in main branch"
git merge conflict || :
)
(git init both-modified && cd both-modified
git init
> file && git add file && git commit -m "init"
git checkout -b conflict
echo conflicting >> file && git commit -am "alt-change"
git checkout main
echo other >> file && git commit -am "change"
git merge conflict || :
)
(git init both-added && cd both-added
git init
set -x
echo init >> deleted-by-them && git add . && git commit -m "init"
git checkout -b second_branch
git rm deleted-by-them
git commit -m "deleted-by-them deleted on second_branch"
echo second > both-added && git add . && git commit -m second
git checkout main
echo on_second > deleted-by-them && git commit -am "on second"
echo main > both-added && git add . && git commit -m main
git merge second_branch || :
)

View File

@@ -0,0 +1,6 @@
status_unchanged.tar
status_changed.tar
symlink_stack.tar
status_nonfile.tar
status_unchanged_filter.tar
unreadable_untracked.tar

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
echo -n "foo" > content
git add -A
git commit -m "Commit"
# file size should not be changed by this
echo -n "bar" > content

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo -n "other content" > dir/content
echo -n "other content" > dir/content2
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
chmod +x dir/content
echo "new content" > dir/content2
chmod -x executable
echo -n "foo" > executable
rm empty
ln -sf dir/content empty
git reset

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
echo base > content
git add -A
git commit -m "base"
git checkout -b feat
echo feat > content
git commit -am "feat"
git checkout main
echo base-change > content
git commit -am "new base"
git merge feat || :

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch content
echo -n "content" > content
git add --intent-to-add -A

View File

@@ -0,0 +1,54 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q changed-and-untracked
(cd changed-and-untracked
touch empty
echo "content" > executable
chmod +x executable
mkdir dir
echo "other content" > dir/content
echo "different content" > dir/content2
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
echo "change" >> executable
mkdir dir/empty
>dir/untracked
>untracked
git status
)
cp -R changed-and-untracked changed-and-untracked-and-renamed
(cd changed-and-untracked-and-renamed
# it has a local change compared to the indexed version, hence it's rewritten
mv executable rewritten-executable
cp dir/content content-copy
cp dir/content content-copy-with-rewrite
echo change >> content-copy-with-rewrite
mv dir/content plainly-renamed-content
mv dir/content2 content-with-rewrite
echo change >> content-with-rewrite
)
cp -R changed-and-untracked replace-dir-with-file
(cd replace-dir-with-file
git checkout executable
rm untracked dir/untracked
mkdir dir/sub
touch dir/sub/nested
git add dir && git commit -m "add file in sub-directory"
rm -Rf dir/
touch dir
)

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q untracked
(cd untracked
touch file && git add file && git commit -m "just to get an index for the test-suite"
mkfifo pipe
git status
)
git init -q tracked-swapped
(cd tracked-swapped
touch file && git add file && git commit -m "it starts out as trackable file"
rm file && mkfifo file
git status
)

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo -n "other content" > dir/content
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
rm -rf ./empty ./executable ./dir/content ./dir/sub-dir/symlink
git reset

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q module1
(cd module1
touch this
git add .
git commit -q -m c1
echo hello >> this
git commit -q -am c2
)
git init no-change
(cd no-change
git submodule add ../module1 m1
git commit -m "add module 1"
)
cp -R no-change deleted-dir
(cd deleted-dir
rm -Rf m1
)
cp -R no-change type-change
(cd type-change
rm -Rf m1
touch m1
)
cp -R no-change empty-dir-no-change
(cd empty-dir-no-change
rm -Rf m1
mkdir m1
)
cp -R no-change conflict
(cd conflict
(cd m1
git checkout @~1
)
git commit -am "change submodule head"
git checkout -b other @~1
git rm -rf m1
git commit -m "removed submodule"
git merge main || :
)

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
mkdir tracked target
echo "content" > tracked/file
echo "other" > target/file
git add tracked/file
git commit -q -m init
rm -rf tracked
ln -s target tracked

View File

@@ -0,0 +1,21 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo -n "other content" > dir/content
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
touch ./empty ./executable ./dir/content ./dir/sub-dir/symlink
git reset # ensure index timestamp is large enough to not mark everything racy

View File

@@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" >executable
chmod +x executable
mkdir dir
echo "other content" >dir/content
seq 5 >dir/content2
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
git ls-files | xargs rm
git config core.autocrlf true
git checkout -f HEAD

View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -eu -o pipefail
mkdir base;
(cd base
touch file
mkdir dir
touch dir/file-in-dir
(cd dir
ln -s file-in-dir filelink
mkdir subdir
ln -s subdir dirlink
)
ln -s file root-filelink
ln -s dir root-dirlink
)
ln -s base symlink-base

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init
>tracked
git add tracked && git commit -m "init"
>unreadable
chmod 000 unreadable
git status

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,473 @@
use bstr::ByteSlice;
use gix_diff::{blob::pipeline::WorktreeRoots, rewrites::CopySource};
use gix_index::entry;
use gix_status::{
index_as_worktree::{traits::FastEq, Change, EntryStatus},
index_as_worktree_with_renames,
index_as_worktree_with_renames::{Context, DirwalkContext, Entry, Options, Outcome, Recorder, Sorting, Summary},
};
use pretty_assertions::assert_eq;
use crate::{fixture_path, fixture_path_rw_slow};
#[test]
fn changed_and_untracked_and_renamed() {
let expectations_with_dirwalk = [
// Not always will we match the right source to destinations, there is ambiguity.
Expectation::Rewrite {
source_rela_path: "dir/content",
dest_rela_path: "content-copy",
dest_dirwalk_status: gix_dir::entry::Status::Untracked,
diff: None,
copy: false,
},
Expectation::DirwalkEntry {
rela_path: "content-copy-with-rewrite",
status: gix_dir::entry::Status::Untracked,
disk_kind: Some(gix_dir::entry::Kind::File),
},
Expectation::Rewrite {
source_rela_path: "dir/content2",
dest_rela_path: "content-with-rewrite",
dest_dirwalk_status: gix_dir::entry::Status::Untracked,
diff: Some(gix_diff::blob::DiffLineStats {
removals: 0,
insertions: 1,
before: 1,
after: 2,
similarity: 0.72,
}),
copy: false,
},
Expectation::Rewrite {
source_rela_path: "empty",
dest_rela_path: "dir/untracked",
dest_dirwalk_status: gix_dir::entry::Status::Untracked,
diff: None,
copy: true,
},
// This is just detected as untracked, related to how the rename-tracker matches pairs
Expectation::DirwalkEntry {
rela_path: "plainly-renamed-content",
status: gix_dir::entry::Status::Untracked,
disk_kind: Some(gix_dir::entry::Kind::File),
},
Expectation::Rewrite {
source_rela_path: "executable",
dest_rela_path: "rewritten-executable",
dest_dirwalk_status: gix_dir::entry::Status::Untracked,
diff: Some(gix_diff::blob::DiffLineStats {
removals: 0,
insertions: 1,
before: 1,
after: 2,
similarity: 0.53333336,
}),
copy: false,
},
Expectation::Rewrite {
source_rela_path: "empty",
dest_rela_path: "untracked",
dest_dirwalk_status: gix_dir::entry::Status::Untracked,
diff: None,
copy: true,
},
];
let rewrites = gix_diff::Rewrites {
copies: Some(gix_diff::rewrites::Copies {
source: CopySource::FromSetOfModifiedFiles,
percentage: Some(0.3),
}),
percentage: Some(0.3),
limit: 0,
track_empty: true,
};
let out = fixture_filtered_detailed(
"status_many.sh",
"changed-and-untracked-and-renamed",
&[],
&expectations_with_dirwalk,
Some(rewrites),
Some(Default::default()),
Fixture::ReadOnly,
);
assert_eq!(
out.rewrites,
Some(gix_diff::rewrites::Outcome {
options: rewrites,
num_similarity_checks: 11,
num_similarity_checks_skipped_for_rename_tracking_due_to_limit: 0,
num_similarity_checks_skipped_for_copy_tracking_due_to_limit: 0,
})
);
}
#[test]
#[cfg(unix)]
fn nonfile_untracked_are_not_visible() {
fixture_filtered_detailed(
"status_nonfile.sh",
"untracked",
&[],
&[],
None,
Some(Default::default()),
Fixture::ReadOnly,
);
}
#[test]
#[cfg(unix)]
fn tracked_changed_to_non_file() {
fixture_filtered_detailed(
"status_nonfile.sh",
"tracked-swapped",
&[],
&[Expectation::Modification {
rela_path: "file",
status: Change::Type {
worktree_mode: entry::Mode::FILE,
}
.into(),
}],
None,
Some(Default::default()),
Fixture::ReadOnly,
);
}
#[test]
fn changed_and_untracked() {
let out = fixture_filtered_detailed(
"status_many.sh",
"changed-and-untracked",
&[],
&[Expectation::Modification {
rela_path: "executable",
status: EntryStatus::Change(Change::Modification {
executable_bit_changed: false,
content_change: Some(()),
set_entry_stat_size_zero: false,
}),
}],
None,
None,
Fixture::ReadOnly,
);
assert_eq!(out.tracked_file_modification.entries_processed, 4);
assert_eq!(
out.dirwalk, None,
"we didn't configure the dirwalk, so it's just like a modification check"
);
assert_eq!(out.rewrites, None, "rewrite checking isn't configured either");
let expectations_with_dirwalk = [
Expectation::DirwalkEntry {
rela_path: "dir/untracked",
status: gix_dir::entry::Status::Untracked,
disk_kind: Some(gix_dir::entry::Kind::File),
},
Expectation::Modification {
rela_path: "executable",
status: EntryStatus::Change(Change::Modification {
executable_bit_changed: false,
content_change: Some(()),
set_entry_stat_size_zero: false,
}),
},
Expectation::DirwalkEntry {
rela_path: "untracked",
status: gix_dir::entry::Status::Untracked,
disk_kind: Some(gix_dir::entry::Kind::File),
},
];
let out = fixture_filtered_detailed(
"status_many.sh",
"changed-and-untracked",
&[],
&expectations_with_dirwalk,
None,
Some(gix_dir::walk::Options::default()),
Fixture::ReadOnly,
);
let dirwalk = out.dirwalk.expect("configured thus has output");
assert_eq!(
dirwalk,
gix_dir::walk::Outcome {
read_dir_calls: 3,
returned_entries: 2,
seen_entries: 8,
}
);
assert_eq!(out.rewrites, None, "rewrites are still not configured");
let out = fixture_filtered_detailed(
"status_many.sh",
"changed-and-untracked",
&[],
&expectations_with_dirwalk,
Some(Default::default()),
Some(gix_dir::walk::Options::default()),
Fixture::ReadOnly,
);
let rewrites = out.rewrites.expect("configured thus has output");
assert_eq!(
rewrites,
gix_diff::rewrites::Outcome::default(),
"there actually is no candidates pairs as there are no deletions"
);
}
#[cfg(unix)]
#[test]
fn unreadable_untracked() {
let expectations_with_dirwalk = [Expectation::DirwalkEntry {
rela_path: "unreadable",
status: gix_dir::entry::Status::Untracked,
disk_kind: Some(gix_dir::entry::Kind::File),
}];
let out = fixture_filtered_detailed(
"unreadable_untracked.sh",
"",
&[],
&expectations_with_dirwalk,
Some(Default::default()),
Some(gix_dir::walk::Options::default()),
Fixture::WritableExecuted,
);
let dirwalk = out.dirwalk.expect("configured thus has output");
assert_eq!(
dirwalk,
gix_dir::walk::Outcome {
read_dir_calls: 1,
returned_entries: 1,
seen_entries: 3,
}
);
}
enum Fixture {
ReadOnly,
WritableExecuted,
}
fn fixture_filtered_detailed(
script: &str,
subdir: &str,
pathspecs: &[&str],
expected: &[Expectation<'_>],
rewrites: Option<gix_diff::Rewrites>,
dirwalk: Option<gix_dir::walk::Options>,
fixture: Fixture,
) -> Outcome {
fn cleanup(mut out: Outcome) -> Outcome {
out.tracked_file_modification.worktree_bytes = 0;
out.tracked_file_modification.worktree_files_read = 0;
out.tracked_file_modification.entries_to_update = 0;
out.tracked_file_modification.racy_clean = 0;
out
}
let (worktree, _tmp) = match fixture {
Fixture::ReadOnly => {
let dir = fixture_path(script).join(subdir);
(dir, None)
}
Fixture::WritableExecuted => {
let tmp = fixture_path_rw_slow(script);
let dir = tmp.path().join(subdir);
(dir, Some(tmp))
}
};
let git_dir = worktree.join(".git");
let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap();
let search = gix_pathspec::Search::from_specs(
crate::index_as_worktree::to_pathspecs(pathspecs),
None,
std::path::Path::new(""),
)
.expect("valid specs can be normalized");
let stack = gix_worktree::Stack::from_state_and_ignore_case(
worktree.clone(),
false,
gix_worktree::stack::State::AttributesAndIgnoreStack {
attributes: Default::default(),
ignore: Default::default(),
},
&index,
index.path_backing(),
);
let capabilities = gix_fs::Capabilities::probe(&git_dir);
let resource_cache = gix_diff::blob::Platform::new(
Default::default(),
gix_diff::blob::Pipeline::new(
WorktreeRoots {
old_root: None,
new_root: Some(worktree.to_owned()),
},
gix_filter::Pipeline::new(Default::default(), Default::default()),
vec![],
gix_diff::blob::pipeline::Options {
large_file_threshold_bytes: 0,
fs: capabilities,
},
),
gix_diff::blob::pipeline::Mode::ToGit,
stack,
);
let git_dir_real = gix_path::realpath(&git_dir).unwrap();
let cwd = gix_fs::current_dir(capabilities.precompose_unicode).unwrap();
let context = Context {
pathspec: search,
resource_cache,
should_interrupt: &Default::default(),
dirwalk: DirwalkContext {
git_dir_realpath: &git_dir_real,
current_dir: &cwd,
ignore_case_index_lookup: None,
},
};
let options = Options {
object_hash: gix_hash::Kind::Sha1,
tracked_file_modifications: gix_status::index_as_worktree::Options {
fs: capabilities,
stat: crate::index_as_worktree::TEST_OPTIONS,
..Default::default()
},
dirwalk,
sorting: Some(Sorting::ByPathCaseSensitive),
rewrites,
};
let mut recorder = Recorder::default();
let objects = gix_odb::at(git_dir.join("objects")).unwrap().into_arc().unwrap();
let outcome = index_as_worktree_with_renames(
&index,
&worktree,
&mut recorder,
FastEq,
crate::index_as_worktree::SubmoduleStatusMock { dirty: false },
objects,
&mut gix_features::progress::Discard,
context,
options,
)
.unwrap();
let actual = records_to_expectations(&recorder.records);
assert_eq!(actual, expected);
assert_summary(&recorder.records, expected);
cleanup(outcome)
}
fn assert_summary(entries: &[Entry<(), ()>], expected: &[Expectation]) {
let entries: Vec<_> = entries
.iter()
.filter(|r| {
!matches!(
r,
Entry::Modification {
status: EntryStatus::NeedsUpdate(..),
..
}
)
})
.collect();
assert_eq!(entries.len(), expected.len());
for (entry, expected) in entries.iter().zip(expected) {
assert_eq!(entry.summary(), expected.summary());
}
}
fn records_to_expectations<'a>(recs: &'a [Entry<'_, (), ()>]) -> Vec<Expectation<'a>> {
recs.iter()
.filter(|r| {
!matches!(
r,
Entry::Modification {
status: EntryStatus::NeedsUpdate(..),
..
}
)
})
.map(|r| match r {
Entry::Modification { rela_path, status, .. } => Expectation::Modification {
rela_path: rela_path.to_str().unwrap(),
status: status.clone(),
},
Entry::DirectoryContents { entry, .. } => Expectation::DirwalkEntry {
rela_path: entry.rela_path.to_str().unwrap(),
status: entry.status,
disk_kind: entry.disk_kind,
},
Entry::Rewrite {
source,
dirwalk_entry,
diff,
copy,
..
} => Expectation::Rewrite {
source_rela_path: source.rela_path().to_str().unwrap(),
dest_rela_path: dirwalk_entry.rela_path.to_str().unwrap(),
dest_dirwalk_status: dirwalk_entry.status,
diff: *diff,
copy: *copy,
},
})
.collect()
}
#[derive(Debug, Clone, PartialEq)]
enum Expectation<'a> {
Modification {
rela_path: &'a str,
status: EntryStatus<(), ()>,
},
DirwalkEntry {
rela_path: &'a str,
status: gix_dir::entry::Status,
disk_kind: Option<gix_dir::entry::Kind>,
},
Rewrite {
source_rela_path: &'a str,
dest_rela_path: &'a str,
dest_dirwalk_status: gix_dir::entry::Status,
diff: Option<gix_diff::blob::DiffLineStats>,
copy: bool,
},
}
impl Expectation<'_> {
pub fn summary(&self) -> Option<Summary> {
Some(match self {
Expectation::Modification { status, .. } => match status {
EntryStatus::Conflict { .. } => Summary::Conflict,
EntryStatus::Change(change) => match change {
Change::Removed => Summary::Removed,
Change::Type { .. } => Summary::TypeChange,
Change::Modification { .. } | Change::SubmoduleModification(_) => Summary::Modified,
},
EntryStatus::NeedsUpdate(_) => return None,
EntryStatus::IntentToAdd => Summary::IntentToAdd,
},
Expectation::DirwalkEntry { status, .. } => {
if matches!(status, gix_dir::entry::Status::Untracked) {
Summary::Added
} else {
return None;
}
}
Expectation::Rewrite { copy, .. } => {
if *copy {
Summary::Copied
} else {
Summary::Renamed
}
}
})
}
}

View File

@@ -0,0 +1,27 @@
use gix_testtools::Creation;
pub use gix_testtools::Result;
mod index_as_worktree;
mod index_as_worktree_with_renames;
mod stack;
pub fn fixture_path(name: &str) -> std::path::PathBuf {
let dir = gix_testtools::scripted_fixture_read_only_standalone(std::path::Path::new(name).with_extension("sh"))
.expect("script works");
dir
}
pub fn fixture_path_rw_slow(name: &str) -> gix_testtools::tempfile::TempDir {
let tmp = gix_testtools::scripted_fixture_writable_with_args_standalone_single_archive(
std::path::Path::new(name).with_extension("sh"),
None::<String>,
Creation::Execute,
)
.expect("script works");
tmp
}
fn hex_to_id(hex: &str) -> gix_hash::ObjectId {
gix_hash::ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex")
}

View File

@@ -0,0 +1,91 @@
fn stack() -> gix_status::SymlinkCheck {
stack_in("base")
}
fn stack_in(dir: &str) -> gix_status::SymlinkCheck {
gix_status::SymlinkCheck::new(
gix_testtools::scripted_fixture_read_only_standalone("symlink_stack.sh")
.expect("valid script")
.join(dir),
)
}
#[test]
fn paths_not_going_through_symlink_directories_are_ok_and_point_to_correct_item() -> crate::Result {
for root in ["base", "symlink-base"] {
let mut stack = stack_in(root);
for (rela_path, expectation) in [
("root-filelink", is_symlink as fn(&std::fs::Metadata) -> bool),
("root-dirlink", is_symlinked_dir),
("file", is_file),
("dir/file-in-dir", is_file),
("dir", is_dir),
("dir/subdir", is_dir),
("dir/filelink", is_symlink),
("dir/dirlink", is_symlinked_dir),
] {
assert!(
expectation(&stack.verified_path(rela_path)?.symlink_metadata()?),
"{rela_path:?} expectation failed"
);
}
}
Ok(())
}
#[test]
fn leaf_file_does_not_have_to_exist() -> crate::Result {
assert!(!stack().verified_path("dir/does-not-exist")?.exists());
Ok(())
}
#[test]
#[cfg(not(windows))]
fn intermediate_directories_have_to_exist_or_not_found_error() -> crate::Result {
assert_eq!(
stack().verified_path("nonexisting-dir/file").unwrap_err().kind(),
std::io::ErrorKind::NotFound
);
Ok(())
}
#[test]
#[cfg(windows)]
fn intermediate_directories_do_not_have_exist_for_success() -> crate::Result {
assert!(stack().verified_path("nonexisting-dir/file").is_ok());
Ok(())
}
#[test]
#[cfg_attr(
windows,
ignore = "on windows, symlinks appear to be files or dirs, is_symlink() doesn't work"
)]
fn paths_leading_through_symlinks_are_rejected() {
let mut stack = stack();
assert_eq!(
stack.verified_path("root-dirlink/file-in-dir").unwrap_err().kind(),
std::io::ErrorKind::Other,
"root-dirlink is a symlink to a directory"
);
assert_eq!(
stack.verified_path("dir/dirlink/nothing").unwrap_err().kind(),
std::io::ErrorKind::Other,
"root-dirlink is a symlink to a directory"
);
}
fn is_symlink(m: &std::fs::Metadata) -> bool {
m.is_symlink()
}
fn is_symlinked_dir(m: &std::fs::Metadata) -> bool {
m.is_symlink()
}
fn is_file(m: &std::fs::Metadata) -> bool {
m.is_file()
}
fn is_dir(m: &std::fs::Metadata) -> bool {
m.is_dir()
}