create src

This commit is contained in:
awfixer
2026-03-11 02:04:19 -07:00
commit 52f7a22bf2
2595 changed files with 402870 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
lints.workspace = true
[package]
name = "src-worktree-state"
version = "0.27.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "A crate of the gitoxide project implementing setting the worktree to a particular state"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
edition = "2021"
include = ["src/**/*", "LICENSE-*"]
rust-version = "1.82"
autotests = false
[lib]
doctest = false
[features]
## Enable support for the SHA-1 hash by forwarding the feature to dependencies.
sha1 = ["src-filter/sha1", "src-index/sha1", "src-object/sha1", "src-worktree/sha1"]
[dependencies]
src-worktree = { version = "^0.49.0", path = "../src-worktree", default-features = false, features = ["attributes"] }
src-index = { version = "^0.48.0", path = "../src-index" }
src-fs = { version = "^0.19.1", path = "../src-fs" }
src-object = { version = "^0.57.0", path = "../src-object" }
src-path = { version = "^0.11.1", path = "../src-path" }
src-features = { version = "^0.46.1", path = "../src-features" }
src-filter = { version = "^0.27.0", path = "../src-filter" }
io-close = "0.3.7"
thiserror = "2.0.18"
bstr = { version = "1.12.0", default-features = false }
[dev-dependencies]
src-worktree-state = { path = ".", features = ["sha1"] }
[package.metadata.docs.rs]
features = ["sha1"]

View File

@@ -0,0 +1 @@
../LICENSE-APACHE

View File

@@ -0,0 +1 @@
../LICENSE-MIT

View File

@@ -0,0 +1,365 @@
use std::{
collections::BTreeSet,
sync::atomic::{AtomicUsize, Ordering},
};
use bstr::{BStr, BString};
use gix_worktree::Stack;
use crate::{checkout, checkout::entry};
mod reduce {
use crate::checkout;
pub struct Reduce<'entry> {
pub aggregate: super::Outcome<'entry>,
}
impl<'entry> gix_features::parallel::Reduce for Reduce<'entry> {
type Input = Result<super::Outcome<'entry>, checkout::Error>;
type FeedProduce = ();
type Output = super::Outcome<'entry>;
type Error = checkout::Error;
fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> {
let item = item?;
let super::Outcome {
bytes_written,
files,
delayed_symlinks,
errors,
collisions,
delayed_paths_unknown,
delayed_paths_unprocessed,
} = item;
self.aggregate.bytes_written += bytes_written;
self.aggregate.files += files;
self.aggregate.delayed_symlinks.extend(delayed_symlinks);
self.aggregate.errors.extend(errors);
self.aggregate.collisions.extend(collisions);
self.aggregate.delayed_paths_unknown.extend(delayed_paths_unknown);
self.aggregate
.delayed_paths_unprocessed
.extend(delayed_paths_unprocessed);
Ok(())
}
fn finalize(self) -> Result<Self::Output, Self::Error> {
Ok(self.aggregate)
}
}
}
pub use reduce::Reduce;
use crate::checkout::entry::DelayedFilteredStream;
#[derive(Default)]
pub struct Outcome<'a> {
pub collisions: Vec<checkout::Collision>,
pub errors: Vec<checkout::ErrorRecord>,
pub delayed_symlinks: Vec<(&'a mut gix_index::Entry, &'a BStr)>,
// all (immediately) written bytes
pub bytes_written: u64,
// the amount of files we processed
pub files: usize,
/// Relative paths that the process listed as 'delayed' even though we never passed them.
pub delayed_paths_unknown: Vec<BString>,
/// All paths that were left unprocessed, because they were never listed by the process even though we passed them.
pub delayed_paths_unprocessed: Vec<BString>,
}
#[derive(Clone)]
pub struct Context<Find: Clone> {
pub objects: Find,
pub path_cache: Stack,
pub filters: gix_filter::Pipeline,
pub buf: Vec<u8>,
pub options: Options,
}
#[derive(Clone, Copy)]
pub struct Options {
pub fs: gix_fs::Capabilities,
pub destination_is_initially_empty: bool,
pub overwrite_existing: bool,
pub keep_going: bool,
pub filter_process_delay: gix_filter::driver::apply::Delay,
}
impl From<&checkout::Options> for Options {
fn from(opts: &checkout::Options) -> Self {
Options {
fs: opts.fs,
destination_is_initially_empty: opts.destination_is_initially_empty,
overwrite_existing: opts.overwrite_existing,
keep_going: opts.keep_going,
filter_process_delay: opts.filter_process_delay,
}
}
}
pub fn process<'entry, Find>(
entries_with_paths: impl Iterator<Item = (&'entry mut gix_index::Entry, &'entry BStr)>,
files: &AtomicUsize,
bytes: &AtomicUsize,
delayed_filter_results: &mut Vec<DelayedFilteredStream<'entry>>,
ctx: &mut Context<Find>,
) -> Result<Outcome<'entry>, checkout::Error>
where
Find: gix_object::Find + Clone,
{
let mut delayed_symlinks = Vec::new();
let mut collisions = Vec::new();
let mut errors = Vec::new();
let mut bytes_written = 0;
let mut files_in_chunk = 0;
for (entry, entry_path) in entries_with_paths {
// TODO: write test for that
if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) {
files.fetch_add(1, Ordering::Relaxed);
files_in_chunk += 1;
continue;
}
// Symlinks always have to be delayed on windows as they have to point to something that exists on creation.
// And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is
// before creating it.
// And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety
// around writing through symlinks (even though we handle this).
// This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too.
if entry.mode == gix_index::entry::Mode::SYMLINK {
delayed_symlinks.push((entry, entry_path));
continue;
}
match checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? {
entry::Outcome::Written { bytes } => {
bytes_written += bytes as u64;
files_in_chunk += 1;
}
entry::Outcome::Delayed(delayed) => delayed_filter_results.push(delayed),
}
}
Ok(Outcome {
bytes_written,
files: files_in_chunk,
errors,
collisions,
delayed_symlinks,
delayed_paths_unknown: Vec::new(),
delayed_paths_unprocessed: Vec::new(),
})
}
pub fn process_delayed_filter_results<Find>(
mut delayed_filter_results: Vec<DelayedFilteredStream<'_>>,
files: &AtomicUsize,
bytes: &AtomicUsize,
out: &mut Outcome<'_>,
ctx: &mut Context<Find>,
) -> Result<(), checkout::Error>
where
Find: gix_object::Find + Clone,
{
let Options {
destination_is_initially_empty,
overwrite_existing,
keep_going,
..
} = ctx.options;
let mut bytes_written = 0;
let mut delayed_files = 0;
// Sort by path for fast lookups
delayed_filter_results.sort_by(|a, b| a.entry_path.cmp(b.entry_path));
// We process each key and do as the filter process tells us, while collecting data about the overall progress.
let keys: BTreeSet<_> = delayed_filter_results.iter().map(|d| d.key.clone()).collect();
let mut unknown_paths = Vec::new();
for key in keys {
loop {
let rela_paths = ctx.filters.driver_state_mut().list_delayed_paths(&key)?;
if rela_paths.is_empty() {
break;
}
for rela_path in rela_paths {
let delayed = match delayed_filter_results.binary_search_by(|d| d.entry_path.cmp(rela_path.as_ref())) {
Ok(idx) => &mut delayed_filter_results[idx],
Err(_) => {
if keep_going {
unknown_paths.push(rela_path);
continue;
} else {
return Err(checkout::Error::FilterPathUnknown { rela_path });
}
}
};
let mut read = std::io::BufReader::with_capacity(
512 * 1024,
ctx.filters.driver_state_mut().fetch_delayed(
&key,
rela_path.as_ref(),
gix_filter::driver::Operation::Smudge,
)?,
);
let (file, set_executable_after_creation) = match entry::open_file(
&std::mem::take(&mut delayed.validated_file_path), // mark it as seen, relevant for `unprocessed_paths`
destination_is_initially_empty,
overwrite_existing,
delayed.needs_executable_bit,
delayed.entry.mode,
) {
Ok(res) => res,
Err(err) => {
if !is_collision(&err, delayed.entry_path, &mut out.collisions, files) {
handle_error(err, delayed.entry_path, files, &mut out.errors, ctx.options.keep_going)?;
}
std::io::copy(&mut read, &mut std::io::sink())?;
continue;
}
};
let mut write = WriteWithProgress {
inner: std::io::BufWriter::with_capacity(512 * 1024, file),
progress: bytes,
};
let actual_bytes = std::io::copy(&mut read, &mut write)?;
bytes_written += actual_bytes;
entry::finalize_entry(
delayed.entry,
write.inner.into_inner().map_err(std::io::IntoInnerError::into_error)?,
actual_bytes,
set_executable_after_creation,
)?;
delayed_files += 1;
files.fetch_add(1, Ordering::Relaxed);
}
}
}
let unprocessed_paths = delayed_filter_results
.into_iter()
.filter_map(|d| (!d.validated_file_path.as_os_str().is_empty()).then(|| d.entry_path.to_owned()))
.collect();
if !keep_going && !unknown_paths.is_empty() {
return Err(checkout::Error::FilterPathsUnprocessed {
rela_paths: unprocessed_paths,
});
}
out.delayed_paths_unknown = unknown_paths;
out.delayed_paths_unprocessed = unprocessed_paths;
out.bytes_written += bytes_written;
out.files += delayed_files;
Ok(())
}
pub struct WriteWithProgress<'a, T> {
pub inner: T,
pub progress: &'a AtomicUsize,
}
impl<T> std::io::Write for WriteWithProgress<'_, T>
where
T: std::io::Write,
{
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let written = self.inner.write(buf)?;
self.progress
.fetch_add(written as gix_features::progress::Step, Ordering::SeqCst);
Ok(written)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
pub fn checkout_entry_handle_result<'entry, Find>(
entry: &'entry mut gix_index::Entry,
entry_path: &'entry BStr,
errors: &mut Vec<checkout::ErrorRecord>,
collisions: &mut Vec<checkout::Collision>,
files: &AtomicUsize,
bytes: &AtomicUsize,
Context {
objects,
path_cache,
filters,
buf,
options,
}: &mut Context<Find>,
) -> Result<entry::Outcome<'entry>, checkout::Error>
where
Find: gix_object::Find + Clone,
{
let res = entry::checkout(
entry,
entry_path,
entry::Context {
objects,
path_cache,
filters,
buf,
},
*options,
);
match res {
Ok(out) => {
if let Some(num) = out.as_bytes() {
bytes.fetch_add(num, Ordering::Relaxed);
files.fetch_add(1, Ordering::Relaxed);
}
Ok(out)
}
Err(checkout::Error::Io(err)) if is_collision(&err, entry_path, collisions, files) => {
Ok(entry::Outcome::Written { bytes: 0 })
}
Err(err) => handle_error(err, entry_path, files, errors, options.keep_going)
.map(|()| entry::Outcome::Written { bytes: 0 }),
}
}
fn handle_error<E>(
err: E,
entry_path: &BStr,
files: &AtomicUsize,
errors: &mut Vec<checkout::ErrorRecord>,
keep_going: bool,
) -> Result<(), E>
where
E: std::error::Error + Send + Sync + 'static,
{
if keep_going {
errors.push(checkout::ErrorRecord {
path: entry_path.into(),
error: Box::new(err),
});
files.fetch_add(1, Ordering::Relaxed);
Ok(())
} else {
Err(err)
}
}
fn is_collision(
err: &std::io::Error,
entry_path: &BStr,
collisions: &mut Vec<checkout::Collision>,
files: &AtomicUsize,
) -> bool {
if !gix_fs::symlink::is_collision_error(err) {
return false;
}
// We are here because a file existed or was blocked by a directory which shouldn't be possible unless
// we are on a file insensitive file system.
gix_features::trace::error!("{entry_path}: collided ({:?})", err.kind());
collisions.push(checkout::Collision {
path: entry_path.into(),
error_kind: err.kind(),
});
files.fetch_add(1, Ordering::Relaxed);
true
}

View File

@@ -0,0 +1,404 @@
use std::{
borrow::Cow,
io::Write,
path::{Path, PathBuf},
};
use bstr::BStr;
use gix_filter::{driver::apply::MaybeDelayed, pipeline::convert::ToWorktreeOutcome};
use gix_index::{entry::Stat, Entry};
use gix_object::FindExt;
use gix_worktree::Stack;
use io_close::Close;
pub struct Context<'a, Find> {
pub objects: &'a mut Find,
pub path_cache: &'a mut Stack,
pub filters: &'a mut gix_filter::Pipeline,
pub buf: &'a mut Vec<u8>,
}
/// A delayed result of a long-running filter process, which is made available as stream.
pub struct DelayedFilteredStream<'a> {
/// The key identifying the driver program
pub key: gix_filter::driver::Key,
/// If the file is going to be an executable.
pub needs_executable_bit: bool,
/// The validated path on disk at which the file should be placed.
pub validated_file_path: PathBuf,
/// The entry to adjust with the file we will write.
pub entry: &'a mut gix_index::Entry,
/// The relative path at which the entry resides (for use when querying the delayed entry).
pub entry_path: &'a BStr,
}
pub enum Outcome<'a> {
/// The file was written.
Written {
/// The amount of written bytes.
bytes: usize,
},
/// The will be ready later.
Delayed(DelayedFilteredStream<'a>),
}
impl Outcome<'_> {
/// Return ourselves as (in-memory) bytes if possible.
pub fn as_bytes(&self) -> Option<usize> {
match self {
Outcome::Written { bytes } => Some(*bytes),
Outcome::Delayed { .. } => None,
}
}
}
#[cfg_attr(not(unix), allow(unused_variables))]
pub fn checkout<'entry, Find>(
entry: &'entry mut Entry,
entry_path: &'entry BStr,
Context {
objects,
filters,
path_cache,
buf,
}: Context<'_, Find>,
crate::checkout::chunk::Options {
fs: gix_fs::Capabilities {
symlink,
executable_bit,
..
},
destination_is_initially_empty,
overwrite_existing,
filter_process_delay,
..
}: crate::checkout::chunk::Options,
) -> Result<Outcome<'entry>, crate::checkout::Error>
where
Find: gix_object::Find,
{
let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| crate::checkout::Error::IllformedUtf8 {
path: entry_path.to_owned(),
})?;
let path_cache = path_cache.at_path(dest_relative.as_ref(), Some(entry.mode), &*objects)?;
let dest = path_cache.path();
let object_size = match entry.mode {
gix_index::entry::Mode::FILE | gix_index::entry::Mode::FILE_EXECUTABLE => {
let obj = (*objects)
.find_blob(&entry.id, buf)
.map_err(|err| crate::checkout::Error::Find {
err,
path: dest.to_path_buf(),
})?;
let filtered = filters.convert_to_worktree(
obj.data,
entry_path,
&mut |_, attrs| {
path_cache.matching_attributes(attrs);
},
filter_process_delay,
)?;
let (num_bytes, file, set_executable_after_creation) = match filtered {
ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => {
let (mut file, flag) = open_file(
dest,
destination_is_initially_empty,
overwrite_existing,
executable_bit,
entry.mode,
)?;
file.write_all(buf)?;
(buf.len(), file, flag)
}
ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut filtered)) => {
let (mut file, flag) = open_file(
dest,
destination_is_initially_empty,
overwrite_existing,
executable_bit,
entry.mode,
)?;
let num_bytes = std::io::copy(&mut filtered, &mut file)? as usize;
(num_bytes, file, flag)
}
ToWorktreeOutcome::Process(MaybeDelayed::Delayed(key)) => {
return Ok(Outcome::Delayed(DelayedFilteredStream {
key,
needs_executable_bit: false,
validated_file_path: dest.to_owned(),
entry,
entry_path,
}))
}
};
// For possibly existing, overwritten files, we must change the file mode explicitly.
finalize_entry(entry, file, num_bytes as u64, set_executable_after_creation)?;
num_bytes
}
gix_index::entry::Mode::SYMLINK => {
let obj = (*objects)
.find_blob(&entry.id, buf)
.map_err(|err| crate::checkout::Error::Find {
err,
path: dest.to_path_buf(),
})?;
if symlink {
#[cfg_attr(not(windows), allow(unused_mut))]
let mut symlink_destination = Cow::Borrowed(
gix_path::try_from_byte_slice(obj.data)
.map_err(|_| crate::checkout::Error::IllformedUtf8 { path: obj.data.into() })?,
);
#[cfg(windows)]
{
symlink_destination = gix_path::to_native_path_on_windows(gix_path::into_bstr(symlink_destination))
}
try_op_or_unlink(dest, overwrite_existing, |p| {
gix_fs::symlink::create(symlink_destination.as_ref(), p)
})?;
} else {
let mut file = try_op_or_unlink(dest, overwrite_existing, |p| {
open_options(p, destination_is_initially_empty, overwrite_existing).open(dest)
})?;
file.write_all(obj.data)?;
file.close()?;
}
entry.stat = Stat::from_fs(&gix_index::fs::Metadata::from_path_no_follow(dest)?)?;
obj.data.len()
}
gix_index::entry::Mode::DIR => {
gix_features::trace::warn!(
"Skipped sparse directory at '{entry_path}' ({id}) as it cannot yet be handled",
id = entry.id
);
0
}
gix_index::entry::Mode::COMMIT => {
gix_features::trace::warn!(
"Skipped submodule at '{entry_path}' ({id}) as it cannot yet be handled",
id = entry.id
);
0
}
_ => unreachable!(),
};
Ok(Outcome::Written { bytes: object_size })
}
/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by
/// delaying symlink creation to the end and will always do that sequentially.
/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against.
fn try_op_or_unlink<T>(
path: &Path,
overwrite_existing: bool,
op: impl Fn(&Path) -> std::io::Result<T>,
) -> std::io::Result<T> {
if overwrite_existing {
match op(path) {
Ok(res) => Ok(res),
Err(err) if gix_fs::symlink::is_collision_error(&err) => {
try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?;
op(path)
}
Err(err) => Err(err),
}
} else {
op(path)
}
}
fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> {
if path_meta.is_dir() {
std::fs::remove_dir_all(path)
} else if path_meta.file_type().is_symlink() {
gix_fs::symlink::remove(path)
} else {
std::fs::remove_file(path)
}
}
#[cfg(not(debug_assertions))]
fn debug_assert_dest_is_no_symlink(_path: &Path) {}
/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place
#[cfg(debug_assertions)]
fn debug_assert_dest_is_no_symlink(path: &Path) {
if let Ok(meta) = path.metadata() {
debug_assert!(
!meta.file_type().is_symlink(),
"BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}",
path.display()
);
}
}
fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> std::fs::OpenOptions {
if overwrite_existing || !destination_is_initially_empty {
debug_assert_dest_is_no_symlink(path);
}
let mut options = gix_features::fs::open_options_no_follow();
options
.create_new(destination_is_initially_empty && !overwrite_existing)
.create(!destination_is_initially_empty || overwrite_existing)
.write(true)
.truncate(true);
options
}
pub(crate) fn open_file(
path: &Path,
destination_is_initially_empty: bool,
overwrite_existing: bool,
fs_supports_executable_bit: bool,
entry_mode: gix_index::entry::Mode,
) -> std::io::Result<(std::fs::File, bool)> {
#[cfg_attr(windows, allow(unused_mut))]
let mut options = open_options(path, destination_is_initially_empty, overwrite_existing);
let needs_executable_bit = fs_supports_executable_bit && entry_mode == gix_index::entry::Mode::FILE_EXECUTABLE;
#[cfg(unix)]
let set_executable_after_creation = if needs_executable_bit && destination_is_initially_empty {
use std::os::unix::fs::OpenOptionsExt;
// Note that these only work if the file was newly created, but won't if it's already
// existing, possibly without the executable bit set. Thus we do this only if the file is new.
options.mode(0o777);
false
} else {
needs_executable_bit
};
// not supported on windows
#[cfg(windows)]
let set_executable_after_creation = needs_executable_bit;
try_op_or_unlink(path, overwrite_existing, |p| options.open(p)).map(|f| (f, set_executable_after_creation))
}
/// Close `file` and store its stats in `entry`, possibly setting `file` executable.
///
/// `desired_bytes` is the amount of bytes Git thinks the file should have after writing.
pub(crate) fn finalize_entry(
entry: &mut gix_index::Entry,
file: std::fs::File,
desired_bytes: u64,
#[cfg_attr(windows, allow(unused_variables))] set_executable_after_creation: bool,
) -> Result<(), crate::checkout::Error> {
// For possibly existing, overwritten files, we must change the file mode explicitly.
#[cfg(unix)]
if set_executable_after_creation {
set_executable(&file)?;
}
let md = &gix_index::fs::Metadata::from_file(&file)?;
// A last sanity check: if the file wasn't truncated upon opening, which is good in case something
// goes wrong during writing, not everything is lost, then after writing the file is smaller than it was
// before, it needs truncation. We do that here.
let needs_truncation = md.len() > desired_bytes;
if needs_truncation {
file.set_len(desired_bytes)?;
}
// NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well.
// revisit this once there is a bug to fix.
entry.stat = Stat::from_fs(md)?;
file.close()?;
Ok(())
}
/// Use `fstat` and `fchmod` on a file descriptor to make a regular file executable.
///
/// See `let_readers_execute` for the exact details of how the mode is transformed.
#[cfg(unix)]
fn set_executable(file: &std::fs::File) -> Result<(), std::io::Error> {
use std::os::unix::fs::{MetadataExt, PermissionsExt};
let old_mode = file.metadata()?.mode();
let new_mode = let_readers_execute(old_mode);
file.set_permissions(std::fs::Permissions::from_mode(new_mode))?;
Ok(())
}
/// Given the st_mode of a regular file, compute the mode with executable bits safely added.
///
/// Currently this adds executable bits for whoever has read bits already. It doesn't use the umask.
/// Set-user-ID and set-group-ID bits are unset for safety. The sticky bit is also unset.
///
/// This returns only mode bits, not file type. The return value can be used in chmod or fchmod.
#[cfg(any(unix, test))]
fn let_readers_execute(mut mode: u32) -> u32 {
assert_eq!(mode & 0o170000, 0o100000, "bug in caller if not from a regular file");
mode &= 0o777; // Clear type, non-rwx mode bits (setuid, setgid, sticky).
mode |= (mode & 0o444) >> 2; // Let readers also execute.
mode
}
#[cfg(test)]
mod tests {
#[test]
fn let_readers_execute() {
let cases = [
// Common cases:
(0o100755, 0o755),
(0o100644, 0o755),
(0o100750, 0o750),
(0o100640, 0o750),
(0o100700, 0o700),
(0o100600, 0o700),
(0o100775, 0o775),
(0o100664, 0o775),
(0o100770, 0o770),
(0o100660, 0o770),
(0o100764, 0o775),
(0o100760, 0o770),
// Less common:
(0o100674, 0o775),
(0o100670, 0o770),
(0o100000, 0o000),
(0o100400, 0o500),
(0o100440, 0o550),
(0o100444, 0o555),
(0o100462, 0o572),
(0o100242, 0o252),
(0o100167, 0o177),
// With set-user-ID, set-group-ID, and sticky bits:
(0o104755, 0o755),
(0o104644, 0o755),
(0o102755, 0o755),
(0o102644, 0o755),
(0o101755, 0o755),
(0o101644, 0o755),
(0o106755, 0o755),
(0o106644, 0o755),
(0o104750, 0o750),
(0o104640, 0o750),
(0o102750, 0o750),
(0o102640, 0o750),
(0o101750, 0o750),
(0o101640, 0o750),
(0o106750, 0o750),
(0o106640, 0o750),
(0o107644, 0o755),
(0o107000, 0o000),
(0o106400, 0o500),
(0o102462, 0o572),
];
for (st_mode, expected) in cases {
let actual = super::let_readers_execute(st_mode);
assert_eq!(
actual, expected,
"{st_mode:06o} should become {expected:04o}, became {actual:04o}"
);
}
}
#[test]
#[should_panic]
fn let_readers_execute_panics_on_directory() {
super::let_readers_execute(0o040644);
}
#[test]
#[should_panic]
fn let_readers_execute_should_panic_on_symlink() {
super::let_readers_execute(0o120644);
}
}

View File

@@ -0,0 +1,154 @@
use std::sync::atomic::AtomicBool;
use gix_features::{interrupt, parallel::in_parallel_with_finalize};
use gix_worktree::{stack, Stack};
use crate::checkout::chunk;
/// Checkout the entire `index` into `dir`, and resolve objects found in index entries with `objects` to write their content to their
/// respective path in `dir`.
/// Use `files` to count each fully checked out file, and count the amount written `bytes`. If `should_interrupt` is `true`, the
/// operation will abort.
/// `options` provide a lot of context on how to perform the operation.
///
/// ### Handling the return value
///
/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome.
///
#[allow(clippy::too_many_arguments)]
pub fn checkout<Find>(
index: &mut gix_index::State,
dir: impl Into<std::path::PathBuf>,
objects: Find,
files: &dyn gix_features::progress::Count,
bytes: &dyn gix_features::progress::Count,
should_interrupt: &AtomicBool,
options: crate::checkout::Options,
) -> Result<crate::checkout::Outcome, crate::checkout::Error>
where
Find: gix_object::Find + Send + Clone,
{
let paths = index.take_path_backing();
let res = checkout_inner(index, &paths, dir, objects, files, bytes, should_interrupt, options);
index.return_path_backing(paths);
res
}
#[allow(clippy::too_many_arguments)]
fn checkout_inner<Find>(
index: &mut gix_index::State,
paths: &gix_index::PathStorage,
dir: impl Into<std::path::PathBuf>,
objects: Find,
files: &dyn gix_features::progress::Count,
bytes: &dyn gix_features::progress::Count,
should_interrupt: &AtomicBool,
mut options: crate::checkout::Options,
) -> Result<crate::checkout::Outcome, crate::checkout::Error>
where
Find: gix_object::Find + Send + Clone,
{
let num_files = files.counter();
let num_bytes = bytes.counter();
let dir = dir.into();
let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit(
100,
index.entries().len().into(),
options.thread_limit,
None,
);
let mut ctx = chunk::Context {
buf: Vec::new(),
options: (&options).into(),
path_cache: Stack::from_state_and_ignore_case(
dir,
options.fs.ignore_case,
stack::State::for_checkout(
options.overwrite_existing,
options.validate,
std::mem::take(&mut options.attributes),
),
index,
paths,
),
filters: options.filters,
objects,
};
let chunk::Outcome {
mut collisions,
mut errors,
mut bytes_written,
files: files_updated,
delayed_symlinks,
delayed_paths_unknown,
delayed_paths_unprocessed,
} = if num_threads == 1 {
let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt);
let mut delayed_filter_results = Vec::new();
let mut out = chunk::process(
entries_with_paths,
&num_files,
&num_bytes,
&mut delayed_filter_results,
&mut ctx,
)?;
chunk::process_delayed_filter_results(delayed_filter_results, &num_files, &num_bytes, &mut out, &mut ctx)?;
out
} else {
let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt);
in_parallel_with_finalize(
gix_features::iter::Chunks {
inner: entries_with_paths,
size: chunk_size,
},
thread_limit,
{
let ctx = ctx.clone();
move |_| (Vec::new(), ctx)
},
|chunk, (delayed_filter_results, ctx)| {
chunk::process(chunk.into_iter(), &num_files, &num_bytes, delayed_filter_results, ctx)
},
|(delayed_filter_results, mut ctx)| {
let mut out = chunk::Outcome::default();
chunk::process_delayed_filter_results(
delayed_filter_results,
&num_files,
&num_bytes,
&mut out,
&mut ctx,
)?;
Ok(out)
},
chunk::Reduce {
aggregate: Default::default(),
},
)?
};
for (entry, entry_path) in delayed_symlinks {
bytes_written += chunk::checkout_entry_handle_result(
entry,
entry_path,
&mut errors,
&mut collisions,
&num_files,
&num_bytes,
&mut ctx,
)?
.as_bytes()
.expect("only symlinks are delayed here, they are never filtered (or delayed again)")
as u64;
}
Ok(crate::checkout::Outcome {
files_updated,
collisions,
errors,
bytes_written,
delayed_paths_unknown,
delayed_paths_unprocessed,
})
}

View File

@@ -0,0 +1,103 @@
use bstr::BString;
use gix_index::entry::stat;
/// Information about a path that failed to checkout as something else was already present.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Collision {
/// the path that collided with something already present on disk.
pub path: BString,
/// The io error we encountered when checking out `path`.
pub error_kind: std::io::ErrorKind,
}
/// A path that encountered an IO error.
#[derive(Debug)]
pub struct ErrorRecord {
/// the path that encountered the error.
pub path: BString,
/// The error
pub error: Box<dyn std::error::Error + Send + Sync + 'static>,
}
/// The outcome of checking out an entire index.
#[derive(Debug, Default)]
pub struct Outcome {
/// The amount of files updated, or created.
pub files_updated: usize,
/// The amount of bytes written to disk,
pub bytes_written: u64,
/// The encountered collisions, which can happen on a case-insensitive filesystem.
pub collisions: Vec<Collision>,
/// Other errors that happened during checkout.
pub errors: Vec<ErrorRecord>,
/// Relative paths that the process listed as 'delayed' even though we never passed them.
pub delayed_paths_unknown: Vec<BString>,
/// All paths that were left unprocessed, because they were never listed by the process even though we passed them.
pub delayed_paths_unprocessed: Vec<BString>,
}
/// Options to further configure the checkout operation.
#[derive(Clone, Default)]
pub struct Options {
/// capabilities of the file system
pub fs: gix_fs::Capabilities,
/// Options to configure how to validate path components.
pub validate: gix_worktree::validate::path::component::Options,
/// If set, don't use more than this amount of threads.
/// Otherwise, usually use as many threads as there are logical cores.
/// A value of 0 is interpreted as no-limit
pub thread_limit: Option<usize>,
/// If true, we assume no file to exist in the target directory, and want exclusive access to it.
/// This should be enabled when cloning to avoid checks for freshness of files. This also enables
/// detection of collisions based on whether or not exclusive file creation succeeds or fails.
pub destination_is_initially_empty: bool,
/// If true, default false, worktree entries on disk will be overwritten with content from the index
/// even if they appear to be changed. When creating directories that clash with existing worktree entries,
/// these will try to delete the existing entry.
/// This is similar in behaviour as `git checkout --force`.
pub overwrite_existing: bool,
/// If true, default false, try to checkout as much as possible and don't abort on first error which isn't
/// due to a conflict.
/// The checkout operation will never fail, but count the encountered errors instead along with their paths.
pub keep_going: bool,
/// Control how stat comparisons are made when checking if a file is fresh.
pub stat_options: stat::Options,
/// A stack of attributes to use with the filesystem cache to use as driver for filters.
pub attributes: gix_worktree::stack::state::Attributes,
/// The filter pipeline to use for applying mandatory filters before writing to the worktree.
pub filters: gix_filter::Pipeline,
/// Control how long-running processes may use the 'delay' capability.
pub filter_process_delay: gix_filter::driver::apply::Delay,
}
/// The error returned by the [checkout()][crate::checkout()] function.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Could not convert path to UTF8: {}", .path)]
IllformedUtf8 { path: BString },
#[error("The clock was off when reading file related metadata after updating a file on disk")]
Time(#[from] std::time::SystemTimeError),
#[error("IO error while writing blob or reading file metadata or changing filetype")]
Io(#[from] std::io::Error),
#[error("object for checkout at {} could not be retrieved from object database", .path.display())]
Find {
#[source]
err: gix_object::find::existing_object::Error,
path: std::path::PathBuf,
},
#[error(transparent)]
Filter(#[from] gix_filter::pipeline::convert::to_worktree::Error),
#[error(transparent)]
FilterListDelayed(#[from] gix_filter::driver::delayed::list::Error),
#[error(transparent)]
FilterFetchDelayed(#[from] gix_filter::driver::delayed::fetch::Error),
#[error("The entry at path '{rela_path}' was listed as delayed by the filter process, but we never passed it")]
FilterPathUnknown { rela_path: BString },
#[error("The following paths were delayed and apparently forgotten to be processed by the filter driver: ")]
FilterPathsUnprocessed { rela_paths: Vec<BString> },
}
mod chunk;
mod entry;
pub(crate) mod function;

View File

@@ -0,0 +1,6 @@
//! A crate to help setting the worktree to a particular state.
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
///
pub mod checkout;
pub use checkout::function::checkout;

View File

@@ -0,0 +1,34 @@
lints.workspace = true
[package]
name = "src-worktree-state-tests"
version = "0.0.0"
repository = "https://github.com/GitoxideLabs/gitoxide"
license = "MIT OR Apache-2.0"
description = "A crate for running tests with feature toggles on src-worktree-state"
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
edition = "2021"
publish = false
rust-version = "1.82"
[[test]]
name = "worktree"
path = "worktree.rs"
[features]
src-features-parallel = ["src-features/parallel"]
[dev-dependencies]
src-worktree-state = { path = ".." }
src-object = { path = "../../src-object" }
src-discover = { path = "../../src-discover" }
src-filter = { path = "../../src-filter" }
src-index = { path = "../../src-index" }
src-hash = { path = "../../src-hash", features = ["sha256"] }
src-fs = { path = "../../src-fs" }
src-features = { path = "../../src-features" }
src-testtools = { path = "../../tests/tools" }
src-odb = { path = "../../src-odb" }
symlink = "0.1.0"
walkdir = "2.3.2"

View File

@@ -0,0 +1,7 @@
make_ignore_and_attributes_setup.tar
make_mixed_without_submodules.tar
make_mixed.tar
make_mixed_without_submodules_and_symlinks.tar
make_attributes_baseline.tar
make_dangerous_symlink.tar
make_ignorecase_collisions.tar

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
# Every symlink is dangerous as it might either link to another directory and thus redirect
# all writes in the path, or it might point to a file and opening the symlink actually opens
# the target.
# We handle this by either validating symlinks specifically or create symlinks
empty_oid=$(git hash-object -w --stdin </dev/null)
fake_dir_target=$(echo -n 'A-dir' | git hash-object -w --stdin)
fake_file_target=$(echo -n 'A-file' | git hash-object -w --stdin)
git update-index --index-info <<EOF
100644 $empty_oid A-dir/a
100644 $empty_oid A-file
120000 $fake_dir_target FAKE-DIR
120000 $fake_file_target FAKE-FILE
100644 $empty_oid fake-dir/b
100644 $empty_oid fake-file
EOF
git commit -m "init"
git checkout -f HEAD;

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
target_oid=$(echo -n "non-existing-target" | git hash-object -w --stdin)
git update-index --index-info <<EOF
120000 $target_oid dangling
EOF
git commit -m "dangling symlink in index"

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
# On Windows, the target is an invalid file name.
qmarks_oid=$(echo -n "???" | git hash-object -w --stdin)
git update-index --index-info <<EOF
120000 $qmarks_oid dangling-qmarks-symlink
EOF
git commit -m "dangling symlinks with Windows invalid target in index"

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
# On Windows, the target is a reserved legacy DOS device name.
con_oid=$(echo -n "CON" | git hash-object -w --stdin)
git update-index --index-info <<EOF
120000 $con_oid dangling-con-symlink
EOF
git commit -m "dangling symlinks with Windows reserved target in index"

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
target_oid=$(echo -n "." | git hash-object -w --stdin)
git update-index --index-info <<EOF
120000 $target_oid symlink
EOF
git commit -m "symlink in index, points to directory"

View File

@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
empty_oid=$(git hash-object -w --stdin </dev/null)
content_oid=$(echo "content" | git hash-object -w --stdin)
symlink_target=$(echo -n 'X' | git hash-object -w --stdin)
echo "FILE_? filter=arrow" > .gitattributes
git add -A
git update-index --index-info <<EOF
100644 $content_oid FILE_X
100644 $content_oid FILE_x
100644 $content_oid file_X
100644 $content_oid file_x
100644 $empty_oid D/B
100644 $empty_oid D/C
100644 $empty_oid d
100644 $empty_oid X
120000 $symlink_target x
120000 $symlink_target link-to-X
EOF
git commit -m "init"
git checkout -f HEAD;

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo "other content" > dir/content
echo "* filter=arrow" > .gitattributes
echo "executable -filter" >> .gitattributes
echo ".gitattributes -filter" >> .gitattributes
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"
git init module1
(cd module1
echo hello-from-submodule > f1
mkdir dir
: >dir/f2
git add . && git commit -m "init submodule"
)
git submodule add ./module1 m1
git submodule add ./module1 modules/m1

View File

@@ -0,0 +1,21 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo "other content" > dir/content
echo "* filter=arrow" > .gitattributes
echo "executable -filter" >> .gitattributes
echo ".gitattributes -filter" >> .gitattributes
mkdir dir/sub-dir
(cd dir/sub-dir && ln -sf ../content symlink)
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"

View File

@@ -0,0 +1,21 @@
#!/usr/bin/env bash
set -eu -o pipefail
git init -q
touch empty
echo -n "content" > executable
chmod +x executable
mkdir dir
echo "other content" > dir/content
echo "* filter=arrow" > .gitattributes
echo "executable -filter" >> .gitattributes
echo ".gitattributes -filter" >> .gitattributes
mkdir dir/sub-dir
echo "even other content" > dir/sub-dir/file
git add -A
git update-index --chmod=+x executable # For Windows.
git commit -m "Commit"

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -eu -o pipefail
# Makes a repo carrying a tree structure representing the given path to a blob.
# File content is from stdin. Args are repo name, path, -x or +x, and tr sets.
function make_repo() (
local repo="$1" path="$2" xbit="$3" set1="$4" set2="$5"
local dir dir_standin path_standin path_standin_pattern path_replacement
git init -- "$repo"
cd -- "$repo" # Temporary, as the function body is a ( ) subshell.
dir="${path%/*}"
dir_standin="$(tr "$set1" "$set2" <<<"$dir")"
path_standin="$(tr "$set1" "$set2" <<<"$path")"
mkdir -p -- "$dir_standin"
cat >"$path_standin"
git add --chmod="$xbit" -- "$path_standin"
path_standin_pattern="$(sed 's/[|.*^$\]/\\&/g' <<<"$path_standin")"
path_replacement="$(sed 's/[|&\]/\\&/g' <<<"$path")"
cp .git/index old_index
LC_ALL=C sed "s|$path_standin_pattern|$path_replacement|g" old_index >.git/index
git commit -m 'Initial commit'
)
make_repo traverse_dotdot_trees '../outside' -x '.' '@' \
<<<'A file outside the working tree, somehow.'
make_repo traverse_dotgit_trees '.git/hooks/pre-commit' +x '.' '@' <<'EOF'
#!/bin/sh
printf 'Vulnerable!\n'
date >vulnerable
EOF
make_repo traverse_dotgit_stream '.git::$INDEX_ALLOCATION/hooks/pre-commit' +x ':' ',' <<'EOF'
#!/bin/sh
printf 'Vulnerable!\n'
date >vulnerable
EOF

View File

@@ -0,0 +1,747 @@
#[cfg(unix)]
use std::os::unix::prelude::MetadataExt;
use std::{
fs,
io::{ErrorKind, ErrorKind::AlreadyExists},
path::{Path, PathBuf},
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
};
use gix_features::progress;
use gix_object::{bstr::ByteSlice, Data};
use gix_testtools::tempfile::TempDir;
use gix_worktree_state::checkout::Collision;
use std::sync::LazyLock;
use crate::fixture_path;
static DRIVER: LazyLock<PathBuf> = LazyLock::new(|| {
let mut cargo = std::process::Command::new(env!("CARGO"));
let res = cargo
.args(["build", "-p=src-filter", "--example", "arrow"])
.status()
.expect("cargo should run fine");
assert!(res.success(), "cargo invocation should be successful");
let path = PathBuf::from(env!("CARGO_TARGET_TMPDIR"))
.ancestors()
.nth(1)
.expect("first parent in target dir")
.join("debug")
.join("examples")
.join(if cfg!(windows) { "arrow.exe" } else { "arrow" });
assert!(path.is_file(), "Expecting driver to be located at {}", path.display());
path
});
fn driver_exe() -> String {
let mut exe = DRIVER.to_string_lossy().into_owned();
if cfg!(windows) {
exe = exe.replace('\\', "/");
}
exe
}
fn assure_is_empty(dir: impl AsRef<Path>) -> std::io::Result<()> {
assert_eq!(std::fs::read_dir(dir)?.count(), 0);
Ok(())
}
#[test]
fn submodules_are_instantiated_as_directories() -> crate::Result {
let mut opts = opts_from_probe();
opts.overwrite_existing = false;
let (_source_tree, destination, _index, _outcome) = checkout_index_in_tmp_dir(opts.clone(), "make_mixed", None)?;
for path in ["m1", "modules/m1"] {
let sm = destination.path().join(path);
assert!(sm.is_dir());
assure_is_empty(sm)?;
}
Ok(())
}
#[test]
fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() {
let mut opts = opts_from_probe();
// without overwrite mode, everything is safe.
opts.overwrite_existing = false;
let (source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink", None).unwrap();
let source_files = dir_structure(&source_tree);
let worktree_files = dir_structure(&destination);
if opts.fs.ignore_case {
assert_eq!(
stripped_prefix(&source_tree, &source_files),
stripped_prefix(&destination, &worktree_files),
);
if multi_threaded() {
assert_eq!(outcome.collisions.len(), 2);
} else {
assert_eq!(
outcome.collisions,
vec![
Collision {
path: "FAKE-DIR".into(),
error_kind: AlreadyExists
},
Collision {
path: "FAKE-FILE".into(),
error_kind: AlreadyExists
}
]
);
}
} else {
let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"];
assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected));
assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected));
assert!(outcome.collisions.is_empty());
}
}
#[test]
fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() {
let mut opts = opts_from_probe();
// with overwrite mode
opts.overwrite_existing = true;
let (source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink", None).unwrap();
let source_files = dir_structure(&source_tree);
let worktree_files = dir_structure(&destination);
if opts.fs.ignore_case {
assert_eq!(
stripped_prefix(&source_tree, &source_files),
paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]),
);
assert_eq!(
stripped_prefix(&destination, &worktree_files),
paths([
if cfg!(windows) { r"A-dir\a" } else { "A-dir/a" },
"A-file",
"FAKE-DIR",
"FAKE-FILE"
]),
);
assert!(outcome.collisions.is_empty());
} else {
let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"];
assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected));
assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected));
assert!(outcome.collisions.is_empty());
}
}
#[test]
fn delayed_driver_process() -> crate::Result {
let mut opts = opts_from_probe();
opts.overwrite_existing = true;
opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow;
opts.destination_is_initially_empty = false;
setup_filter_pipeline(opts.filters.options_mut());
let (_source, destination, _index, outcome) = checkout_index_in_tmp_dir_opts(
opts,
"make_mixed_without_submodules_and_symlinks",
None,
|_| true,
|_| Ok(()),
)?;
assert_eq!(outcome.collisions.len(), 0);
assert_eq!(outcome.errors.len(), 0);
assert_eq!(outcome.files_updated, 5);
let dest = destination.path();
assert_eq!(
std::fs::read(dest.join("executable"))?.as_bstr(),
"content",
"unfiltered"
);
assert_eq!(
std::fs::read(dest.join("dir").join("content"))?.as_bstr(),
"➡other content\r\n"
);
assert_eq!(
std::fs::read(dest.join("dir").join("sub-dir").join("file"))?.as_bstr(),
"➡even other content\r\n"
);
Ok(())
}
#[test]
fn overwriting_files_and_lone_directories_works() -> crate::Result {
for delay in [
gix_filter::driver::apply::Delay::Allow,
gix_filter::driver::apply::Delay::Forbid,
] {
let mut opts = opts_from_probe();
assert!(opts.fs.symlink, "The probe must detect to be able to generate symlinks");
opts.overwrite_existing = true;
opts.filter_process_delay = delay;
opts.destination_is_initially_empty = false;
setup_filter_pipeline(opts.filters.options_mut());
let (source, destination, _index, outcome) = checkout_index_in_tmp_dir_opts(
opts.clone(),
"make_mixed",
None,
|_| true,
|d| {
let empty = d.join("empty");
symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above
std::fs::write(d.join("executable"), b"longer content foo bar")?; // executable is regular file and has different content
let dir = d.join("dir");
std::fs::create_dir(&dir)?;
std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now
let dir = dir.join("sub-dir");
std::fs::create_dir(&dir)?;
symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to a directory.
Ok(())
},
)?;
assert!(outcome.collisions.is_empty());
assert_eq!(
stripped_prefix(&destination, &dir_structure(&destination)),
paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"])
);
let meta = std::fs::symlink_metadata(destination.path().join("empty"))?;
assert!(meta.is_file(), "'empty' is now a file");
assert_eq!(meta.len(), 0, "'empty' is indeed empty");
let exe = destination.path().join("executable");
assert_eq!(std::fs::read(&exe)?, b"content", "'exe' has the correct content");
let meta = std::fs::symlink_metadata(exe)?;
assert!(meta.is_file());
#[cfg(unix)]
if opts.fs.executable_bit {
let mode = meta.mode();
assert_eq!(
mode & 0o100,
0o100,
"executable bit set where supported ({:04o} & {:04o} = {:04o} should be {:04o})",
mode,
0o100,
mode & 0o100,
0o100
);
let umask_write = gix_testtools::umask() & 0o222;
assert_eq!(
mode & umask_write,
0,
"no excessive write bits are set ({:04o} & {:04o} = {:04o} should be {:04o})",
mode,
umask_write,
mode & umask_write,
0
);
assert_ne!(
umask_write, 0,
"test not meaningful unless runner umask restricts some writes"
);
}
assert_eq!(
std::fs::read(source.join("dir/content"))?.as_bstr(),
"other content\n",
"in the worktree, we have LF"
);
assert_eq!(
std::fs::read(destination.path().join("dir/content"))?.as_bstr(),
"➡other content\r\n",
"autocrlf is enabled, so we get CRLF when checking out as the pipeline is active, and we have a filter"
);
let symlink = destination.path().join("dir/sub-dir/symlink");
assert!(std::fs::symlink_metadata(&symlink)?.is_symlink());
assert_eq!(
std::fs::read(symlink)?.as_bstr(),
"➡other content\r\n",
"autocrlf is enabled"
);
}
Ok(())
}
#[test]
fn symlinks_become_files_if_disabled() -> crate::Result {
let mut opts = opts_from_probe();
opts.fs.symlink = false;
let (source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules", None)?;
assert_equality(&source_tree, &destination, opts.fs.symlink)?;
assert!(outcome.collisions.is_empty());
Ok(())
}
#[test]
fn symlinks_to_directories_are_usable() -> crate::Result {
let opts = opts_from_probe();
assert!(opts.fs.symlink, "The probe must detect to be able to generate symlinks");
let (_source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_dir_symlink", None)?;
let worktree_files = dir_structure(&destination);
let worktree_files_stripped = stripped_prefix(&destination, &worktree_files);
assert_eq!(worktree_files_stripped, paths(["symlink"]));
let symlink_path = &worktree_files[0];
assert!(symlink_path
.symlink_metadata()
.expect("symlink is on disk")
.is_symlink());
assert!(symlink_path
.metadata()
.expect("metadata accessible through symlink")
.is_dir());
assert_eq!(std::fs::read_link(symlink_path)?, Path::new("."));
assert!(outcome.collisions.is_empty());
Ok(())
}
#[test]
fn dangling_symlinks_can_be_created() -> crate::Result {
let opts = opts_from_probe();
assert!(opts.fs.symlink, "The probe must detect to be able to generate symlinks");
for (fixture, symlink_name, target_name) in [
("make_dangling_symlink", "dangling", "non-existing-target"),
(
"make_dangling_symlink_to_windows_invalid",
"dangling-qmarks-symlink",
"???",
),
(
"make_dangling_symlink_to_windows_reserved",
"dangling-con-symlink",
"CON",
),
] {
let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir(opts.clone(), fixture, None)?;
let worktree_files = dir_structure(&destination);
let worktree_files_stripped = stripped_prefix(&destination, &worktree_files);
assert_eq!(worktree_files_stripped, paths([symlink_name]));
let symlink_path = &worktree_files[0];
assert!(symlink_path
.symlink_metadata()
.expect("dangling symlink is on disk")
.is_symlink());
assert_eq!(std::fs::read_link(symlink_path)?, Path::new(target_name));
assert!(outcome.collisions.is_empty());
}
Ok(())
}
#[test]
fn allow_or_disallow_symlinks() -> crate::Result {
let mut opts = opts_from_probe();
for allowed in &[false, true] {
opts.fs.symlink = *allowed;
let (source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules", None)?;
assert_equality(&source_tree, &destination, opts.fs.symlink)?;
assert!(outcome.collisions.is_empty());
}
Ok(())
}
#[test]
fn keep_going_collects_results() {
let mut opts = opts_from_probe();
opts.keep_going = true;
let count = AtomicUsize::default();
let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts(
opts,
"make_mixed_without_submodules",
None,
|_id| {
count
.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| {
(current < 2).then_some(current + 1)
})
.is_err()
},
|_| Ok(()),
)
.unwrap();
if multi_threaded() {
assert_eq!(
outcome.errors.len(),
2,
"content changes due to non-deterministic nature of racy threads"
);
} else {
assert_eq!(
outcome
.errors
.iter()
.map(|r| r.path.to_path_lossy().into_owned())
.collect::<Vec<_>>(),
paths([".gitattributes", "dir/content"])
);
}
if multi_threaded() {
let actual = dir_structure(&destination);
assert!(
(2..=3).contains(&actual.len()),
"it's 3 most of the time, but can be 2 of the 'empty' file is missing as the object couldn't be accessed.\
It's unclear why there isn't more, as it would keep going"
);
} else {
assert_eq!(
stripped_prefix(&destination, &dir_structure(&destination)),
paths([
if cfg!(unix) {
"dir/sub-dir/symlink"
} else {
r"dir\sub-dir\symlink"
},
"empty",
"executable",
]),
"some files could not be created"
);
}
assert!(outcome.collisions.is_empty());
}
#[test]
fn no_case_related_collisions_on_case_sensitive_filesystem() {
let opts = opts_from_probe();
if opts.fs.ignore_case {
eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system");
return;
}
let (source_tree, destination, index, outcome) =
checkout_index_in_tmp_dir(opts.clone(), "make_ignorecase_collisions", None).unwrap();
assert!(outcome.collisions.is_empty());
let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap();
assert_eq!(
num_files,
index.entries().len() - 1,
"it checks out all files (minus 1 to account for .gitattributes which is skipped in the worktree in our tests)"
);
assert!(
destination.path().join(".gitattributes").is_file(),
"we do have attributes even though, dot files are ignored in `assert_equality`"
);
}
#[test]
fn safety_checks_dotdot_trees() {
let mut opts = opts_from_probe();
let err =
checkout_index_in_tmp_dir(opts.clone(), "make_traverse_trees", Some("traverse_dotdot_trees")).unwrap_err();
let expected_err_msg = "Input path \"../outside\" contains relative or absolute components";
assert_eq!(err.source().expect("inner").to_string(), expected_err_msg);
opts.keep_going = true;
let (_source_tree, _destination, _index, outcome) =
checkout_index_in_tmp_dir(opts, "make_traverse_trees", Some("traverse_dotdot_trees"))
.expect("keep-going checks out as much as possible");
assert_eq!(outcome.errors.len(), 1, "one path could not be checked out");
assert_eq!(
outcome.errors[0].error.source().expect("inner").to_string(),
expected_err_msg
);
}
#[test]
fn safety_checks_dotgit_trees() {
let opts = opts_from_probe();
let err =
checkout_index_in_tmp_dir(opts.clone(), "make_traverse_trees", Some("traverse_dotgit_trees")).unwrap_err();
assert_eq!(
err.source().expect("inner").to_string(),
"The .git name may never be used"
);
}
#[test]
fn safety_checks_dotgit_ntfs_stream() {
let opts = opts_from_probe();
let err =
checkout_index_in_tmp_dir(opts.clone(), "make_traverse_trees", Some("traverse_dotgit_stream")).unwrap_err();
assert_eq!(
err.source().expect("inner").to_string(),
"The .git name may never be used",
"note how it is still discovered even though the path is `.git::$INDEX_ALLOCATION`"
);
}
#[test]
fn collisions_are_detected_on_a_case_insensitive_filesystem_even_with_delayed_filters() {
let mut opts = opts_from_probe();
if !opts.fs.ignore_case {
eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system");
return;
}
setup_filter_pipeline(opts.filters.options_mut());
opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow;
let (source_tree, destination, _index, outcome) =
checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions", None).unwrap();
let source_files = dir_structure(&source_tree);
assert_eq!(
stripped_prefix(&source_tree, &source_files),
paths(["d", "file_x", "link-to-X", "x"]),
"plenty of collisions prevent a checkout"
);
let dest_files = dir_structure(&destination);
if multi_threaded() {
assert!(
(4..=6).contains(&dest_files.len()),
"due to the clash happening at nearly any time, and keep-going is false, we get a variance of files"
);
} else {
assert_eq!(
stripped_prefix(&destination, &dest_files),
paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]),
"we checkout files in order and generally handle collision detection differently, hence the difference"
);
}
let error_kind = ErrorKind::AlreadyExists;
#[cfg(windows)]
let error_kind_dir = ErrorKind::PermissionDenied;
#[cfg(not(windows))]
let error_kind_dir = error_kind;
if multi_threaded() {
assert!(
(5..=6).contains(&outcome.collisions.len()),
"can only assert on number as it's racily creating files so unclear which one clashes, and due to keep-going = false there is variance"
);
} else {
assert_eq!(
outcome.collisions,
vec![
Collision {
path: "d".into(),
error_kind: error_kind_dir,
},
Collision {
path: "FILE_x".into(),
error_kind,
},
Collision {
path: "file_X".into(),
error_kind,
},
Collision {
path: "file_x".into(),
error_kind,
},
Collision {
path: "x".into(),
error_kind,
},
],
"these files couldn't be checked out"
);
}
}
fn multi_threaded() -> bool {
gix_features::parallel::num_threads(None) > 1
}
fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result<usize> {
let source_files = dir_structure(source_tree);
let worktree_files = dir_structure(destination);
assert_eq!(
stripped_prefix(source_tree, &source_files),
stripped_prefix(destination, &worktree_files),
);
let mut count = 0;
for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) {
count += 1;
if !allow_symlinks && source_file.is_symlink() {
assert!(!worktree_file.is_symlink());
assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?);
} else {
assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?);
#[cfg(unix)]
assert_eq!(
fs::symlink_metadata(source_file)?.mode() & 0o700,
fs::symlink_metadata(worktree_file)?.mode() & 0o700,
"permissions of source and checked out file are comparable"
);
}
}
Ok(count)
}
pub fn dir_structure<P: AsRef<std::path::Path>>(path: P) -> Vec<std::path::PathBuf> {
let path = path.as_ref();
let mut files: Vec<_> = walkdir::WalkDir::new(path)
.follow_links(false)
.into_iter()
.filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.'))
.flatten()
.filter_map(|e| (!e.path().symlink_metadata().map_or(true, |m| m.is_dir())).then(|| e.path().to_path_buf()))
.collect();
files.sort();
files
}
fn checkout_index_in_tmp_dir(
opts: gix_worktree_state::checkout::Options,
name: &str,
subdir_name: Option<&str>,
) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree_state::checkout::Outcome)> {
checkout_index_in_tmp_dir_opts(opts, name, subdir_name, |_d| true, |_| Ok(()))
}
fn checkout_index_in_tmp_dir_opts(
opts: gix_worktree_state::checkout::Options,
script_name: &str,
subdir_name: Option<&str>,
allow_return_object: impl FnMut(&gix_hash::oid) -> bool + Send + Clone,
prep_dest: impl Fn(&Path) -> std::io::Result<()>,
) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree_state::checkout::Outcome)> {
let source_tree = {
let root = fixture_path(script_name);
if let Some(name) = subdir_name {
root.join(name)
} else {
root
}
};
let git_dir = source_tree.join(".git");
let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?;
let odb = gix_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?;
let destination = gix_testtools::tempfile::tempdir_in(std::env::current_dir()?)?;
prep_dest(destination.path()).expect("preparation must succeed");
#[derive(Clone)]
struct MaybeFind<Allow: Clone, Find: Clone> {
allow: std::cell::RefCell<Allow>,
objects: Find,
}
impl<Allow, Find> gix_object::Find for MaybeFind<Allow, Find>
where
Allow: FnMut(&gix_hash::oid) -> bool + Send + Clone,
Find: gix_object::Find + Send + Clone,
{
fn try_find<'a>(
&self,
id: &gix_hash::oid,
buf: &'a mut Vec<u8>,
) -> Result<Option<Data<'a>>, gix_object::find::Error> {
if (self.allow.borrow_mut())(id) {
self.objects.try_find(id, buf)
} else {
Ok(None)
}
}
}
let db = MaybeFind {
allow: allow_return_object.into(),
objects: odb,
};
let outcome = gix_worktree_state::checkout(
&mut index,
destination.path(),
db,
&progress::Discard,
&progress::Discard,
&AtomicBool::default(),
opts,
)?;
Ok((source_tree, destination, index, outcome))
}
fn stripped_prefix(prefix: impl AsRef<Path>, source_files: &[PathBuf]) -> Vec<&Path> {
source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect()
}
fn probe_gitoxide_dir() -> crate::Result<gix_fs::Capabilities> {
Ok(gix_fs::Capabilities::probe(
&gix_discover::upwards(".".as_ref())?
.0
.into_repository_and_work_tree_directories()
.0,
))
}
fn opts_from_probe() -> gix_worktree_state::checkout::Options {
static CAPABILITIES: LazyLock<gix_fs::Capabilities> = LazyLock::new(|| probe_gitoxide_dir().unwrap());
gix_worktree_state::checkout::Options {
fs: *CAPABILITIES,
destination_is_initially_empty: true,
thread_limit: gix_features::parallel::num_threads(None).into(),
..Default::default()
}
}
fn paths<'a>(p: impl IntoIterator<Item = &'a str>) -> Vec<PathBuf> {
p.into_iter().map(PathBuf::from).collect()
}
fn setup_filter_pipeline(opts: &mut gix_filter::pipeline::Options) {
opts.eol_config.auto_crlf = gix_filter::eol::AutoCrlf::Enabled;
opts.drivers = vec![gix_filter::Driver {
name: "arrow".into(),
clean: None,
smudge: None,
process: Some((driver_exe() + " process").into()),
required: true,
}];
}
#[test]
fn checkout_truncates_existing_longer_files() -> crate::Result {
let mut opts = opts_from_probe();
opts.overwrite_existing = false;
opts.destination_is_initially_empty = false;
// Use existing fixture and modify one file to be longer
let (_source_tree, destination, _index, _outcome) = checkout_index_in_tmp_dir_opts(
opts.clone(),
"make_mixed_without_submodules_and_symlinks",
None,
|_| true,
|dest| {
// Create a longer version of the "executable" file before checkout
let file_path = dest.join("executable");
std::fs::create_dir_all(dest)?;
std::fs::write(
&file_path,
b"This is much longer content that should be truncated to match git's version",
)?;
Ok(())
},
)?;
let file_path = destination.path().join("executable");
let final_content = std::fs::read(&file_path)?;
assert_eq!(
final_content[..].as_bstr(),
"content",
"File content should match git's version"
);
Ok(())
}

View File

@@ -0,0 +1,11 @@
mod checkout;
use std::path::{Path, PathBuf};
pub type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error>>;
pub fn fixture_path(name: &str) -> PathBuf {
let dir = gix_testtools::scripted_fixture_read_only_standalone(Path::new(name).with_extension("sh"))
.expect("script works");
dir
}

View File

@@ -0,0 +1,2 @@
mod state;
use state::*;