From 3c8421f003bc3a5f2f51cee1b5cb6a526d5e0f38 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 24 Oct 2023 14:07:48 +0200 Subject: [PATCH] feat!: Add git-style metadata support. As opposed to the Rust standard library, this one will get the ctime from the file itself, instead of from the inode. That way, the index file written by `gix` will not continuously be expensively rewritten by `git`, and vice versa. --- Cargo.lock | 12 +-- gix-index/Cargo.toml | 4 + gix-index/src/entry/mode.rs | 10 +-- gix-index/src/entry/stat.rs | 23 +++-- gix-index/src/fs.rs | 166 ++++++++++++++++++++++++++++++++++++ gix-index/src/lib.rs | 2 + 6 files changed, 194 insertions(+), 23 deletions(-) create mode 100644 gix-index/src/fs.rs diff --git a/Cargo.lock b/Cargo.lock index 909ae597fd5..7d574a99c88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1795,7 +1795,9 @@ dependencies = [ "gix-object 0.38.0", "gix-traverse 0.34.0", "itoa", + "libc", "memmap2 0.7.1", + "rustix 0.38.20", "serde", "smallvec", "thiserror", @@ -2082,7 +2084,7 @@ dependencies = [ "gix-config-value", "gix-testtools", "parking_lot", - "rustix 0.38.19", + "rustix 0.38.20", "serial_test", "thiserror", ] @@ -2933,7 +2935,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", - "rustix 0.38.19", + "rustix 0.38.20", "windows-sys", ] @@ -3859,9 +3861,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.19" +version = "0.38.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" +checksum = "67ce50cb2e16c2903e30d1cbccfd8387a74b9d4c938b6a4c5ec6cc7556f7a8a0" dependencies = [ "bitflags 2.4.0", "errno", @@ -4289,7 +4291,7 @@ dependencies = [ "cfg-if", "fastrand 2.0.1", "redox_syscall", - "rustix 0.38.19", + "rustix 0.38.20", "windows-sys", ] diff --git a/gix-index/Cargo.toml b/gix-index/Cargo.toml index 9320daa51b0..cd845b326b7 100644 --- a/gix-index/Cargo.toml +++ b/gix-index/Cargo.toml @@ -41,6 +41,10 @@ bitflags = "2" document-features = { version = "0.2.0", optional = true } +[target.'cfg(not(windows))'.dependencies] +rustix = { version = "0.38.20", default-features = false, features = ["std", "fs"] } +libc = { version = "0.2.149" } + [package.metadata.docs.rs] features = ["document-features", "serde"] rustdoc-args = ["--cfg", "docsrs"] diff --git a/gix-index/src/entry/mode.rs b/gix-index/src/entry/mode.rs index 0301df43800..583c295bc7b 100644 --- a/gix-index/src/entry/mode.rs +++ b/gix-index/src/entry/mode.rs @@ -37,7 +37,7 @@ impl Mode { /// can not be committed to git). pub fn change_to_match_fs( self, - stat: &std::fs::Metadata, + stat: &crate::fs::Metadata, has_symlinks: bool, executable_bit: bool, ) -> Option { @@ -46,15 +46,13 @@ impl Mode { Mode::SYMLINK if has_symlinks && !stat.is_symlink() => (), Mode::SYMLINK if !has_symlinks && !stat.is_file() => (), Mode::COMMIT | Mode::DIR if !stat.is_dir() => (), - Mode::FILE if executable_bit && gix_fs::is_executable(stat) => return Some(Change::ExecutableBit), - Mode::FILE_EXECUTABLE if executable_bit && !gix_fs::is_executable(stat) => { - return Some(Change::ExecutableBit) - } + Mode::FILE if executable_bit && stat.is_executable() => return Some(Change::ExecutableBit), + Mode::FILE_EXECUTABLE if executable_bit && !stat.is_executable() => return Some(Change::ExecutableBit), _ => return None, }; let new_mode = if stat.is_dir() { Mode::COMMIT - } else if executable_bit && gix_fs::is_executable(stat) { + } else if executable_bit && stat.is_executable() { Mode::FILE_EXECUTABLE } else { Mode::FILE diff --git a/gix-index/src/entry/stat.rs b/gix-index/src/entry/stat.rs index 5e60f8540be..9e279e784ea 100644 --- a/gix-index/src/entry/stat.rs +++ b/gix-index/src/entry/stat.rs @@ -76,11 +76,11 @@ impl Stat { } /// Creates stat information from the result of `symlink_metadata`. - pub fn from_fs(fstat: &std::fs::Metadata) -> Result { - let mtime = fstat.modified().unwrap_or(std::time::UNIX_EPOCH); - let ctime = fstat.created().unwrap_or(std::time::UNIX_EPOCH); + pub fn from_fs(stat: &crate::fs::Metadata) -> Result { + let mtime = stat.modified().unwrap_or(std::time::UNIX_EPOCH); + let ctime = stat.created().unwrap_or(std::time::UNIX_EPOCH); - #[cfg(not(unix))] + #[cfg(windows)] let res = Stat { mtime: mtime.try_into()?, ctime: ctime.try_into()?, @@ -89,11 +89,10 @@ impl Stat { uid: 0, gid: 0, // truncation to 32 bits is on purpose (git does the same). - size: fstat.len() as u32, + size: stat.len() as u32, }; - #[cfg(unix)] + #[cfg(not(windows))] let res = { - use std::os::unix::fs::MetadataExt; Stat { mtime: mtime.try_into().unwrap_or_default(), ctime: ctime.try_into().unwrap_or_default(), @@ -101,12 +100,12 @@ impl Stat { // that's what the linux syscalls returns // just rust upcasts to 64 bits for some reason? // numbers this large are impractical anyway (that's a lot of hard-drives). - dev: fstat.dev() as u32, - ino: fstat.ino() as u32, - uid: fstat.uid(), - gid: fstat.gid(), + dev: stat.dev() as u32, + ino: stat.ino() as u32, + uid: stat.uid(), + gid: stat.gid(), // truncation to 32 bits is on purpose (git does the same). - size: fstat.len() as u32, + size: stat.len() as u32, } }; diff --git a/gix-index/src/fs.rs b/gix-index/src/fs.rs new file mode 100644 index 00000000000..21422f9b804 --- /dev/null +++ b/gix-index/src/fs.rs @@ -0,0 +1,166 @@ +//! This module contains a `Metadata` implementation that must be used instead of `std::fs::Metadata` to assure +//! that the `ctime` information is populated exactly like the one in `git`, which wouldn't be the case on unix. +#![allow(clippy::useless_conversion)] // on some MacOOS conversions are required, but on linux usually not. +#![allow(clippy::unnecessary_cast)] + +// it's allowed for good measure, in case there are systems that use different types for that. +use std::path::Path; +use std::time::{Duration, SystemTime}; + +/// A structure to partially mirror [`std::fs::Metadata`]. +#[cfg(not(windows))] +pub struct Metadata(rustix::fs::Stat); + +#[cfg(windows)] +/// A structure to partially mirror [`std::fs::Metadata`]. +pub struct Metadata(std::fs::Metadata); + +/// Lifecycle +impl Metadata { + /// Obtain the metadata at `path` without following symlinks. + pub fn from_path_no_follow(path: &Path) -> Result { + #[cfg(not(windows))] + { + rustix::fs::lstat(path).map(Metadata).map_err(Into::into) + } + #[cfg(windows)] + path.symlink_metadata().map(Metadata) + } + + /// Obtain the metadata at `path` without following symlinks. + pub fn from_file(file: &std::fs::File) -> Result { + #[cfg(not(windows))] + { + rustix::fs::fstat(file).map(Metadata).map_err(Into::into) + } + #[cfg(windows)] + file.metadata().map(Metadata) + } +} + +/// Access +#[allow(clippy::len_without_is_empty)] +impl Metadata { + /// Return true if the metadata belongs to a directory + pub fn is_dir(&self) -> bool { + #[cfg(not(windows))] + { + (self.0.st_mode & libc::S_IFMT) == libc::S_IFDIR + } + #[cfg(windows)] + self.0.is_dir() + } + + /// Return the time at which the underlying file was modified. + pub fn modified(&self) -> Option { + #[cfg(not(windows))] + { + Some(system_time_from_secs_nanos( + self.0.st_mtime.try_into().ok()?, + self.0.st_mtime_nsec.try_into().ok()?, + )) + } + #[cfg(windows)] + self.0.modified().ok() + } + + /// Return the time at which the underlying file was created. + /// + /// Note that this differes from [`std::fs::Metadata::created()`] which would return + /// the inode birth time, which is notably different to what `git` does. + pub fn created(&self) -> Option { + #[cfg(not(windows))] + { + Some(system_time_from_secs_nanos( + self.0.st_ctime.try_into().ok()?, + self.0.st_ctime_nsec.try_into().ok()?, + )) + } + #[cfg(windows)] + self.0.created().ok() + } + + /// Return the size of the file in bytes. + pub fn len(&self) -> u64 { + #[cfg(not(windows))] + { + self.0.st_size as u64 + } + #[cfg(windows)] + self.0.len() + } + + /// Return the device id on which the file is located, or 0 on windows. + pub fn dev(&self) -> u64 { + #[cfg(not(windows))] + { + self.0.st_dev as u64 + } + #[cfg(windows)] + 0 + } + + /// Return the inode id tracking the file, or 0 on windows. + pub fn ino(&self) -> u64 { + #[cfg(not(windows))] + { + self.0.st_ino as u64 + } + #[cfg(windows)] + 0 + } + + /// Return the user-id of the file or 0 on windows. + pub fn uid(&self) -> u32 { + #[cfg(not(windows))] + { + self.0.st_uid as u32 + } + #[cfg(windows)] + 0 + } + + /// Return the group-id of the file or 0 on windows. + pub fn gid(&self) -> u32 { + #[cfg(not(windows))] + { + self.0.st_gid as u32 + } + #[cfg(windows)] + 0 + } + + /// Return `true` if the file's executable bit is set, or `false` on windows. + pub fn is_executable(&self) -> bool { + #[cfg(not(windows))] + { + (self.0.st_mode & libc::S_IFMT) == libc::S_IFREG && self.0.st_mode & libc::S_IXUSR == libc::S_IXUSR + } + #[cfg(windows)] + gix_fs::is_executable(&self.0) + } + + /// Return `true` if the file's is a symbolic link. + pub fn is_symlink(&self) -> bool { + #[cfg(not(windows))] + { + (self.0.st_mode & libc::S_IFMT) == libc::S_IFLNK + } + #[cfg(windows)] + self.0.is_symlink() + } + + /// Return `true` if this is a regular file, executable or not. + pub fn is_file(&self) -> bool { + #[cfg(not(windows))] + { + (self.0.st_mode & libc::S_IFMT) == libc::S_IFREG + } + #[cfg(windows)] + self.0.is_file() + } +} + +fn system_time_from_secs_nanos(secs: u64, nanos: u32) -> SystemTime { + std::time::UNIX_EPOCH + Duration::new(secs, nanos) +} diff --git a/gix-index/src/lib.rs b/gix-index/src/lib.rs index 55b332a8280..e54c4aaf1fa 100644 --- a/gix-index/src/lib.rs +++ b/gix-index/src/lib.rs @@ -33,6 +33,8 @@ pub mod verify; /// pub mod write; +pub mod fs; + /// All known versions of a git index file. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]