From 028635165ddd98322d8b902fe0714fe2d0699a3e Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sat, 15 Feb 2025 17:57:36 -0500 Subject: [PATCH 1/2] doc: Fix `to_windows_separators` docstring, revise others The `gix_path::convert::to_windows_separators` documentation comment had inadvertently described the opposite of what this function does, saying it changed backslashed to slashes and that the effect was like a Unix path. This fixes the docstring so it saye `to_windows_separators` changes slashes to backslashes, and moves the text that likens it to paths on Unix to the `to_unix_separators` docstring, where it applies. This also makes a number of other more minor revisions for clarity and stylistic consistency to documentation comments on other functions in the `gix_path::convert` module. --- gix-path/src/convert.rs | 64 +++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/gix-path/src/convert.rs b/gix-path/src/convert.rs index 4f2fe990ee0..106c94ccf71 100644 --- a/gix-path/src/convert.rs +++ b/gix-path/src/convert.rs @@ -45,9 +45,9 @@ pub fn try_os_str_into_bstr(path: Cow<'_, OsStr>) -> Result, Utf8E } } -/// Convert the given path either into its raw bytes on unix or its UTF8 encoded counterpart on windows. +/// Convert the given path either into its raw bytes on Unix or its UTF-8 encoded counterpart on Windows. /// -/// On windows, if the source Path contains ill-formed, lone surrogates, the UTF-8 conversion will fail +/// On Windows, if the source `Path`` contains ill-formed, lone surrogates, the UTF-8 conversion will fail /// causing `Utf8Error` to be returned. pub fn try_into_bstr<'a>(path: impl Into>) -> Result, Utf8Error> { let path = path.into(); @@ -86,7 +86,7 @@ pub fn try_into_bstr<'a>(path: impl Into>) -> Result Ok(path_str) } -/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on windows. +/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on Windows. pub fn into_bstr<'a>(path: impl Into>) -> Cow<'a, BStr> { try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8") } @@ -101,11 +101,11 @@ pub fn join_bstr_unix_pathsep<'a, 'b>(base: impl Into>, path: impl base } -/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on unix. +/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on Unix. /// -/// On windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before. There are some potential -/// git versions and windows installation which produce mal-formed UTF-16 if certain emojies are in the path. It's as rare as -/// it sounds, but possible. +/// On Windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before. +/// There are some potential Git versions and Windows installations which produce malformed UTF-16 +/// if certain emojis are in the path. It's as rare as it sounds, but possible. pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> { #[cfg(unix)] let p = { @@ -131,7 +131,7 @@ pub fn try_from_bstr<'a>(input: impl Into>) -> Result(input: impl Into>) -> Cow<'a, Path> { try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8") } @@ -205,7 +205,8 @@ pub fn to_native_separators<'a>(path: impl Into>) -> Cow<'a, BStr> p } -/// Convert paths with slashes to backslashes on windows and do nothing on unix, but **panics** if malformed surrogates are encountered on windows. +/// Convert paths with slashes to backslashes on Windows and do nothing on Unix, +/// but **panic** if unpaired surrogates are encountered on Windows. pub fn to_native_path_on_windows<'a>(path: impl Into>) -> Cow<'a, std::path::Path> { #[cfg(not(windows))] { @@ -217,7 +218,7 @@ pub fn to_native_path_on_windows<'a>(path: impl Into>) -> Cow<'a, } } -/// Replaces windows path separators with slashes, but only do so on windows. +/// Replace Windows path separators with slashes, but only do so on Windows. pub fn to_unix_separators_on_windows<'a>(path: impl Into>) -> Cow<'a, BStr> { #[cfg(windows)] { @@ -229,7 +230,7 @@ pub fn to_unix_separators_on_windows<'a>(path: impl Into>) -> Cow< } } -/// Replaces windows path separators with slashes, unconditionally. +/// Replace Windows path separators with slashes, which typically resembles a Unix path, unconditionally. /// /// **Note** Do not use these and prefer the conditional versions of this method. // TODO: use https://lib.rs/crates/path-slash to handle escapes @@ -237,7 +238,7 @@ pub fn to_unix_separators<'a>(path: impl Into>) -> Cow<'a, BStr> { replace(path, b'\\', b'/') } -/// Find backslashes and replace them with slashes, which typically resembles a unix path, unconditionally. +/// Find slashes and replace them with backslashes, unconditionally. /// /// **Note** Do not use these and prefer the conditional versions of this method. // TODO: use https://lib.rs/crates/path-slash to handle escapes @@ -245,19 +246,24 @@ pub fn to_windows_separators<'a>(path: impl Into>) -> Cow<'a, BStr replace(path, b'/', b'\\') } -/// Resolve relative components virtually without accessing the file system, e.g. turn `a/./b/c/.././..` into `a`, -/// without keeping intermediate `..` and `/a/../b/..` becomes `/`. -/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of the full path to `current_dir`. -/// Note that single `.` components as well as duplicate separators are left untouched. +/// Resolve relative components virtually, eliminating intermediate `..` without accessing the filesystem. /// -/// This is particularly useful when manipulating paths that are based on user input, and not resolving intermediate -/// symlinks keeps the path similar to what the user provided. If that's not desirable, use `[realpath()][crate::realpath()` -/// instead. +/// For example, this turns `a/./b/c/.././..` into `a`, and turns `/a/../b/..` into `/`. /// -/// Note that we might access the `current_dir` if we run out of path components to pop off, which is expected to be absolute -/// as typical return value of `std::env::current_dir()` or `gix_fs::current_dir(…)` when `core.precomposeUnicode` is known. -/// As a `current_dir` like `/c` can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability -/// to produce a logically consistent path. +/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of +/// the full path to `current_dir`. +/// +/// Single `.` components as well as duplicate separators are left untouched. +/// +/// This is particularly useful when manipulating paths that are based on user input, and not +/// resolving intermediate symlinks keeps the path similar to what the user provided. If that's not +/// desirable, use `[realpath()][crate::realpath()` instead. +/// +/// Note that we might access the `current_dir` if we run out of path components to pop off, which +/// is expected to be absolute as typical return value of `std::env::current_dir()` or +/// `gix_fs::current_dir(…)` when `core.precomposeUnicode` is known. As a `current_dir` like `/c` +/// can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability to +/// produce a logically consistent path. pub fn normalize<'a>(path: Cow<'a, Path>, current_dir: &Path) -> Option> { use std::path::Component::ParentDir; @@ -290,14 +296,16 @@ pub fn normalize<'a>(path: Cow<'a, Path>, current_dir: &Path) -> Option(relative_path: &'a Path, prefix: &Path) -> Cow<'a, Path> { if prefix.as_os_str().is_empty() { return Cow::Borrowed(relative_path); From 42875c9e4314bc2c92478393bc8dda4400fc397b Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sat, 15 Feb 2025 18:03:56 -0500 Subject: [PATCH 2/2] Clarify `to_unix_separators{,_on_windows}` relationship `to_native_path_on_windows` already called `to_windows_separators` to do what would otherwise repeat logic of `to_windows_separators`, but `to_unix_separators_on_windows` repeated the code of `to_unix_separators`. Having `to_unix_separators_on_windows` call `to_unix_separators` instead of repeating its logic seems slightly easier to read, elucidates the relationship between the several functions, and might be slightly more maintainable. This is purely a refactoring, which should not change the effect. It should also not change performance, since the compiler should be able to inline the call in circumstnaces where that would help. --- gix-path/src/convert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gix-path/src/convert.rs b/gix-path/src/convert.rs index 106c94ccf71..af81a5ee42a 100644 --- a/gix-path/src/convert.rs +++ b/gix-path/src/convert.rs @@ -222,7 +222,7 @@ pub fn to_native_path_on_windows<'a>(path: impl Into>) -> Cow<'a, pub fn to_unix_separators_on_windows<'a>(path: impl Into>) -> Cow<'a, BStr> { #[cfg(windows)] { - replace(path, b'\\', b'/') + to_unix_separators(path) } #[cfg(not(windows))] {