Skip to content

Commit

Permalink
find: New WalkEntry wrapper
Browse files Browse the repository at this point in the history
The new type wraps DirEntry when possible, but also lets us pass a valid
entry to matchers when walkdir returns a broken symlink error.  It also
implements a Metadata cache (part of uutils#430).
  • Loading branch information
tavianator committed Aug 8, 2024
1 parent a7bf906 commit 5a6576a
Show file tree
Hide file tree
Showing 27 changed files with 458 additions and 264 deletions.
5 changes: 2 additions & 3 deletions src/find/matchers/access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
// https://opensource.org/licenses/MIT.

use faccess::PathExt;
use walkdir::DirEntry;

use super::{Matcher, MatcherIO};
use super::{Matcher, MatcherIO, WalkEntry};

/// Matcher for -{read,writ,execut}able.
pub enum AccessMatcher {
Expand All @@ -17,7 +16,7 @@ pub enum AccessMatcher {
}

impl Matcher for AccessMatcher {
fn matches(&self, file_info: &DirEntry, _: &mut MatcherIO) -> bool {
fn matches(&self, file_info: &WalkEntry, _: &mut MatcherIO) -> bool {
let path = file_info.path();

match self {
Expand Down
8 changes: 3 additions & 5 deletions src/find/matchers/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
* file that was distributed with this source code.
*/

use std::fs::{self, FileType};
use std::fs;
use std::io::{self, stderr, Write};
use std::path::Path;

use walkdir::DirEntry;

use super::{Matcher, MatcherIO};
use super::{FileType, Matcher, MatcherIO, WalkEntry};

pub struct DeleteMatcher;

Expand All @@ -32,7 +30,7 @@ impl DeleteMatcher {
}

impl Matcher for DeleteMatcher {
fn matches(&self, file_info: &DirEntry, _: &mut MatcherIO) -> bool {
fn matches(&self, file_info: &WalkEntry, _: &mut MatcherIO) -> bool {
let path = file_info.path();
let path_str = path.to_string_lossy();

Expand Down
4 changes: 2 additions & 2 deletions src/find/matchers/empty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{
io::{stderr, Write},
};

use super::Matcher;
use super::{Matcher, MatcherIO, WalkEntry};

pub struct EmptyMatcher;

Expand All @@ -20,7 +20,7 @@ impl EmptyMatcher {
}

impl Matcher for EmptyMatcher {
fn matches(&self, file_info: &walkdir::DirEntry, _: &mut super::MatcherIO) -> bool {
fn matches(&self, file_info: &WalkEntry, _: &mut MatcherIO) -> bool {
if file_info.file_type().is_file() {
match file_info.metadata() {
Ok(meta) => meta.len() == 0,
Expand Down
309 changes: 309 additions & 0 deletions src/find/matchers/entry.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
//! Paths encountered during a walk.
use std::cell::OnceCell;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt::{self, Display, Formatter};
use std::fs::{self, Metadata};
use std::io::{self, ErrorKind};
#[cfg(unix)]
use std::os::unix::fs::FileTypeExt;
use std::path::{Path, PathBuf};

use walkdir::DirEntry;

/// Wrapper for a directory entry.
#[derive(Debug)]
enum Entry {
/// Wraps an explicit path and depth.
Explicit(PathBuf, usize),
/// Wraps a WalkDir entry.
WalkDir(DirEntry),
}

/// File types.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum FileType {
Unknown,
Fifo,
CharDevice,
Directory,
BlockDevice,
Regular,
Symlink,
Socket,
}

impl FileType {
pub fn is_dir(self) -> bool {
self == Self::Directory
}

pub fn is_file(self) -> bool {
self == Self::Regular
}

pub fn is_symlink(self) -> bool {
self == Self::Symlink
}
}

impl From<fs::FileType> for FileType {
fn from(t: fs::FileType) -> FileType {
if t.is_dir() {
return FileType::Directory;
}
if t.is_file() {
return FileType::Regular;
}
if t.is_symlink() {
return FileType::Symlink;
}

#[cfg(unix)]
{
if t.is_fifo() {
return FileType::Fifo;
}
if t.is_char_device() {
return FileType::CharDevice;
}
if t.is_block_device() {
return FileType::BlockDevice;
}
if t.is_socket() {
return FileType::Socket;
}
}

FileType::Unknown
}
}

/// An error encountered while walking a file system.
#[derive(Clone, Debug)]
pub struct WalkError {
path: Option<PathBuf>,
raw: Option<i32>,
}

fn is_not_found(e: &io::Error) -> bool {
if e.kind() == ErrorKind::NotFound {
return true;
}

// NotADirectory is nightly-only
#[cfg(unix)]
{
if e.raw_os_error() == Some(uucore::libc::ENOTDIR) {
return true;
}
}

false
}

impl WalkError {
pub fn kind(&self) -> ErrorKind {
io::Error::from(self).kind()
}

/// Check for ErrorKind::{NotFound,NotADirectory}.
pub fn is_not_found(&self) -> bool {
is_not_found(&io::Error::from(self))
}
}

impl Display for WalkError {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
let ioe = io::Error::from(self);
if let Some(path) = &self.path {
write!(f, "{}: {}", path.display(), ioe)
} else {
write!(f, "{}", ioe)
}
}
}

impl Error for WalkError {}

impl From<io::Error> for WalkError {
fn from(e: io::Error) -> WalkError {
WalkError::from(&e)
}
}

impl From<&io::Error> for WalkError {
fn from(e: &io::Error) -> WalkError {
WalkError {
path: None,
raw: e.raw_os_error(),
}
}
}

impl From<walkdir::Error> for WalkError {
fn from(e: walkdir::Error) -> WalkError {
WalkError::from(&e)
}
}

impl From<&walkdir::Error> for WalkError {
fn from(e: &walkdir::Error) -> WalkError {
let path = e.path().map(|p| p.to_owned());
let raw = e.io_error().and_then(|e| e.raw_os_error());
WalkError { path, raw }
}
}

impl From<WalkError> for io::Error {
fn from(e: WalkError) -> io::Error {
io::Error::from(&e)
}
}

impl From<&WalkError> for io::Error {
fn from(e: &WalkError) -> io::Error {
e.raw
.map(io::Error::from_raw_os_error)
.unwrap_or_else(|| ErrorKind::Other.into())
}
}

/// A path encountered while walking a file system.
#[derive(Debug)]
pub struct WalkEntry {
/// The wrapped path/dirent.
inner: Entry,
/// Whether to follow symlinks.
follow: bool,
/// Cached metadata.
meta: OnceCell<Result<Metadata, WalkError>>,
}

impl WalkEntry {
pub fn new(path: impl Into<PathBuf>, depth: usize, follow: bool) -> Self {
Self {
inner: Entry::Explicit(path.into(), depth),
follow,
meta: OnceCell::new(),
}
}

pub fn path(&self) -> &Path {
match &self.inner {
Entry::Explicit(path, _) => path.as_path(),
Entry::WalkDir(ent) => ent.path(),
}
}

pub fn into_path(self) -> PathBuf {
match self.inner {
Entry::Explicit(path, _) => path,
Entry::WalkDir(ent) => ent.into_path(),
}
}

pub fn file_name(&self) -> &OsStr {
match &self.inner {
Entry::Explicit(path, _) => {
// Path::file_name() only works if the last component is normal
path.components()
.last()
.map(|c| c.as_os_str())
.unwrap_or_else(|| path.as_os_str())
}
Entry::WalkDir(ent) => ent.file_name(),
}
}

pub fn depth(&self) -> usize {
match &self.inner {
Entry::Explicit(_, depth) => *depth,
Entry::WalkDir(ent) => ent.depth(),
}
}

/// Get the metadata on a cache miss.
fn get_metadata(&self) -> io::Result<Metadata> {
if self.follow {
match self.path().metadata() {
Ok(meta) => return Ok(meta),
Err(e) if !is_not_found(&e) => return Err(e),
_ => (),
}
}

self.path().symlink_metadata()
}

pub fn metadata(&self) -> Result<&Metadata, WalkError> {
let result = self.meta.get_or_init(|| match &self.inner {
Entry::Explicit(_, _) => Ok(self.get_metadata()?),
Entry::WalkDir(ent) => Ok(ent.metadata()?),
});
result.as_ref().map_err(|e| e.clone())
}

pub fn file_type(&self) -> FileType {
match &self.inner {
Entry::Explicit(_, _) => self
.metadata()
.map(|m| m.file_type().into())
.unwrap_or(FileType::Unknown),
Entry::WalkDir(ent) => ent.file_type().into(),
}
}

pub fn path_is_symlink(&self) -> bool {
match &self.inner {
Entry::Explicit(path, _) => {
if self.follow {
path.symlink_metadata()
.is_ok_and(|m| m.file_type().is_symlink())
} else {
self.file_type().is_symlink()
}
}
Entry::WalkDir(ent) => ent.path_is_symlink(),
}
}
}

impl From<DirEntry> for WalkEntry {
fn from(entry: DirEntry) -> WalkEntry {
let follow = entry.path_is_symlink() && !entry.file_type().is_symlink();
WalkEntry {
inner: Entry::WalkDir(entry),
follow,
meta: OnceCell::new(),
}
}
}

impl TryFrom<walkdir::Result<DirEntry>> for WalkEntry {
type Error = WalkError;

fn try_from(result: walkdir::Result<DirEntry>) -> Result<WalkEntry, WalkError> {
match result {
Ok(entry) => Ok(entry.into()),
Err(e) => {
// Detect broken symlinks and replace them with explicit entries
match (e.path(), e.io_error()) {
(Some(path), Some(ioe)) if is_not_found(ioe) => {
if let Ok(meta) = path.symlink_metadata() {
return Ok(WalkEntry {
inner: Entry::Explicit(path.into(), e.depth()),
follow: false,
meta: Ok(meta).into(),
});
}
}
_ => {}
}

Err(e.into())
}
}
}
}
5 changes: 2 additions & 3 deletions src/find/matchers/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ use std::ffi::OsString;
use std::io::{stderr, Write};
use std::path::Path;
use std::process::Command;
use walkdir::DirEntry;

use super::{Matcher, MatcherIO};
use super::{Matcher, MatcherIO, WalkEntry};

enum Arg {
FileArg(Vec<OsString>),
Expand Down Expand Up @@ -52,7 +51,7 @@ impl SingleExecMatcher {
}

impl Matcher for SingleExecMatcher {
fn matches(&self, file_info: &DirEntry, _: &mut MatcherIO) -> bool {
fn matches(&self, file_info: &WalkEntry, _: &mut MatcherIO) -> bool {
let mut command = Command::new(&self.executable);
let path_to_file = if self.exec_in_parent_dir {
if let Some(f) = file_info.path().file_name() {
Expand Down
Loading

0 comments on commit 5a6576a

Please sign in to comment.