Skip to content

Commit d84eeea

Browse files
committed
Add file fingerprint to copy/remove only required files.
Updates the persistent volume using a fingerprint of all files in the project, skipping any cache directories by default. If the file modified date has changed, or the file has been added, copy it to the volume and update it. If the file has been removed, then remove it from the host. To avoid a massive command-line argument, we copy a file containing each changed file on a line to the container, and then remove each file by running a script on the container.
1 parent b11b7ba commit d84eeea

File tree

2 files changed

+222
-10
lines changed

2 files changed

+222
-10
lines changed

src/docker/remote.rs

Lines changed: 212 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
use std::io::Read;
1+
use std::collections::BTreeMap;
2+
use std::io::{self, BufRead, Read, Write};
23
use std::path::{Path, PathBuf};
34
use std::process::ExitStatus;
4-
use std::{env, fs};
5+
use std::{env, fs, time};
56

67
use super::engine::Engine;
78
use super::shared::*;
@@ -394,6 +395,205 @@ pub fn copy_volume_container_rust(
394395
Ok(())
395396
}
396397

398+
type FingerprintMap = BTreeMap<String, time::SystemTime>;
399+
400+
fn parse_project_fingerprint(path: &Path) -> Result<FingerprintMap> {
401+
let epoch = time::SystemTime::UNIX_EPOCH;
402+
let file = fs::OpenOptions::new().read(true).open(path)?;
403+
let reader = io::BufReader::new(file);
404+
let mut result = BTreeMap::new();
405+
for line in reader.lines() {
406+
let line = line?;
407+
let (timestamp, relpath) = line
408+
.split_once('\t')
409+
.ok_or_else(|| eyre::eyre!("unable to parse fingerprint line '{line}'"))?;
410+
let modified = epoch + time::Duration::from_millis(timestamp.parse::<u64>()?);
411+
result.insert(relpath.to_string(), modified);
412+
}
413+
414+
Ok(result)
415+
}
416+
417+
fn write_project_fingerprint(path: &Path, fingerprint: &FingerprintMap) -> Result<()> {
418+
let epoch = time::SystemTime::UNIX_EPOCH;
419+
let mut file = fs::OpenOptions::new().write(true).create(true).open(path)?;
420+
for (relpath, modified) in fingerprint {
421+
let timestamp = modified.duration_since(epoch)?.as_millis() as u64;
422+
writeln!(file, "{timestamp}\t{relpath}")?;
423+
}
424+
425+
Ok(())
426+
}
427+
428+
fn read_dir_fingerprint(
429+
home: &Path,
430+
path: &Path,
431+
map: &mut FingerprintMap,
432+
copy_cache: bool,
433+
) -> Result<()> {
434+
let epoch = time::SystemTime::UNIX_EPOCH;
435+
for entry in fs::read_dir(path)? {
436+
let file = entry?;
437+
let file_type = file.file_type()?;
438+
// only parse known files types: 0 or 1 of these tests can pass.
439+
if file_type.is_dir() {
440+
if copy_cache || !is_cachedir(&file) {
441+
read_dir_fingerprint(home, &path.join(file.file_name()), map, copy_cache)?;
442+
}
443+
} else if file_type.is_file() || file_type.is_symlink() {
444+
// we're mounting to the same location, so this should fine
445+
// we need to round the modified date to millis.
446+
let modified = file.metadata()?.modified()?;
447+
let millis = modified.duration_since(epoch)?.as_millis() as u64;
448+
let rounded = epoch + time::Duration::from_millis(millis);
449+
let relpath = file.path().strip_prefix(home)?.as_posix()?;
450+
map.insert(relpath, rounded);
451+
}
452+
}
453+
454+
Ok(())
455+
}
456+
457+
fn get_project_fingerprint(home: &Path, copy_cache: bool) -> Result<FingerprintMap> {
458+
let mut result = BTreeMap::new();
459+
read_dir_fingerprint(home, home, &mut result, copy_cache)?;
460+
Ok(result)
461+
}
462+
463+
fn get_fingerprint_difference<'a, 'b>(
464+
previous: &'a FingerprintMap,
465+
current: &'b FingerprintMap,
466+
) -> (Vec<&'b str>, Vec<&'a str>) {
467+
// this can be added or updated
468+
let changed: Vec<&str> = current
469+
.iter()
470+
.filter(|(ref k, ref v1)| {
471+
previous
472+
.get(&k.to_string())
473+
.map(|ref v2| v1 != v2)
474+
.unwrap_or(true)
475+
})
476+
.map(|(k, _)| k.as_str())
477+
.collect();
478+
let removed: Vec<&str> = previous
479+
.iter()
480+
.filter(|(ref k, _)| !current.contains_key(&k.to_string()))
481+
.map(|(k, _)| k.as_str())
482+
.collect();
483+
(changed, removed)
484+
}
485+
486+
// copy files for a docker volume, for remote host support
487+
// provides a list of files relative to src.
488+
fn copy_volume_file_list(
489+
engine: &Engine,
490+
container: &str,
491+
src: &Path,
492+
dst: &Path,
493+
files: &[&str],
494+
verbose: bool,
495+
) -> Result<ExitStatus> {
496+
// SAFETY: safe, single-threaded execution.
497+
let tempdir = unsafe { temp::TempDir::new()? };
498+
let temppath = tempdir.path();
499+
for file in files {
500+
let src_path = src.join(file);
501+
let dst_path = temppath.join(file);
502+
fs::create_dir_all(dst_path.parent().expect("must have parent"))?;
503+
fs::copy(&src_path, &dst_path)?;
504+
}
505+
copy_volume_files(engine, container, temppath, dst, verbose)
506+
}
507+
508+
// removed files from a docker volume, for remote host support
509+
// provides a list of files relative to src.
510+
fn remove_volume_file_list(
511+
engine: &Engine,
512+
container: &str,
513+
dst: &Path,
514+
files: &[&str],
515+
verbose: bool,
516+
) -> Result<ExitStatus> {
517+
const PATH: &str = "/tmp/remove_list";
518+
let mut script = vec![];
519+
if verbose {
520+
script.push("set -x".to_string());
521+
}
522+
script.push(format!(
523+
"cat \"{PATH}\" | while read line; do
524+
rm -f \"${{line}}\"
525+
done
526+
527+
rm \"{PATH}\"
528+
"
529+
));
530+
531+
// SAFETY: safe, single-threaded execution.
532+
let mut tempfile = unsafe { temp::TempFile::new()? };
533+
for file in files {
534+
writeln!(tempfile.file(), "{}", dst.join(file).as_posix()?)?;
535+
}
536+
537+
// need to avoid having hundreds of files on the command, so
538+
// just provide a single file name.
539+
subcommand(engine, "cp")
540+
.arg(tempfile.path())
541+
.arg(format!("{container}:{PATH}"))
542+
.run_and_get_status(verbose, true)?;
543+
544+
subcommand(engine, "exec")
545+
.arg(container)
546+
.args(&["sh", "-c", &script.join("\n")])
547+
.run_and_get_status(verbose, true)
548+
.map_err(Into::into)
549+
}
550+
551+
fn copy_volume_container_project(
552+
engine: &Engine,
553+
container: &str,
554+
src: &Path,
555+
dst: &Path,
556+
volume: &VolumeId,
557+
copy_cache: bool,
558+
verbose: bool,
559+
) -> Result<()> {
560+
let copy_all = || {
561+
if copy_cache {
562+
copy_volume_files(engine, container, src, dst, verbose)
563+
} else {
564+
copy_volume_files_nocache(engine, container, src, dst, verbose)
565+
}
566+
};
567+
match volume {
568+
VolumeId::Keep(_) => {
569+
let parent = temp::dir()?;
570+
fs::create_dir_all(&parent)?;
571+
let fingerprint = parent.join(container);
572+
let current = get_project_fingerprint(src, copy_cache)?;
573+
if fingerprint.exists() {
574+
let previous = parse_project_fingerprint(&fingerprint)?;
575+
let (changed, removed) = get_fingerprint_difference(&previous, &current);
576+
write_project_fingerprint(&fingerprint, &current)?;
577+
578+
if !changed.is_empty() {
579+
copy_volume_file_list(engine, container, src, dst, &changed, verbose)?;
580+
}
581+
if !removed.is_empty() {
582+
remove_volume_file_list(engine, container, dst, &removed, verbose)?;
583+
}
584+
} else {
585+
write_project_fingerprint(&fingerprint, &current)?;
586+
copy_all()?;
587+
}
588+
}
589+
VolumeId::Discard(_) => {
590+
copy_all()?;
591+
}
592+
}
593+
594+
Ok(())
595+
}
596+
397597
fn run_and_get_status(engine: &Engine, args: &[&str], verbose: bool) -> Result<ExitStatus> {
398598
command(engine)
399599
.args(args)
@@ -645,7 +845,15 @@ pub(crate) fn run(
645845
} else {
646846
mount_prefix_path.join("project")
647847
};
648-
copy(&dirs.host_root, &mount_root)?;
848+
copy_volume_container_project(
849+
engine,
850+
&container,
851+
&dirs.host_root,
852+
&mount_root,
853+
&volume,
854+
copy_cache,
855+
verbose,
856+
)?;
649857

650858
let mut copied = vec![
651859
(&dirs.xargo, mount_prefix_path.join("xargo")),
@@ -692,7 +900,7 @@ pub(crate) fn run(
692900
let mut final_args = vec![];
693901
let mut iter = args.iter().cloned();
694902
let mut has_target_dir = false;
695-
let target_dir_string = target_dir.to_utf8()?.to_string();
903+
let target_dir_string = target_dir.as_posix()?;
696904
while let Some(arg) = iter.next() {
697905
if arg == "--target-dir" {
698906
has_target_dir = true;

src/temp.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ pub(crate) unsafe fn clean() {
3434

3535
/// # Safety
3636
/// Safe as long as we have single-threaded execution.
37-
unsafe fn push_tempfile() -> Result<&'static Path> {
37+
unsafe fn push_tempfile() -> Result<&'static mut tempfile::NamedTempFile> {
3838
let parent = dir()?;
3939
fs::create_dir_all(&parent).ok();
4040
let file = tempfile::NamedTempFile::new_in(&parent)?;
4141
FILES.push(file);
42-
Ok(FILES.last().unwrap().path())
42+
Ok(FILES.last_mut().unwrap())
4343
}
4444

4545
/// # Safety
@@ -50,20 +50,24 @@ unsafe fn pop_tempfile() -> Option<tempfile::NamedTempFile> {
5050

5151
#[derive(Debug)]
5252
pub struct TempFile {
53-
path: &'static Path,
53+
file: &'static mut tempfile::NamedTempFile,
5454
}
5555

5656
impl TempFile {
5757
/// # Safety
5858
/// Safe as long as we have single-threaded execution.
5959
pub unsafe fn new() -> Result<Self> {
6060
Ok(Self {
61-
path: push_tempfile()?,
61+
file: push_tempfile()?,
6262
})
6363
}
6464

65-
pub fn path(&self) -> &'static Path {
66-
self.path
65+
pub fn file(&mut self) -> &mut tempfile::NamedTempFile {
66+
self.file
67+
}
68+
69+
pub fn path(&self) -> &Path {
70+
self.file.path()
6771
}
6872
}
6973

0 commit comments

Comments
 (0)