From 00b19144c7b51e228fec0fec1aafba8b5276f940 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 19 Dec 2020 00:48:06 -0500 Subject: [PATCH] pack: Add support for using a common pack dir for multiple traces When packaging traces from CI, it's fairly commong to have hundreds of traces that all basically share the exact same files. This can lead to some fairly large traces after packing. Of course, some file-systems support block-level deduplication and a compression library would certainly be able to dedup it back down as well, but it'd be faster to not create trace directories that big on disk in the first place. This adds a `--pack-dir` command to `rr pack `, which is used as a the common pack dir for all traces. Rather than packing files into their own trace dirs, they will be packed into the `pack-dir`, with relative symlinks from the original trace directories to the pack dir. An unmodified rr will be able to replay these as long as the pack dir is moved along with the trace dirs. --- CMakeLists.txt | 2 +- src/PackCommand.cc | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 31e291ae182..e92651c0482 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,7 @@ endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11") # Define __STDC_LIMIT_MACROS so |#include | works as expected. # Define __STDC_FORMAT_MACROS so |#include | works as expected. -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++14") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++17") # We support three build types: # DEBUG: suitable for debugging rr diff --git a/src/PackCommand.cc b/src/PackCommand.cc index 4e1178b2eac..43a376caa85 100644 --- a/src/PackCommand.cc +++ b/src/PackCommand.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -67,6 +68,7 @@ struct PackFlags { /* If true, insert symlinks into the trace dir which point to the original * files, rather than copying the files themselves */ bool symlink; + std::string pack_dir; PackFlags() : symlink(false) {} @@ -99,6 +101,12 @@ bool operator<(const FsExtentsHash& h1, const FsExtentsHash& h2) { return memcmp(h1.bytes, h2.bytes, sizeof(h1)) < 0; } +struct PackDir { + string dir; + map mapped_files; + PackDir(string dir) : dir(dir) {} +}; + static bool name_comparator(const TraceReader::MappedData& d1, const TraceReader::MappedData d2) { return d1.file_name < d2.file_name; @@ -528,7 +536,8 @@ static map compute_canonical_symlink_map( * for all files with that hash. */ static map compute_canonical_mmapped_files( - const string& trace_dir) { + const string& trace_dir, + PackDir &pack_dir) { map file_info = gather_file_info(trace_dir); map hash_to_name; @@ -545,10 +554,24 @@ static map compute_canonical_mmapped_files( int name_index = 0; for (auto& p : hash_to_name) { + // Check if this in our common pack directory + auto it = pack_dir.mapped_files.find(p.first); + if (it != pack_dir.mapped_files.end()) { + LOG(debug) << "Found in common pack dir"; + p.second = symlink_into_trace(fs::relative(it->second, trace_dir), trace_dir, &name_index); + continue; + } + // Copy hardlinked files into the trace to avoid the possibility of someone // overwriting the original file. if (is_hardlink(p.second) || !is_in_trace_dir(p.second, trace_dir)) { - p.second = copy_into_trace(p.second, trace_dir, &name_index); + if (pack_dir.dir != "") { + // If a pack dir is specified, first copy into pack dir, then symlink into trace. + auto path = pack_dir.mapped_files[p.first] = copy_into_trace(p.second, pack_dir.dir, &name_index); + p.second = symlink_into_trace(fs::relative(path, trace_dir), trace_dir, &name_index); + } else { + p.second = copy_into_trace(p.second, trace_dir, &name_index); + } } } @@ -656,6 +679,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) { dir = reader.dir(); } + PackDir pack_dir(flags.pack_dir); char buf[PATH_MAX]; char* ret = realpath(dir.c_str(), buf); if (!ret) { @@ -670,7 +694,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) { delete_unnecessary_files(canonical_symlink_map, abspath); } else { map canonical_mmapped_files = - compute_canonical_mmapped_files(abspath); + compute_canonical_mmapped_files(abspath, pack_dir); rewrite_mmaps(canonical_mmapped_files, abspath); delete_unnecessary_files(canonical_mmapped_files, abspath); } @@ -685,6 +709,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) { static bool parse_pack_arg(vector& args, PackFlags& flags) { static const OptionSpec options[] = { { 0, "symlink", NO_PARAMETER }, + { 1, "pack-dir", HAS_PARAMETER }, }; ParsedOption opt; auto args_copy = args; @@ -696,6 +721,9 @@ static bool parse_pack_arg(vector& args, PackFlags& flags) { case 0: flags.symlink = true; break; + case 1: + flags.pack_dir = opt.value; + break; default: DEBUG_ASSERT(0 && "Unknown pack option"); }