Skip to content

Commit 00b1914

Browse files
committed
pack: Add support for using a common pack dir for multiple traces
When packaging traces from CI, it's fairly commong to have hundreds of traces that all basically share the exact same files. This can lead to some fairly large traces after packing. Of course, some file-systems support block-level deduplication and a compression library would certainly be able to dedup it back down as well, but it'd be faster to not create trace directories that big on disk in the first place. This adds a `--pack-dir` command to `rr pack <traces...>`, which is used as a the common pack dir for all traces. Rather than packing files into their own trace dirs, they will be packed into the `pack-dir`, with relative symlinks from the original trace directories to the pack dir. An unmodified rr will be able to replay these as long as the pack dir is moved along with the trace dirs.
1 parent 5b7c1f4 commit 00b1914

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ endif()
9494
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAGS_COMMON} -Wstrict-prototypes -std=gnu11")
9595
# Define __STDC_LIMIT_MACROS so |#include <stdint.h>| works as expected.
9696
# Define __STDC_FORMAT_MACROS so |#include <inttypes.h>| works as expected.
97-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++14")
97+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_COMMON} -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++17")
9898

9999
# We support three build types:
100100
# DEBUG: suitable for debugging rr

src/PackCommand.cc

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <unistd.h>
1414

1515
#include <algorithm>
16+
#include <filesystem>
1617
#include <limits>
1718
#include <map>
1819
#include <set>
@@ -67,6 +68,7 @@ struct PackFlags {
6768
/* If true, insert symlinks into the trace dir which point to the original
6869
* files, rather than copying the files themselves */
6970
bool symlink;
71+
std::string pack_dir;
7072

7173
PackFlags()
7274
: symlink(false) {}
@@ -99,6 +101,12 @@ bool operator<(const FsExtentsHash& h1, const FsExtentsHash& h2) {
99101
return memcmp(h1.bytes, h2.bytes, sizeof(h1)) < 0;
100102
}
101103

104+
struct PackDir {
105+
string dir;
106+
map<FileHash, string> mapped_files;
107+
PackDir(string dir) : dir(dir) {}
108+
};
109+
102110
static bool name_comparator(const TraceReader::MappedData& d1,
103111
const TraceReader::MappedData d2) {
104112
return d1.file_name < d2.file_name;
@@ -528,7 +536,8 @@ static map<string, string> compute_canonical_symlink_map(
528536
* for all files with that hash.
529537
*/
530538
static map<string, string> compute_canonical_mmapped_files(
531-
const string& trace_dir) {
539+
const string& trace_dir,
540+
PackDir &pack_dir) {
532541
map<string, FileHash> file_info = gather_file_info(trace_dir);
533542

534543
map<FileHash, string> hash_to_name;
@@ -545,10 +554,24 @@ static map<string, string> compute_canonical_mmapped_files(
545554

546555
int name_index = 0;
547556
for (auto& p : hash_to_name) {
557+
// Check if this in our common pack directory
558+
auto it = pack_dir.mapped_files.find(p.first);
559+
if (it != pack_dir.mapped_files.end()) {
560+
LOG(debug) << "Found in common pack dir";
561+
p.second = symlink_into_trace(fs::relative(it->second, trace_dir), trace_dir, &name_index);
562+
continue;
563+
}
564+
548565
// Copy hardlinked files into the trace to avoid the possibility of someone
549566
// overwriting the original file.
550567
if (is_hardlink(p.second) || !is_in_trace_dir(p.second, trace_dir)) {
551-
p.second = copy_into_trace(p.second, trace_dir, &name_index);
568+
if (pack_dir.dir != "") {
569+
// If a pack dir is specified, first copy into pack dir, then symlink into trace.
570+
auto path = pack_dir.mapped_files[p.first] = copy_into_trace(p.second, pack_dir.dir, &name_index);
571+
p.second = symlink_into_trace(fs::relative(path, trace_dir), trace_dir, &name_index);
572+
} else {
573+
p.second = copy_into_trace(p.second, trace_dir, &name_index);
574+
}
552575
}
553576
}
554577

@@ -656,6 +679,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
656679
dir = reader.dir();
657680
}
658681

682+
PackDir pack_dir(flags.pack_dir);
659683
char buf[PATH_MAX];
660684
char* ret = realpath(dir.c_str(), buf);
661685
if (!ret) {
@@ -670,7 +694,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
670694
delete_unnecessary_files(canonical_symlink_map, abspath);
671695
} else {
672696
map<string, string> canonical_mmapped_files =
673-
compute_canonical_mmapped_files(abspath);
697+
compute_canonical_mmapped_files(abspath, pack_dir);
674698
rewrite_mmaps(canonical_mmapped_files, abspath);
675699
delete_unnecessary_files(canonical_mmapped_files, abspath);
676700
}
@@ -685,6 +709,7 @@ static int pack(const string& trace_dir, const PackFlags& flags) {
685709
static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
686710
static const OptionSpec options[] = {
687711
{ 0, "symlink", NO_PARAMETER },
712+
{ 1, "pack-dir", HAS_PARAMETER },
688713
};
689714
ParsedOption opt;
690715
auto args_copy = args;
@@ -696,6 +721,9 @@ static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
696721
case 0:
697722
flags.symlink = true;
698723
break;
724+
case 1:
725+
flags.pack_dir = opt.value;
726+
break;
699727
default:
700728
DEBUG_ASSERT(0 && "Unknown pack option");
701729
}

0 commit comments

Comments
 (0)