Skip to content

Commit 94374fe

Browse files
authored
Merge pull request #10602 from benlangmuir/eng/blangmuir/validate-if-needed-swift-123542312-release/6.2
[🍒][llvm][cas] Add validate-if-needed to recover from invalid data
2 parents 900de58 + fe19246 commit 94374fe

26 files changed

+757
-34
lines changed

clang/include/clang/CAS/CASOptions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class CASOptions : public CASConfiguration {
117117
/// default on-disk CAS, otherwise this is a noop.
118118
void ensurePersistentCAS();
119119

120+
void getResolvedCASPath(llvm::SmallVectorImpl<char> &Result) const;
121+
120122
private:
121123
/// Initialize Cached CAS and ActionCache.
122124
llvm::Error initCache() const;

clang/lib/CAS/CASOptions.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ llvm::Error CASOptions::initCache() const {
108108
}
109109

110110
SmallString<256> PathBuf;
111+
getResolvedCASPath(PathBuf);
111112
if (CASPath == "auto") {
112113
getDefaultOnDiskCASPath(PathBuf);
113114
CASPath = PathBuf;
@@ -119,3 +120,11 @@ llvm::Error CASOptions::initCache() const {
119120
std::tie(Cache.CAS, Cache.AC) = std::move(DBs);
120121
return llvm::Error::success();
121122
}
123+
124+
void CASOptions::getResolvedCASPath(SmallVectorImpl<char> &Result) const {
125+
if (CASPath == "auto") {
126+
getDefaultOnDiskCASPath(Result);
127+
} else {
128+
Result.assign(CASPath.begin(), CASPath.end());
129+
}
130+
}

clang/test/CAS/depscan-cas-log.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Ensure both the first clang process and the daemon have logging enabled.
2+
// It's hard to check this exhaustively, but in practice if the daemon does not
3+
// enable logging there are currently zero records in the log.
4+
5+
// RUN: rm -rf %t && mkdir %t
6+
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_LOG=1 LLVM_CAS_DISABLE_VALIDATION=1 %clang \
7+
// RUN: -cc1depscan -fdepscan=daemon -fdepscan-include-tree -o - \
8+
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
9+
// RUN: FileCheck %s --input-file %t/cas/v1.log
10+
11+
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
12+
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie
13+
14+
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v8.index'
15+
// Even a minimal compilation involves at least 9 records for the cache key.
16+
// CHECK-COUNT-9: [[PID2]] {{[0-9]*}}: create record
17+
18+
// CHECK: [[PID1]] {{[0-9]*}}: close mmap '{{.*}}v8.index'

clang/test/CAS/validate-once.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: rm -rf %t
2+
3+
// RUN: llvm-cas --cas %t/cas --ingest %s
4+
// RUN: mv %t/cas/v1.1/v8.data %t/cas/v1.1/v8.data.bak
5+
6+
// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
7+
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
8+
// RUN: -Xclang -fcas-path -Xclang %t/cas \
9+
// RUN: -fdepscan=daemon -fdepscan-daemon=%{clang-daemon-dir}/%basename_t -fsyntax-only -x c %s
10+
11+
// RUN: ls %t/cas/corrupt.0.v1.1
12+
13+
// RUN: llvm-cas --cas %t/cas --validate-if-needed | FileCheck %s -check-prefix=SKIPPED
14+
// SKIPPED: validation skipped
15+
16+
#include "test.h"
17+
18+
int func(void);

clang/tools/driver/cc1depscanProtocol.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Support/Process.h"
1818
#include "llvm/Support/Signals.h"
1919
#include "llvm/Support/StringSaver.h"
20+
#include <cstdlib>
2021

2122
#if LLVM_ON_UNIX
2223
#include <sys/socket.h> // FIXME: Unix-only. Not portable.
@@ -186,10 +187,20 @@ Expected<ScanDaemon> ScanDaemon::launchDaemon(StringRef BasePath,
186187
return llvm::errorCodeToError(std::error_code(EC, std::generic_category()));
187188
#endif
188189

190+
static constexpr const char *PassThroughEnv[] = {
191+
"LLVM_CAS_LOG",
192+
"LLVM_CAS_DISABLE_VALIDATION",
193+
};
194+
SmallVector<const char *> EnvP;
195+
for (const char *Name : PassThroughEnv)
196+
if (const char *Value = getenv(Name))
197+
EnvP.push_back(Saver.save(llvm::Twine(Name) + "=" + Value).data());
198+
EnvP.push_back(nullptr);
199+
189200
::pid_t Pid;
190201
int EC = ::posix_spawn(&Pid, Args[0], /*file_actions=*/nullptr, &Attrs,
191202
const_cast<char **>(LaunchArgs.data()),
192-
/*envp=*/nullptr);
203+
const_cast<char **>(EnvP.data()));
193204
if (EC)
194205
return llvm::errorCodeToError(std::error_code(EC, std::generic_category()));
195206

clang/tools/driver/cc1depscan_main.cpp

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
#include "clang/Tooling/DependencyScanning/ScanAndUpdateArgs.h"
2929
#include "llvm/ADT/ArrayRef.h"
3030
#include "llvm/ADT/ScopeExit.h"
31+
#include "llvm/ADT/SmallVector.h"
3132
#include "llvm/ADT/Statistic.h"
3233
#include "llvm/Bitstream/BitstreamReader.h"
3334
#include "llvm/CAS/ActionCache.h"
35+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
3436
#include "llvm/CAS/CASProvidingFileSystem.h"
3537
#include "llvm/CAS/CachingOnDiskFileSystem.h"
3638
#include "llvm/CAS/HierarchicalTreeBuilder.h"
@@ -41,6 +43,7 @@
4143
#include "llvm/Support/Compiler.h"
4244
#include "llvm/Support/Error.h"
4345
#include "llvm/Support/ErrorHandling.h"
46+
#include "llvm/Support/FileSystem.h"
4447
#include "llvm/Support/ManagedStatic.h"
4548
#include "llvm/Support/Path.h"
4649
#include "llvm/Support/PrefixMapper.h"
@@ -52,6 +55,7 @@
5255
#include "llvm/Support/raw_ostream.h"
5356
#include <cstdio>
5457
#include <mutex>
58+
#include <optional>
5559
#include <shared_mutex>
5660

5761
#if LLVM_ON_UNIX
@@ -631,8 +635,8 @@ namespace {
631635
struct ScanServer {
632636
const char *Argv0 = nullptr;
633637
SmallString<128> BasePath;
634-
/// List of cas options.
635-
ArrayRef<const char *> CASArgs;
638+
CASOptions CASOpts;
639+
bool ProduceIncludeTree = true;
636640
int PidFD = -1;
637641
int ListenSocket = -1;
638642
/// \p std::nullopt means it runs indefinitely.
@@ -641,7 +645,7 @@ struct ScanServer {
641645

642646
~ScanServer() { shutdown(); }
643647

644-
void start(bool Exclusive);
648+
void start(bool Exclusive, ArrayRef<const char *> CASArgs);
645649
int listen();
646650

647651
/// Tear down the socket and bind file immediately but wait till all existing
@@ -706,13 +710,13 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
706710
// particular "build session", to shutdown, then have it stay alive until the
707711
// session is finished.
708712
bool LongRunning = false;
709-
713+
ArrayRef<const char *> CASArgs;
710714
for (const auto *A = Argv.begin() + 2; A != Argv.end(); ++A) {
711715
StringRef Arg(*A);
712716
if (Arg == "-long-running")
713717
LongRunning = true;
714718
else if (Arg == "-cas-args") {
715-
Server.CASArgs = ArrayRef(A + 1, Argv.end());
719+
CASArgs = ArrayRef(A + 1, Argv.end());
716720
break;
717721
}
718722
}
@@ -723,7 +727,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
723727
reportError(Twine("cannot create basedir: ") + EC.message());
724728

725729
if (Command == "-serve") {
726-
Server.start(/*Exclusive*/ true);
730+
Server.start(/*Exclusive*/ true, CASArgs);
727731
return Server.listen();
728732

729733
} else if (Command == "-execute") {
@@ -734,7 +738,7 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
734738
}
735739

736740
// Make sure to start the server before executing the command.
737-
Server.start(/*Exclusive*/ true);
741+
Server.start(/*Exclusive*/ true, CASArgs);
738742
std::thread ServerThread([&Server]() { Server.listen(); });
739743

740744
setenv("CLANG_CACHE_SCAN_DAEMON_SOCKET_PATH", Server.BasePath.c_str(),
@@ -785,11 +789,61 @@ int cc1depscand_main(ArrayRef<const char *> Argv, const char *Argv0,
785789
openAndReplaceFD(1, LogOutPath);
786790
openAndReplaceFD(2, LogErrPath);
787791

788-
Server.start(/*Exclusive*/ false);
792+
Server.start(/*Exclusive*/ false, CASArgs);
789793
return Server.listen();
790794
}
791795

792-
void ScanServer::start(bool Exclusive) {
796+
static std::optional<StringRef>
797+
findLLVMCasBinary(const char *Argv0, llvm::SmallVectorImpl<char> &Storage) {
798+
using namespace llvm::sys;
799+
std::string Path = fs::getMainExecutable(Argv0, (void *)cc1depscan_main);
800+
Storage.assign(Path.begin(), Path.end());
801+
path::remove_filename(Storage);
802+
path::append(Storage, "llvm-cas");
803+
StringRef PathStr(Storage.data(), Storage.size());
804+
if (fs::exists(PathStr))
805+
return PathStr;
806+
// Look for a corresponding usr/local/bin/llvm-cas
807+
PathStr = path::parent_path(PathStr);
808+
if (path::filename(PathStr) != "bin")
809+
return std::nullopt;
810+
PathStr = path::parent_path(PathStr);
811+
Storage.truncate(PathStr.size());
812+
path::append(Storage, "local", "bin", "llvm-cas");
813+
PathStr = StringRef{Storage.data(), Storage.size()};
814+
if (fs::exists(PathStr))
815+
return PathStr;
816+
return std::nullopt;
817+
}
818+
819+
void ScanServer::start(bool Exclusive, ArrayRef<const char *> CASArgs) {
820+
// Parse CAS options and validate if needed.
821+
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
822+
823+
const OptTable &Opts = clang::driver::getDriverOptTable();
824+
unsigned MissingArgIndex, MissingArgCount;
825+
auto ParsedCASArgs =
826+
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
827+
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
828+
CASOpts.ensurePersistentCAS();
829+
ProduceIncludeTree =
830+
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);
831+
832+
static std::once_flag ValidateOnce;
833+
std::call_once(ValidateOnce, [&] {
834+
if (getenv("LLVM_CAS_DISABLE_VALIDATION"))
835+
return;
836+
if (CASOpts.CASPath.empty() || !CASOpts.PluginPath.empty())
837+
return;
838+
SmallString<64> LLVMCasStorage;
839+
SmallString<64> CASPath;
840+
CASOpts.getResolvedCASPath(CASPath);
841+
ExitOnErr(llvm::cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
842+
CASPath, /*CheckHash=*/true,
843+
/*AllowRecovery=*/true,
844+
/*Force=*/false, findLLVMCasBinary(Argv0, LLVMCasStorage)));
845+
});
846+
793847
// Check the pidfile.
794848
SmallString<128> PidPath;
795849
(BasePath + ".pid").toVector(PidPath);
@@ -828,16 +882,6 @@ int ScanServer::listen() {
828882
llvm::DefaultThreadPool Pool;
829883

830884
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions());
831-
CASOptions CASOpts;
832-
const OptTable &Opts = clang::driver::getDriverOptTable();
833-
unsigned MissingArgIndex, MissingArgCount;
834-
auto ParsedCASArgs =
835-
Opts.ParseArgs(CASArgs, MissingArgIndex, MissingArgCount);
836-
CompilerInvocation::ParseCASArgs(CASOpts, ParsedCASArgs, Diags);
837-
CASOpts.ensurePersistentCAS();
838-
bool ProduceIncludeTree =
839-
ParsedCASArgs.hasArg(driver::options::OPT_fdepscan_include_tree);
840-
841885
std::shared_ptr<llvm::cas::ObjectStore> CAS;
842886
std::shared_ptr<llvm::cas::ActionCache> Cache;
843887
std::tie(CAS, Cache) = CASOpts.getOrCreateDatabases(Diags);

llvm/include/llvm/CAS/ActionCache.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ class ActionCache {
114114
Globally, std::move(Callback), CancelObj);
115115
}
116116

117+
/// Validate the ActionCache contents.
118+
virtual Error validate() const = 0;
119+
117120
virtual ~ActionCache() = default;
118121

119122
protected:

llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,39 @@ class ObjectStore;
2121
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
2222
createOnDiskUnifiedCASDatabases(StringRef Path);
2323

24+
/// Represents the result of validating the contents using
25+
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
26+
///
27+
/// Note: invalid results are handled as an \c Error.
28+
enum class ValidationResult {
29+
/// The data is already valid.
30+
Valid,
31+
/// The data was invalid, but was recovered.
32+
Recovered,
33+
/// Validation was skipped, as it was not needed.
34+
Skipped,
35+
};
36+
37+
/// Validate the data in \p Path, if needed to ensure correctness.
38+
///
39+
/// \param Path directory for the on-disk database.
40+
/// \param CheckHash Whether to validate hashes match the data.
41+
/// \param AllowRecovery Whether to automatically recover from invalid data by
42+
/// marking the files for garbage collection.
43+
/// \param ForceValidation Whether to force validation to occur even if it
44+
/// should not be necessary.
45+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
46+
/// using the given \c llvm-cas executable which protects against crashes
47+
/// during validation. Otherwise validation is performed in-process.
48+
///
49+
/// \returns \c Valid if the data is already valid, \c Recovered if data
50+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
51+
/// or an \c Error if validation cannot be performed or if the data is left
52+
/// in an invalid state because \p AllowRecovery is false.
53+
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
54+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
55+
std::optional<StringRef> LLVMCasBinary);
56+
2457
} // namespace llvm::cas
2558

2659
#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

llvm/include/llvm/CAS/OnDiskCASLogger.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ class OnDiskCASLogger {
6262
void log_MappedFileRegionBumpPtr_allocate(void *Region, TrieOffset Off,
6363
size_t Size);
6464
void log_UnifiedOnDiskCache_collectGarbage(StringRef Path);
65+
void log_UnifiedOnDiskCache_validateIfNeeded(
66+
StringRef Path, uint64_t BootTime, uint64_t ValidationTime,
67+
bool CheckHash, bool AllowRecovery, bool Force,
68+
std::optional<StringRef> LLVMCas, StringRef ValidationError, bool Skipped,
69+
bool Recovered);
6570
void log_TempFile_create(StringRef Name);
6671
void log_TempFile_keep(StringRef TmpName, StringRef Name, std::error_code EC);
6772
void log_TempFile_remove(StringRef TmpName, std::error_code EC);

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ class OnDiskKeyValueDB {
6060
StringRef ValueName, size_t ValueSize,
6161
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6262

63+
using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
64+
Error validate(CheckValueT CheckValue) const;
65+
6366
private:
6467
OnDiskKeyValueDB(size_t ValueSize, OnDiskHashMappedTrie Cache)
6568
: ValueSize(ValueSize), Cache(std::move(Cache)) {}

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_CAS_UNIFIEDONDISKCACHE_H
1010
#define LLVM_CAS_UNIFIEDONDISKCACHE_H
1111

12+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
1213
#include "llvm/CAS/OnDiskGraphDB.h"
1314

1415
namespace llvm::cas::ondisk {
@@ -82,6 +83,34 @@ class UnifiedOnDiskCache {
8283
OnDiskGraphDB::FaultInPolicy FaultInPolicy =
8384
OnDiskGraphDB::FaultInPolicy::FullTree);
8485

86+
/// Validate the data in \p Path, if needed to ensure correctness.
87+
///
88+
/// Note: if invalid data is detected and \p AllowRecovery is true, then
89+
/// recovery requires exclusive access to the CAS and it is an error to
90+
/// attempt recovery if there is concurrent use of the CAS.
91+
///
92+
/// \param Path directory for the on-disk database.
93+
/// \param HashName Identifier name for the hashing algorithm that is going to
94+
/// be used.
95+
/// \param HashByteSize Size for the object digest hash bytes.
96+
/// \param CheckHash Whether to validate hashes match the data.
97+
/// \param AllowRecovery Whether to automatically recover from invalid data by
98+
/// marking the files for garbage collection.
99+
/// \param ForceValidation Whether to force validation to occur even if it
100+
/// should not be necessary.
101+
/// \param LLVMCasBinary If provided, validation is performed out-of-process
102+
/// using the given \c llvm-cas executable which protects against crashes
103+
/// during validation. Otherwise validation is performed in-process.
104+
///
105+
/// \returns \c Valid if the data is already valid, \c Recovered if data
106+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
107+
/// or an \c Error if validation cannot be performed or if the data is left
108+
/// in an invalid state because \p AllowRecovery is false.
109+
static Expected<ValidationResult>
110+
validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize,
111+
bool CheckHash, bool AllowRecovery, bool ForceValidation,
112+
std::optional<StringRef> LLVMCasBinary);
113+
85114
/// This is called implicitly at destruction time, so it is not required for a
86115
/// client to call this. After calling \p close the only method that is valid
87116
/// to call is \p needsGarbaseCollection.
@@ -124,6 +153,8 @@ class UnifiedOnDiskCache {
124153

125154
~UnifiedOnDiskCache();
126155

156+
Error validateActionCache();
157+
127158
private:
128159
UnifiedOnDiskCache();
129160

0 commit comments

Comments
 (0)