Skip to content

Commit 911efd8

Browse files
committed
Merged PR 728101: BlobLifetimeManager supports multiple universes/namespaces, reading from the change feed, and checkpointing
As preparation for checkpointing, which requires that garbage collection happens for the whole storage account at once, it makes sense to first support garbage collecting multiple namespaces/universes in a single garbage collection run. The idea is that instead of accessing the DB directly, there is an IAccessor, which limits the view of the database to only a given namespace. In practice, what this means is that each accessor will have a unique set of RocksDb column families that it accesses. Other than that, the logic to create/manage the database stays the same. Another change is that we can now update our view of the world in subsequent runs via reading Azure Storage's change feed. This is extremely important since otherwise, nothing works: on the first run, since we touch everything, nothing is evictable; and on the second run, such a long time has passed that without updating our view of things, we might be deleting blobs with new references. Finally, after both these changes, I also implemented checkpointing. The checkpoint and all its data will live in different containers in the 0th shard of the cache, as different-sized caches _are different caches_, regardless of whether they share accounts. Ideally, we won't have this ever since we're the ones resharding, but even today we already have that problem since some of our tests are not using all 100 accounts we've provisioned.
1 parent b64183f commit 911efd8

37 files changed

+2184
-625
lines changed

Public/Src/Cache/ContentStore/BuildXL.Cache.ContentStore.dsc

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ export function getAzureBlobStorageSdkPackagesWithoutNetStandard() : (Managed.Ma
167167
importFrom("Azure.Storage.Common").pkg,
168168
importFrom("Azure.Core").pkg,
169169
importFrom("Azure.Storage.Blobs.Batch").pkg,
170+
importFrom("Azure.Storage.Blobs.ChangeFeed").pkg,
170171
];
171172
}
172173

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
using System;
5+
using System.Text.RegularExpressions;
6+
7+
namespace BuildXL.Cache.ContentStore.Distributed.Blob
8+
{
9+
/// <summary>
10+
/// This absolute path is gotten from the Azure Blob change feed. It uniquely identifies a blob within the cache.
11+
/// </summary>
12+
public readonly record struct AbsoluteBlobPath(BlobCacheStorageAccountName Account, BlobCacheContainerName Container, BlobPath Path)
13+
{
14+
private readonly static Regex BlobChangeFeedEventSubjectRegex = new(@"/blobServices/default/containers/(?<container>[^/]+)/blobs/(?<path>.+)", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant);
15+
16+
public static AbsoluteBlobPath ParseFromChangeEventSubject(BlobCacheStorageAccountName account, string subject)
17+
{
18+
var match = BlobChangeFeedEventSubjectRegex.Match(subject);
19+
if (!match.Success)
20+
{
21+
throw new ArgumentException($"Failed to match {nameof(BlobChangeFeedEventSubjectRegex)} to {subject}", nameof(subject));
22+
}
23+
24+
var container = BlobCacheContainerName.Parse(match.Groups["container"].Value);
25+
var path = new BlobPath(match.Groups["path"].Value, relative: false);
26+
27+
return new(Account: account, Container: container, Path: path);
28+
}
29+
}
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
namespace BuildXL.Cache.ContentStore.Distributed.Blob
5+
{
6+
/// <summary>
7+
/// This uniquely describes a namespace in a blob cache. Each namespace is garbage-collected
8+
/// as a separate cache from other namespaces
9+
/// </summary>
10+
public readonly record struct BlobNamespaceId(string Universe, string Namespace)
11+
{
12+
public override string ToString() => $"{Universe}-{Namespace}";
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
using BuildXL.Cache.ContentStore.Interfaces.Secrets;
8+
9+
#nullable enable
10+
11+
namespace BuildXL.Cache.ContentStore.Distributed.Blob
12+
{
13+
public class EnvironmentVariableCacheSecretsProvider : StaticBlobCacheSecretsProvider
14+
{
15+
public EnvironmentVariableCacheSecretsProvider(string environmentVariableName)
16+
: base(ExtractCredsFromEnvironmentVariable(environmentVariableName))
17+
{
18+
}
19+
20+
public static Dictionary<BlobCacheStorageAccountName, AzureStorageCredentials> ExtractCredsFromEnvironmentVariable(string environmentVariableName)
21+
{
22+
var connectionStringsString = Environment.GetEnvironmentVariable(environmentVariableName);
23+
if (string.IsNullOrEmpty(connectionStringsString))
24+
{
25+
throw new ArgumentException($"Connections strings for the L3 cache must be provided via the {environmentVariableName} environment variable " +
26+
$"in the format of comma-separated strings.");
27+
}
28+
29+
var connectionStrings = connectionStringsString.Split(',');
30+
var creds = connectionStrings.Select(connString => new AzureStorageCredentials(new PlainTextSecret(connString))).ToArray();
31+
return creds.ToDictionary(
32+
cred => BlobCacheStorageAccountName.Parse(cred.GetAccountName()),
33+
cred => cred);
34+
}
35+
}
36+
}

Public/Src/Cache/ContentStore/Distributed/Blob/IBlobCacheSecretsProvider.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4+
using System.Collections.Generic;
45
using System.Threading.Tasks;
56
using BuildXL.Cache.ContentStore.Interfaces.Secrets;
67
using BuildXL.Cache.ContentStore.Tracing.Internal;
8+
using BuildXL.Utilities.Collections;
79

810
#nullable enable
911

@@ -19,6 +21,5 @@ public interface IBlobCacheSecretsProvider
1921
/// </summary>
2022
public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(
2123
OperationContext context,
22-
BlobCacheStorageAccountName account,
23-
BlobCacheContainerName container);
24+
BlobCacheStorageAccountName account);
2425
}

Public/Src/Cache/ContentStore/Distributed/Blob/ShardedBlobCacheTopology.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ internal static BlobCacheContainerName[] GenerateContainerNames(string universe,
116116
}).ToArray();
117117
}
118118

119-
internal static (string Metadata, string Content) GenerateMatrix(ShardingScheme scheme)
119+
public static (string Metadata, string Content) GenerateMatrix(ShardingScheme scheme)
120120
{
121121
// The matrix here ensures that metadata does not overlap across sharding schemes. Basically, whenever we add
122122
// or remove shards (or change the sharding algorithm), we will get a new salt. This salt will force us to use
@@ -206,7 +206,7 @@ private Task<Result<BlobContainerClient>> CreateClientAsync(OperationContext con
206206
Tracer,
207207
async context =>
208208
{
209-
var credentials = await _configuration.SecretsProvider.RetrieveBlobCredentialsAsync(context, account, container);
209+
var credentials = await _configuration.SecretsProvider.RetrieveBlobCredentialsAsync(context, account);
210210

211211
BlobClientOptions blobClientOptions = new(BlobClientOptions.ServiceVersion.V2021_02_12)
212212
{

Public/Src/Cache/ContentStore/Distributed/Blob/ShardingScheme.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public enum ShardingAlgorithm
3333
/// <summary>
3434
/// Specifies a sharding scheme.
3535
/// </summary>
36-
public record ShardingScheme(ShardingAlgorithm Scheme, List<BlobCacheStorageAccountName> Accounts)
36+
public record ShardingScheme(ShardingAlgorithm Scheme, IReadOnlyList<BlobCacheStorageAccountName> Accounts)
3737
{
3838
public IShardingScheme<int, BlobCacheStorageAccountName> Create()
3939
{

Public/Src/Cache/ContentStore/Distributed/Blob/StaticBlobCacheSecretsProvider.cs

+9-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#nullable enable
55
using System.Collections.Generic;
6+
using System.Linq;
67
using System.Threading.Tasks;
78
using BuildXL.Cache.ContentStore.Interfaces.Secrets;
89
using BuildXL.Cache.ContentStore.Tracing;
@@ -17,23 +18,28 @@ public class StaticBlobCacheSecretsProvider : IBlobCacheSecretsProvider
1718
{
1819
protected static Tracer Tracer { get; } = new(nameof(StaticBlobCacheSecretsProvider));
1920

21+
public IReadOnlyList<BlobCacheStorageAccountName> ConfiguredAccounts => _accounts;
22+
2023
private readonly AzureStorageCredentials? _fallback;
2124
private readonly IReadOnlyDictionary<BlobCacheStorageAccountName, AzureStorageCredentials> _credentials = new Dictionary<BlobCacheStorageAccountName, AzureStorageCredentials>();
25+
private readonly IReadOnlyList<BlobCacheStorageAccountName> _accounts;
2226

2327
public StaticBlobCacheSecretsProvider(IReadOnlyDictionary<BlobCacheStorageAccountName, AzureStorageCredentials> credentials, AzureStorageCredentials? fallback = null)
2428
{
2529
_credentials = credentials;
30+
_accounts = _credentials.Keys.ToArray();
2631
_fallback = fallback;
2732
}
2833

2934
public StaticBlobCacheSecretsProvider(AzureStorageCredentials fallback)
3035
{
3136
_fallback = fallback;
37+
_accounts = _credentials.Keys.ToArray();
3238
}
3339

34-
public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationContext context, BlobCacheStorageAccountName account, BlobCacheContainerName container)
40+
public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationContext context, BlobCacheStorageAccountName account)
3541
{
36-
Tracer.Info(context, $"Fetching credentials. Account=[{account}] Container=[{container}]");
42+
Tracer.Info(context, $"Fetching credentials. Account=[{account}]");
3743

3844
if (_credentials.TryGetValue(account, out var credentials))
3945
{
@@ -45,6 +51,6 @@ public Task<AzureStorageCredentials> RetrieveBlobCredentialsAsync(OperationConte
4551
return Task.FromResult(_fallback);
4652
}
4753

48-
throw new KeyNotFoundException($"Credentials are unavailable for storage account {account} and container {container}");
54+
throw new KeyNotFoundException($"Credentials are unavailable for storage account {account}");
4955
}
5056
}

Public/Src/Cache/ContentStore/Distributed/NuCache/CheckpointManager.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ public sealed class CheckpointManager : StartupShutdownComponentBase
7777

7878
/// <inheritdoc />
7979
public CheckpointManager(
80-
ContentLocationDatabase database,
80+
ICheckpointable database,
8181
ICheckpointRegistry checkpointRegistry,
8282
CentralStorage storage,
8383
CheckpointManagerConfiguration configuration,

Public/Src/Cache/ContentStore/Interfaces/Secrets/AzureStorageCredentials.cs

+23
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Text.RegularExpressions;
66
using Azure;
77
using Azure.Storage.Blobs;
8+
using Azure.Storage.Blobs.ChangeFeed;
89
using Microsoft.WindowsAzure.Storage;
910
using Microsoft.WindowsAzure.Storage.Auth;
1011
using Microsoft.WindowsAzure.Storage.Blob;
@@ -130,6 +131,28 @@ public BlobServiceClient CreateBlobServiceClient(BlobClientOptions? blobClientOp
130131
};
131132
}
132133

134+
/// <nodoc />
135+
public BlobChangeFeedClient CreateBlobChangeFeedClient(BlobClientOptions? blobClientOptions = null, BlobChangeFeedClientOptions? changeFeedClientOptions = null)
136+
{
137+
// We default to this specific version because tests run against the Azurite emulator. The emulator doesn't
138+
// currently support any higher version than this, and we won't upgrade it because it's build process is
139+
// weird as hell and they don't just provide binaries.
140+
blobClientOptions ??= new BlobClientOptions(BlobClientOptions.ServiceVersion.V2021_02_12);
141+
142+
changeFeedClientOptions ??= new BlobChangeFeedClientOptions();
143+
144+
return _secret switch
145+
{
146+
PlainTextSecret plainText => new BlobChangeFeedClient(connectionString: plainText.Secret, blobClientOptions, changeFeedClientOptions),
147+
UpdatingSasToken sasToken => new BlobChangeFeedClient(
148+
serviceUri: new Uri($"https://{sasToken.Token.StorageAccount}.blob.core.windows.net/"),
149+
credential: CreateV12StorageCredentialsFromSasToken(sasToken),
150+
blobClientOptions,
151+
changeFeedClientOptions),
152+
_ => throw new NotImplementedException($"Unknown secret type `{_secret.GetType()}`")
153+
};
154+
}
155+
133156
/// <nodoc />
134157
public BlobContainerClient CreateContainerClient(string containerName, BlobClientOptions? blobClientOptions = null)
135158
{

Public/Src/Cache/LifetimeManager/App/BuildXL.Cache.BlobLifetimeManager.App.dsc

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
namespace App {
55
@@public
6-
export const exe = !BuildXLSdk.Flags.isMicrosoftInternal ? undefined : BuildXLSdk.executable({
6+
export const exe = BuildXLSdk.executable({
77
assemblyName: "BuildXL.Cache.BlobLifetimeManager",
88
sources: globR(d`.`,"*.cs"),
99
references: [
@@ -12,10 +12,11 @@ namespace App {
1212

1313
importFrom("BuildXL.Cache.ContentStore").Distributed.dll,
1414
importFrom("BuildXL.Cache.ContentStore").Interfaces.dll,
15-
importFrom("BuildXL.Cache.ContentStore").Hashing.dll,
1615
importFrom("BuildXL.Cache.ContentStore").Library.dll,
17-
importFrom("BuildXL.Cache.ContentStore").UtilitiesCore.dll,
18-
importFrom("BuildXL.Cache.MemoizationStore").Interfaces.dll,
16+
17+
importFrom("BuildXL.Utilities").dll,
18+
19+
...importFrom("BuildXL.Cache.ContentStore").getAzureBlobStorageSdkPackages(true),
1920
],
2021
tools: {
2122
csc: {

0 commit comments

Comments
 (0)