Skip to content

Commit b64183f

Browse files
author
Ignacio Alonso Battaglia
committed
Merged PR 730455: Migrate to GRPC format in MachineLocation
Add a flag to use the host in MachinoLocation _Equals_ function. This allows us to migrate to the GRPC format without breaking the match between the old and the new format. **NOTE:** We should revert this changes after migration.
1 parent 1a2d4f2 commit b64183f

File tree

12 files changed

+73
-16
lines changed

12 files changed

+73
-16
lines changed

Public/Src/Cache/ContentStore/Distributed/ContentLocations/MachineLocation.cs

+25-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@ namespace BuildXL.Cache.ContentStore.Distributed
1616
/// </summary>
1717
public readonly record struct MachineLocation
1818
{
19-
public const string GrpcUriSchemePrefix = "grpc://";
19+
private const string GrpcUriSchemePrefix = "grpc://";
20+
21+
// TODO: This is a temporary solution while we migrate to the new format
22+
// When set to true the old and the new format will be considered equal
23+
// Work item to remove this https://dev.azure.com/mseng/1ES/_workitems/edit/2095358
24+
public static bool OnlyUseHostToCompare = false;
2025

2126
public static MachineLocation Invalid { get; } = new(string.Empty);
2227

@@ -45,12 +50,24 @@ public bool Equals(MachineLocation other)
4550
return other.Path is null;
4651
}
4752

53+
if (OnlyUseHostToCompare)
54+
{
55+
return ExtractHost().Equals(other.ExtractHost(), StringComparison.InvariantCultureIgnoreCase);
56+
}
57+
4858
return Path.Equals(other.Path, StringComparison.InvariantCultureIgnoreCase);
4959
}
5060

5161
public override int GetHashCode()
5262
{
53-
return Path is not null ? StringComparer.InvariantCultureIgnoreCase.GetHashCode(Path) : 42;
63+
if (Path is null)
64+
{
65+
return 42;
66+
}
67+
// TODO: This is a temporary solution while we migrate to the new format
68+
// Same machine has same hash code
69+
var host = ExtractHost();
70+
return StringComparer.InvariantCultureIgnoreCase.GetHashCode(host);
5471
}
5572

5673
/// <inheritdoc />
@@ -64,6 +81,12 @@ public static MachineLocation Create(string machineName, int port)
6481
return new MachineLocation($"{MachineLocation.GrpcUriSchemePrefix}{machineName}:{port}/");
6582
}
6683

84+
private string ExtractHost()
85+
{
86+
var (extractedHost, _) = ExtractHostInfo();
87+
return extractedHost;
88+
}
89+
6790
public (string host, int? port) ExtractHostInfo()
6891
{
6992
if (Path.StartsWith(GrpcUriSchemePrefix))

Public/Src/Cache/ContentStore/Distributed/NuCache/ClusterStateManagement/ClusterStateMachine.cs

+12-3
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ public static (ClusterStateMachine NextState, MachineRecord[] Result) HeartbeatM
2525
var priorMachineRecords = new MachineRecord[request.MachineIds.Count];
2626
foreach (var entry in request.MachineIds.AsIndexed())
2727
{
28+
MachineLocation? newLocation = request.MachineLocations != null ? request.MachineLocations[entry.Index] : null;
29+
2830
(state, priorMachineRecords[entry.Index]) =
29-
state.Heartbeat(entry.Item, nowUtc, request.MachineState).ThrowIfFailure();
31+
state.Heartbeat(entry.Item, nowUtc, request.MachineState, newLocation).ThrowIfFailure();
3032
}
3133

3234
return (state, priorMachineRecords);
@@ -209,7 +211,7 @@ private ClusterStateMachine ForceRegisterMachineWithState(
209211
}
210212
}
211213

212-
public Result<(ClusterStateMachine Next, MachineRecord Previous)> Heartbeat(MachineId machineId, DateTime nowUtc, MachineState state)
214+
public Result<(ClusterStateMachine Next, MachineRecord Previous)> Heartbeat(MachineId machineId, DateTime nowUtc, MachineState state, MachineLocation? machineLocation = null)
213215
{
214216
if (!machineId.Valid)
215217
{
@@ -222,7 +224,14 @@ private ClusterStateMachine ForceRegisterMachineWithState(
222224
MachineRecord? previous = null;
223225
foreach (var record in Records)
224226
{
225-
if (record.Id == machineId)
227+
if (record.Id == machineId && machineLocation != null)
228+
{
229+
// TODO: This is a hack to allow the machine location to be updated https://dev.azure.com/mseng/1ES/_workitems/edit/2095358
230+
// Update the machine location in case it has changed due to migration to a different format (e.g. GRPC format)
231+
records.Add(record.Heartbeat(nowUtc, state, (MachineLocation)machineLocation));
232+
previous = record;
233+
}
234+
else if (record.Id == machineId)
226235
{
227236
records.Add(record.Heartbeat(nowUtc, state));
228237
previous = record;

Public/Src/Cache/ContentStore/Distributed/NuCache/ClusterStateManagement/ClusterStateManager.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ public Task<Result<MachineState>> HeartbeatAsync(
165165
}
166166

167167
var localMachineIds = ClusterState.LocalMachineMappings.Select(machineMapping => machineMapping.Id).ToArray();
168+
var localMachineLocations = ClusterState.LocalMachineMappings.Select(machineMapping => machineMapping.Location).ToArray();
168169

169170
if (_configuration.ReadOnly || localMachineIds.Length == 0)
170171
{
@@ -177,7 +178,7 @@ public Task<Result<MachineState>> HeartbeatAsync(
177178
}
178179
else
179180
{
180-
var heartbeatResponse = await _storage.HeartbeatAsync(context, new IClusterStateStorage.HeartbeatInput(localMachineIds, machineState)).ThrowIfFailureAsync();
181+
var heartbeatResponse = await _storage.HeartbeatAsync(context, new IClusterStateStorage.HeartbeatInput(localMachineIds, machineState, localMachineLocations)).ThrowIfFailureAsync();
181182
Contract.Assert(heartbeatResponse.PriorRecords.Length == localMachineIds.Length, "Mismatch between number of requested heartbeats and actual heartbeats. This should never happen.");
182183

183184
ClusterState.Update(context, heartbeatResponse.State, configuration: _configuration.RecomputeConfiguration, nowUtc: _clock.UtcNow).ThrowIfFailure();

Public/Src/Cache/ContentStore/Distributed/NuCache/ClusterStateManagement/IClusterStateStorage.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public record RegisterMachineOutput(ClusterStateMachine State, MachineMapping[]
3939
public Task<Result<RegisterMachineOutput>> RegisterMachinesAsync(OperationContext context, RegisterMachineInput request);
4040

4141
[ProtoContract(ImplicitFields = ImplicitFields.AllPublic)]
42-
public record HeartbeatInput(IReadOnlyList<MachineId> MachineIds, MachineState MachineState)
42+
public record HeartbeatInput(IReadOnlyList<MachineId> MachineIds, MachineState MachineState, IReadOnlyList<MachineLocation> MachineLocations = null)
4343
{
4444
/// <summary>
4545
/// This parameterless constructor exists only to allow ProtoBuf.NET initialization

Public/Src/Cache/ContentStore/Distributed/NuCache/ClusterStateManagement/MachineRecord.cs

+10
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ internal MachineRecord Heartbeat(DateTime nowUtc, MachineState nextState)
4040
return this with { State = nextState, LastHeartbeatTimeUtc = nowUtc };
4141
}
4242

43+
internal MachineRecord Heartbeat(DateTime nowUtc, MachineState nextState, MachineLocation machineLocation)
44+
{
45+
if (nextState == MachineState.Unknown)
46+
{
47+
return this with { LastHeartbeatTimeUtc = nowUtc, Location = machineLocation};
48+
}
49+
50+
return this with { State = nextState, LastHeartbeatTimeUtc = nowUtc, Location = machineLocation };
51+
}
52+
4353
public bool IsOpen()
4454
{
4555
return State == MachineState.Open;

Public/Src/Cache/ContentStore/Distributed/NuCache/TransitioningContentLocationStore.cs

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ protected override async Task<BoolResult> StartupCoreAsync(OperationContext cont
7575
if (LocalLocationStore.ClusterState.TryResolveMachineId(LocalMachineLocation, out var localMachineId))
7676
{
7777
LocalMachineId = localMachineId;
78+
Tracer.Info(context, $"Resolved MachineId {LocalMachineId} for {LocalMachineLocation}");
7879
}
7980
else if (_configuration.DistributedContentConsumerOnly)
8081
{

Public/Src/Cache/ContentStore/Distributed/Stores/DistributedContentStoreSettings.cs

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Collections.Generic;
6+
using System.Runtime.Serialization;
67
using BuildXL.Cache.ContentStore.Distributed.NuCache.CopyScheduling;
78
using BuildXL.Cache.ContentStore.Distributed.Sessions;
89
using BuildXL.Cache.ContentStore.Interfaces.Distributed;
@@ -106,6 +107,11 @@ public sealed class DistributedContentStoreSettings
106107
/// </summary>
107108
public int ProactiveCopyMaxRetries { get; set; } = 0;
108109

110+
/// <summary>
111+
/// Whether to use the value of the host inside Equals function in MachineLocation
112+
/// </summary>
113+
public bool UseHostInMachineLocationEquals { get; set; } = false;
114+
109115
/// <summary>
110116
/// Defines pinning behavior
111117
/// </summary>

Public/Src/Cache/ContentStore/Distributed/Utilities/GrpcFileCopier.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ public MachineLocation GetLocalMachineLocation(AbsolutePath cacheRoot)
206206
{
207207
if (_configuration.UseUniversalLocations)
208208
{
209-
return new MachineLocation($"{MachineLocation.GrpcUriSchemePrefix}{_localMachineName}:{_configuration.GrpcPort}/");
209+
return MachineLocation.Create(_localMachineName, _configuration.GrpcPort);
210210
}
211211

212212
if (!cacheRoot.IsLocal)

Public/Src/Cache/ContentStore/DistributedTest/ContentLocation/NuCache/MachineListTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ public MachineListFactory(ITestClock clock, int amountMachines, int designatedLo
271271

272272
this.MachineIds = Enumerable.Range(1, amountMachines).Select(n => (ushort)n).ToArray();
273273

274-
var machineMappings = MachineIds.Select(m => new MachineMapping(new MachineId(m), new MachineLocation(m.ToString()))).ToArray();
274+
var machineMappings = MachineIds.Select(m => new MachineMapping(new MachineId(m), new MachineLocation($"grpc://{m}:123"))).ToArray();
275275
var clusterState = new ClusterState(primaryMachineId: default, machineMappings);
276276
foreach (var mapping in machineMappings)
277277
{

Public/Src/Cache/ContentStore/DistributedTest/Stores/DistributedContentCopierTests.cs

+7-7
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public async Task CopyFromInRingMachines()
5555
new OperationContext(context),
5656
hashWithLocations,
5757
handleCopyAsync: tpl => Task.FromResult(new PutResult(hash, 42)),
58-
inRingMachines: new MachineLocation[] { new MachineLocation("") });
58+
inRingMachines: new MachineLocation[] { new MachineLocation(@"grpc://fun.com:123") });
5959

6060
result.ShouldBeSuccess();
6161
}
@@ -99,7 +99,7 @@ public async Task CopyFailsForWrongCopySize()
9999
var hashWithLocations = new ContentHashWithSizeAndLocations(
100100
hash,
101101
size: 42,
102-
new MachineLocation[] {new MachineLocation("")});
102+
new MachineLocation[] {new MachineLocation(@"grpc://fun.com:123") });
103103

104104
mockFileCopier.CopyToAsyncResult = CopyFileResult.SuccessWithSize(41);
105105
var result = await distributedCopier.TryCopyAndPutAsync(
@@ -127,7 +127,7 @@ public async Task CopyFailsForWrongHash()
127127
var hashWithLocations = new ContentHashWithSizeAndLocations(
128128
hash,
129129
size: 42,
130-
new MachineLocation[] {new MachineLocation("")});
130+
new MachineLocation[] {new MachineLocation(@"grpc://fun.com:123") });
131131

132132
mockFileCopier.CopyToAsyncResult = CopyFileResult.SuccessWithSize(42);
133133
var result = await distributedCopier.TryCopyAndPutAsync(
@@ -152,7 +152,7 @@ public async Task CopyRetries(int retries)
152152
var (distributedCopier, mockFileCopier) = CreateMocks(FileSystem, directory.Path,TimeSpan.Zero, retries);
153153
await using var _ = await distributedCopier.StartupWithAutoShutdownAsync(context);
154154

155-
var machineLocations = new MachineLocation[] {new MachineLocation("")};
155+
var machineLocations = new MachineLocation[] {new MachineLocation(@"grpc://fun.com:123") };
156156

157157
var hash = ContentHash.Random();
158158
var hashWithLocations = new ContentHashWithSizeAndLocations(
@@ -190,7 +190,7 @@ public async Task CopyRetriesWithRestrictions(int retries)
190190
maxRetryCount: retries + 1);
191191
await using var _ = await distributedCopier.StartupWithAutoShutdownAsync(context);
192192

193-
var empty = new MachineLocation("");
193+
var empty = new MachineLocation(@"grpc://fun.com:123");
194194
var machineLocations = new [] { empty, empty, empty, empty, empty };
195195

196196
var hash = ContentHash.Random();
@@ -245,7 +245,7 @@ public async Task CopyWithDestinationPathError(int retries)
245245
var (distributedCopier, mockFileCopier) = CreateMocks(FileSystem, directory.Path, TimeSpan.FromMilliseconds((10)), retries);
246246
await using var _ = await distributedCopier.StartupWithAutoShutdownAsync(context);
247247

248-
var machineLocations = new MachineLocation[] { new MachineLocation(""), new MachineLocation("") };
248+
var machineLocations = new MachineLocation[] { new MachineLocation(@"grpc://fun.com:123"), new MachineLocation(@"grpc://fun.com:123") };
249249

250250
var hash = ContentHash.Random();
251251
var hashWithLocations = new ContentHashWithSizeAndLocations(
@@ -306,7 +306,7 @@ public class MockFileCopier : IRemoteFileCopier
306306
#pragma warning restore 649
307307
public CopyFileResult[] CustomResults;
308308

309-
public MachineLocation GetLocalMachineLocation(AbsolutePath cacheRoot) => new MachineLocation("");
309+
public MachineLocation GetLocalMachineLocation(AbsolutePath cacheRoot) => new MachineLocation(@"grpc://fun.com:123");
310310

311311
/// <inheritdoc />
312312
public Task<CopyFileResult> CopyToAsync(OperationContext context, ContentLocation sourceLocation, Stream destinationStream, CopyOptions options)

Public/Src/Cache/DistributedCache.Host/Configuration/DistributedContentSettings.cs

+3
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,9 @@ public bool UseGrpcDotNetForCopies()
896896
[DataMember]
897897
public bool UseUniversalLocations { get; set; }
898898

899+
[DataMember]
900+
public bool UseHostInMachineLocationEquals { get; set; } = false;
901+
899902
/// <summary>
900903
/// Include domain name in machine location.
901904
/// </summary>

Public/Src/Cache/DistributedCache.Host/Service/Internal/DistributedContentStoreFactory.cs

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ public DistributedContentStoreFactory(DistributedCacheServiceArguments arguments
8484
ContentLocationStoreConfiguration = CreateContentLocationStoreConfiguration();
8585
_distributedContentStoreSettings = CreateDistributedStoreSettings(_arguments, ContentLocationStoreConfiguration);
8686

87+
// TODO: This is a temporary solution while we migrate to the new format
88+
MachineLocation.OnlyUseHostToCompare = _distributedContentStoreSettings.UseHostInMachineLocationEquals;
89+
8790
// Tracing configuration before creating anything.
8891
if (arguments.TraceConfiguration)
8992
{
@@ -369,6 +372,7 @@ private static DistributedContentStoreSettings CreateDistributedStoreSettings(
369372
{
370373

371374
PinConfiguration = pinConfiguration,
375+
UseHostInMachineLocationEquals = distributedSettings.UseHostInMachineLocationEquals,
372376
ProactiveCopyMode = (ProactiveCopyMode)Enum.Parse(typeof(ProactiveCopyMode), distributedSettings.ProactiveCopyMode),
373377
PushProactiveCopies = distributedSettings.PushProactiveCopies,
374378
ProactiveCopyOnPut = distributedSettings.ProactiveCopyOnPut,

0 commit comments

Comments
 (0)