Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
2e6eab6
.
SimonCropp Nov 25, 2025
61be3e1
.
SimonCropp Nov 25, 2025
9547497
.
SimonCropp Nov 25, 2025
4fcfe7e
.
SimonCropp Nov 25, 2025
9cbf604
.
SimonCropp Nov 25, 2025
8ea715c
.
SimonCropp Nov 25, 2025
4500695
Update CrossChunkMatcher.cs
SimonCropp Nov 25, 2025
d5bd5b2
.
SimonCropp Nov 25, 2025
448915d
Update CrossChunkMatcher.cs
SimonCropp Nov 25, 2025
621a50e
.
SimonCropp Nov 25, 2025
d089ed1
Update DirectoryReplacements_StringBuilder.cs
SimonCropp Nov 25, 2025
d55ab13
.
SimonCropp Nov 25, 2025
6eec98f
Update CrossChunkMatcher.cs
SimonCropp Nov 25, 2025
bb269f9
.
SimonCropp Nov 25, 2025
d28a0fc
Update DateScrubber.cs
SimonCropp Nov 25, 2025
d89326b
Update src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs
SimonCropp Nov 29, 2025
e5b7655
Update src/Verify/Serialization/Scrubbers/DirectoryReplacements_Strin…
SimonCropp Nov 29, 2025
45cff61
Update src/Verify/Serialization/Scrubbers/DirectoryReplacements_Strin…
SimonCropp Nov 29, 2025
0eddccb
.
SimonCropp Nov 29, 2025
cc75e9c
.
SimonCropp Nov 29, 2025
19e4e72
Merge branch 'main' into CrossChunkMatcher2
SimonCropp Nov 29, 2025
6b6bbb0
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
2e84ea2
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
b1c5d9a
.
SimonCropp Nov 30, 2025
87f4760
.
SimonCropp Nov 30, 2025
e8e1263
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
33020a6
Update CrossChunkMatcherBenchmarks.cs
SimonCropp Nov 30, 2025
7003276
Update CrossChunkMatcherBenchmarks.cs
SimonCropp Nov 30, 2025
29b7f60
Merge branch 'main' into CrossChunkMatcher2
SimonCropp Nov 30, 2025
54105a1
Merge branch 'main' into CrossChunkMatcher2
SimonCropp Nov 30, 2025
6bc2108
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
f584fac
Merge branch 'main' into CrossChunkMatcher2
SimonCropp Nov 30, 2025
9c5d0f6
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
d2efaa3
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
7ce4241
Update CrossChunkMatcher.cs
SimonCropp Nov 30, 2025
b2711cc
.
SimonCropp Nov 30, 2025
e7b2874
Update CrossChunkMatcherBenchmarks.cs
SimonCropp Dec 1, 2025
90dc4a5
Update CrossChunkMatcher.cs
SimonCropp Dec 1, 2025
81dbc48
Update CrossChunkMatcher.cs
SimonCropp Dec 1, 2025
d3c9213
Update CrossChunkMatcher.cs
SimonCropp Dec 1, 2025
b5d8496
Update CrossChunkMatcher.cs
SimonCropp Dec 1, 2025
86973c9
Update CrossChunkMatcher.cs
SimonCropp Dec 1, 2025
4295148
Update DateScrubber.cs
SimonCropp Dec 1, 2025
b50e2ef
Update Extensions.cs
SimonCropp Dec 1, 2025
26cfb5d
Update Extensions.cs
SimonCropp Dec 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/// <summary>
/// Helper for matching and replacing patterns in StringBuilder that may span across chunk boundaries.
/// </summary>
static class CrossChunkMatcher
{
/// <summary>
/// Finds all matches in a StringBuilder (handling patterns spanning chunk boundaries) and applies replacements.
/// </summary>
/// <param name="builder">The StringBuilder to search and modify</param>
/// <param name="context">User context passed to callbacks</param>
/// <param name="onCrossChunk">Called for each potential cross-chunk match position</param>
/// <param name="onWithinChunk">Called for each position within a chunk</param>
public static void ReplaceAll<TContext>(
StringBuilder builder,
int maxLength,
Comment on lines +20 to +22
Copy link

Copilot AI Nov 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The XML documentation is missing the maxLength parameter. Add a <param name="maxLength"> tag to document this parameter, e.g., <param name="maxLength">Maximum length of patterns to search for</param>.

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

TContext context,
CrossChunkHandler<TContext> onCrossChunk,
WithinChunkHandler<TContext> onWithinChunk)
{
Span<char> buffer = stackalloc char[maxLength];
Span<char> carryoverBuffer = stackalloc char[maxLength - 1];
var carryoverLength = 0;
var previousChunkAbsoluteEnd = 0;
var absolutePosition = 0;
List<Match> matches = [];
var addMatch = matches.Add;
foreach (var chunk in builder.GetChunks())
{
var chunkSpan = chunk.Span;

// Check for matches spanning from previous chunk to current chunk
if (carryoverLength > 0)
{
for (var carryoverIndex = 0; carryoverIndex < carryoverLength; carryoverIndex++)
{
var remainingInCarryover = carryoverLength - carryoverIndex;
var startPosition = previousChunkAbsoluteEnd - carryoverLength + carryoverIndex;

onCrossChunk(
builder,
carryoverBuffer,
buffer,
carryoverIndex,
remainingInCarryover,
chunkSpan,
startPosition,
context,
addMatch);
}
}

// Process matches entirely within this chunk
var chunkIndex = 0;
while (chunkIndex < chunk.Length)
{
var absoluteIndex = absolutePosition + chunkIndex;
var skipAhead = onWithinChunk(chunk, chunkSpan, chunkIndex, absoluteIndex, context, addMatch);
chunkIndex += skipAhead > 0 ? skipAhead : 1;
}

// Save last N chars for next iteration
carryoverLength = Math.Min(maxLength - 1, chunk.Length);
chunkSpan.Slice(chunk.Length - carryoverLength, carryoverLength).CopyTo(carryoverBuffer);

previousChunkAbsoluteEnd = absolutePosition + chunk.Length;
absolutePosition += chunk.Length;
}

// Apply matches in descending position order
foreach (var match in matches.OrderByDescending(_ => _.Index))
{
builder.Overwrite(match.Value, match.Index, match.Length);
}
}

/// <summary>
/// Callback for processing potential cross-chunk matches.
/// </summary>
public delegate void CrossChunkHandler<TContext>(
StringBuilder builder,
Span<char> carryoverBuffer,
Span<char> buffer,
int carryoverIndex,
int remainingInCarryover,
CharSpan currentChunkSpan,
int absoluteStartPosition,
TContext context,
Action<Match> addMatch);

/// <summary>
/// Callback for processing positions within a chunk.
/// </summary>
/// <returns>Number of positions to skip ahead (0 or 1 for normal iteration, more to skip past a match)</returns>
public delegate int WithinChunkHandler<TContext>(
ReadOnlyMemory<char> chunk,
CharSpan chunkSpan,
int chunkIndex,
int absoluteIndex,
TContext context,
Action<Match> addMatch);
}


readonly struct Match(int index, int length, string value)
{
public readonly int Index = index;
public readonly int Length = length;
public readonly string Value = value;
}
117 changes: 72 additions & 45 deletions src/Verify/Serialization/Scrubbers/DateScrubber.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// ReSharper disable ReturnValueOfPureMethodIsNotUsed
// ReSharper disable ReturnValueOfPureMethodIsNotUsed
static class DateScrubber
{
delegate bool TryConvert(
Expand Down Expand Up @@ -192,7 +192,7 @@ static bool TryGetFormatWithUpperMillisecondsTrimmed(string format, [NotNullWhen
return false;
}

static void ReplaceInner(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate)
static void ReplaceInner(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvert)
{
if (!counter.ScrubDateTimes)
{
Expand All @@ -206,69 +206,96 @@ static void ReplaceInner(StringBuilder builder, string format, Counter counter,
return;
}

if (min == max)
{
ReplaceFixedLength(builder, format, counter, culture, tryConvertDate, max);

return;
}
var context = new MatchContext(format, counter, culture, tryConvert, max, min);

ReplaceVariableLength(builder, format, counter, culture, tryConvertDate, max, min);
CrossChunkMatcher.ReplaceAll(
builder,
maxLength: max,
context,
OnCrossChunk,
OnWithinChunk);
}

static void ReplaceVariableLength(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate, int longest, int shortest)
static void OnCrossChunk(
StringBuilder builder,
Span<char> carryoverBuffer,
Span<char> buffer,
int carryoverIndex,
int remainingInCarryover,
CharSpan currentChunkSpan,
int absoluteStartPosition,
MatchContext context,
Action<Match> addMatch)
{
var value = builder.AsSpan();
var builderIndex = 0;
for (var index = 0; index <= value.Length; index++)
// Try lengths from longest to shortest (greedy matching)
for (var length = context.MaxLength; length >= context.MinLength; length--)
{
var found = false;
for (var length = longest; length >= shortest; length--)
var neededFromCurrent = length - remainingInCarryover;

if (neededFromCurrent <= 0 ||
neededFromCurrent > currentChunkSpan.Length)
{
var end = index + length;
if (end > value.Length)
{
continue;
}

var slice = value.Slice(index, length);
if (tryConvertDate(slice, format, counter, culture, out var convert))
{
builder.Overwrite(convert, builderIndex, length);
builderIndex += convert.Length;
index += length - 1;
found = true;
break;
}
continue;
}

if (found)
// Combine carryover and current chunk
carryoverBuffer.Slice(carryoverIndex, remainingInCarryover).CopyTo(buffer);
currentChunkSpan[..neededFromCurrent].CopyTo(buffer[remainingInCarryover..]);

var slice = buffer[..length];

if (!context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var convert))
{
continue;
}

builderIndex++;
addMatch(new(absoluteStartPosition, length, convert));
// Found match at this position
return;
}
}

static void ReplaceFixedLength(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate, int length)
static int OnWithinChunk(
ReadOnlyMemory<char> chunk,
CharSpan chunkSpan,
int chunkIndex,
int absoluteIndex,
MatchContext context,
Action<Match> addMatch)
{
var value = builder.AsSpan();
var builderIndex = 0;
var increment = length - 1;
for (var index = 0; index <= value.Length - length; index++)
// Try lengths from longest to shortest (greedy matching)
for (var length = context.MaxLength; length >= context.MinLength; length--)
{
var slice = value.Slice(index, length);
if (tryConvertDate(slice, format, counter, culture, out var convert))
if (chunkIndex + length > chunk.Length)
{
builder.Overwrite(convert, builderIndex, length);
builderIndex += convert.Length;
index += increment;
continue;
}
else

var slice = chunkSpan.Slice(chunkIndex, length);

if (context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var convert))
{
builderIndex++;
addMatch(new(absoluteIndex, length, convert));
return length; // Skip past match
}
}

return 1;
}

sealed class MatchContext(
string format,
Counter counter,
Culture culture,
TryConvert tryConvert,
int maxLength,
int minLength)
{
public string Format { get; } = format;
public Counter Counter { get; } = counter;
public Culture Culture { get; } = culture;
public TryConvert TryConvert { get; } = tryConvert;
public int MaxLength { get; } = maxLength;
public int MinLength { get; } = minLength;
}
}
}
Loading