diff --git a/src/Benchmarks/CrossChunkMatcherBenchmarks.cs b/src/Benchmarks/CrossChunkMatcherBenchmarks.cs new file mode 100644 index 000000000..4d0a93ded --- /dev/null +++ b/src/Benchmarks/CrossChunkMatcherBenchmarks.cs @@ -0,0 +1,263 @@ +[MemoryDiagnoser] +[SimpleJob(warmupCount: 3, iterationCount: 5)] +public class CrossChunkMatcherBenchmarks +{ + StringBuilder singleChunkSmall = null!; + StringBuilder singleChunkMedium = null!; + StringBuilder singleChunkLarge = null!; + StringBuilder multiChunkSmall = null!; + StringBuilder multiChunkMedium = null!; + StringBuilder multiChunkLarge = null!; + + [GlobalSetup] + public void Setup() + { + // Single-chunk builders (created from string - fast path) + var smallText = BuildText(10); + var mediumText = BuildText(100); + var largeText = BuildText(1000); + + singleChunkSmall = new(smallText); + singleChunkMedium = new(mediumText); + singleChunkLarge = new(largeText); + + // Multi-chunk builders (built incrementally to force multiple chunks) + multiChunkSmall = BuildMultiChunk(10); + multiChunkMedium = BuildMultiChunk(100); + multiChunkLarge = BuildMultiChunk(1000); + } + + static string BuildText(int iterations) + { + var builder = new StringBuilder(); + for (var i = 0; i < iterations; i++) + { + builder.AppendLine("Hello world this is"); + builder.AppendLine("a test with some patterns to match."); + } + return builder.ToString(); + } + + static StringBuilder BuildMultiChunk(int iterations) + { + var builder = new StringBuilder(); + for (var i = 0; i < iterations; i++) + { + // Each Append can create separate chunks + builder.Append("Hello "); + builder.Append(""); + builder.Append("world"); + builder.Append(""); + builder.AppendLine(" this is"); + builder.Append("a test with some "); + builder.Append(""); + builder.Append("patterns"); + builder.Append(""); + builder.AppendLine(" to match."); + } + return builder; + } + + // Single-chunk benchmarks (fast path) + [Benchmark] + public void SingleChunk_Small() + { + var builder = new StringBuilder(singleChunkSmall.ToString()); + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void SingleChunk_Medium() + { + var builder = new StringBuilder(singleChunkMedium.ToString()); + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void SingleChunk_Large() + { + var builder = new StringBuilder(singleChunkLarge.ToString()); + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + // Multi-chunk benchmarks (complex path) + [Benchmark] + public void MultiChunk_Small() + { + var builder = new StringBuilder(); + foreach (var chunk in multiChunkSmall.GetChunks()) + { + builder.Append(chunk.Span); + } + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void MultiChunk_Medium() + { + var builder = new StringBuilder(); + foreach (var chunk in multiChunkMedium.GetChunks()) + { + builder.Append(chunk.Span); + } + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void MultiChunk_Large() + { + var builder = new StringBuilder(); + foreach (var chunk in multiChunkLarge.GetChunks()) + { + builder.Append(chunk.Span); + } + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(5, "[REPLACED]"); + } + return null; + }); + } + + // Edge cases + [Benchmark] + public void SingleChunk_NoMatches() + { + var builder = new StringBuilder(singleChunkMedium.ToString()); + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(9, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void MultiChunk_NoMatches() + { + var builder = new StringBuilder(); + foreach (var chunk in multiChunkMedium.GetChunks()) + { + builder.Append(chunk.Span); + } + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 20, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("")) + { + return new MatchResult(9, "[REPLACED]"); + } + return null; + }); + } + + [Benchmark] + public void SingleChunk_ComplexPattern() + { + var builder = new StringBuilder(singleChunkMedium.ToString()); + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 100, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("world")) + { + return new MatchResult(16, "[COMPLEX]"); + } + return null; + }); + } + + [Benchmark] + public void MultiChunk_ComplexPattern() + { + var builder = new StringBuilder(); + foreach (var chunk in multiChunkMedium.GetChunks()) + { + builder.Append(chunk.Span); + } + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 100, + context: (string?)null, + matcher: static (content, _, _) => + { + if (content.StartsWith("world")) + { + return new MatchResult(16, "[COMPLEX]"); + } + return null; + }); + } +} \ No newline at end of file diff --git a/src/Benchmarks/GlobalUsings.cs b/src/Benchmarks/GlobalUsings.cs index 73aad3a34..34862b298 100644 --- a/src/Benchmarks/GlobalUsings.cs +++ b/src/Benchmarks/GlobalUsings.cs @@ -1,3 +1,4 @@ -global using BenchmarkDotNet.Attributes; +global using System.Text; +global using BenchmarkDotNet.Attributes; global using BenchmarkDotNet.Running; global using VerifyTests; diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.ConsecutiveMatches.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.ConsecutiveMatches.verified.txt new file mode 100644 index 000000000..0ecaa2c2b --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.ConsecutiveMatches.verified.txt @@ -0,0 +1 @@ +XYXYXY \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.CrossChunkMatch.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.CrossChunkMatch.verified.txt new file mode 100644 index 000000000..a0f371f92 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.CrossChunkMatch.verified.txt @@ -0,0 +1,5 @@ +{ + MatchFound: true, + Position: 8000, + Context: aaaaaaaaaaFOUNDbbbbbbbbbb +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.DifferentReplacementLengths.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.DifferentReplacementLengths.verified.txt new file mode 100644 index 000000000..86f45cb10 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.DifferentReplacementLengths.verified.txt @@ -0,0 +1 @@ +replaced x r y \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.EmptyStringBuilder.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.EmptyStringBuilder.verified.txt new file mode 100644 index 000000000..c1b8d743e --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.EmptyStringBuilder.verified.txt @@ -0,0 +1 @@ +emptyString \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.LargeStringBuilderWithMultipleChunks.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.LargeStringBuilderWithMultipleChunks.verified.txt new file mode 100644 index 000000000..ff04588eb --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.LargeStringBuilderWithMultipleChunks.verified.txt @@ -0,0 +1,3 @@ +{ + MatchCount: 5 +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtEnd.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtEnd.verified.txt new file mode 100644 index 000000000..71f7a8811 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtEnd.verified.txt @@ -0,0 +1 @@ +StringResult \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtStart.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtStart.verified.txt new file mode 100644 index 000000000..db9e677fd --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MatchAtStart.verified.txt @@ -0,0 +1 @@ +ResultString \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleMatchesAcrossChunks.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleMatchesAcrossChunks.verified.txt new file mode 100644 index 000000000..e2efd5cec --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleMatchesAcrossChunks.verified.txt @@ -0,0 +1,3 @@ +{ + MatchCount: 2 +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleWithinChunkMatches.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleWithinChunkMatches.verified.txt new file mode 100644 index 000000000..e72b1133a --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.MultipleWithinChunkMatches.verified.txt @@ -0,0 +1 @@ +bar bar bar baz bar \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.NoMatches.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.NoMatches.verified.txt new file mode 100644 index 000000000..05eac029f --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.NoMatches.verified.txt @@ -0,0 +1 @@ +Hello World \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.OverlappingMatches.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.OverlappingMatches.verified.txt new file mode 100644 index 000000000..64b11214f --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.OverlappingMatches.verified.txt @@ -0,0 +1 @@ +bbbb \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningFourChunks.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningFourChunks.verified.txt new file mode 100644 index 000000000..028df203b --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningFourChunks.verified.txt @@ -0,0 +1,5 @@ +{ + MatchFound: true, + OriginalPatternExists: false, + Note: Pattern spanning 4 chunks should now be detected +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks.verified.txt new file mode 100644 index 000000000..19bf65c6e --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks.verified.txt @@ -0,0 +1,5 @@ +{ + MatchFound: true, + OriginalPatternExists: false, + Note: Pattern spanning 3 chunks should now be detected +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks_AlternativeLayout.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks_AlternativeLayout.verified.txt new file mode 100644 index 000000000..19bf65c6e --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSpanningThreeChunks_AlternativeLayout.verified.txt @@ -0,0 +1,5 @@ +{ + MatchFound: true, + OriginalPatternExists: false, + Note: Pattern spanning 3 chunks should now be detected +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSplitAcrossChunkBoundary.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSplitAcrossChunkBoundary.verified.txt new file mode 100644 index 000000000..8f036f145 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.PatternSplitAcrossChunkBoundary.verified.txt @@ -0,0 +1,5 @@ +{ + MatchFound: true, + ExpectedPosition: 8000, + ActualPosition: 8000 +} \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SimpleWithinChunkMatch.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SimpleWithinChunkMatch.verified.txt new file mode 100644 index 000000000..61ae9fb31 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SimpleWithinChunkMatch.verified.txt @@ -0,0 +1 @@ +Hello Universe \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SingleCharacterMatch.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SingleCharacterMatch.verified.txt new file mode 100644 index 000000000..c701ded90 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SingleCharacterMatch.verified.txt @@ -0,0 +1 @@ +x b x c x \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SkipAheadFunctionality.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SkipAheadFunctionality.verified.txt new file mode 100644 index 000000000..b03c43f9e --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.SkipAheadFunctionality.verified.txt @@ -0,0 +1,8 @@ +[ + 0, + 3, + 6, + 9, + 12, + 15 +] \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.VariableLengthMatches.verified.txt b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.VariableLengthMatches.verified.txt new file mode 100644 index 000000000..5d16e226d --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.VariableLengthMatches.verified.txt @@ -0,0 +1 @@ +animal animal animal animal \ No newline at end of file diff --git a/src/Verify.Tests/Serialization/CrossChunkMatcherTests.cs b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.cs new file mode 100644 index 000000000..489933938 --- /dev/null +++ b/src/Verify.Tests/Serialization/CrossChunkMatcherTests.cs @@ -0,0 +1,525 @@ +public class CrossChunkMatcherTests +{ + [Fact] + public Task SimpleWithinChunkMatch() + { + var builder = new StringBuilder("Hello World"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 5, + context: "World", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "Universe"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task MultipleWithinChunkMatches() + { + var builder = new StringBuilder("foo bar foo baz foo"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 3, + context: "foo", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "bar"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task CrossChunkMatch() + { + // Create a StringBuilder with multiple chunks + var builder = new StringBuilder(); + builder.Append(new string('a', 8000)); // First chunk + builder.Append("MATCH"); + builder.Append(new string('b', 8000)); // Force into new chunk + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 5, + context: "MATCH", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "FOUND"); + } + + return null; + }); + + var result = builder.ToString(); + var matchPosition = result.IndexOf("FOUND", StringComparison.Ordinal); + var surroundingContext = result.Substring( + Math.Max(0, matchPosition - 10), + Math.Min(25, result.Length - matchPosition + 10)); + + return Verify(new + { + MatchFound = matchPosition >= 0, + Position = matchPosition, + Context = surroundingContext + }); + } + + [Fact] + public Task NoMatches() + { + var builder = new StringBuilder("Hello World"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 10, + context: "NotFound", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "Replaced"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task EmptyStringBuilder() + { + var builder = new StringBuilder(); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 5, + context: "test", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "replaced"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task MatchAtStart() + { + var builder = new StringBuilder("TargetString"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 6, + context: "Target", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "Result"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task MatchAtEnd() + { + var builder = new StringBuilder("StringTarget"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 6, + context: "Target", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "Result"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task OverlappingMatches() + { + var builder = new StringBuilder("aaaa"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 2, + context: "aa", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "bb"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task DifferentReplacementLengths() + { + var builder = new StringBuilder("short x long y"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 5, + context: (Short: "short", Long: "long"), + matcher: static (content, _, context) => + { + if (content.Length >= context.Short.Length && + content[..context.Short.Length].SequenceEqual(context.Short)) + { + return new(context.Short.Length, "replaced"); + } + + if (content.Length >= context.Long.Length && + content[..context.Long.Length].SequenceEqual(context.Long)) + { + return new(context.Long.Length, "r"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task SingleCharacterMatch() + { + var builder = new StringBuilder("a b a c a"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 1, + context: 'a', + matcher: static (content, _, context) => + { + if (content.Length >= 1 && content[0] == context) + { + return new(1, "x"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task LargeStringBuilderWithMultipleChunks() + { + var builder = new StringBuilder(); + // Create multiple chunks with patterns + for (var i = 0; i < 5; i++) + { + builder.Append(new string('x', 7000)); + builder.Append("PATTERN"); + } + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 7, + context: "PATTERN", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "MATCH"); + } + + return null; + }); + + var result = builder.ToString(); + var matchCount = 0; + var index = 0; + while ((index = result.IndexOf("MATCH", index, StringComparison.Ordinal)) != -1) + { + matchCount++; + index += "MATCH".Length; + } + + return Verify(new {MatchCount = matchCount}); + } + + [Fact] + public Task PatternSplitAcrossChunkBoundary() + { + var builder = new StringBuilder(); + // Create a pattern that will be split across chunk boundary + // First chunk ends with "PAT", second chunk starts with "TERN" + var firstChunk = new string('a', 8000) + "PAT"; + var secondChunk = "TERN" + new string('b', 8000); + + builder.Append(firstChunk); + builder.Append(secondChunk); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 7, + context: "PATTERN", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "SUCCESS"); + } + + return null; + }); + + var result = builder.ToString(); + var matchPosition = result.IndexOf("SUCCESS", StringComparison.Ordinal); + + return Verify(new + { + MatchFound = matchPosition >= 0, + ExpectedPosition = firstChunk.Length - 3, + ActualPosition = matchPosition + }); + } + + [Fact] + public Task ConsecutiveMatches() + { + var builder = new StringBuilder("ABABAB"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 2, + context: "AB", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "XY"); + } + + return null; + }); + + return Verify(builder.ToString()); + } + + [Fact] + public Task PatternSpanningThreeChunks() + { + // Pattern "ABCDEFGH" spans 3 chunks + var builder = new StringBuilder(); + + // Chunk 1: ends with "ABC" + builder.Append(new string('x', 8000) + "ABC"); + // Chunk 2: contains "DEF" + builder.Append("DEF"); + // Chunk 3: starts with "GH" + builder.Append("GH" + new string('y', 8000)); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 8, + context: "ABCDEFGH", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "FOUND!!!"); + } + + return null; + }); + + var result = builder.ToString(); + var matchFound = result.Contains("FOUND!!!"); + var originalPatternExists = result.Contains("ABCDEFGH"); + + return Verify(new + { + MatchFound = matchFound, + OriginalPatternExists = originalPatternExists, + Note = "Pattern spanning 3 chunks should now be detected" + }); + } + + [Fact] + public Task PatternSpanningThreeChunks_AlternativeLayout() + { + // Another layout: AB | CDEF | GH + var builder = new StringBuilder(); + + builder.Append(new string('x', 8000) + "AB"); + builder.Append("CDEF"); + builder.Append("GH" + new string('y', 8000)); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 8, + context: "ABCDEFGH", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "FOUND!!!"); + } + + return null; + }); + + var result = builder.ToString(); + + return Verify(new + { + MatchFound = result.Contains("FOUND!!!"), + OriginalPatternExists = result.Contains("ABCDEFGH"), + Note = "Pattern spanning 3 chunks should now be detected" + }); + } + + [Fact] + public Task PatternSpanningFourChunks() + { + // Extreme case: A | BC | DE | FGH + var builder = new StringBuilder(); + + builder.Append(new string('x', 8000) + "A"); + builder.Append("BC"); + builder.Append("DE"); + builder.Append("FGH" + new string('y', 8000)); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 8, + context: "ABCDEFGH", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "SUCCESS!"); + } + + return null; + }); + + var result = builder.ToString(); + + return Verify(new + { + MatchFound = result.Contains("SUCCESS!"), + OriginalPatternExists = result.Contains("ABCDEFGH"), + Note = "Pattern spanning 4 chunks should now be detected" + }); + } + + [Fact] + public Task MultipleMatchesAcrossChunks() + { + var builder = new StringBuilder(); + + // Multiple patterns across chunks + builder.Append(new string('x', 7000) + "PAT"); + builder.Append("TERN1" + new string('y', 7000) + "PAT"); + builder.Append("TERN2" + new string('z', 7000)); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 7, + context: "PATTERN", + matcher: static (content, _, context) => + { + if (content.Length >= context.Length && + content[..context.Length].SequenceEqual(context)) + { + return new(context.Length, "MATCH"); + } + + return null; + }); + + var result = builder.ToString(); + var matchCount = 0; + var index = 0; + while ((index = result.IndexOf("MATCH", index, StringComparison.Ordinal)) != -1) + { + matchCount++; + index += "MATCH".Length; + } + + return Verify(new {MatchCount = matchCount}); + } + + [Fact] + public Task VariableLengthMatches() + { + var builder = new StringBuilder("cat dog bird elephant"); + + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 8, + context: new[] {"cat", "dog", "bird", "elephant"}, + matcher: static (content, _, context) => + { + foreach (var word in context) + { + if (content.Length >= word.Length && + content[..word.Length].SequenceEqual(word)) + { + return new(word.Length, "animal"); + } + } + + return null; + }); + + return Verify(builder.ToString()); + } +} \ No newline at end of file diff --git a/src/Verify/Extensions.cs b/src/Verify/Extensions.cs index afc379d1c..dc046164c 100644 --- a/src/Verify/Extensions.cs +++ b/src/Verify/Extensions.cs @@ -203,37 +203,12 @@ public static TValue GetOrAdd(this Dictionary dictio return dictionary[key] = factory(key); } - public static void ReplaceTokens(this StringBuilder builder, IReadOnlyCollection> replaces) - { - foreach (var replace in replaces) - { - if (builder.Length >= replace.Key.Length) - { - builder.Replace(replace.Key, replace.Value); - } - } - } - public static void Overwrite(this StringBuilder builder, string value, int index, int length) { builder.Remove(index, length); builder.Insert(index, value); } - public static int Count(this StringBuilder builder, char ch) - { - var count = 0; - for (var index = 0; index < builder.Length; index++) - { - if (builder[index] == ch) - { - count++; - } - } - - return count; - } - public static bool IsException(this Type type) => type.IsAssignableTo(); } \ No newline at end of file diff --git a/src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs b/src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs new file mode 100644 index 000000000..90242fd04 --- /dev/null +++ b/src/Verify/Serialization/Scrubbers/CrossChunkMatcher.cs @@ -0,0 +1,263 @@ + +#if !NET6_0_OR_GREATER +using ChunkEnumerator = Polyfills.Polyfill.ChunkEnumerator; +#else +using ChunkEnumerator = System.Text.StringBuilder.ChunkEnumerator; +#endif + +/// +/// Helper for matching and replacing patterns in StringBuilder that may span across chunk boundaries. +/// +static class CrossChunkMatcher +{ + /// + /// Finds all matches in a StringBuilder (handling patterns spanning chunk boundaries) and applies replacements. + /// + /// The StringBuilder to search and modify + /// Maximum pattern length to search for + /// User context passed to callbacks + /// Called for each potential match position with accumulated buffer + public static void ReplaceAll( + StringBuilder builder, + int maxLength, + TContext context, + MatchHandler matcher) + { + if (maxLength <= 0) + { + throw new ArgumentException("maxLength must be positive", nameof(maxLength)); + } + + var chunks = builder.GetChunks(); + // Fast path for single chunk + if (TryGetSingleChunk(builder, out var chunk)) + { + // Only one chunk - use optimized path + ReplaceAllSingleChunk(builder, chunk, maxLength, context, matcher); + return; + } + + // Multi-chunk path + ReplaceAllMultiChunk(builder, chunks, maxLength, context, matcher); + } + +#if NET8_0_OR_GREATER + [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "m_ChunkPrevious")] + static extern ref StringBuilder? GetChunkPrevious(StringBuilder builder); + [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "m_ChunkChars")] + static extern ref char[] GetChunkChars(StringBuilder builder); + [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "m_ChunkLength")] + static extern ref int GetChunkLength(StringBuilder builder); + + static bool TryGetSingleChunk(StringBuilder builder, out CharSpan single) + { + if (GetChunkPrevious(builder) != null) + { + single = null; + return false; + } + + single = new(GetChunkChars(builder), 0, GetChunkLength(builder)); + return true; + } +#else + // ReSharper disable once UnusedParameter.Local + static bool TryGetSingleChunk(StringBuilder builder, out CharSpan single) + { + var chunks = builder.GetChunks(); + var enumerator = chunks.GetEnumerator(); + if (enumerator.MoveNext()) + { + single = enumerator.Current.Span; + if (!enumerator.MoveNext()) + { + return true; + } + } + + single = null; + return false; + } +#endif + + static void ReplaceAllSingleChunk( + StringBuilder builder, + CharSpan span, + int maxLength, + TContext context, + MatchHandler matcher) + { + List matches = []; + + for (var i = 0; i < span.Length; i++) + { + if (ShouldSkipPosition(span[i])) + { + continue; + } + + // Get window at current position + var remainingLength = span.Length - i; + var windowLength = Math.Min(maxLength, remainingLength); + var window = span.Slice(i, windowLength); + + if (!TryMatch(window, i, context, matcher, out var match)) + { + continue; + } + + matches.Add(match); + // Skip past the match + i += match.Length - 1; + } + + ApplyMatches(builder, matches); + } + + static void ReplaceAllMultiChunk( + StringBuilder builder, + ChunkEnumerator chunks, + int maxLength, + TContext context, + MatchHandler matcher) + { + Span buffer = stackalloc char[maxLength]; + List matches = []; + var position = 0; + + foreach (var chunk in chunks) + { + for (var chunkIndex = 0; chunkIndex < chunk.Length; chunkIndex++) + { + if (ShouldSkipPosition(chunk.Span[chunkIndex])) + { + continue; + } + + var absolutePosition = position + chunkIndex; + + // Build content window starting at current position + var windowSlice = FillBuffer(builder, absolutePosition, buffer); + + if (!TryMatch(windowSlice, absolutePosition, context, matcher, out var match)) + { + continue; + } + + matches.Add(match); + + // Skip past the match + var skipAmount = match.Length - 1; + if (skipAmount <= 0) + { + continue; + } + + var remaining = chunk.Length - chunkIndex - 1; + var toSkip = Math.Min(skipAmount, remaining); + chunkIndex += toSkip; + } + + position += chunk.Length; + } + + ApplyMatches(builder, matches); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static bool ShouldSkipPosition(char ch) => ch is '\n' or '\r'; + + static bool TryMatch( + CharSpan window, + int position, + TContext context, + MatchHandler matcher, + out Match match) + { + var potentialMatch = matcher(window, position, context); + + if (potentialMatch == null) + { + match = default; + return false; + } + + var result = potentialMatch.Value; + match = new(position, result.Length, result.Replacement); + return true; + } + + static void ApplyMatches(StringBuilder builder, List matches) + { + foreach (var match in matches.OrderByDescending(_ => _.Index)) + { + builder.Overwrite(match.Value, match.Index, match.Length); + } + } + + static Span FillBuffer(StringBuilder builder, int start, Span buffer) + { + var bufferIndex = 0; + var currentPosition = 0; + + foreach (var chunk in builder.GetChunks()) + { + var chunkEnd = currentPosition + chunk.Length; + + // Skip chunks before our start position + if (chunkEnd <= start) + { + currentPosition = chunkEnd; + continue; + } + + // Determine where to start in this chunk + var chunkStart = start > currentPosition ? start - currentPosition : 0; + + // Copy what we can from this chunk + var destinationSlice = buffer[bufferIndex..]; + var toCopy = Math.Min(chunk.Length - chunkStart, destinationSlice.Length); + + chunk.Span.Slice(chunkStart, toCopy).CopyTo(destinationSlice); + bufferIndex += toCopy; + + // If buffer is full, we're done + if (bufferIndex >= buffer.Length) + { + break; + } + + currentPosition = chunkEnd; + } + + return buffer[..bufferIndex]; + } + + /// + /// Callback for checking if content matches and should be replaced. + /// + /// The current window content to check + /// Absolute position in the StringBuilder where this content starts + /// User-provided context + /// Match result indicating if a match was found and replacement details + public delegate MatchResult? MatchHandler( + CharSpan content, + int absolutePosition, + TContext context); +} + +/// +/// Result of a match check operation. +/// +readonly struct MatchResult(int length, string replacement) +{ + public readonly int Length = length; + public readonly string Replacement = replacement; +} + +readonly struct Match(int index, int length, string value) +{ + public readonly int Index = index; + public readonly int Length = length; + public readonly string Value = value; +} \ No newline at end of file diff --git a/src/Verify/Serialization/Scrubbers/DateScrubber.cs b/src/Verify/Serialization/Scrubbers/DateScrubber.cs index bfdafc9b7..56dccebc0 100644 --- a/src/Verify/Serialization/Scrubbers/DateScrubber.cs +++ b/src/Verify/Serialization/Scrubbers/DateScrubber.cs @@ -1,4 +1,4 @@ -// ReSharper disable ReturnValueOfPureMethodIsNotUsed +// ReSharper disable ReturnValueOfPureMethodIsNotUsed static class DateScrubber { delegate bool TryConvert( @@ -192,7 +192,7 @@ static bool TryGetFormatWithUpperMillisecondsTrimmed(string format, [NotNullWhen return false; } - static void ReplaceInner(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate) + static void ReplaceInner(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvert) { if (!counter.ScrubDateTimes) { @@ -206,69 +206,48 @@ static void ReplaceInner(StringBuilder builder, string format, Counter counter, return; } - if (min == max) - { - ReplaceFixedLength(builder, format, counter, culture, tryConvertDate, max); - - return; - } - - ReplaceVariableLength(builder, format, counter, culture, tryConvertDate, max, min); - } + var context = new MatchContext(format, counter, culture, tryConvert, max, min); - static void ReplaceVariableLength(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate, int longest, int shortest) - { - var value = builder.AsSpan(); - var builderIndex = 0; - for (var index = 0; index <= value.Length; index++) - { - var found = false; - for (var length = longest; length >= shortest; length--) + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: max, + context, + matcher: static (content, _, context) => { - var end = index + length; - if (end > value.Length) - { - continue; - } - - var slice = value.Slice(index, length); - if (tryConvertDate(slice, format, counter, culture, out var convert)) + // Try lengths from longest to shortest (greedy matching) + for (var length = context.MaxLength; length >= context.MinLength; length--) { - builder.Overwrite(convert, builderIndex, length); - builderIndex += convert.Length; - index += length - 1; - found = true; - break; + // Not enough content for this length + if (content.Length < length) + { + continue; + } + + var slice = content[..length]; + + if (context.TryConvert(slice, context.Format, context.Counter, context.Culture, out var converted)) + { + return new MatchResult(length, converted); + } } - } - - if (found) - { - continue; - } - builderIndex++; - } + return null; + }); } - static void ReplaceFixedLength(StringBuilder builder, string format, Counter counter, Culture culture, TryConvert tryConvertDate, int length) + sealed class MatchContext( + string format, + Counter counter, + Culture culture, + TryConvert tryConvert, + int maxLength, + int minLength) { - var value = builder.AsSpan(); - var builderIndex = 0; - var increment = length - 1; - for (var index = 0; index <= value.Length - length; index++) - { - var slice = value.Slice(index, length); - if (tryConvertDate(slice, format, counter, culture, out var convert)) - { - builder.Overwrite(convert, builderIndex, length); - builderIndex += convert.Length; - index += increment; - } - else - { - builderIndex++; - } - } + public string Format { get; } = format; + public Counter Counter { get; } = counter; + public Culture Culture { get; } = culture; + public TryConvert TryConvert { get; } = tryConvert; + public int MaxLength { get; } = maxLength; + public int MinLength { get; } = minLength; } } \ No newline at end of file diff --git a/src/Verify/Serialization/Scrubbers/DirectoryReplacements_StringBuilder.cs b/src/Verify/Serialization/Scrubbers/DirectoryReplacements_StringBuilder.cs index a9d5551f9..2e86bed87 100644 --- a/src/Verify/Serialization/Scrubbers/DirectoryReplacements_StringBuilder.cs +++ b/src/Verify/Serialization/Scrubbers/DirectoryReplacements_StringBuilder.cs @@ -1,4 +1,4 @@ -static partial class DirectoryReplacements +static partial class DirectoryReplacements { public readonly struct Pair { @@ -32,190 +32,68 @@ public static void Replace(StringBuilder builder, List paths) throw new("Find should be distinct"); } #endif - if (builder.Length == 0) + if (builder.Length == 0 || paths.Count == 0) { return; } - var matches = FindMatches(builder, paths); + // pairs are ordered by length, so max length is the first one + var maxLength = paths[0].Find.Length; + var context = new MatchContext(builder, paths); - // Sort by position descending - var orderByDescending = matches.OrderByDescending(_ => _.Index); - - // Apply matches - foreach (var match in orderByDescending) - { - builder.Overwrite(match.Value, match.Index, match.Length); - } - } - - static List FindMatches(StringBuilder builder, List pairs) - { - if (pairs.Count == 0) - { - return []; - } - - var matches = new List(); - // Track matched positions - var matchedRanges = new List<(int Start, int End)>(); - var absolutePosition = 0; - - // pairs are ordered by length. so max length is the first one - var maxLength = pairs[0].Find.Length; - var carryoverSize = maxLength - 1; - - Span carryoverBuffer = stackalloc char[carryoverSize]; - Span combinedBuffer = stackalloc char[maxLength * 2]; - var carryoverLength = 0; - var previousChunkAbsoluteEnd = 0; - - foreach (var chunk in builder.GetChunks()) - { - var chunkSpan = chunk.Span; - - // Check for matches spanning from previous chunk to current chunk - if (carryoverLength > 0) - { - for (var carryoverIndex = 0; carryoverIndex < carryoverLength; carryoverIndex++) - { - foreach (var pair in pairs) - { - var remainingInCarryover = carryoverLength - carryoverIndex; - var neededFromCurrent = pair.Find.Length - remainingInCarryover; - - if (neededFromCurrent <= 0 || - neededFromCurrent > chunkSpan.Length) - { - continue; - } - - var combinedLength = remainingInCarryover + neededFromCurrent; - carryoverBuffer.Slice(carryoverIndex, remainingInCarryover).CopyTo(combinedBuffer); - chunkSpan[..neededFromCurrent].CopyTo(combinedBuffer[remainingInCarryover..]); - - var startPosition = previousChunkAbsoluteEnd - carryoverLength + carryoverIndex; - - // Check if this position overlaps with existing match - if (OverlapsExistingMatch(startPosition, pair.Find.Length, matchedRanges)) - { - continue; - } - - if (!TryMatchAtCrossChunk( - builder, - combinedBuffer[..combinedLength], - chunkSpan, - startPosition, - neededFromCurrent, - pair.Find, - out var matchLength)) - { - continue; - } - - matches.Add(new(startPosition, matchLength, pair.Replace)); - matchedRanges.Add((startPosition, startPosition + matchLength)); - // Found a match at this position, skip other pairs - break; - } - } - } - - // Process matches entirely within this chunk - for (var chunkIndex = 0; chunkIndex < chunk.Length; chunkIndex++) + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: maxLength, + context, + matcher: static (content, absolutePosition, context) => { - var absoluteIndex = absolutePosition + chunkIndex; - // Skip if already matched - if (IsPositionMatched(absoluteIndex, matchedRanges)) + if (context.IsPositionMatched(absolutePosition)) { - continue; + return null; } - foreach (var pair in pairs) + foreach (var pair in context.Pairs) { - // Check if we have enough characters left in this chunk - if (chunkIndex + pair.Find.Length > chunk.Length) + // Not enough content for this pattern + if (content.Length < pair.Find.Length) { continue; } // Check if this would overlap with existing match - if (OverlapsExistingMatch(absoluteIndex, pair.Find.Length, matchedRanges)) + if (context.OverlapsExistingMatch(absolutePosition, pair.Find.Length)) { continue; } // Try to match at this position - if (!TryMatchAt(chunk, chunkIndex, pair.Find, out var matchLength)) + if (!TryMatchAt(context.Builder, content, absolutePosition, pair.Find, out var matchLength)) { continue; } - matches.Add(new(absoluteIndex, matchLength, pair.Replace)); - matchedRanges.Add((absoluteIndex, absoluteIndex + matchLength)); - // Skip past this match - chunkIndex += matchLength - 1; - // Found a match, skip other pairs at this position - break; + context.AddMatchedRange(absolutePosition, absolutePosition + matchLength); + return new MatchResult(matchLength, pair.Replace); } - } - - // Save last N chars for next iteration - carryoverLength = Math.Min(carryoverSize, chunk.Length); - chunkSpan.Slice(chunk.Length - carryoverLength, carryoverLength).CopyTo(carryoverBuffer); - - previousChunkAbsoluteEnd = absolutePosition + chunk.Length; - absolutePosition += chunk.Length; - } - - return matches; - } - - static bool IsPositionMatched(int position, List<(int Start, int End)> matchedRanges) - { - foreach (var (start, end) in matchedRanges) - { - if (position >= start && position < end) - { - return true; - } - } - - return false; - } - - static bool OverlapsExistingMatch(int start, int length, List<(int Start, int End)> matchedRanges) - { - var end = start + length; - foreach (var range in matchedRanges) - { - // Check if ranges overlap - if (start < range.End && end > range.Start) - { - return true; - } - } - return false; + return null; + }); } - static bool TryMatchAtCrossChunk( + static bool TryMatchAt( StringBuilder builder, - CharSpan combinedSpan, - CharSpan currentChunkSpan, - int absoluteStartPosition, - int neededFromCurrent, + CharSpan content, + int absolutePosition, string find, out int matchLength) { matchLength = 0; // Check preceding character - if (absoluteStartPosition > 0) + if (absolutePosition > 0) { - var preceding = builder[absoluteStartPosition - 1]; + var preceding = builder[absolutePosition - 1]; if (char.IsLetterOrDigit(preceding)) { return false; @@ -223,66 +101,34 @@ static bool TryMatchAtCrossChunk( } // Check if the path matches - if (!IsPathMatchAt(combinedSpan, 0, find)) + if (!IsPathMatchAt(content, 0, find)) { return false; } matchLength = find.Length; - // Check trailing character (it's in the current chunk) - if (neededFromCurrent < currentChunkSpan.Length) - { - var trailing = currentChunkSpan[neededFromCurrent]; - - // Invalid if trailing is letter or digit - if (char.IsLetterOrDigit(trailing)) - { - return false; - } - - // Greedy: include trailing separator - if (trailing is '/' or '\\') - { - matchLength++; - } - } - - return true; - } - - static bool TryMatchAt(ReadOnlyMemory chunk, int chunkPos, string find, out int matchLength) - { - var span = chunk.Span; - matchLength = 0; - - // Check preceding character - if (chunkPos > 0) + // Check trailing character + var trailingPosition = absolutePosition + find.Length; + if (trailingPosition >= builder.Length) { - var preceding = span[chunkPos - 1]; - if (char.IsLetterOrDigit(preceding)) - { - return false; - } + return true; } - // Check if the path matches - if (!IsPathMatchAt(span, chunkPos, find)) + // Check if we have the trailing character in our content window + // or need to look it up in the builder + char trailing; + if (find.Length < content.Length) { - return false; + // Trailing char is in our window + trailing = content[find.Length]; } - - // Check trailing character - matchLength = find.Length; - var trailingPos = chunkPos + find.Length; - - if (trailingPos >= span.Length) + else { - return true; + // Need to look up in builder + trailing = builder[trailingPosition]; } - var trailing = span[trailingPos]; - // Invalid if trailing is letter or digit if (char.IsLetterOrDigit(trailing)) { @@ -298,15 +144,15 @@ static bool TryMatchAt(ReadOnlyMemory chunk, int chunkPos, string find, ou return true; } - static bool IsPathMatchAt(CharSpan chunk, int chunkPos, string find) + static bool IsPathMatchAt(CharSpan content, int contentPos, string find) { for (var i = 0; i < find.Length; i++) { - var chunkChar = chunk[chunkPos + i]; + var contentChar = content[contentPos + i]; var findChar = find[i]; // Treat / and \ as equivalent - if (chunkChar is '/' or '\\') + if (contentChar is '/' or '\\') { if (findChar != '/') { @@ -316,7 +162,7 @@ static bool IsPathMatchAt(CharSpan chunk, int chunkPos, string find) continue; } - if (chunkChar != findChar) + if (contentChar != findChar) { return false; } @@ -325,10 +171,32 @@ static bool IsPathMatchAt(CharSpan chunk, int chunkPos, string find) return true; } - readonly struct Match(int index, int length, string value) + sealed class MatchContext(StringBuilder builder, List pairs) { - public readonly int Index = index; - public readonly int Length = length; - public readonly string Value = value; + public StringBuilder Builder { get; } = builder; + public List Pairs { get; } = pairs; + List<(int Start, int End)> matchedRanges = []; + + public void AddMatchedRange(int start, int end) => + matchedRanges.Add((start, end)); + + public bool IsPositionMatched(int position) + { + foreach (var (start, end) in matchedRanges) + { + if (position >= start && position < end) + { + return true; + } + } + + return false; + } + + public bool OverlapsExistingMatch(int start, int length) + { + var end = start + length; + return matchedRanges.Any(range => start < range.End && end > range.Start); + } } } \ No newline at end of file diff --git a/src/Verify/Serialization/Scrubbers/GuidScrubber.cs b/src/Verify/Serialization/Scrubbers/GuidScrubber.cs index 8582410a0..e6f84d314 100644 --- a/src/Verify/Serialization/Scrubbers/GuidScrubber.cs +++ b/src/Verify/Serialization/Scrubbers/GuidScrubber.cs @@ -1,4 +1,4 @@ -static class GuidScrubber +static class GuidScrubber { public static void ReplaceGuids(StringBuilder builder, Counter counter) { @@ -13,119 +13,44 @@ public static void ReplaceGuids(StringBuilder builder, Counter counter) return; } - var matches = FindMatches(builder, counter); - - // Sort by position descending - var orderByDescending = matches.OrderByDescending(_ => _.Index); - - // Apply matches - foreach (var match in orderByDescending) - { - builder.Overwrite(match.Value, match.Index, 36); - } - } - - static List FindMatches(StringBuilder builder, Counter counter) - { - var absolutePosition = 0; - var matches = new List(); - Span carryoverBuffer = stackalloc char[35]; - Span buffer = stackalloc char[36]; - var carryoverLength = 0; - var previousChunkAbsoluteEnd = 0; - - foreach (var chunk in builder.GetChunks()) - { - var chunkSpan = chunk.Span; - - // Check for GUIDs spanning from previous chunk to current chunk - if (carryoverLength > 0) + CrossChunkMatcher.ReplaceAll( + builder, + maxLength: 36, + context: (Builder: builder, Counter: counter), + matcher: static (content, absolutePosition, context) => { - // Check each possible starting position in the carryover - for (var carryoverIndex = 0; carryoverIndex < carryoverLength; carryoverIndex++) + // Need at least 36 characters for a GUID + if (content.Length < 36) { - var remainingInCarryover = carryoverLength - carryoverIndex; - var neededFromCurrent = 36 - remainingInCarryover; - - if (neededFromCurrent <= 0 || - chunkSpan.Length < neededFromCurrent) - { - continue; - } - - carryoverBuffer.Slice(carryoverIndex, remainingInCarryover).CopyTo(buffer); - chunkSpan[..neededFromCurrent].CopyTo(buffer[remainingInCarryover..]); - - // Check boundary characters - var startPosition = previousChunkAbsoluteEnd - carryoverLength + carryoverIndex; - - var hasValidStart = startPosition == 0 || - !IsInvalidStartingChar(builder[startPosition - 1]); - - if (!hasValidStart) - { - continue; - } - - var hasValidEnd = neededFromCurrent >= chunkSpan.Length || - !IsInvalidEndingChar(chunkSpan[neededFromCurrent]); - - if (!hasValidEnd) - { - continue; - } - - if (!Guid.TryParseExact(buffer, "D", out var guid)) - { - continue; - } - - var convert = counter.Convert(guid); - matches.Add(new(startPosition, convert)); + return null; } - } - // Process GUIDs entirely within this chunk - if (chunk.Length >= 36) - { - for (var chunkIndex = 0; chunkIndex < chunk.Length; chunkIndex++) + // Validate start boundary (check character before the potential GUID) + if (absolutePosition > 0 && + IsInvalidStartingChar(context.Builder[absolutePosition - 1])) { - var end = chunkIndex + 36; - if (end > chunk.Length) - { - break; - } - - var value = chunkSpan; - if ((chunkIndex != 0 && IsInvalidStartingChar(value[chunkIndex - 1])) || - (end != value.Length && IsInvalidEndingChar(value[end]))) - { - continue; - } - - var slice = value.Slice(chunkIndex, 36); - - if (!Guid.TryParseExact(slice, "D", out var guid)) - { - continue; - } - - var convert = counter.Convert(guid); - var startReplaceIndex = absolutePosition + chunkIndex; - matches.Add(new(startReplaceIndex, convert)); - chunkIndex += 35; + return null; } - } - // Save last 35 chars for next iteration - carryoverLength = Math.Min(35, chunk.Length); - chunkSpan.Slice(chunk.Length - carryoverLength, carryoverLength).CopyTo(carryoverBuffer); + // Validate end boundary (check character after the potential GUID) + var endPosition = absolutePosition + 36; + if (endPosition < context.Builder.Length && + IsInvalidEndingChar(context.Builder[endPosition])) + { + return null; + } - previousChunkAbsoluteEnd = absolutePosition + chunk.Length; - absolutePosition += chunk.Length; - } + // Try to parse as GUID + var slice = content.Slice(0, 36); + if (!Guid.TryParseExact(slice, "D", out var guid)) + { + return null; + } - return matches; + // Convert and return match + var converted = context.Counter.Convert(guid); + return new MatchResult(36, converted); + }); } static bool IsInvalidEndingChar(char ch) => @@ -141,10 +66,4 @@ static bool IsInvalidStartingChar(char ch) => IsInvalidChar(ch) && ch != '{' && ch != '('; - - internal readonly struct Match(int index, string value) - { - public readonly int Index = index; - public readonly string Value = value; - } } \ No newline at end of file diff --git a/src/VerifyCore.slnf b/src/VerifyCore.slnf index a57366b13..4115b5115 100644 --- a/src/VerifyCore.slnf +++ b/src/VerifyCore.slnf @@ -3,6 +3,7 @@ "path": "Verify.slnx", "projects": [ "DisableScrubbersTests\\DisableScrubbersTests.csproj", + "Benchmarks\\Benchmarks.csproj", "RawTempUsage\\RawTempUsage.csproj", "TargetLibrary\\TargetLibrary.csproj", "Verify.NUnit\\Verify.NUnit.csproj",