diff --git a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs index 6e9c7a3451c25a..60dbf961ad2fc5 100644 --- a/src/libraries/System.Memory/tests/Span/StringSearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/StringSearchValues.cs @@ -357,9 +357,61 @@ public static void IndexOfAny_InvalidUtf16() // These should hit the Aho-Corasick implementation [InlineData("a", "b")] [InlineData("ab", "c")] - // Simple Teddy cases + // Two-string specialization with same length values (short strings) + [InlineData("ab", "cd")] + [InlineData("ab", "xy")] + // Two-string specialization with same length values (medium strings) + [InlineData("foo", "bar")] + [InlineData("hello", "world")] + [InlineData("test1", "test2")] + // Two-string specialization with same length values (longer strings for vector testing) + [InlineData("abcdefghij", "klmnopqrst")] + [InlineData("abcdefghijklmno", "123456789abcdef")] + [InlineData("helloworld1234", "goodbyeworld56")] + // Two-string specialization with same first character + [InlineData("abc", "axy")] + [InlineData("hello", "happy")] + [InlineData("ab", "ac")] + [InlineData("test", "toad")] + // Two-string specialization with different first characters + [InlineData("abc", "xyz")] + [InlineData("foo", "baz")] [InlineData("abc", "cde")] [InlineData("abc", "cd")] + // Two-string specialization with different length values (short/medium) + [InlineData("ab", "abc")] + [InlineData("ab", "abcd")] + [InlineData("foo", "foobar")] + [InlineData("hello", "hi")] + [InlineData("test", "testing123")] + [InlineData("xy", "xyz123")] + [InlineData("abcdefgh", "ab")] + // Two-string specialization with different length values (longer strings) + [InlineData("ab", "abcdefghijklmnop")] + [InlineData("abcdefghijklmnop", "xy")] + [InlineData("hello", "helloworld12345")] + // Two-string specialization with special characters + [InlineData("ab", "!@")] + [InlineData("a!", "b@")] + [InlineData("foo!", "bar?")] + [InlineData("test%", "data#")] + // Two-string specialization with numbers + [InlineData("12", "34")] + [InlineData("123", "456")] + [InlineData("abc123", "def456")] + [InlineData("12ab", "34cd")] + // Two-string specialization with mixed case (will be tested case-sensitive and case-insensitive) + [InlineData("Ab", "Cd")] + [InlineData("Hello", "World")] + [InlineData("ABC", "XYZ")] + // Two-string specialization edge cases + [InlineData("ab", "bc")] + [InlineData("ab", "c!")] + [InlineData("abc", "bc")] + // Simple Teddy cases (3+ strings to ensure Teddy is used, not two-string specialization) + [InlineData("abc", "cde", "efg")] + [InlineData("abc", "cd", "ef")] + [InlineData("ab", "cd", "ef")] // Teddy where all starting chars are letters, but not all other characters are [InlineData("ab", "de%", "ghi", "jkl!")] [InlineData("abc", "def%", "ghi", "jkl!")] @@ -370,13 +422,9 @@ public static void IndexOfAny_InvalidUtf16() [InlineData("12", "45b", "789")] [InlineData("123", "456", "789")] [InlineData("123", "456a", "789b")] - // We'll expand these values to all case permutations - [InlineData("ab", "bc")] - [InlineData("ab", "c!")] + // These will be expanded to all case permutations (3+ strings to test Teddy) [InlineData("ab", "c!", "!%")] - // These won't be expanded as they would produce more than 8 permutations [InlineData("ab", "bc", "c!")] - [InlineData("abc", "bc")] // Rabin-Karp where one of the values is longer than what the implementation can match (17) [InlineData("abc", "a012345678012345678")] // Rabin-Karp where all of the values are longer than what the implementation can match (17) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 836920e1c83952..80252690802240 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -479,6 +479,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/CharacterFrequencyHelper.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/CharacterFrequencyHelper.cs index 012250e301315b..017e1ea02eecbf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/CharacterFrequencyHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/CharacterFrequencyHelper.cs @@ -80,6 +80,43 @@ public static void GetSingleStringMultiCharacterOffsets(string value, bool ignor } } + /// + /// For a two-string search, we only need one additional anchor character per string (the first character is always used). + /// + public static int GetSecondCharacterOffset(string value, bool ignoreCase) => + GetSecondCharacterOffset(value, ignoreCase, value.Length); + + /// + /// For a two-string search with different length values, we constrain the offset to be within maxOffset. + /// This allows the inner loop to use the same offset bounds for both values. + /// + /// The string to search for anchor characters. + /// Whether the search is case-insensitive. + /// + /// The exclusive upper bound for the search range (must be at least 2). + /// When maxOffset is 2, only position 1 is considered for the second anchor since position 0 is always the first anchor. + /// + public static int GetSecondCharacterOffset(string value, bool ignoreCase, int maxOffset) + { + Debug.Assert(value.Length > 1); + // maxOffset must be at least 2 so there's at least one position (index 1) to consider for the second anchor. + Debug.Assert(maxOffset >= 2 && maxOffset <= value.Length); + Debug.Assert(!ignoreCase || char.IsAscii(value[0])); + + int ch2Offset = IndexOfAsciiCharWithLowestFrequency(value.AsSpan(0, maxOffset), ignoreCase); + + if (ch2Offset < 0) + { + // We have fewer than 2 ASCII chars in the value (within maxOffset). + Debug.Assert(!ignoreCase); + + // We don't have a frequency table for non-ASCII characters, pick the last one within range. + ch2Offset = maxOffset - 1; + } + + return ch2Offset; + } + private static int IndexOfAsciiCharWithLowestFrequency(ReadOnlySpan span, bool ignoreCase, int excludeIndex = -1) { float minFrequency = float.MaxValue; @@ -123,5 +160,66 @@ private static int IndexOfAsciiCharWithLowestFrequency(ReadOnlySpan span, return minIndex; } + + /// + /// For a two-string search, finds the best shared second character offset that minimizes + /// the combined character frequency across both values at that offset. + /// This reduces the number of vector loads in the inner loop from 3 to 2. + /// + /// First search string. + /// Second search string. + /// Whether the search is case-insensitive. + /// The exclusive upper bound for the offset (typically min(value0.Length, value1.Length)). + /// The offset (1 to maxOffset-1) with the lowest combined frequency. + public static int GetSharedSecondCharacterOffset(string value0, string value1, bool ignoreCase, int maxOffset) + { + Debug.Assert(value0.Length > 1); + Debug.Assert(value1.Length > 1); + Debug.Assert(maxOffset >= 2 && maxOffset <= Math.Min(value0.Length, value1.Length)); + + float minCombinedFrequency = float.MaxValue; + // Default to the last valid offset within range (same as single-string case when no ASCII chars found). + // This provides better filtering by using a character further from position 0. + int bestOffset = maxOffset - 1; + + // Search for the offset with lowest combined frequency across both values + for (int i = 1; i < maxOffset; i++) + { + char c0 = value0[i]; + char c1 = value1[i]; + + // We need both characters at this offset to be ASCII for frequency comparison + if (!char.IsAscii(c0) || !char.IsAscii(c1)) + { + continue; + } + + float freq0 = AsciiFrequency[c0]; + float freq1 = AsciiFrequency[c1]; + + if (ignoreCase) + { + freq0 += AsciiFrequency[c0 ^ 0x20]; + freq1 += AsciiFrequency[c1 ^ 0x20]; + } + + // Penalize early positions (same as single-value logic) + if (i <= 2) + { + freq0 *= 1.5f; + freq1 *= 1.5f; + } + + float combinedFrequency = freq0 + freq1; + + if (combinedFrequency < minCombinedFrequency) + { + minCombinedFrequency = combinedFrequency; + bestOffset = i; + } + } + + return bestOffset; + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs index 05db4d2f96bf73..191e12868a7f24 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs @@ -127,6 +127,13 @@ private static SearchValues CreateFromNormalizedValues( return CreateForSingleValue(values[0], uniqueValues, ignoreCase, allAscii, asciiLettersOnly); } + if (values.Length == 2 && + Vector128.IsHardwareAccelerated && + TryCreateForTwoValues(values, uniqueValues, ignoreCase, allAscii, asciiLettersOnly, nonAsciiAffectedByCaseConversion) is { } twoValuesSearchValues) + { + return twoValuesSearchValues; + } + if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && TryGetTeddyAcceleratedValues(values, uniqueValues, ignoreCase, allAscii, asciiLettersOnly, nonAsciiAffectedByCaseConversion, minLength) is { } searchValues) { @@ -407,6 +414,100 @@ private static SearchValues CreateForSingleValue( : new SingleStringSearchValuesFallback(value, uniqueValues); } + private static SearchValues? TryCreateForTwoValues( + ReadOnlySpan values, + HashSet uniqueValues, + bool ignoreCase, + bool allAscii, + bool asciiLettersOnly, + bool nonAsciiAffectedByCaseConversion) + { + Debug.Assert(values.Length == 2); + + // Packed implementation requires SSE2 or ARM64 SIMD + if (!Sse2.IsSupported && !AdvSimd.Arm64.IsSupported) + { + return null; + } + + string value0 = values[0]; + string value1 = values[1]; + + // Both values must have at least 2 characters + if (value0.Length < 2 || value1.Length < 2) + { + return null; + } + + // Constrain offsets to be within the shorter value's length so both can be used + // with the same bounds in the vectorized inner loop. + int minLength = Math.Min(value0.Length, value1.Length); + + // For case-insensitive matching with non-ASCII affected by case conversion, + // we need ASCII anchor characters within the minLength range of both values + if (nonAsciiAffectedByCaseConversion) + { + // Check that both values have at least 2 ASCII characters within minLength (for anchor points) + if (!char.IsAscii(value0[0]) || !char.IsAscii(value1[0])) + { + return null; + } + + // Need at least one more ASCII character in each value within the minLength range for the second anchor + if (!value0.AsSpan(1, minLength - 1).ContainsAnyInRange((char)0, (char)127) || + !value1.AsSpan(1, minLength - 1).ContainsAnyInRange((char)0, (char)127)) + { + return null; + } + } + + // Get the shared second character offset with lowest combined frequency across both values. + // Using a shared offset reduces vector loads in the inner loop from 3 to 2. + int ch2Offset = CharacterFrequencyHelper.GetSharedSecondCharacterOffset(value0, value1, ignoreCase, minLength); + + // Packed implementation requires all anchor characters to be packable (fit in a byte with certain constraints) + if (!CanUsePackedImpl(value0[0]) || !CanUsePackedImpl(value0[ch2Offset]) || + !CanUsePackedImpl(value1[0]) || !CanUsePackedImpl(value1[ch2Offset])) + { + return null; + } + + if (!ignoreCase) + { + return new TwoStringSearchValuesPackedThreeChars(uniqueValues, value0, value1, ch2Offset); + } + + // For case-insensitive search, ensure anchor characters are ASCII + if (!char.IsAscii(value0[0]) || !char.IsAscii(value0[ch2Offset]) || + !char.IsAscii(value1[0]) || !char.IsAscii(value1[ch2Offset])) + { + return null; + } + + if (asciiLettersOnly) + { + return new TwoStringSearchValuesPackedThreeChars(uniqueValues, value0, value1, ch2Offset); + } + + if (allAscii) + { + return new TwoStringSearchValuesPackedThreeChars(uniqueValues, value0, value1, ch2Offset); + } + + if (nonAsciiAffectedByCaseConversion) + { + return new TwoStringSearchValuesPackedThreeChars(uniqueValues, value0, value1, ch2Offset); + } + + return new TwoStringSearchValuesPackedThreeChars(uniqueValues, value0, value1, ch2Offset); + } + + // Unlike with PackedSpanHelpers (Sse2 only), we are also using this approach on ARM64. + // We use PackUnsignedSaturate on X86 and UnzipEven on ARM, so the set of allowed characters differs slightly (we can't use it for \0 and \xFF on X86). + private static bool CanUsePackedImpl(char c) => + PackedSpanHelpers.PackedIndexOfIsSupported ? PackedSpanHelpers.CanUsePackedIndexOf(c) : + (AdvSimd.Arm64.IsSupported && c <= byte.MaxValue); + private static SearchValues? TryCreateSingleValuesThreeChars( string value, HashSet? uniqueValues, @@ -454,12 +555,6 @@ private static SearchValues CreateSingleValuesThreeChars(uniqueValues, value, ch2Offset, ch3Offset); - - // Unlike with PackedSpanHelpers (Sse2 only), we are also using this approach on ARM64. - // We use PackUnsignedSaturate on X86 and UnzipEven on ARM, so the set of allowed characters differs slightly (we can't use it for \0 and \xFF on X86). - static bool CanUsePackedImpl(char c) => - PackedSpanHelpers.PackedIndexOfIsSupported ? PackedSpanHelpers.CanUsePackedIndexOf(c) : - (AdvSimd.Arm64.IsSupported && c <= byte.MaxValue); } private static void AnalyzeValues( diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/TwoStringSearchValuesPackedThreeChars.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/TwoStringSearchValuesPackedThreeChars.cs new file mode 100644 index 00000000000000..497359d6c02bcd --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/TwoStringSearchValuesPackedThreeChars.cs @@ -0,0 +1,471 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +using static System.Buffers.StringSearchValuesHelper; + +namespace System.Buffers +{ + /// + /// Specialized implementation for exactly two strings. + /// Uses packed byte comparisons similar to + /// to process twice as many characters per iteration. + /// + /// + /// This implementation packs two consecutive Vector<ushort> inputs into one Vector<byte>, + /// allowing it to compare 16/32/64 character positions per iteration. + /// It uses a shared second character offset for both values (4 comparisons total: v0Ch1, v0Ch2, v1Ch1, v1Ch2), + /// but only requires 2 vector loads per iteration (input at offset 0 and input at the shared offset). + /// This reduces memory bandwidth compared to using separate offsets per value. + /// The ThreeChars suffix in the type name is retained for consistency with the single-string variant and to reflect + /// the algorithm family it belongs to; it does not mean that this type uses three anchor characters per string. + /// + internal sealed class TwoStringSearchValuesPackedThreeChars : StringSearchValuesBase + where TCaseSensitivity : struct, ICaseSensitivity + { + private const byte CaseConversionMask = unchecked((byte)~0x20); + + private readonly string _value0; + private readonly string _value1; + private readonly nint _minusValueTailLength; + private readonly int _minValueLength; + + // First character anchors (at offset 0) for each value + private readonly byte _v0Ch1; + private readonly byte _v1Ch1; + + // Second character anchors at shared offset + private readonly nuint _ch2ByteOffset; + private readonly byte _v0Ch2; + private readonly byte _v1Ch2; + + private static bool IgnoreCase => typeof(TCaseSensitivity) != typeof(CaseSensitive); + + public TwoStringSearchValuesPackedThreeChars(HashSet uniqueValues, string value0, string value1, int ch2Offset) + : base(uniqueValues) + { + Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(value0.Length > 1); + Debug.Assert(value1.Length > 1); + Debug.Assert(ch2Offset > 0); + Debug.Assert(ch2Offset < Math.Min(value0.Length, value1.Length)); + Debug.Assert(value0[0] <= byte.MaxValue && value0[ch2Offset] <= byte.MaxValue); + Debug.Assert(value1[0] <= byte.MaxValue && value1[ch2Offset] <= byte.MaxValue); + + _value0 = value0; + _value1 = value1; + + int minLength = Math.Min(value0.Length, value1.Length); + _minValueLength = minLength; + + // We need to reserve space for reading at ch2Offset from the starting position, + // and for verifying the full value (minLength - 1 chars after the starting position for the shorter value) + _minusValueTailLength = -Math.Max(minLength - 1, ch2Offset); + + _v0Ch1 = (byte)value0[0]; + _v1Ch1 = (byte)value1[0]; + _v0Ch2 = (byte)value0[ch2Offset]; + _v1Ch2 = (byte)value1[ch2Offset]; + + if (IgnoreCase) + { + _v0Ch1 &= CaseConversionMask; + _v1Ch1 &= CaseConversionMask; + _v0Ch2 &= CaseConversionMask; + _v1Ch2 &= CaseConversionMask; + } + + _ch2ByteOffset = (nuint)ch2Offset * sizeof(char); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAnyMultiString(ReadOnlySpan span) => + IndexOf(ref MemoryMarshal.GetReference(span), span.Length); + + private int IndexOf(ref char searchSpace, int searchSpaceLength) + { + ref char searchSpaceStart = ref searchSpace; + + // Calculate how many positions we can safely search (accounting for max offset needed) + nint searchSpaceMinusValueTailLength = searchSpaceLength + _minusValueTailLength; + + nuint ch2ByteOffset = _ch2ByteOffset; + + // Packed variant processes Vector.Count characters at a time + if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceMinusValueTailLength - Vector512.Count >= 0) + { + Vector512 v0Ch1Vec = Vector512.Create(_v0Ch1); + Vector512 v0Ch2Vec = Vector512.Create(_v0Ch2); + Vector512 v1Ch1Vec = Vector512.Create(_v1Ch1); + Vector512 v1Ch2Vec = Vector512.Create(_v1Ch2); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector512.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector512.Count + (int)(ch2ByteOffset / sizeof(char))); + + Vector512 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, v0Ch1Vec, v0Ch2Vec, v1Ch1Vec, v1Ch2Vec); + + if (result != Vector512.Zero) + { + goto CandidateFound512; + } + + LoopFooter512: + searchSpace = ref Unsafe.Add(ref searchSpace, Vector512.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector512.Count))) + { + return -1; + } + + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound512: + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter512; + } + } + else if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) + { + Vector256 v0Ch1Vec = Vector256.Create(_v0Ch1); + Vector256 v0Ch2Vec = Vector256.Create(_v0Ch2); + Vector256 v1Ch1Vec = Vector256.Create(_v1Ch1); + Vector256 v1Ch2Vec = Vector256.Create(_v1Ch2); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector256.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector256.Count + (int)(ch2ByteOffset / sizeof(char))); + + Vector256 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, v0Ch1Vec, v0Ch2Vec, v1Ch1Vec, v1Ch2Vec); + + if (result != Vector256.Zero) + { + goto CandidateFound256; + } + + LoopFooter256: + searchSpace = ref Unsafe.Add(ref searchSpace, Vector256.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector256.Count))) + { + return -1; + } + + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound256: + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, PackedSpanHelpers.FixUpPackedVector256Result(result).ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter256; + } + } + else if ((Sse2.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceMinusValueTailLength - Vector128.Count >= 0) + { + Vector128 v0Ch1Vec = Vector128.Create(_v0Ch1); + Vector128 v0Ch2Vec = Vector128.Create(_v0Ch2); + Vector128 v1Ch1Vec = Vector128.Create(_v1Ch1); + Vector128 v1Ch2Vec = Vector128.Create(_v1Ch2); + + ref char lastSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceMinusValueTailLength - Vector128.Count); + + while (true) + { + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count); + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref searchSpace, Vector128.Count + (int)(ch2ByteOffset / sizeof(char))); + + Vector128 result = GetComparisonResult(ref searchSpace, ch2ByteOffset, v0Ch1Vec, v0Ch2Vec, v1Ch1Vec, v1Ch2Vec); + + if (result != Vector128.Zero) + { + goto CandidateFound128; + } + + LoopFooter128: + searchSpace = ref Unsafe.Add(ref searchSpace, Vector128.Count); + + if (Unsafe.IsAddressGreaterThan(ref searchSpace, ref lastSearchSpace)) + { + if (Unsafe.AreSame(ref searchSpace, ref Unsafe.Add(ref lastSearchSpace, Vector128.Count))) + { + return -1; + } + + searchSpace = ref lastSearchSpace; + } + + continue; + + CandidateFound128: + if (TryMatch(ref searchSpaceStart, searchSpaceLength, ref searchSpace, result.ExtractMostSignificantBits(), out int offset)) + { + return offset; + } + goto LoopFooter128; + } + } + + // Fallback: Iterate through all valid starting positions + nint shortInputEnd = searchSpaceLength - _minValueLength + 1; + for (nint i = 0; i < shortInputEnd; i++) + { + ref char cur = ref Unsafe.Add(ref searchSpace, i); + + if (StartsWith(ref cur, searchSpaceLength - (int)i, _value0) || + StartsWith(ref cur, searchSpaceLength - (int)i, _value1)) + { + return (int)i; + } + } + + return -1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + private static Vector128 GetComparisonResult( + ref char searchSpace, + nuint ch2ByteOffset, + Vector128 v0Ch1, Vector128 v0Ch2, + Vector128 v1Ch1, Vector128 v1Ch2) + { + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + // Load packed input at position 0 and at the shared second character offset + Vector128 input0 = LoadPacked128(ref searchSpace, 0); + Vector128 inputCh2 = LoadPacked128(ref searchSpace, ch2ByteOffset); + + // Compare first characters of both values + Vector128 cmpV0Ch1 = Vector128.Equals(v0Ch1, input0); + Vector128 cmpV1Ch1 = Vector128.Equals(v1Ch1, input0); + + // Compare second characters at the shared offset + Vector128 cmpV0Ch2 = Vector128.Equals(v0Ch2, inputCh2); + Vector128 cmpV1Ch2 = Vector128.Equals(v1Ch2, inputCh2); + + // A position matches if (value0's ch1 AND ch2 match) OR (value1's ch1 AND ch2 match) + Vector128 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector128 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + else + { + Vector128 caseConversion = Vector128.Create(CaseConversionMask); + + // Load packed input at position 0 and at the shared second character offset, applying case conversion + Vector128 input0 = LoadPacked128(ref searchSpace, 0) & caseConversion; + Vector128 inputCh2 = LoadPacked128(ref searchSpace, ch2ByteOffset) & caseConversion; + + // Compare first characters of both values + Vector128 cmpV0Ch1 = Vector128.Equals(v0Ch1, input0); + Vector128 cmpV1Ch1 = Vector128.Equals(v1Ch1, input0); + + // Compare second characters at the shared offset + Vector128 cmpV0Ch2 = Vector128.Equals(v0Ch2, inputCh2); + Vector128 cmpV1Ch2 = Vector128.Equals(v1Ch2, inputCh2); + + // A position matches if (value0's ch1 AND ch2 match) OR (value1's ch1 AND ch2 match) + Vector128 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector128 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 GetComparisonResult( + ref char searchSpace, + nuint ch2ByteOffset, + Vector256 v0Ch1, Vector256 v0Ch2, + Vector256 v1Ch1, Vector256 v1Ch2) + { + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + Vector256 input0 = LoadPacked256(ref searchSpace, 0); + Vector256 inputCh2 = LoadPacked256(ref searchSpace, ch2ByteOffset); + + Vector256 cmpV0Ch1 = Vector256.Equals(v0Ch1, input0); + Vector256 cmpV1Ch1 = Vector256.Equals(v1Ch1, input0); + Vector256 cmpV0Ch2 = Vector256.Equals(v0Ch2, inputCh2); + Vector256 cmpV1Ch2 = Vector256.Equals(v1Ch2, inputCh2); + + Vector256 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector256 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + else + { + Vector256 caseConversion = Vector256.Create(CaseConversionMask); + + Vector256 input0 = LoadPacked256(ref searchSpace, 0) & caseConversion; + Vector256 inputCh2 = LoadPacked256(ref searchSpace, ch2ByteOffset) & caseConversion; + + Vector256 cmpV0Ch1 = Vector256.Equals(v0Ch1, input0); + Vector256 cmpV1Ch1 = Vector256.Equals(v1Ch1, input0); + Vector256 cmpV0Ch2 = Vector256.Equals(v0Ch2, inputCh2); + Vector256 cmpV1Ch2 = Vector256.Equals(v1Ch2, inputCh2); + + Vector256 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector256 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx512BW))] + private static Vector512 GetComparisonResult( + ref char searchSpace, + nuint ch2ByteOffset, + Vector512 v0Ch1, Vector512 v0Ch2, + Vector512 v1Ch1, Vector512 v1Ch2) + { + if (typeof(TCaseSensitivity) == typeof(CaseSensitive)) + { + Vector512 input0 = LoadPacked512(ref searchSpace, 0); + Vector512 inputCh2 = LoadPacked512(ref searchSpace, ch2ByteOffset); + + Vector512 cmpV0Ch1 = Vector512.Equals(v0Ch1, input0); + Vector512 cmpV1Ch1 = Vector512.Equals(v1Ch1, input0); + Vector512 cmpV0Ch2 = Vector512.Equals(v0Ch2, inputCh2); + Vector512 cmpV1Ch2 = Vector512.Equals(v1Ch2, inputCh2); + + Vector512 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector512 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + else + { + Vector512 caseConversion = Vector512.Create(CaseConversionMask); + + Vector512 input0 = LoadPacked512(ref searchSpace, 0) & caseConversion; + Vector512 inputCh2 = LoadPacked512(ref searchSpace, ch2ByteOffset) & caseConversion; + + Vector512 cmpV0Ch1 = Vector512.Equals(v0Ch1, input0); + Vector512 cmpV1Ch1 = Vector512.Equals(v1Ch1, input0); + Vector512 cmpV0Ch2 = Vector512.Equals(v0Ch2, inputCh2); + Vector512 cmpV1Ch2 = Vector512.Equals(v1Ch2, inputCh2); + + Vector512 matchV0 = cmpV0Ch1 & cmpV0Ch2; + Vector512 matchV1 = cmpV1Ch1 & cmpV1Ch2; + + return matchV0 | matchV1; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char searchSpace, uint mask, out int offsetFromStart) + { + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + + ref char matchRef = ref Unsafe.Add(ref searchSpace, bitPos); + int lengthRemaining = searchSpaceLength - (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref matchRef, Math.Min(_value0.Length, lengthRemaining)); + + // Check both values - return the one that matches + if (StartsWith(ref matchRef, lengthRemaining, _value0) || + StartsWith(ref matchRef, lengthRemaining, _value1)) + { + offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + return true; + } + + mask = BitOperations.ResetLowestSetBit(mask); + } + while (mask != 0); + + offsetFromStart = 0; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool TryMatch(ref char searchSpaceStart, int searchSpaceLength, ref char searchSpace, ulong mask, out int offsetFromStart) + { + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + + ref char matchRef = ref Unsafe.Add(ref searchSpace, bitPos); + int lengthRemaining = searchSpaceLength - (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + + ValidateReadPosition(ref searchSpaceStart, searchSpaceLength, ref matchRef, Math.Min(_value0.Length, lengthRemaining)); + + // Check both values - return the one that matches + if (StartsWith(ref matchRef, lengthRemaining, _value0) || + StartsWith(ref matchRef, lengthRemaining, _value1)) + { + offsetFromStart = (int)((nuint)Unsafe.ByteOffset(ref searchSpaceStart, ref matchRef) / sizeof(char)); + return true; + } + + mask = BitOperations.ResetLowestSetBit(mask); + } + while (mask != 0); + + offsetFromStart = 0; + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + private static Vector128 LoadPacked128(ref char searchSpace, nuint byteOffset) + { + Vector128 input0 = Vector128.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)); + Vector128 input1 = Vector128.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector128.Count)); + + return Sse2.IsSupported + ? Sse2.PackUnsignedSaturate(input0.AsInt16(), input1.AsInt16()) + : AdvSimd.Arm64.UnzipEven(input0.AsByte(), input1.AsByte()); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 LoadPacked256(ref char searchSpace, nuint byteOffset) => + Avx2.PackUnsignedSaturate( + Vector256.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)).AsInt16(), + Vector256.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector256.Count)).AsInt16()); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx512BW))] + private static Vector512 LoadPacked512(ref char searchSpace, nuint byteOffset) => + Avx512BW.PackUnsignedSaturate( + Vector512.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset)).AsInt16(), + Vector512.LoadUnsafe(ref Unsafe.AddByteOffset(ref searchSpace, byteOffset + (uint)Vector512.Count)).AsInt16()); + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs index 8edbf03d5aec04..a97bce55d4c42a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs @@ -183,18 +183,15 @@ private RegexFindOptimizations(RegexNode root, RegexOptions options, bool isLead return; } - // TODO: While some benchmarks benefit from this significantly, others regressed a bit (in particular those with few - // matches). Before enabling this, we need to investigate the performance impact on real-world scenarios, - // and see if there are ways to reduce the impact. - //if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: false) is { Length: > 1 } caseSensitivePrefixes) - //{ - // LeadingPrefixes = caseSensitivePrefixes; - // FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight; + if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: false) is { Length: > 1 } caseSensitivePrefixes) + { + LeadingPrefixes = caseSensitivePrefixes; + FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight; #if SYSTEM_TEXT_REGULAREXPRESSIONS - // LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal); + LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal); #endif - // return; - //} + return; + } } // Build up a list of all of the sets that are a fixed distance from the start of the expression.