@@ -1699,6 +1699,85 @@ fuzzy_match_find(Arena *arena, String8 needle, String8 haystack)
16991699 scratch_end (scratch );
17001700 return result ;
17011701}
1702+
1703+ internal ScoredFuzzyMatchRangeList
1704+ scored_fuzzy_match_find (Arena * arena , String8 needle , String8 haystack )
1705+ {
1706+ Temp scratch = scratch_begin (0 , 0 );
1707+ // We're going to implement a very simple scoring mechanism similar to that described in
1708+ // https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
1709+ #define scored_fuzzy_match_unmatched -1
1710+ #define scored_fuzzy_match_consecutive 5
1711+ #define scored_fuzzy_match_unmatched_leading -3
1712+ ScoredFuzzyMatchRangeList invalid = {0 };
1713+ ScoredFuzzyMatchRangeList result = {0 };
1714+ // Simplify to a single needle which has common delimiters removed.
1715+ String8List needles = str8_split (scratch .arena , needle , (U8 * )" " , 1 , 0 );
1716+ needle = str8_list_join (scratch .arena , & needles , 0 );
1717+ if (needle .size == 0 )
1718+ {
1719+ scratch_end (scratch );
1720+ return invalid ;
1721+ }
1722+ String8 tmp_str = str8 (needle .str , 1 );
1723+ U64 find_pos = 0 ;
1724+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1725+ if (find_pos >= haystack .size )
1726+ {
1727+ scratch_end (scratch );
1728+ return invalid ;
1729+ }
1730+ // Leading character penalty.
1731+ // Only go to a max of 3 based on the article.
1732+ result .score += Min (find_pos , 3 ) * scored_fuzzy_match_unmatched_leading ;
1733+ // We also want to deduct for additional unmatched characters between start and find_pos.
1734+ if (find_pos > 3 )
1735+ {
1736+ result .score += (find_pos - 3 ) * scored_fuzzy_match_unmatched ;
1737+ }
1738+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1739+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1740+ n -> range = range ;
1741+ SLLQueuePush (result .list .first , result .list .last , n );
1742+ result .list .count += 1 ;
1743+ // Match the rest.
1744+ U64 prev_found = find_pos ;
1745+ U64 search_start = 0 ;
1746+ find_pos += 1 ;
1747+ for (U64 idx = 1 ; idx < needle .size ; ++ idx )
1748+ {
1749+ tmp_str = str8 (needle .str + idx , 1 );
1750+ search_start = find_pos ;
1751+ find_pos = str8_find_needle (haystack , find_pos , tmp_str , StringMatchFlag_CaseInsensitive );
1752+ if (find_pos >= haystack .size )
1753+ {
1754+ scratch_end (scratch );
1755+ return invalid ;
1756+ }
1757+ // Compute consecutive bonus.
1758+ if (prev_found + 1 == find_pos )
1759+ {
1760+ result .score += scored_fuzzy_match_consecutive ;
1761+ // We can reuse the existing node and simply extend it.
1762+ result .list .last -> range .max = find_pos + 1 ;
1763+ }
1764+ else
1765+ {
1766+ result .score += (find_pos - search_start ) * scored_fuzzy_match_unmatched ;
1767+ Rng1U64 range = r1u64 (find_pos , find_pos + 1 );
1768+ FuzzyMatchRangeNode * n = push_array (arena , FuzzyMatchRangeNode , 1 );
1769+ n -> range = range ;
1770+ SLLQueuePush (result .list .first , result .list .last , n );
1771+ result .list .count += 1 ;
1772+ }
1773+ prev_found = find_pos ;
1774+ find_pos += 1 ;
1775+ }
1776+ // Compute final unmatched characters.
1777+ result .score += (haystack .size - find_pos ) * scored_fuzzy_match_unmatched ;
1778+ scratch_end (scratch );
1779+ return result ;
1780+ }
17021781////////////////////////////////
17031782//~ NOTE(allen): Serialization Helpers
17041783
0 commit comments