-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtypolice.go
More file actions
123 lines (99 loc) · 2.83 KB
/
typolice.go
File metadata and controls
123 lines (99 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package typolice
import (
_ "embed"
"encoding/json"
"sort"
"strings"
"github.com/cloudflare/ahocorasick"
)
//go:embed data/format_variants.json
var formatVariantsJSON []byte
//go:embed data/aws_service_names.json
var awsServiceNamesJSON []byte
var (
baseFormatVariants map[string][]string
baseAWSVariants map[string][]string
)
func init() {
baseFormatVariants = mustLoadMap("data/format_variants.json", formatVariantsJSON)
baseAWSVariants = mustLoadMap("data/aws_service_names.json", awsServiceNamesJSON)
}
func Run(text string) []CheckResult {
return RunWithOverlays(text)
}
func RunWithOverlays(text string, overlays ...map[string][]string) []CheckResult {
dictionaries := make([]map[string][]string, 0, 2+len(overlays))
dictionaries = append(dictionaries, baseAWSVariants, baseFormatVariants)
dictionaries = append(dictionaries, overlays...)
return reportPreferredCorrections(dictionaries, text)
}
type CheckResult struct {
Typo string
Preferred string
}
func reportPreferredCorrections(dictionaries []map[string][]string, text string) []CheckResult {
typosToPreferred := make(map[string]string)
var invalidPatterns []string
for _, dict := range dictionaries {
for preferred, typos := range dict {
for _, t := range typos {
if _, seen := typosToPreferred[t]; !seen {
invalidPatterns = append(invalidPatterns, t)
}
typosToPreferred[t] = preferred
}
}
}
matcher := ahocorasick.NewStringMatcher(invalidPatterns)
matches := matcher.Match([]byte(text))
type occurrence struct {
position int
result CheckResult
}
var occurrences []occurrence
for _, matchIdx := range matches {
typo := invalidPatterns[matchIdx]
preferred := typosToPreferred[typo]
for _, pos := range findAllOccurrences(text, typo) {
occurrences = append(occurrences, occurrence{
position: pos,
result: CheckResult{Typo: typo, Preferred: preferred},
})
}
}
sort.SliceStable(occurrences, func(i, j int) bool {
if occurrences[i].position == occurrences[j].position {
return occurrences[i].result.Typo < occurrences[j].result.Typo
}
return occurrences[i].position < occurrences[j].position
})
results := make([]CheckResult, 0, len(occurrences))
for _, occ := range occurrences {
results = append(results, occ.result)
}
return results
}
func mustLoadMap(name string, data []byte) map[string][]string {
var m map[string][]string
if err := json.Unmarshal(data, &m); err != nil {
panic("typolice: failed to load " + name + ": " + err.Error())
}
return m
}
func findAllOccurrences(text, substr string) []int {
if substr == "" {
return nil
}
var positions []int
searchStart := 0
for {
idx := strings.Index(text[searchStart:], substr)
if idx == -1 {
break
}
actualIdx := searchStart + idx
positions = append(positions, actualIdx)
searchStart = actualIdx + 1
}
return positions
}