-
Notifications
You must be signed in to change notification settings - Fork 74
/
aho_corasick.rs
98 lines (91 loc) · 3.22 KB
/
aho_corasick.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::collections::VecDeque;
use std::rc::{Rc, Weak};
#[derive(Default)]
struct ACNode {
trans: BTreeMap<char, Rc<RefCell<ACNode>>>,
suffix: Weak<RefCell<ACNode>>, // the suffix(fail) link
lengths: Vec<usize>, // lengths of matched patterns ended at this node
}
#[derive(Default)]
pub struct AhoCorasick {
root: Rc<RefCell<ACNode>>,
}
impl AhoCorasick {
pub fn new(words: &[&str]) -> Self {
let root = Rc::new(RefCell::new(ACNode::default()));
for word in words {
let mut cur = Rc::clone(&root);
for c in word.chars() {
cur = Rc::clone(Rc::clone(&cur).borrow_mut().trans.entry(c).or_default());
}
cur.borrow_mut().lengths.push(word.len());
}
Self::build_suffix(Rc::clone(&root));
Self { root }
}
fn build_suffix(root: Rc<RefCell<ACNode>>) {
let mut q = VecDeque::new();
q.push_back(Rc::clone(&root));
while let Some(parent) = q.pop_front() {
let parent = parent.borrow();
for (c, child) in &parent.trans {
q.push_back(Rc::clone(child));
let mut child = child.borrow_mut();
let mut suffix = parent.suffix.upgrade();
loop {
match &suffix {
None => {
child.lengths.extend(root.borrow().lengths.clone());
child.suffix = Rc::downgrade(&root);
break;
}
Some(node) => {
if node.borrow().trans.contains_key(c) {
let node = &node.borrow().trans[c];
child.lengths.extend(node.borrow().lengths.clone());
child.suffix = Rc::downgrade(node);
break;
} else {
suffix = suffix.unwrap().borrow().suffix.upgrade();
}
}
}
}
}
}
}
pub fn search<'a>(&self, s: &'a str) -> Vec<&'a str> {
let mut ans = vec![];
let mut cur = Rc::clone(&self.root);
for (i, c) in s.chars().enumerate() {
loop {
if let Some(child) = Rc::clone(&cur).borrow().trans.get(&c) {
cur = Rc::clone(child);
break;
}
let suffix = cur.borrow().suffix.clone();
match suffix.upgrade() {
Some(node) => cur = node,
None => break,
}
}
for &len in &cur.borrow().lengths {
ans.push(&s[i + 1 - len..=i]);
}
}
ans
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_aho_corasick() {
let dict = ["abc", "abcd", "xyz", "acxy", "efg", "123", "678", "6543"];
let ac = AhoCorasick::new(&dict);
let res = ac.search("ababcxyzacxy12678acxy6543");
assert_eq!(res, ["abc", "xyz", "acxy", "678", "acxy", "6543",]);
}
}