Skip to content

Commit 6cf07d7

Browse files
committed
substring_search: implement BoyerMoore for string
1 parent a4c92f3 commit 6cf07d7

File tree

1 file changed

+71
-2
lines changed

1 file changed

+71
-2
lines changed

src/substring_search/mod.rs

+71-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use std::iter;
22

3+
// FIXME: only supports extended-ASCII
4+
/// searches for the pattern in the input text using the
5+
/// KMP algorithm.
36
pub struct KMP<'a> {
47
r: usize,
58
dfa: Vec<Vec<usize>>,
@@ -33,7 +36,7 @@ impl<'a> KMP<'a> {
3336
self.dfa[c][j] = self.dfa[c][x]; // copy mismatch cases
3437
}
3538
self.dfa[self.pat.char_at(j) as usize][j] = j+1; // set match case
36-
x = self.dfa[self.pat.char_at(j) as usize][x];
39+
x = self.dfa[self.pat.char_at(j) as usize][x]; // update restart state
3740
}
3841
}
3942

@@ -54,11 +57,77 @@ impl<'a> KMP<'a> {
5457
}
5558
}
5659

57-
5860
#[test]
5961
fn test_kmp() {
6062
let pat = "abracadabra";
6163
let text = "abacadabrabracabracadabrabrabracad";
6264
let kmp = KMP::new(pat);
6365
assert!(kmp.search(text).map_or(false, |pos| text[pos..].starts_with(pat)));
66+
assert_eq!(kmp.search("zzzzz"), None);
67+
}
68+
69+
70+
/// searches for the pattern in the input text using the
71+
/// bad-character rule part of the Boyer-Moore algorithm.
72+
pub struct BoyerMoore<'a> {
73+
r: usize,
74+
right: Vec<isize>,
75+
pat: &'a str
76+
}
77+
78+
impl<'a> BoyerMoore<'a> {
79+
pub fn new<'b>(pat: &'b str) -> BoyerMoore<'b> {
80+
let r = 256;
81+
let mut ret = BoyerMoore {
82+
r: r,
83+
right: Vec::new(),
84+
pat: pat
85+
};
86+
ret.init();
87+
ret
88+
}
89+
90+
fn init(&mut self) {
91+
let r = self.r;
92+
self.right = iter::repeat(-1).take(r).collect();
93+
for j in 0 .. self.pat.len() {
94+
self.right[self.pat.char_at(j) as usize] = j as isize;
95+
}
96+
}
97+
98+
pub fn search(&self, txt: &str) -> Option<usize> {
99+
let m = self.pat.len();
100+
let n = txt.len();
101+
if n < m {
102+
return None;
103+
}
104+
let mut skip;
105+
let mut i = 0;
106+
while i <= n-m {
107+
skip = 0;
108+
for j in (0 .. m).rev() {
109+
if self.pat.char_at(j) != txt.char_at(i+j) {
110+
skip = j as isize - self.right[txt.char_at(i+j) as usize];
111+
if skip < 1 {
112+
skip = 1;
113+
}
114+
break;
115+
}
116+
}
117+
if skip == 0 {
118+
return Some(i);
119+
}
120+
i += skip as usize;
121+
}
122+
return None;
123+
}
124+
}
125+
126+
#[test]
127+
fn test_boyer_moore() {
128+
let pat = "abracadabra";
129+
let text = "abacadabrabracabracadabrabrabracad";
130+
let bm = BoyerMoore::new(pat);
131+
assert!(bm.search(text).map_or(false, |pos| text[pos..].starts_with(pat)));
132+
assert_eq!(bm.search("zzzzz"), None);
64133
}

0 commit comments

Comments
 (0)