1
1
use std:: iter;
2
2
3
+ // FIXME: only supports extended-ASCII
4
+ /// searches for the pattern in the input text using the
5
+ /// KMP algorithm.
3
6
pub struct KMP < ' a > {
4
7
r : usize ,
5
8
dfa : Vec < Vec < usize > > ,
@@ -33,7 +36,7 @@ impl<'a> KMP<'a> {
33
36
self . dfa [ c] [ j] = self . dfa [ c] [ x] ; // copy mismatch cases
34
37
}
35
38
self . dfa [ self . pat . char_at ( j) as usize ] [ j] = j+1 ; // set match case
36
- x = self . dfa [ self . pat . char_at ( j) as usize ] [ x] ;
39
+ x = self . dfa [ self . pat . char_at ( j) as usize ] [ x] ; // update restart state
37
40
}
38
41
}
39
42
@@ -54,11 +57,77 @@ impl<'a> KMP<'a> {
54
57
}
55
58
}
56
59
57
-
58
60
#[ test]
59
61
fn test_kmp ( ) {
60
62
let pat = "abracadabra" ;
61
63
let text = "abacadabrabracabracadabrabrabracad" ;
62
64
let kmp = KMP :: new ( pat) ;
63
65
assert ! ( kmp. search( text) . map_or( false , |pos| text[ pos..] . starts_with( pat) ) ) ;
66
+ assert_eq ! ( kmp. search( "zzzzz" ) , None ) ;
67
+ }
68
+
69
+
70
+ /// searches for the pattern in the input text using the
71
+ /// bad-character rule part of the Boyer-Moore algorithm.
72
+ pub struct BoyerMoore < ' a > {
73
+ r : usize ,
74
+ right : Vec < isize > ,
75
+ pat : & ' a str
76
+ }
77
+
78
+ impl < ' a > BoyerMoore < ' a > {
79
+ pub fn new < ' b > ( pat : & ' b str ) -> BoyerMoore < ' b > {
80
+ let r = 256 ;
81
+ let mut ret = BoyerMoore {
82
+ r : r,
83
+ right : Vec :: new ( ) ,
84
+ pat : pat
85
+ } ;
86
+ ret. init ( ) ;
87
+ ret
88
+ }
89
+
90
+ fn init ( & mut self ) {
91
+ let r = self . r ;
92
+ self . right = iter:: repeat ( -1 ) . take ( r) . collect ( ) ;
93
+ for j in 0 .. self . pat . len ( ) {
94
+ self . right [ self . pat . char_at ( j) as usize ] = j as isize ;
95
+ }
96
+ }
97
+
98
+ pub fn search ( & self , txt : & str ) -> Option < usize > {
99
+ let m = self . pat . len ( ) ;
100
+ let n = txt. len ( ) ;
101
+ if n < m {
102
+ return None ;
103
+ }
104
+ let mut skip;
105
+ let mut i = 0 ;
106
+ while i <= n-m {
107
+ skip = 0 ;
108
+ for j in ( 0 .. m) . rev ( ) {
109
+ if self . pat . char_at ( j) != txt. char_at ( i+j) {
110
+ skip = j as isize - self . right [ txt. char_at ( i+j) as usize ] ;
111
+ if skip < 1 {
112
+ skip = 1 ;
113
+ }
114
+ break ;
115
+ }
116
+ }
117
+ if skip == 0 {
118
+ return Some ( i) ;
119
+ }
120
+ i += skip as usize ;
121
+ }
122
+ return None ;
123
+ }
124
+ }
125
+
126
+ #[ test]
127
+ fn test_boyer_moore ( ) {
128
+ let pat = "abracadabra" ;
129
+ let text = "abacadabrabracabracadabrabrabracad" ;
130
+ let bm = BoyerMoore :: new ( pat) ;
131
+ assert ! ( bm. search( text) . map_or( false , |pos| text[ pos..] . starts_with( pat) ) ) ;
132
+ assert_eq ! ( bm. search( "zzzzz" ) , None ) ;
64
133
}
0 commit comments