forked from ocaml/ocaml
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstrmatch.ml
397 lines (338 loc) · 11.3 KB
/
strmatch.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
(**************************************************************************)
(* *)
(* OCaml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. *)
(* *)
(* All rights reserved. This file is distributed under the terms of *)
(* the GNU Lesser General Public License version 2.1, with the *)
(* special exception on linking described in the file LICENSE. *)
(* *)
(**************************************************************************)
(* Translation of string matching from closed lambda to C-- *)
open Lambda
open Cmm
module V = Backend_var
module VP = Backend_var.With_provenance
module type I = sig
val string_block_length : Cmm.expression -> Cmm.expression
val transl_switch :
Debuginfo.t -> Cmm.expression -> int -> int ->
(int * Cmm.expression) list -> Cmm.expression ->
Cmm.expression
end
module Make(I:I) = struct
(* Debug *)
let dbg = false
let mask =
let open Nativeint in
sub (shift_left one 8) one
let pat_as_string p =
let rec digits k n p =
if n <= 0 then k
else
let d = Nativeint.to_int (Nativeint.logand mask p) in
let d = Char.escaped (Char.chr d) in
digits (d::k) (n-1) (Nativeint.shift_right_logical p 8) in
let ds = digits [] Arch.size_addr p in
let ds =
if Arch.big_endian then ds else List.rev ds in
String.concat "" ds
let do_pp_cases chan cases =
List.iter
(fun (ps,_) ->
Printf.fprintf chan " [%s]\n"
(String.concat "; " (List.map pat_as_string ps)))
cases
let pp_cases chan tag cases =
Printf.eprintf "%s:\n" tag ;
do_pp_cases chan cases
let pp_match chan tag idxs cases =
Printf.eprintf
"%s: idx=[%s]\n" tag
(String.concat "; " (List.map Int.to_string idxs)) ;
do_pp_cases chan cases
(* Utilities *)
let gen_cell_id () = V.create_local "cell"
let gen_size_id () = V.create_local "size"
let mk_let_cell id str ind body =
let dbg = Debuginfo.none in
let cell =
Cop(Cload (Word_int, Asttypes.Mutable),
[Cop(Cadda,[str;Cconst_int(Arch.size_int*ind, dbg)], dbg)],
dbg) in
Clet(id, cell, body)
let mk_let_size id str body =
let size = I.string_block_length str in
Clet(id, size, body)
let mk_cmp_gen cmp_op id nat ifso ifnot =
let dbg = Debuginfo.none in
let test =
Cop (Ccmpi cmp_op, [ Cvar id; Cconst_natint (nat, dbg) ], dbg)
in
Cifthenelse (test, dbg, ifso, dbg, ifnot, dbg)
let mk_lt = mk_cmp_gen Clt
let mk_eq = mk_cmp_gen Ceq
module IntArg =
struct
type t = int
let compare (x:int) (y:int) =
if x < y then -1
else if x > y then 1
else 0
end
let interval m0 n =
let rec do_rec m =
if m >= n then []
else m::do_rec (m+1) in
do_rec m0
(*****************************************************)
(* Compile strings to a lists of words [native ints] *)
(*****************************************************)
let pat_of_string str =
let len = String.length str in
let n = len / Arch.size_addr + 1 in
let get_byte i =
if i < len then int_of_char str.[i]
else if i < n * Arch.size_addr - 1 then 0
else n * Arch.size_addr - 1 - len in
let mk_word ind =
let w = ref 0n in
let imin = ind * Arch.size_addr
and imax = (ind + 1) * Arch.size_addr - 1 in
if Arch.big_endian then
for i = imin to imax do
w := Nativeint.logor (Nativeint.shift_left !w 8)
(Nativeint.of_int (get_byte i));
done
else
for i = imax downto imin do
w := Nativeint.logor (Nativeint.shift_left !w 8)
(Nativeint.of_int (get_byte i));
done;
!w in
let rec mk_words ind =
if ind >= n then []
else mk_word ind::mk_words (ind+1) in
mk_words 0
(*****************************)
(* Discriminating heuristics *)
(*****************************)
module IntSet = Set.Make(IntArg)
module NativeSet = Set.Make(Nativeint)
let rec add_one sets ps = match sets,ps with
| [],[] -> []
| set::sets,p::ps ->
let sets = add_one sets ps in
NativeSet.add p set::sets
| _,_ -> assert false
let count_arities cases = match cases with
| [] -> assert false
| (ps,_)::_ ->
let sets =
List.fold_left
(fun sets (ps,_) -> add_one sets ps)
(List.map (fun _ -> NativeSet.empty) ps) cases in
List.map NativeSet.cardinal sets
let count_arities_first cases =
let set =
List.fold_left
(fun set case -> match case with
| (p::_,_) -> NativeSet.add p set
| _ -> assert false)
NativeSet.empty cases in
NativeSet.cardinal set
let count_arities_length cases =
let set =
List.fold_left
(fun set (ps,_) -> IntSet.add (List.length ps) set)
IntSet.empty cases in
IntSet.cardinal set
let best_col =
let rec do_rec kbest best k = function
| [] -> kbest
| x::xs ->
if x < best then
do_rec k x (k+1) xs
else
do_rec kbest best (k+1) xs in
let smallest = do_rec (-1) max_int 0 in
fun cases ->
let ars = count_arities cases in
smallest ars
let swap_list =
let rec do_rec k xs = match xs with
| [] -> assert false
| x::xs ->
if k <= 0 then [],x,xs
else
let xs,mid,ys = do_rec (k-1) xs in
x::xs,mid,ys in
fun k xs ->
let xs,x,ys = do_rec k xs in
x::xs @ ys
let swap k idxs cases =
if k = 0 then idxs,cases
else
let idxs = swap_list k idxs
and cases =
List.map
(fun (ps,act) -> swap_list k ps,act)
cases in
if dbg then begin
pp_match stderr "SWAP" idxs cases
end ;
idxs,cases
let best_first idxs cases = match idxs with
| []|[_] -> idxs,cases (* optimisation: one column only *)
| _ ->
let k = best_col cases in
swap k idxs cases
(************************************)
(* Divide according to first column *)
(************************************)
module Divide(O:Set.OrderedType) = struct
module OMap = Map.Make(O)
let divide cases =
let env =
List.fold_left
(fun env (p,psact) ->
let old =
try OMap.find p env
with Not_found -> [] in
OMap.add p ((psact)::old) env)
OMap.empty cases in
let r = OMap.fold (fun key v k -> (key,v)::k) env [] in
List.rev r (* Now sorted *)
end
(***************)
(* Compilation *)
(***************)
(* Group by cell *)
module DivideNative = Divide(Nativeint)
let by_cell cases =
DivideNative.divide
(List.map
(fun case -> match case with
| (p::ps),act -> p,(ps,act)
| [],_ -> assert false)
cases)
(* Split into two halves *)
let rec do_split idx env = match env with
| [] -> assert false
| (midkey,_ as x)::rem ->
if idx <= 0 then [],midkey,env
else
let lt,midkey,ge = do_split (idx-1) rem in
x::lt,midkey,ge
let split_env len env = do_split (len/2) env
(* Switch according to one cell *)
(*
Emit the switch, here as a comparison tree.
Argument compile_rec is to be called to compile the rest of patterns,
as match_on_cell can be called in two different contexts :
from do_compile_pats and top_compile below.
*)
let match_oncell compile_rec str default idx env =
let id = gen_cell_id () in
let rec comp_rec env =
let len = List.length env in
if len <= 3 then
List.fold_right
(fun (key,cases) ifnot ->
mk_eq id key
(compile_rec str default cases)
ifnot)
env default
else
let lt,midkey,ge = split_env len env in
mk_lt id midkey (comp_rec lt) (comp_rec ge) in
mk_let_cell (VP.create id) str idx (comp_rec env)
(*
Recursive 'list of cells' compile function:
- choose the matched cell and switch on it
- notice: patterns (and idx) all have the same length
*)
let rec do_compile_pats idxs str default cases =
if dbg then begin
pp_match stderr "COMPILE" idxs cases
end ;
match idxs with
| [] ->
begin match cases with
| [] -> default
| (_,e)::_ -> e
end
| _::_ ->
let idxs,cases = best_first idxs cases in
begin match idxs with
| [] -> assert false
| idx::idxs ->
match_oncell
(do_compile_pats idxs) str default idx (by_cell cases)
end
(* Group by size *)
module DivideInt = Divide(IntArg)
let by_size cases =
DivideInt.divide
(List.map
(fun (ps,_ as case) -> List.length ps,case)
cases)
(*
Switch according to pattern size
Argument from_ind is the starting index, it can be zero
or one (when the switch on the cell 0 has already been performed.
In that latter case pattern len is string length-1 and is corrected.
*)
let compile_by_size dbg from_ind str default cases =
let size_cases =
List.map
(fun (len,cases) ->
let len = len+from_ind in
let act =
do_compile_pats
(interval from_ind len)
str default cases in
(len,act))
(by_size cases) in
let id = gen_size_id () in
let switch = I.transl_switch dbg (Cvar id) 1 max_int size_cases default in
mk_let_size (VP.create id) str switch
(*
Compilation entry point: we choose to switch
either on size or on first cell, using the
'least discriminant' heuristics.
*)
let top_compile debuginfo str default cases =
let a_len = count_arities_length cases
and a_fst = count_arities_first cases in
if a_len <= a_fst then begin
if dbg then pp_cases stderr "SIZE" cases ;
compile_by_size debuginfo 0 str default cases
end else begin
if dbg then pp_cases stderr "FIRST COL" cases ;
let compile_size_rest str default cases =
compile_by_size debuginfo 1 str default cases in
match_oncell compile_size_rest str default 0 (by_cell cases)
end
(* Module entry point *)
let catch dbg arg k = match arg with
| Cexit (_e,[]) -> k arg
| _ ->
let e = next_raise_count () in
ccatch (e,[],k (Cexit (e,[])),arg,dbg)
let compile dbg str default cases =
(* We do not attempt to really optimise default=None *)
let cases,default = match cases,default with
| (_,e)::cases,None
| cases,Some e -> cases,e
| [],None -> assert false in
let cases =
List.rev_map
(fun (s,act) -> pat_of_string s,act)
cases in
catch dbg default (fun default -> top_compile dbg str default cases)
end