cmsc330examples/ocaml/dfas.ml at master · kmicinski/cmsc330examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
(*
 * DFAs in OCaml
 *)

(* Let's make a module for representing deterministic finite automata
   in OCaml.  We're going to see that the mathematical definition of a
   DFA, given by Wikipedia, is going to lead to a very simple
   implementation of DFAs in our setting.

   First, let's think back to what DFAs were, when we introduced them
   formally. Wikipedia says:

     A deterministic finite automaton M is a 5-tuple, (Q, Σ, δ, q0,
     F), consisting of
      - a finite set of states (Q)
      - a finite set of input symbols called the alphabet (Σ)
      - a transition function (δ : Q × Σ → Q)
      - a start state (q0 ∈ Q)
      - a set of accepting states (F ⊆ Q)

   Now, let's translate this into OCaml!
*)

(* We're going to say states are just numbers.  This means that our
   set q is going to just be a set of integers.  We're just going to
   represent this as a list of integers.  For example, in [this
   machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton#/media/File:DFA_example_multiplies_of_3.svg),
   the set of states is just going to be represented (by us) using the
   list [0;1;2]
*)

(* The following line creates a type *alias*.  It tells the ocaml
   compiler to, whenever it sees `state`, treat that type as int.  Why
   not just use `int` everywhere?  Well, what happens later if we want
   to refactor our code?  We might want to change the definition of
   this type to (e.g.) string.
*)
type state = int

(* For our symbols, we're going to just use ocaml's built in char type *)
type symbol = char

(* To represent a transition function, we're actually going to represent a
   table.  The table is going to tell us where to go on a given input
   state q, and a given symbol s.

   ----------------|--------------|----------------
   | Current state | input symbol | Next state    |
   |---------------|--------------|---------------|
   |      0        |      '1'     |       1       |
   |      0        |      '0'     |       0       |
   |      1        |      '1'     |       1       |
   |      1        |      '0'     |       0       |
   |---------------|--------------|---------------|

   We're going to represent this as a list of *tuples* in OCaml, which
   generalize pairs.  Remember that a pair type is something of the
   form `'a * 'b` where 'a and 'b are any type (we call them type
   variables).  A triple is created simliarly: 'a * 'b * 'c
*)

type transition = int * symbol * int

(* Now we can literally translate the wikipedia definition of what a
   state machine is:
*)
type dfa_attempt = state list * symbol list * state * transition list * state list

(* Here's an example dfa *)
let d : dfa_attempt =
  ([0;1],           (* State list *)
   ['0';'1'],       (* Alphabet *)
   0,               (* Start state *)
   [(0,'0',0);      (* transition 1 *)
    (0,'1',1);      (* transition 2 *)
    (1,'0',0);      (* transition 3 *)
    (1,'1',1)],     (* transition 4 *)
   [1])             (* Accepting states *)

(* This is all fine and well, but to access the set of states, we have
   to break apart the dfa.  It will help to write some accessor functions. *)
let states (s:dfa_attempt) = match s with
  | (s,_,_,_,_) -> s (* We use wildcards here, because we don't care about the
                        other components. *)

let transitions ((_,_,_,t,_):dfa_attempt) = t

(* Instead, there's another tool I can use.  I can use the record
   notation.  Records are similar to C structs: they allow me to group
   common information, and then name the fields with sensible labels
   so that I can use them in my programs later.

   https://realworldocaml.org/v1/en/html/records.html

   Let's define our DFA type using the record notation.
*)

type dfa =
  {
    states : state list;
    sigma : symbol list;
    start : state;
    transitions : transition list;
    accepting : state list;
  }

(* Here's an example DFA *)

let d : dfa =
  { states = [0;1];
    sigma = ['0';'1'];
    start = 0;
    transitions =
      [(0,'0',0);
       (0,'1',1);
       (1,'0',0);
       (1,'1',1)];
    accepting = [1]
  }

(* To dereference a record, I use the .field notation *)
let states (dfa : dfa) = dfa.states

(* This is a function that takes in a DFA as input, and adds a transition. *)
let addTransition t dfa = { dfa with transitions = t::dfa.transitions }

(* Now we're going to define a function that lets us *run* a DFA on an input
   string.  This is going to be our trickiest example yet, so make
   sure you think through it to see what each piece is doing.
*)

(* We're going to define two helper functions. *)

(* `explode` takes a string `s`, and turns it into its individual
   characters.  This way we can run the DFA on the string "101"
   without explicitly writing ['1';'0';'1']
*)

let explode s =
  (* Note that we define expl as a *helper* function.  Helper functions are very
     useful in functional programming, because they help us build larger
     programs from programs that operate on smaller items.  Not ethat
     the definition of `expl` is only visible *inside* of `explode`.
     It's a local function (local to explode), so you can't call it
     outside.  *)
  let rec expl i l =
    if i < 0 then l else
      expl (i - 1) (s.[i] :: l) in  (* s.[i] returns the ith element of s as a char *)
  expl (String.length s - 1) [];;   (* String.length s returns the length of s      *)

(* Let's reflect on how `explode "110"` is working.  First, explode
   will call `expl 2 []`.  Let's calculate that using the equation for
   it!

   expl 2 [] =
     if (2 < 0) then [] else
       expl 1 ("110".[2] :: [])
     =                               (because if test false)
     expl 1 ("110".[2] :: [])
     =                               (definition of "110".[2] and ::)
     expl 1 (['0'])
     =
     if (1 < 0) then ['0'] else
       expl 0 ("110".[1] :: ['0'])
     =                               (because if test false, and defn of .[1] and ::)
     expl 0 (['1','0'])
     =
     if (0 < 0) then ['1','0'] else
       expl -1 ("110".[0] :: ['1','0'])
     =
     expl -1 (['0','1','0'])         (because if test false, and defn of .[0] and ::)
     =
     if (0 < 0) then ['1','1','0'] else
       expl -1 ("110".[-1] :: ['1','1','0'])
     =                               (because if test true)
     ['1','1','0']

   This is how evaluation is happening in OCaml.  You unwind function
   definitions until you get to a case where the recursion stops, or
   "bottoms out."

   This function is actually *tail* recursive.  Which is something we'll talk
   about next lecture.
*)

(* Here's another helper function, that checks whether a list contains an element *)
let rec contains e l =
  match l with
  | [] -> false
  | hd::tl -> if hd = e then true else contains e tl

(*
   Now, on to checking DFA acceptance.

   First, let's think about how we might run a DFA on paper, or in
   Ruby.  If I did it, I might keep a (mutable) variable that keeps
   track of what state I'm currently at, and then updates the state
   depending on that.

   Instead of doing that, I'm just going to write a function that tells me
   what state to go to *next* on an input.  I'm going to call this function
   `transition state input`.  In the formal definition of DFAs it's
   simply called δ.  But in our implementation of DFAs, it's a
   `transition list`.  We need to write a helper function that takes that
   transition list and turns it into a transition function.

   Let's say my input string is "110".

   How do I run the DFA?  Well, I:
    - Start at the beginning state, dfa.start
    - Call `transition dfa.start 1` and move to some state q2
    - Call `transition q2 1` and move to some state q3
    - Call `transition q3 0` and move to some state q4

   And how do I know if the DFA is accepting for that state or not?
   Well, I simply check to see if that state is contained in the set
   of accepting states.

   Now, the procedure I wrote above doesn't necessarily look obviously
   recursive.  But what happens when I write it like this:

        (transition
           (transition
              (transition dfa.start '1')
              '1')
           '0')

*)

let checkAccepts str dfa =
  (* Get the list of symbols. *)
  let symbols = explode str in

  (* If I'm at state {state}, where do I go on {symbol}? *)
  let transition state symbol =
    let rec find_state l =
      match l with
      | (s1,sym,s2)::tl ->
        if (s1 = state && symbol = sym) then
          (* I've found the place in the table where I've got what I'm
             looking for.

             E.g., in
             ----------------|--------------|----------------
             | Current state | input symbol | Next state    |
             |---------------|--------------|---------------|
             |      0        |      '1'     |      *1*      |  <-- here
             |      0        |      '0'     |       0       |
             |      1        |      '1'     |       1       |
             |      1        |      '0'     |       0       |
             |---------------|--------------|---------------|

             If I called `transition 0 '1'`, I would be at the place
             marked `here`, and would return 1.  In OCaml this is
             represented as a list of triples, so I return the third
             element
          *)
          s2
        else
          (* Otherwise I continue my search.  This is the case where,
             in the above example, I look for `transition 1 '0'`, but
             I'm at "here." I know I haven't found the place in the
             lookup table yet, so I keep going *)
          find_state tl
      | _ -> failwith "no next state"
    in
    find_state dfa.transitions
  in

  (* Now I'm going to define `run_dfa`, which is going to do the
     following:

         (transition
           (transition
              (transition dfa.start '1')               ( **line 3** )
              '1')
           '0')

     But it's going to work with any string, which is the list in
     `symbols`.  Now, I'm going to do a trick: I'm going to recurse on
     the *reversed* list.  To do this I'm going to define a helper.

  *)
  let final_state =
    let rec h symbol_list =
      match symbol_list with
      (* Case where list contains only one element *)
      | [hd] -> (transition dfa.start hd)              (* Corresponds to line 3 above *)
      | hd::tl -> (transition (h tl) hd)
      | _ -> failwith "empty list of symbols"          (* assume at least one symbol *)
    in
    h (List.rev symbols)                              (* I use the List library here. *)
  in

  (* Now I simply check to see if the final state is contained in the
     set of accepting states. *)
  if (contains final_state dfa.accepting) then
    true
  else
    false

(* Now, let's reflect very carefully on how I did this.  First, I
   wrote out the recursion I wanted to happen.  Then I thought about
   how I could write a function that preserves that structure.  Occasionally,
   I'll find that I can't obviously do it.  For example, consider how
   the recursion would have worked if I had **not** reversed the
   list.  I would have gotten the wrong result!

   This is a common pattern in functional programming: write an example
   equation you'd like to write, and then figure out how to get it to
   work.  It's really very similar in mechanics to the algebra you
   likely did in high school.  You sit down with the equations, and
   you figure out how to write a program that solves them.

   The part you need to practice at, and the part you get *better* at,
   is writing programs that are small and obviously correct.
   As an example, here's another way I could have written the last few
   lines of that function.  I could have defined a function:

   let rec search_from current_state symbol_list =
     match symbol_list with
       | [] -> current_state
       | sym::tl -> search_from (transition current_state sym) tl
   in
   let end_state = search_from dfa.start symbols in
   if (contains end_state dfa.accepting)
   then
     true
   else
    false

   Now I don't have to reverse the list, I accumulate the current
   state into the function.

   Note that this is very similar to if I had written a function in Ruby that
   did something like:

   current_state = init
   for i in str { |x| ... update current_state }

   I've transformed what was (in Ruby) a program variable, and I've
   taken that to pass it along through the program.  This technique is
   sometimes called "threading the state through" the program.  And in
   fact, any program where I use some notion of state, I can
   systematically rewrite to thread the state through functions.

   Here's another example:
*)

(* In ruby:
   x = 0
   [1,2,3].each { |y| x += y }

   But in OCaml:
*)

let sum lst =
  let rec helper currentSum lst = match lst with
    | [] -> currentSum
    | hd::tl -> helper (currentSum + hd) tl
  in
  helper 0 lst