-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAbstractInterpretation.lhs
323 lines (253 loc) · 11.3 KB
/
AbstractInterpretation.lhs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
> {-# LANGUAGE DeriveFunctor #-}
> {-# LANGUAGE FlexibleInstances #-}
> {-# LANGUAGE NoMonomorphismRestriction #-}
> {-# LANGUAGE TypeSynonymInstances #-}
This is a modern "translation" of Cousot and Cousot's original
paper on abstract interpretation (1977). I created it for myself, to
see how well I understood its ideas, but you may also find it
useful. The structure closely follows the structure of the original
paper, so it should be possible to read this translation and the
original paper in parallel. Primarily it's useful for loading
into ghci and typing `:info Foo` whenever you need to recall what
the meaning of Foo is.
> module AbstractInterpretation where
>
> import Control.Applicative
> import Control.Monad
> import Data.Monoid
Section 3.1 on page 238, Syntax of a program.
A program (flowchart) is built from a set of nodes, each with
successor and predecessor nodes.
> data Connections self neighbor = Connections {
> nPred :: self -> [neighbor],
> nSucc :: self -> [neighbor] }
In the original paper, both self and neighbor are of type `Node`.
> data Program node ident expr bexpr prim = Program {
> connections :: Connections node node,
> classify :: node -> NodeType node ident expr bexpr,
> upcast :: bexpr -> expr,
> entryNodes :: [Entry node],
> val :: expr -> Env ident prim -> Value prim, -- defined in section 3.2
> valBexpr :: bexpr -> Env ident prim -> CL Bool -- defined in section 3.2
> }
There are 5 types of nodes:
> data NodeType node ident expr bexpr
> = NSEntry (Entry node) -- todo: rename to NEntry
> | NSAssignment (Assignment node ident expr)
> | NSTest (Test node bexpr)
> | NSJunction (Junction node)
> | NSExit (Exit node)
Entry nodes have 1 successor.
> data Entry node = Entry {
> entrySelf :: node,
> entrySuccessor :: node }
>
> entryConnections :: Connections (Entry node) node
> entryConnections = Connections pred succ where
> pred _ = []
> succ n = [entrySuccessor n]
Page 239
Assignment nodes assign the value of an expression (expr) to an
identifier (ident). They have one predecessor and one successor.
> data Assignment node ident expr = Assignment {
> assignmentId :: ident,
> assignmentExpression :: expr,
> assignmentPredecessor :: node,
> assignmentSuccessor :: node }
>
> assignmentConnections :: Connections (Assignment node ident expr) node
> assignmentConnections = Connections pred succ where
> pred n = [assignmentPredecessor n]
> succ n = [assignmentSuccessor n]
Test nodes evaluate a boolean expression. They have one predecessor and
two successors. The successor is chosen based on the value of the boolean.
> data Test node bexpr = Test {
> testSelf :: node, -- todo: add self function for other types
> testPredecessor :: node,
> nSuccT :: node,
> nSuccF :: node,
> testExpression :: bexpr }
>
> testConnections :: Connections (Test node bexpr) node
> testConnections = Connections pred succ where
> pred n = [testPredecessor n]
> succ n = [nSuccT n, nSuccF n]
Junction nodes have one successor and more than one predecessor.
> data Junction node = Junction {
> junctionPredecessors :: [node],
> junctionSuccessor :: node }
>
> junctionConnections :: Connections (Junction node) node
> junctionConnections = Connections pred succ where
> pred = junctionPredecessors
> succ n = [junctionSuccessor n]
Exit node has one predecessor and no successor.
> data Exit node = Exit {
> exitPredecessor :: node }
>
> exitConnections :: Connections (Exit node) node
> exitConnections = Connections pred succ where
> pred = (:[]) . exitPredecessor
> succ _ = []
An arc is a pair of nodes connected in the flow chart.
> data Arc node = Arc {
> origin :: node,
> end :: node }
> deriving (Eq)
>
> aSucc :: Connections node node -> node -> [Arc node]
> aSucc conn n = map (Arc n) (nSucc conn n)
>
> aPred :: Connections node node -> node -> [Arc node]
> aPred conn n = map (flip Arc n) (nPred conn n)
>
> aSuccT :: Test node bexpr -> Arc node
> aSuccT n = Arc (testSelf n) (nSuccT n)
>
> aSuccF :: Test node bexpr -> Arc node
> aSuccF n = Arc (testSelf n) (nSuccF n)
Section 3.2, page 239, Semantics of programs
If S is a set, we denote CL S the complete lattice by adding
bottom and top to it.
> data CL a = Top | Bottom | Middle a
> deriving (Eq, Functor) -- not sure that Eq is appropriate
>
> instance Applicative CL where
> pure = Middle
> Top <*> _ = Top
> _ <*> Top = Top
> Bottom <*> _ = Bottom
> _ <*> Bottom = Bottom
> Middle f <*> Middle x = Middle (f x)
>
> instance Monad CL where
> return = Middle
> Top >>= _ = Top
> Bottom >>= _ = Bottom
> Middle x >>= f = f x
Values is the complete lattice of booleans and other primitives.
> type WithBool prim = Either Bool prim
> type Value prim = CL (WithBool prim)
Environments map identifiers to values.
> type Env ident prim = CL ident -> Value prim
val and valBexpr were defined in Program.
Every state is a pair of an instruction pointer and an environment.
> data State node ident prim = State {
> cs :: CL (Arc node),
> env :: Env ident prim }
cond evaluates boolean expressions.
> cond :: CL Bool -> CL a -> CL a -> CL a
> cond Top _ _ = Top
> cond Bottom _ _ = Bottom
> cond (Middle True) e _ = e
> cond (Middle False) _ e = e
> subst :: (Eq ident) => Env ident prim -> Value prim -> CL ident -> CL ident -> Value prim
> subst e v x = \y -> cond (y =:= x) v (e y)
> (=:=) :: (Eq ident) => CL ident -> CL ident -> CL Bool
> a =:= b = (==) <$> a <*> b
The static transition function defines the behavior of each of
the nodes.
> nState :: (Eq ident) => Program node ident expr bexpr prim -> State node ident prim -> State node ident prim
> nState program s =
> let n = fmap end (cs s)
> e = env s in
> case fmap (classify program) n of
> Top -> State Top e
> Bottom -> State Bottom e
> Middle t ->
> case t of
> NSEntry _ -> error "no state exists before entry node"
> NSAssignment a ->
> let ident = assignmentId a
> expr = assignmentExpression a
> newEnv Top = Top
> newEnv Bottom = Bottom
> newEnv (Middle i) | i == ident = val program expr e
> | otherwise = e (Middle i) in
> State (fmap (flip Arc (assignmentSuccessor a)) n) newEnv
> NSTest t -> State (cond (valBexpr program (testExpression t) e) (Middle (aSuccT t)) (Middle (aSuccF t))) e
> NSJunction j -> State (fmap (flip Arc (junctionSuccessor j)) n) e
> NSExit _ -> s
The initial environment maps all values to bottom.
> bottomEnv :: Env ident prim
> bottomEnv _ = Bottom
> initialStates :: Program node ident expr bexpr prim -> [State node ident prim]
> initialStates program = map (\entry -> State (Middle (Arc (entrySelf entry) (entrySuccessor entry))) bottomEnv) (entryNodes program)
Page 240
> computationSequence :: (Eq ident) => Program node ident expr bexpr prim -> State node ident prim -> [State node ident prim]
> computationSequence program = iterate (nState program)
For terminating traces, eventually the states returned by computationSequence will all be equivalent, because exit nodes do not change the
state. This could also have been done by having nState return Maybe, terminating computationSequence at the first
Nothing, and getting the last value returned by computationSequence, but for some reason I haven't yet determined, the paper prefers to
define the initial to final state transition function as the least fixed point of nState.
Section 4, page 240, Static semantics of programs
The context for an arc is all possible environments for that arc. Because computationSequence is an infinite
list, the following definition of context will never return environments for any initial states but one.
> type Context ident prim = [Env ident prim]
> type ContextVector node ident prim = CL (Arc node) -> [Env ident prim]
> context :: (Eq ident, Eq node) => Program node ident expr bexpr prim -> ContextVector node ident prim
> context program q = do
> initialState <- initialStates program
> State q' e <- computationSequence program initialState
> guard $ q == q'
> return e
> envOn :: (Eq node) => CL (Arc node) -> State node ident prim -> [Env ident prim]
> envOn r s = if r == cs s then [env s] else []
> nContext :: (Eq ident, Eq node) => Program node ident expr bexpr prim -> CL (Arc node) -> ContextVector node ident prim -> [Env ident prim]
> nContext program rr cv =
> case rr of
> Top -> error "not implemented: nContext top"
> Bottom -> error "not implemented: nContext bottom"
> Middle r ->
> case classify program $ origin r of
> NSEntry _ -> [bottomEnv]
> _ -> do
> q <- aPred (connections program) (origin r)
> e <- cv (Middle q)
> envOn (Middle r) (nState program (State (Middle q) e))
Top of page 240, second column
> fCont :: (Eq ident, Eq node) => Program node ident expr bexpr prim -> ContextVector node ident prim -> ContextVector node ident prim
> fCont program cv = \r -> nContext program r cv
ContextVector is a complete lattice with union `mappend`.
> instance Monoid (ContextVector node ident prim) where
> mempty = const []
> cv1 `mappend` cv2 = \r -> cv1 r `mappend` cv2 r
Then lots of stuff that I don't quite understand.
Section 5 on page 240, Abstract interpretation of programs
> data AbstractInterpretation node aCont = AbstractInterpretation {
> joinContext :: aCont -> aCont -> aCont,
> lte :: aCont -> aCont -> Bool, -- x `lte` y <=> x `joinContext` y == y
> top :: aCont,
> bottom :: aCont,
> interpret :: CL (Arc node) -> Tilda node aCont -> aCont {- defined on page 241 -} }
Page 241
> type Tilda node a = CL (Arc node) -> a
> tildaJoinContext :: AbstractInterpretation node aCont -> Tilda node aCont -> Tilda node aCont -> Tilda node aCont
> tildaJoinContext ai cv' cv'' = \r -> joinContext ai (cv' r) (cv'' r)
> tildaLTE :: AbstractInterpretation node aCont -> Tilda node aCont -> Tilda node aCont -> Bool
> tildaLTE ai cv' cv'' = all (\r -> lte ai (cv' r) (cv'' r)) (allArcs ai) where
> allArcs _ = undefined -- todo; probably need to pass in a program
tildaLTE ai cv' cv'' implies that lte ai (interpret ai a cv') (interpret ai a cv'') for all a in arcs.
Section 5.3.1, page 241, static semantics of programs
> staticSemantics :: (Eq ident, Eq node) => Program node ident expr bexpr prim -> AbstractInterpretation node (Context ident prim)
> staticSemantics program = AbstractInterpretation mappend subset allEnvs [] (nContext program) where
> subset = undefined -- todo
> allEnvs = undefined -- todo; we probably need to extend type to include a top value
Section 5.3.2 Data Flow Analysis
> type BVect node expr = Arc node -> expr -> Bool
> avail ::
> Program node ident expr bexpr prim ->
> (node -> expr -> Bool) ->
> (node -> expr -> Bool) ->
> Arc node ->
> BVect node expr ->
> expr -> Bool
> avail program generated transparent r bv =
> let n = origin r in
> case classify program $ n of
> NSEntry _ -> \_ -> False
> _ -> \e -> generated n e || flip all (aPred (connections program) n) (\p -> bv p e && transparent n e)
^ not really sure about this one. It seems like there should be some transitive calculation going on here
(the paper uses the phrase "for all predecessors"), yet the definition does no such thing. Also,
the paper actually defines `type BVector node expr = expr -> Bool`, but I had to add the extra argument
`Arc node` in order to get it to type check