Skip to content

Commit 297358c

Browse files
committed
Python: remove EssaNodes
This commit removes SSA nodes from the data flow graph. Specifically, for a definition and use such as ```python x = expr y = x + 2 ``` we used to have flow from `expr` to an SSA variable representing x and from that SSA variable to the use of `x` in the definition of `y`. Now we instead have flow from `expr` to the control flow node for `x` at line 1 and from there to the control flow node for `x` at line 2. Specific changes: - `EssaNode` from the data flow layer no longer exists. - Several glue steps between `EssaNode`s and `CfgNode`s have been deleted. - Entry nodes are now admitted as `CfgNodes` in the data flow layer (they were filtered out before). - Entry nodes now have a new `toString` taking into account that the module name may be ambigous. - Some tests have been rewritten to accomodate the changes, but only `python/ql/test/experimental/dataflow/basic/maximalFlowsConfig.qll` should have semantic changes. - Comments have been updated - Test output has been updated, but apart from `python/ql/test/experimental/dataflow/basic/maximalFlows.expected` only `python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py` should have a semantic change. This is a bonus fix, probably meaning that something was never connected up correctly.
1 parent 298c6b5 commit 297358c

File tree

83 files changed

+1924
-1958
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+1924
-1958
lines changed

python/ql/lib/semmle/python/Flow.qll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ class ControlFlowNode extends @py_flow_node {
126126
cached
127127
string toString() {
128128
Stages::AST::ref() and
129-
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
129+
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
130+
exists(Scope s | s.getEntryNode() = this |
131+
result = "Entry node for " + concat( | | s.toString(), ",")
132+
)
130133
or
131134
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
132135
or

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -281,28 +281,33 @@ class DataFlowExpr = Expr;
281281
/**
282282
* A module to compute local flow.
283283
*
284-
* Flow will generally go from control flow nodes into essa variables at definitions,
284+
* Flow will generally go from control flow nodes for expressions into
285+
* control flow nodes for variables at definitions,
285286
* and from there via use-use flow to other control flow nodes.
286287
*
287288
* Some syntaxtic constructs are handled separately.
288289
*/
289290
module LocalFlow {
290-
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
291+
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
291292
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
292293
// Definition
293294
// `x = f(42)`
294-
// nodeFrom is `f(42)`, cfg node
295-
// nodeTo is `x`, essa var
296-
nodeFrom.(CfgNode).getNode() =
297-
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
295+
// nodeFrom is `f(42)`
296+
// nodeTo is `x`
297+
exists(AssignmentDefinition def |
298+
nodeFrom.(CfgNode).getNode() = def.getValue() and
299+
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
300+
)
298301
or
299302
// With definition
300303
// `with f(42) as x:`
301-
// nodeFrom is `f(42)`, cfg node
302-
// nodeTo is `x`, essa var
303-
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
304+
// nodeFrom is `f(42)`
305+
// nodeTo is `x`
306+
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
307+
var = withDef.getDefiningNode()
308+
|
304309
nodeFrom.(CfgNode).getNode() = contextManager and
305-
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
310+
nodeTo.(CfgNode).getNode() = var and
306311
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
307312
with.getContextExpr() = contextManager.getNode() and
308313
with.getOptionalVars() = var.getNode() and
@@ -313,34 +318,6 @@ module LocalFlow {
313318
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
314319
// * `async with x.foo() as foo: await foo.async_method()`.
315320
)
316-
or
317-
// Async with var definition
318-
// `async with f(42) as x:`
319-
// nodeFrom is `x`, cfg node
320-
// nodeTo is `x`, essa var
321-
//
322-
// This makes the cfg node the local source of the awaited value.
323-
//
324-
// We have this step in addition to the step above, to handle cases where the QL
325-
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
326-
// using `async with`, so you can do both:
327-
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
328-
// * `async with x.foo() as foo: await foo.async_method()`.
329-
exists(With with, ControlFlowNode var |
330-
nodeFrom.(CfgNode).getNode() = var and
331-
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
332-
with.getOptionalVars() = var.getNode() and
333-
with.isAsync()
334-
)
335-
or
336-
// Parameter definition
337-
// `def foo(x):`
338-
// nodeFrom is `x`, cfgNode
339-
// nodeTo is `x`, essa var
340-
exists(ParameterDefinition pd |
341-
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
342-
nodeTo.(EssaNode).getVar() = pd.getVariable()
343-
)
344321
}
345322

346323
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
@@ -372,9 +349,12 @@ module LocalFlow {
372349
// First use after definition
373350
// `y = 42`
374351
// `x = f(y)`
375-
// nodeFrom is `y` on first line, essa var
376-
// nodeTo is `y` on second line, cfg node
377-
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
352+
// nodeFrom is `y` on first line
353+
// nodeTo is `y` on second line
354+
exists(EssaDefinition def |
355+
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
356+
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
357+
)
378358
or
379359
// Next use after use
380360
// `x = f(y)`
@@ -565,11 +545,7 @@ predicate neverSkipInPathGraph(Node n) {
565545
// ```
566546
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
567547
// annoying and doesn't help the path explanation become clearer.
568-
n.asVar() instanceof EssaDefinition and
569-
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
570-
// with use-use flow, and since the CFN is already part of the path graph, we don't
571-
// want to force showing the SSA node as well.
572-
not n.asVar() instanceof ParameterDefinition
548+
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
573549
}
574550

575551
/**
@@ -916,7 +892,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
916892
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
917893
exists(ForTarget target |
918894
nodeFrom.asExpr() = target.getSource() and
919-
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
895+
nodeTo.asCfgNode() = target
920896
) and
921897
(
922898
c instanceof ListElementContent

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ private import semmle.python.frameworks.data.ModelsAsData
2424
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
2525
*/
2626
newtype TNode =
27-
/** A node corresponding to an SSA variable. */
28-
TEssaNode(EssaVariable var) or
2927
/** A node corresponding to a control flow node. */
3028
TCfgNode(ControlFlowNode node) {
3129
isExpressionNode(node)
3230
or
3331
node.getNode() instanceof Pattern
32+
or
33+
node = any(ScopeEntryDefinition def).getDefiningNode()
3434
} or
3535
/**
3636
* A synthetic node representing the value of an object before a state change.
@@ -156,9 +156,6 @@ class Node extends TNode {
156156
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
157157
}
158158

159-
/** Gets the ESSA variable corresponding to this node, if any. */
160-
EssaVariable asVar() { none() }
161-
162159
/** Gets the control-flow node corresponding to this node, if any. */
163160
ControlFlowNode asCfgNode() { none() }
164161

@@ -171,25 +168,6 @@ class Node extends TNode {
171168
LocalSourceNode getALocalSource() { result.flowsTo(this) }
172169
}
173170

174-
/** A data-flow node corresponding to an SSA variable. */
175-
class EssaNode extends Node, TEssaNode {
176-
EssaVariable var;
177-
178-
EssaNode() { this = TEssaNode(var) }
179-
180-
/** Gets the `EssaVariable` represented by this data-flow node. */
181-
EssaVariable getVar() { result = var }
182-
183-
override EssaVariable asVar() { result = var }
184-
185-
/** Gets a textual representation of this element. */
186-
override string toString() { result = var.toString() }
187-
188-
override Scope getScope() { result = var.getScope() }
189-
190-
override Location getLocation() { result = var.getLocation() }
191-
}
192-
193171
/** A data-flow node corresponding to a control-flow node. */
194172
class CfgNode extends Node, TCfgNode {
195173
ControlFlowNode node;
@@ -412,8 +390,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
412390
}
413391

414392
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
415-
EssaNode getAWrite() {
416-
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
393+
Node getAWrite() {
394+
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
417395
}
418396

419397
/** Gets the possible values of the variable at the end of import time */

python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ module ImportResolution {
112112
not allowedEssaImportStep(_, firstDef)
113113
|
114114
not LocalFlow::defToFirstUse(firstDef, _) and
115-
val.asVar() = firstDef
115+
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
116116
or
117117
exists(ControlFlowNode mid, ControlFlowNode end |
118118
LocalFlow::defToFirstUse(firstDef, mid) and
@@ -320,11 +320,11 @@ module ImportResolution {
320320
// name as a submodule, we always consider that this attribute _could_ be a
321321
// reference to the submodule, even if we don't know that the submodule has been
322322
// imported yet.
323-
exists(string submodule, Module package |
324-
submodule = result.asVar().getName() and
325-
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
326-
package.getEntryNode()) and
327-
m = getModuleFromName(package.getPackageName() + "." + submodule)
323+
exists(string submodule, Module package, EssaVariable var |
324+
submodule = var.getName() and
325+
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
326+
m = getModuleFromName(package.getPackageName() + "." + submodule) and
327+
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
328328
)
329329
}
330330

python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,13 @@
8787
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
8888
*
8989
* 5. [Read] Content is read from `sequence` to its elements.
90-
* a) If the element is a plain variable, the target is the corresponding essa node.
90+
* a) If the element is a plain variable, the target is the corresponding control flow node.
9191
*
9292
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
9393
*
9494
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
9595
*
96-
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
96+
* 6. [Store] Content is stored from `TIterableElement(v)` to the control flow node for variable `v`, with
9797
* content type `ListElementContent`.
9898
*
9999
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
@@ -313,7 +313,7 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
313313
* Step 5
314314
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
315315
* three cases for what `toNode` should be:
316-
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
316+
* a) If the element is a plain variable, `toNode` is the corresponding control flow node.
317317
*
318318
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
319319
*
@@ -351,20 +351,25 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
351351
nodeTo = TIterableElementNode(element)
352352
else
353353
// Step 5a
354-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
354+
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
355+
nodeTo.(CfgNode).getNode() = element
356+
)
355357
)
356358
)
357359
}
358360

359361
/**
360362
* Step 6
361-
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
363+
* Data flows from `TIterableElement(v)` to the control flow node for variable `v`, with
362364
* content type `ListElementContent`.
363365
*/
364366
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
365-
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
367+
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
368+
starred.getNode() instanceof Starred and
369+
starred = mad.getDefiningNode()
370+
|
366371
nodeFrom = TIterableElementNode(starred) and
367-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
372+
nodeTo.asCfgNode() = starred and
368373
c instanceof ListElementContent
369374
)
370375
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class LocalSourceNode extends Node {
7171
or
7272
// We include all scope entry definitions, as these act as the local source within the scope they
7373
// enter.
74-
this.asVar() instanceof ScopeEntryDefinition
74+
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
7575
}
7676

7777
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -165,7 +165,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
165165
LocalSourceNodeNotModuleVariableNode() {
166166
this instanceof ExprNode
167167
or
168-
this.asVar() instanceof ScopeEntryDefinition
168+
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
169169
}
170170
}
171171

python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
8989
or
9090
// the interior pattern flows to the alias
9191
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
92-
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
93-
alias
92+
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
93+
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
94+
)
9495
)
9596
}
9697

@@ -123,13 +124,9 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
123124
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
124125
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
125126
nodeFrom.(CfgNode).getNode().getNode() = capture and
126-
nodeTo
127-
.(EssaNode)
128-
.getVar()
129-
.getDefinition()
130-
.(PatternCaptureDefinition)
131-
.getDefiningNode()
132-
.getNode() = var
127+
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
128+
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
129+
)
133130
)
134131
}
135132

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
216216
*/
217217
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
218218
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
219+
var = any(WithDefinition wd).getDefiningNode()
220+
|
219221
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
220-
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
222+
nodeTo.(DataFlow::CfgNode).getNode() = var and
221223
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
222224
with.getContextExpr() = contextManager.getNode() and
223225
with.getOptionalVars() = var.getNode() and

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,20 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
5050
}
5151

5252
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
53-
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
54-
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
53+
// Jump into a capturing scope.
54+
//
55+
// var = expr
56+
// ...
57+
// def f():
58+
// ..var is used..
59+
//
60+
// nodeFrom is `expr`
61+
// nodeTo is entry node for `f`
62+
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
63+
e.getSourceVariable() = var and
64+
var.hasDefiningNode(def)
65+
|
66+
nodeTo.asCfgNode() = e.getDefiningNode() and
5567
nodeFrom.asCfgNode() = def.getValue() and
5668
var.getScope().getScope*() = nodeFrom.getScope()
5769
)
@@ -228,8 +240,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
228240
|
229241
param = FlowSummary::SummaryComponent::parameter(apos) and
230242
DataFlowDispatch::parameterMatch(ppos, apos) and
231-
// pick the SsaNode rather than the CfgNode
232-
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
243+
result.asCfgNode().getNode() = p and
233244
(
234245
exists(int i | ppos.isPositional(i) |
235246
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)

python/ql/lib/semmle/python/frameworks/Django.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2762,7 +2762,7 @@ module PrivateDjango {
27622762
this.asExpr() = list and
27632763
// we look for an assignment to the `MIDDLEWARE` setting
27642764
exists(DataFlow::Node mw |
2765-
mw.asVar().getName() = "MIDDLEWARE" and
2765+
mw.asExpr().(Name).getId() = "MIDDLEWARE" and
27662766
DataFlow::localFlow(this, mw)
27672767
|
27682768
// To only include results where CSRF protection matters, we only care about CSRF

0 commit comments

Comments
 (0)