Skip to content

Commit 515e8fd

Browse files
committed
Python: Use more general definitions
We now formulate local flow steps based on `DefinitionNode`s, instead of just `AssignmentDefinition`s. This collects all logic around definitions into `DefinitionNode` and avoids extra cases in the flow relations. It also allows a few more definitions needed for variable capture. These are currently excluded from `AssignmentDefinition` because they are not `TEssaNodeDefinition`s because they are not considered live. (`SsaComputeImpl::variableDefine` holds, but `Liveness::liveAtExit` does not). `DefinitionNode` includes parameter definitions from default values, which should be jump steps. So we split the definitions into local and jumps by looking at the enclosing callables. `DefinitionNode` did not include `with`-definitions, so that has been added. Like parameter definitions, this case of `DefinitionNode` must be excluded from `assignment_definition`. Finally, we incur a dataflow inconsistency for a non-sensical assignment in test code. This we simply accept. The code reads ```python len(unknown()) = "foo" ``` which makes the target of the flow edge a post-update node.
1 parent 27be5ba commit 515e8fd

File tree

4 files changed

+30
-38
lines changed

4 files changed

+30
-38
lines changed

python/ql/lib/semmle/python/Flow.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,8 @@ class DefinitionNode extends ControlFlowNode {
672672
exists(For for | for.getTarget().getAFlowNode() = this)
673673
or
674674
exists(Parameter param | this = param.asName().getAFlowNode() and exists(param.getDefault()))
675+
or
676+
exists(With with | this = with.getOptionalVars().getAFlowNode())
675677
}
676678

677679
/** flow node corresponding to the value assigned for the definition corresponding to this flow node */
@@ -840,7 +842,11 @@ private AstNode assigned_value(Expr lhs) {
840842
/* for lhs in seq: => `result` is the `for` node, representing the `iter(next(seq))` operation. */
841843
result.(For).getTarget() = lhs
842844
or
845+
// def f(a = 42): => `result` is 42
843846
exists(Parameter param | lhs = param.asName() and result = param.getDefault())
847+
or
848+
// with f(42) as x: => `result` is `f(42)`
849+
exists(With with | lhs = with.getOptionalVars() and result = with.getContextExpr())
844850
}
845851

846852
predicate nested_sequence_assign(

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -288,36 +288,22 @@ class DataFlowExpr = Expr;
288288
* Some syntaxtic constructs are handled separately.
289289
*/
290290
module LocalFlow {
291-
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
291+
/**
292+
* Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`.
293+
* This includes cases like
294+
* `x = f(42)`
295+
* and
296+
* `with f(42) as x:`
297+
* where `nodeFrom` is `f(42)` and `nodeTo` is `x`.
298+
*/
292299
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
293-
// Definition
294-
// `x = f(42)`
295-
// nodeFrom is `f(42)`
296-
// nodeTo is `x`
297-
exists(AssignmentDefinition def |
298-
nodeFrom.(CfgNode).getNode() = def.getValue() and
299-
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
300-
)
301-
or
302-
// With definition
303-
// `with f(42) as x:`
304-
// nodeFrom is `f(42)`
305-
// nodeTo is `x`
306-
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
307-
var = withDef.getDefiningNode()
308-
|
309-
nodeFrom.(CfgNode).getNode() = contextManager and
310-
nodeTo.(CfgNode).getNode() = var and
311-
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
312-
with.getContextExpr() = contextManager.getNode() and
313-
with.getOptionalVars() = var.getNode() and
314-
contextManager.strictlyDominates(var)
315-
// note: we allow this for both `with` and `async with`, since some
316-
// implementations do `async def __aenter__(self): return self`, so you can do
317-
// both:
318-
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
319-
// * `async with x.foo() as foo: await foo.async_method()`.
320-
)
300+
nodeFrom.(CfgNode).getNode() = nodeTo.(CfgNode).getNode().(DefinitionNode).getValue() and
301+
// This excludes steps that should be jump steps,
302+
// such as assignment of a parameters default value.
303+
// (Since the parameter will be in the scope of the function,
304+
// while the default value itself will be in the scope that _defines_ the
305+
// function.)
306+
nodeFrom.getEnclosingCallable() = nodeTo.getEnclosingCallable()
321307
}
322308

323309
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
@@ -505,14 +491,10 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
505491
// Setting the possible values of the variable at the end of import time
506492
nodeFrom = nodeTo.(ModuleVariableNode).getADefiningWrite()
507493
or
508-
// a parameter with a default value, since the parameter will be in the scope of the
509-
// function, while the default value itself will be in the scope that _defines_ the
510-
// function.
511-
exists(ParameterDefinition param |
512-
// note: we go to the _control-flow node_ of the parameter, and not the ESSA node of the parameter, since for type-tracking, the ESSA node is not a LocalSourceNode, so we would get in trouble.
513-
nodeFrom.asCfgNode() = param.getDefault() and
514-
nodeTo.asCfgNode() = param.getDefiningNode()
515-
)
494+
// Non-local definitions, such as assignment of a parameters default value.
495+
nodeFrom.(CfgNode).getNode() = nodeTo.(CfgNode).getNode().(DefinitionNode).getValue() and
496+
// This excludes steps that should be local.
497+
nodeFrom.getEnclosingCallable() != nodeTo.getEnclosingCallable()
516498
}
517499

518500
//--------

python/ql/lib/semmle/python/essa/SsaDefinitions.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ module SsaSource {
2525
// since parameter will be considered a DefinitionNode, if it has a default value,
2626
// we need to exclude it here since it is already covered by parameter_definition
2727
// (and points-to was unhappy that it was included in both)
28-
not parameter_definition(v, defn)
28+
not parameter_definition(v, defn) and
29+
// similarly for with-definitions
30+
not with_definition(v, defn)
2931
}
3032

3133
/** Holds if `v` is defined by assignment of the captured exception. */
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
postWithInFlow
2+
| test.py:16:1:16:14 | ControlFlowNode for len() | PostUpdateNode should not be the target of local flow. |

0 commit comments

Comments
 (0)