Skip to content

Commit c043f0c

Browse files
committed
Python: wip block flow from captured variables
I think a better approach is to first write a test identifying double flow, then see that disappear.
1 parent 59f9340 commit c043f0c

File tree

1 file changed

+71
-56
lines changed

1 file changed

+71
-56
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 71 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -288,53 +288,65 @@ class DataFlowExpr = Expr;
288288
* Some syntaxtic constructs are handled separately.
289289
*/
290290
module LocalFlow {
291+
private predicate capturedVariableRead(Node n) {
292+
n.asExpr() = any(VariableCapture::CapturedVariable v).getALoad()
293+
}
294+
291295
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
292296
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
293-
// Definition
294-
// `x = f(42)`
295-
// nodeFrom is `f(42)`
296-
// nodeTo is `x`
297-
exists(AssignmentDefinition def |
298-
nodeFrom.(CfgNode).getNode() = def.getValue() and
299-
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
300-
)
301-
or
302-
// With definition
303-
// `with f(42) as x:`
304-
// nodeFrom is `f(42)`
305-
// nodeTo is `x`
306-
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
307-
var = withDef.getDefiningNode()
308-
|
309-
nodeFrom.(CfgNode).getNode() = contextManager and
310-
nodeTo.(CfgNode).getNode() = var and
311-
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
312-
with.getContextExpr() = contextManager.getNode() and
313-
with.getOptionalVars() = var.getNode() and
314-
contextManager.strictlyDominates(var)
315-
// note: we allow this for both `with` and `async with`, since some
316-
// implementations do `async def __aenter__(self): return self`, so you can do
317-
// both:
318-
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
319-
// * `async with x.foo() as foo: await foo.async_method()`.
297+
not capturedVariableRead(nodeTo) and
298+
(
299+
// Definition
300+
// `x = f(42)`
301+
// nodeFrom is `f(42)`
302+
// nodeTo is `x`
303+
exists(AssignmentDefinition def |
304+
nodeFrom.(CfgNode).getNode() = def.getValue() and
305+
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
306+
)
307+
or
308+
// With definition
309+
// `with f(42) as x:`
310+
// nodeFrom is `f(42)`
311+
// nodeTo is `x`
312+
exists(
313+
With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var
314+
|
315+
var = withDef.getDefiningNode()
316+
|
317+
nodeFrom.(CfgNode).getNode() = contextManager and
318+
nodeTo.(CfgNode).getNode() = var and
319+
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
320+
with.getContextExpr() = contextManager.getNode() and
321+
with.getOptionalVars() = var.getNode() and
322+
contextManager.strictlyDominates(var)
323+
// note: we allow this for both `with` and `async with`, since some
324+
// implementations do `async def __aenter__(self): return self`, so you can do
325+
// both:
326+
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
327+
// * `async with x.foo() as foo: await foo.async_method()`.
328+
)
320329
)
321330
}
322331

323332
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
324-
// If expressions
325-
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
326-
or
327-
// Assignment expressions
328-
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(AssignmentExprNode).getValue()
329-
or
330-
// boolean inline expressions such as `x or y` or `x and y`
331-
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
332-
or
333-
// Flow inside an unpacking assignment
334-
iterableUnpackingFlowStep(nodeFrom, nodeTo)
335-
or
336-
// Flow inside a match statement
337-
matchFlowStep(nodeFrom, nodeTo)
333+
not capturedVariableRead(nodeTo) and
334+
(
335+
// If expressions
336+
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
337+
or
338+
// Assignment expressions
339+
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(AssignmentExprNode).getValue()
340+
or
341+
// boolean inline expressions such as `x or y` or `x and y`
342+
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(BoolExprNode).getAnOperand()
343+
or
344+
// Flow inside an unpacking assignment
345+
iterableUnpackingFlowStep(nodeFrom, nodeTo)
346+
or
347+
// Flow inside a match statement
348+
matchFlowStep(nodeFrom, nodeTo)
349+
)
338350
}
339351

340352
predicate useToNextUse(NameNode nodeFrom, NameNode nodeTo) {
@@ -346,22 +358,25 @@ module LocalFlow {
346358
}
347359

348360
predicate useUseFlowStep(Node nodeFrom, Node nodeTo) {
349-
// First use after definition
350-
// `y = 42`
351-
// `x = f(y)`
352-
// nodeFrom is `y` on first line
353-
// nodeTo is `y` on second line
354-
exists(EssaDefinition def |
355-
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
356-
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
361+
// not capturedVariableRead(nodeTo) and
362+
(
363+
// First use after definition
364+
// `y = 42`
365+
// `x = f(y)`
366+
// nodeFrom is `y` on first line
367+
// nodeTo is `y` on second line
368+
exists(EssaDefinition def |
369+
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
370+
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
371+
)
372+
or
373+
// Next use after use
374+
// `x = f(y)`
375+
// `z = y + 1`
376+
// nodeFrom is 'y' on first line, cfg node
377+
// nodeTo is `y` on second line, cfg node
378+
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
357379
)
358-
or
359-
// Next use after use
360-
// `x = f(y)`
361-
// `z = y + 1`
362-
// nodeFrom is 'y' on first line, cfg node
363-
// nodeTo is `y` on second line, cfg node
364-
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
365380
}
366381

367382
predicate localFlowStep(Node nodeFrom, Node nodeTo) {

0 commit comments

Comments
 (0)