Skip to content

Commit 08ae04c

Browse files
committed
python: instantiate module for variable capture
This provides variable capture in standard situations: - nested functions - lambdas There are some deficiencies: - we do not yet handle objects capturing variables. - we do not handle variables captured via the `nonlocal` keyword. This should be solved at the AST level, though, and then it should "just work".
1 parent fa6244e commit 08ae04c

File tree

9 files changed

+315
-17
lines changed

9 files changed

+315
-17
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ private import semmle.python.dataflow.new.internal.TypeTracker::CallGraphConstru
4343
newtype TParameterPosition =
4444
/** Used for `self` in methods, and `cls` in classmethods. */
4545
TSelfParameterPosition() or
46+
TLambdaSelfParameterPosition() or
4647
TPositionalParameterPosition(int index) {
4748
index = any(Parameter p).getPosition()
4849
or
@@ -79,6 +80,9 @@ class ParameterPosition extends TParameterPosition {
7980
/** Holds if this position represents a `self`/`cls` parameter. */
8081
predicate isSelf() { this = TSelfParameterPosition() }
8182

83+
/** Holds if this position represents a reference to a lambda itself. Only used for tracking flow through captured variables. */
84+
predicate isLambdaSelf() { this = TLambdaSelfParameterPosition() }
85+
8286
/** Holds if this position represents a positional parameter at (0-based) `index`. */
8387
predicate isPositional(int index) { this = TPositionalParameterPosition(index) }
8488

@@ -110,6 +114,8 @@ class ParameterPosition extends TParameterPosition {
110114
string toString() {
111115
this.isSelf() and result = "self"
112116
or
117+
this.isLambdaSelf() and result = "lambda self"
118+
or
113119
exists(int index | this.isPositional(index) and result = "position " + index)
114120
or
115121
exists(string name | this.isKeyword(name) and result = "keyword " + name)
@@ -130,6 +136,7 @@ class ParameterPosition extends TParameterPosition {
130136
newtype TArgumentPosition =
131137
/** Used for `self` in methods, and `cls` in classmethods. */
132138
TSelfArgumentPosition() or
139+
TLambdaSelfArgumentPosition() or
133140
TPositionalArgumentPosition(int index) {
134141
exists(any(CallNode c).getArg(index))
135142
or
@@ -154,6 +161,9 @@ class ArgumentPosition extends TArgumentPosition {
154161
/** Holds if this position represents a `self`/`cls` argument. */
155162
predicate isSelf() { this = TSelfArgumentPosition() }
156163

164+
/** Holds if this position represents a lambda `self` argument. Only used for tracking flow through captured variables. */
165+
predicate isLambdaSelf() { this = TLambdaSelfArgumentPosition() }
166+
157167
/** Holds if this position represents a positional argument at (0-based) `index`. */
158168
predicate isPositional(int index) { this = TPositionalArgumentPosition(index) }
159169

@@ -184,6 +194,8 @@ class ArgumentPosition extends TArgumentPosition {
184194
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
185195
ppos.isSelf() and apos.isSelf()
186196
or
197+
ppos.isLambdaSelf() and apos.isLambdaSelf()
198+
or
187199
exists(int index | ppos.isPositional(index) and apos.isPositional(index))
188200
or
189201
exists(string name | ppos.isKeyword(name) and apos.isKeyword(name))
@@ -1507,6 +1519,36 @@ abstract class ParameterNodeImpl extends Node {
15071519
}
15081520
}
15091521

1522+
/**
1523+
* The value of a lambda itself at function entry, viewed as a node in a data
1524+
* flow graph.
1525+
*
1526+
* This is used for tracking flow through captured variables, and we use a
1527+
* separate node and parameter/argument positions in order to distinguish
1528+
* "lambda self" from "normal self", as lambdas may also access outer `self`
1529+
* variables (through variable capture).
1530+
*/
1531+
class LambdaSelfReferenceNode extends ParameterNodeImpl, TLambdaSelfReferenceNode {
1532+
private Function callable;
1533+
1534+
LambdaSelfReferenceNode() { this = TLambdaSelfReferenceNode(callable) }
1535+
1536+
final Function getCallable() { result = callable }
1537+
1538+
override Parameter getParameter() { none() }
1539+
1540+
override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) {
1541+
c = TFunction(callable) and
1542+
pos.isLambdaSelf()
1543+
}
1544+
1545+
override Scope getScope() { result = callable }
1546+
1547+
override Location getLocation() { result = callable.getLocation() }
1548+
1549+
override string toString() { result = "lambda self in " + callable }
1550+
}
1551+
15101552
/** A parameter for a library callable with a flow summary. */
15111553
class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode {
15121554
SummaryParameterNode() {
@@ -1578,6 +1620,28 @@ private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl
15781620
override Node getPreUpdateNode() { result = pre }
15791621
}
15801622

1623+
private class CapturePostUpdateNode extends PostUpdateNodeImpl, CaptureNode {
1624+
private CaptureNode pre;
1625+
1626+
CapturePostUpdateNode() {
1627+
VariableCapture::Flow::capturePostUpdateNode(this.getSynthesizedCaptureNode(),
1628+
pre.getSynthesizedCaptureNode())
1629+
}
1630+
1631+
override Node getPreUpdateNode() { result = pre }
1632+
}
1633+
1634+
private class CaptureArgumentNode extends CfgNode, ArgumentNode {
1635+
CallCfgNode callNode;
1636+
1637+
CaptureArgumentNode() { this = callNode.getFunction() }
1638+
1639+
override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) {
1640+
callNode.asCfgNode() = call.getNode() and
1641+
pos.isLambdaSelf()
1642+
}
1643+
}
1644+
15811645
/** Gets a viable run-time target for the call `call`. */
15821646
DataFlowCallable viableCallable(DataFlowCall call) {
15831647
call instanceof ExtractedDataFlowCall and

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 180 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,167 @@ module LocalFlow {
378378
}
379379
}
380380

381+
/** Provides logic related to captured variables. */
382+
module VariableCapture {
383+
private import codeql.dataflow.VariableCapture as Shared
384+
385+
class ExprCfgNode extends ControlFlowNode {
386+
ExprCfgNode() { isExpressionNode(this) }
387+
}
388+
389+
private predicate closureFlowStep(ExprCfgNode e1, ExprCfgNode e2) {
390+
// simpleAstFlowStep(e1, e2)
391+
// or
392+
exists(SsaVariable def |
393+
def.getAUse() = e2 and
394+
def.getAnUltimateDefinition().getDefinition().(DefinitionNode).getValue() = e1
395+
)
396+
}
397+
398+
private module CaptureInput implements Shared::InputSig {
399+
private import python as P
400+
401+
class Location = P::Location;
402+
403+
class BasicBlock extends P::BasicBlock {
404+
Callable getEnclosingCallable() { result = this.getScope() }
405+
406+
// TODO: check that this gives useful results
407+
Location getLocation() { result = super.getNode(0).getLocation() }
408+
}
409+
410+
BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) {
411+
result = bb.getImmediateDominator()
412+
}
413+
414+
BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() }
415+
416+
class CapturedVariable extends LocalVariable {
417+
Function f;
418+
419+
CapturedVariable() {
420+
// exists(this.getScopeEntryDefinition())
421+
this.getScope() = f and
422+
(
423+
this.getALoad().getScope() != f
424+
or
425+
this.getAStore().getScope() != f
426+
)
427+
}
428+
429+
Callable getCallable() { result = f }
430+
431+
Location getLocation() { result = f.getLocation() }
432+
}
433+
434+
class CapturedParameter extends CapturedVariable {
435+
CapturedParameter() { this.isParameter() }
436+
437+
ControlFlowNode getCfgNode() { result.getNode().(Parameter) = this.getAnAccess() }
438+
}
439+
440+
class Expr extends ExprCfgNode {
441+
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
442+
}
443+
444+
class VariableWrite extends ControlFlowNode {
445+
CapturedVariable v;
446+
447+
VariableWrite() { this = v.getAStore().getAFlowNode() }
448+
449+
CapturedVariable getVariable() { result = v }
450+
451+
predicate hasCfgNode(BasicBlock bb, int i) { this = bb.getNode(i) }
452+
}
453+
454+
class VariableRead extends Expr {
455+
CapturedVariable v;
456+
457+
VariableRead() { this = v.getALoad().getAFlowNode() }
458+
459+
CapturedVariable getVariable() { result = v }
460+
}
461+
462+
class ClosureExpr extends Expr {
463+
ClosureExpr() { this.getNode() instanceof CallableExpr }
464+
465+
predicate hasBody(Callable body) { body = this.getNode().(CallableExpr).getInnerScope() }
466+
467+
predicate hasAliasedAccess(Expr f) { closureFlowStep+(this, f) and not closureFlowStep(f, _) }
468+
}
469+
470+
// TODO: Some basic blocks will not have an enclosing callable
471+
// as their scope are not `Function`s. This leads to inconsistency failures.
472+
class Callable extends Scope {
473+
predicate isConstructor() { none() }
474+
}
475+
}
476+
477+
class CapturedVariable = CaptureInput::CapturedVariable;
478+
479+
class ClosureExpr = CaptureInput::ClosureExpr;
480+
481+
module Flow = Shared::Flow<CaptureInput>;
482+
483+
private Flow::ClosureNode asClosureNode(Node n) {
484+
result = n.(CaptureNode).getSynthesizedCaptureNode()
485+
or
486+
result.(Flow::ExprNode).getExpr() = n.(CfgNode).getNode()
487+
or
488+
result.(Flow::VariableWriteSourceNode).getVariableWrite() = n.(CfgNode).getNode()
489+
or
490+
result.(Flow::ExprPostUpdateNode).getExpr() =
491+
n.(PostUpdateNode).getPreUpdateNode().(CfgNode).getNode()
492+
or
493+
result.(Flow::ParameterNode).getParameter().getCfgNode() = n.(CfgNode).getNode()
494+
or
495+
result.(Flow::ThisParameterNode).getCallable() = n.(LambdaSelfReferenceNode).getCallable()
496+
}
497+
498+
predicate storeStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
499+
Flow::storeStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
500+
}
501+
502+
predicate readStep(Node nodeFrom, CapturedVariableContent c, Node nodeTo) {
503+
Flow::readStep(asClosureNode(nodeFrom), c.getVariable(), asClosureNode(nodeTo))
504+
}
505+
506+
predicate valueStep(Node nodeFrom, Node nodeTo) {
507+
Flow::localFlowStep(asClosureNode(nodeFrom), asClosureNode(nodeTo))
508+
}
509+
510+
// Note: Learn from JS, https://github.com/github/codeql/pull/14412
511+
// - JS: Capture flow
512+
// - JS: Disallow consecutive captured contents
513+
private module Debug {
514+
predicate flowStoreStep(
515+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
516+
Flow::ClosureNode closureNodeTo, Node nodeTo
517+
) {
518+
closureNodeFrom = asClosureNode(nodeFrom) and
519+
closureNodeTo = asClosureNode(nodeTo) and
520+
Flow::storeStep(closureNodeFrom, v, closureNodeTo)
521+
}
522+
523+
predicate flowReadStep(
524+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, CapturedVariable v,
525+
Flow::ClosureNode closureNodeTo, Node nodeTo
526+
) {
527+
closureNodeFrom = asClosureNode(nodeFrom) and
528+
closureNodeTo = asClosureNode(nodeTo) and
529+
Flow::readStep(closureNodeFrom, v, closureNodeTo)
530+
}
531+
532+
predicate flowValueStep(
533+
Node nodeFrom, Flow::ClosureNode closureNodeFrom, Flow::ClosureNode closureNodeTo, Node nodeTo
534+
) {
535+
closureNodeFrom = asClosureNode(nodeFrom) and
536+
closureNodeTo = asClosureNode(nodeTo) and
537+
Flow::localFlowStep(closureNodeFrom, closureNodeTo)
538+
}
539+
}
540+
}
541+
381542
//--------
382543
// Local flow
383544
//--------
@@ -471,6 +632,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
471632
simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo)
472633
or
473634
summaryFlowSteps(nodeFrom, nodeTo)
635+
or
636+
variableCaptureFlowStep(nodeFrom, nodeTo)
474637
}
475638

476639
/**
@@ -494,6 +657,11 @@ predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
494657
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
495658
}
496659

660+
predicate variableCaptureFlowStep(Node nodeFrom, Node nodeTo) {
661+
IncludePostUpdateFlow<PhaseDependentFlow<VariableCapture::valueStep/2>::step/2>::step(nodeFrom,
662+
nodeTo)
663+
}
664+
497665
/** `ModuleVariable`s are accessed via jump steps at runtime. */
498666
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
499667
// Module variable read
@@ -557,7 +725,7 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
557725

558726
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
559727

560-
predicate localMustFlowStep(Node node1, Node node2) { none() }
728+
predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { none() }
561729

562730
/**
563731
* Gets the type of `node`.
@@ -661,6 +829,8 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
661829
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
662830
or
663831
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
832+
or
833+
VariableCapture::storeStep(nodeFrom, c, nodeTo)
664834
}
665835

666836
/**
@@ -864,6 +1034,8 @@ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
8641034
nodeTo.(FlowSummaryNode).getSummaryNode())
8651035
or
8661036
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
1037+
or
1038+
VariableCapture::readStep(nodeFrom, c, nodeTo)
8671039
}
8681040

8691041
/** Data flows from a sequence to a subscript of the sequence. */
@@ -993,6 +1165,8 @@ predicate nodeIsHidden(Node n) {
9931165
n instanceof SynthDictSplatArgumentNode
9941166
or
9951167
n instanceof SynthDictSplatParameterNode
1168+
// or
1169+
// n instanceof CaptureNode
9961170
}
9971171

9981172
class LambdaCallKind = Unit;
@@ -1029,6 +1203,11 @@ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preserves
10291203
*/
10301204
predicate allowParameterReturnInSelf(ParameterNode p) {
10311205
FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(p)
1206+
or
1207+
exists(Function f |
1208+
VariableCapture::Flow::heuristicAllowInstanceParameterReturnInSelf(f) and
1209+
p = TLambdaSelfReferenceNode(f)
1210+
)
10321211
}
10331212

10341213
/** An approximated `Content`. */

0 commit comments

Comments
 (0)