Skip to content

Commit 138a7ae

Browse files
authored
Merge pull request github#6349 from RasmusWL/more-modeling
Python: Improve various library modeling
2 parents bb9911e + c7146ac commit 138a7ae

File tree

30 files changed

+1413
-527
lines changed

30 files changed

+1413
-527
lines changed

python/.vscode/ql.code-snippets

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@
180180
"",
181181
" /** A direct instantiation of `${TM_SELECTED_TEXT}`. */",
182182
" private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {",
183-
" override CallNode node;",
184-
"",
185183
" ClassInstantiation() { this = classRef().getACall() }",
186184
" }",
187185
"",
@@ -195,11 +193,55 @@
195193
"",
196194
" /** Gets a reference to an instance of `${TM_SELECTED_TEXT}`. */",
197195
" DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }",
196+
"",
197+
" /**",
198+
" * Taint propagation for `${TM_SELECTED_TEXT}`.",
199+
" */",
200+
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
201+
" InstanceTaintSteps() { this = \"${TM_SELECTED_TEXT}\" }",
202+
" ",
203+
" override DataFlow::Node getInstance() { result = instance() }",
204+
" ",
205+
" override string getAttributeName() { none() }",
206+
" ",
207+
" override string getMethodName() { none() }",
208+
" ",
209+
" override string getAsyncMethodName() { none() }",
210+
" }",
211+
"",
212+
" /**",
213+
" * Extra taint propagation for `${TM_SELECTED_TEXT}`, not covered by `InstanceTaintSteps`.",
214+
" */",
215+
" private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {",
216+
" override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {",
217+
" // TODO",
218+
" none()",
219+
" }",
220+
" }",
198221
"}",
199222
],
200223
"description": "Type tracking class (select full class path before inserting)",
201224
},
202-
225+
"foo": {
226+
"scope": "ql",
227+
"prefix": "foo",
228+
"body": [
229+
" /**",
230+
" * Taint propagation for `$1`.",
231+
" */",
232+
" private class InstanceTaintSteps extends InstanceTaintStepsHelper {",
233+
" InstanceTaintSteps() { this = \"$1\" }",
234+
"",
235+
" override DataFlow::Node getInstance() { result = instance() }",
236+
"",
237+
" override string getAttributeName() { none() }",
238+
"",
239+
" override string getMethodName() { none() }",
240+
"",
241+
" override string getAsyncMethodName() { none() }",
242+
" }",
243+
],
244+
},
203245
"API graph .getMember chain": {
204246
"scope": "ql",
205247
"prefix": "api graph .getMember chain",

python/ql/test/experimental/dataflow/TestUtil/PrintNode.qll renamed to python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1-
import python
2-
import semmle.python.dataflow.new.DataFlow
1+
/**
2+
* INTERNAL: Do not use.
3+
*
4+
* Provides helper predicates for pretty-printing `DataFlow::Node`s.
5+
*
6+
* Since these have not been performance optimized, please only use them for
7+
* debug-queries or in tests.
8+
*/
9+
10+
private import python
11+
private import semmle.python.dataflow.new.DataFlow
312

13+
/**
14+
* INTERNAL: Do not use.
15+
*
16+
* Gets the pretty-printed version of the Expr `e`.
17+
*/
418
string prettyExpr(Expr e) {
519
not e instanceof Num and
620
not e instanceof StrConst and
@@ -27,15 +41,19 @@ string prettyExpr(Expr e) {
2741
}
2842

2943
/**
30-
* Gets pretty-printed version of the DataFlow::Node `node`
44+
* INTERNAL: Do not use.
45+
*
46+
* Gets the pretty-printed version of the DataFlow::Node `node`
3147
*/
3248
bindingset[node]
3349
string prettyNode(DataFlow::Node node) {
3450
if exists(node.asExpr()) then result = prettyExpr(node.asExpr()) else result = node.toString()
3551
}
3652

3753
/**
38-
* Gets pretty-printed version of the DataFlow::Node `node`, that is suitable for use
54+
* INTERNAL: Do not use.
55+
*
56+
* Gets the pretty-printed version of the DataFlow::Node `node`, that is suitable for use
3957
* with `TestUtilities.InlineExpectationsTest` (that is, no spaces unless required).
4058
*/
4159
bindingset[node]

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ private module Cached {
4646
or
4747
copyStep(nodeFrom, nodeTo)
4848
or
49-
forStep(nodeFrom, nodeTo)
49+
DataFlowPrivate::forReadStep(nodeFrom, _, nodeTo)
5050
or
51-
unpackingAssignmentStep(nodeFrom, nodeTo)
51+
DataFlowPrivate::iterableUnpackingReadStep(nodeFrom, _, nodeTo)
52+
or
53+
DataFlowPrivate::iterableUnpackingStoreStep(nodeFrom, _, nodeTo)
54+
or
55+
awaitStep(nodeFrom, nodeTo)
5256
}
5357
}
5458

@@ -201,26 +205,9 @@ predicate copyStep(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeTo) {
201205
}
202206

203207
/**
204-
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to `for`-iteration,
205-
* for example `for x in xs`, or `for x,y in points`.
206-
*/
207-
predicate forStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
208-
exists(EssaNodeDefinition defn, For for |
209-
for.getTarget().getAChildNode*() = defn.getDefiningNode().getNode() and
210-
nodeTo.getVar() = defn and
211-
nodeFrom.asExpr() = for.getIter()
212-
)
213-
}
214-
215-
/**
216-
* Holds if taint can flow from `nodeFrom` to `nodeTo` with a step related to iterable unpacking.
217-
* Only handles normal assignment (`x,y = calc_point()`), since `for x,y in points` is handled by `forStep`.
208+
* Holds if taint can flow from `nodeFrom` to `nodeTo` with an `await`-step,
209+
* such that the whole expression `await x` is tainted if `x` is tainted.
218210
*/
219-
predicate unpackingAssignmentStep(DataFlow::CfgNode nodeFrom, DataFlow::EssaNode nodeTo) {
220-
// `a, b = myiterable` or `head, *tail = myiterable` (only Python 3)
221-
exists(MultiAssignmentDefinition defn, Assign assign |
222-
assign.getATarget().contains(defn.getDefiningNode().getNode()) and
223-
nodeTo.getVar() = defn and
224-
nodeFrom.asExpr() = assign.getValue()
225-
)
211+
predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
212+
nodeTo.asExpr().(Await).getValue() = nodeFrom.asExpr()
226213
}

python/ql/lib/semmle/python/frameworks/Aiohttp.qll

Lines changed: 74 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ private import semmle.python.frameworks.internal.PoorMansFunctionResolution
1313
private import semmle.python.frameworks.internal.SelfRefMixin
1414
private import semmle.python.frameworks.Multidict
1515
private import semmle.python.frameworks.Yarl
16+
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
1617

1718
/**
1819
* INTERNAL: Do not use.
@@ -293,6 +294,65 @@ module AiohttpWebModel {
293294

294295
/** Gets a reference to an instance of `aiohttp.web.Request`. */
295296
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
297+
298+
/**
299+
* Taint propagation for `aiohttp.web.Request`.
300+
*/
301+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
302+
InstanceTaintSteps() { this = "aiohttp.web.Request" }
303+
304+
override DataFlow::Node getInstance() { result = instance() }
305+
306+
override string getAttributeName() {
307+
result in [
308+
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
309+
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
310+
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
311+
]
312+
}
313+
314+
override string getMethodName() { result in ["clone", "get_extra_info"] }
315+
316+
override string getAsyncMethodName() {
317+
result in ["read", "text", "json", "multipart", "post"]
318+
}
319+
}
320+
321+
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
322+
class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
323+
AiohttpRequestMultiDictProxyInstances() {
324+
this.(DataFlow::AttrRead).getObject() = Request::instance() and
325+
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
326+
or
327+
// Handle the common case of `x = await request.post()`
328+
// but don't try to handle anything else, since we don't have an easy way to do this yet.
329+
// TODO: more complete handling of `await request.post()`
330+
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
331+
this.asExpr() = await
332+
|
333+
read.(DataFlow::AttrRead).getObject() = Request::instance() and
334+
read.(DataFlow::AttrRead).getAttributeName() = "post" and
335+
call.getFunction() = read and
336+
await.getValue() = call.asExpr()
337+
)
338+
}
339+
}
340+
341+
/** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
342+
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
343+
AiohttpRequestYarlUrlInstances() {
344+
this.(DataFlow::AttrRead).getObject() = Request::instance() and
345+
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
346+
}
347+
}
348+
349+
/** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
350+
class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
351+
AiohttpRequestStreamReaderInstances() {
352+
this.(DataFlow::AttrRead).getObject() = Request::instance() and
353+
this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
354+
}
355+
}
296356
}
297357

298358
/**
@@ -357,30 +417,20 @@ module AiohttpWebModel {
357417
/**
358418
* Taint propagation for `aiohttp.StreamReader`.
359419
*/
360-
private class AiohttpStreamReaderAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
361-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
362-
// Methods
363-
//
364-
// TODO: When we have tools that make it easy, model these properly to handle
365-
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
366-
// (since it allows us to at least capture the most common cases).
367-
nodeFrom = StreamReader::instance() and
368-
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
369-
// normal methods
370-
attr.getAttributeName() in ["read_nowait"] and
371-
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
372-
or
373-
// async methods
374-
exists(Await await, DataFlow::CallCfgNode call |
375-
attr.getAttributeName() in [
376-
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked",
377-
"iter_any", "iter_chunks"
378-
] and
379-
call.getFunction() = attr and
380-
await.getValue() = call.asExpr() and
381-
nodeTo.asExpr() = await
382-
)
383-
)
420+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
421+
InstanceTaintSteps() { this = "aiohttp.StreamReader" }
422+
423+
override DataFlow::Node getInstance() { result = instance() }
424+
425+
override string getAttributeName() { none() }
426+
427+
override string getMethodName() { result in ["read_nowait"] }
428+
429+
override string getAsyncMethodName() {
430+
result in [
431+
"read", "readany", "readexactly", "readline", "readchunk", "iter_chunked", "iter_any",
432+
"iter_chunks"
433+
]
384434
}
385435
}
386436
}
@@ -431,80 +481,6 @@ module AiohttpWebModel {
431481
}
432482
}
433483

434-
/**
435-
* Taint propagation for `aiohttp.web.Request`.
436-
*
437-
* See https://docs.aiohttp.org/en/stable/web_reference.html#request-and-base-request
438-
*/
439-
private class AiohttpRequestAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
440-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
441-
// Methods
442-
//
443-
// TODO: When we have tools that make it easy, model these properly to handle
444-
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
445-
// (since it allows us to at least capture the most common cases).
446-
nodeFrom = Request::instance() and
447-
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
448-
// normal methods
449-
attr.getAttributeName() in ["clone", "get_extra_info"] and
450-
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
451-
or
452-
// async methods
453-
exists(Await await, DataFlow::CallCfgNode call |
454-
attr.getAttributeName() in ["read", "text", "json", "multipart", "post"] and
455-
call.getFunction() = attr and
456-
await.getValue() = call.asExpr() and
457-
nodeTo.asExpr() = await
458-
)
459-
)
460-
or
461-
// Attributes
462-
nodeFrom = Request::instance() and
463-
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
464-
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
465-
"url", "rel_url", "forwarded", "host", "remote", "path", "path_qs", "raw_path", "query",
466-
"headers", "transport", "cookies", "content", "_payload", "content_type", "charset",
467-
"http_range", "if_modified_since", "if_unmodified_since", "if_range", "match_info"
468-
]
469-
}
470-
}
471-
472-
/** An attribute read on an `aiohttp.web.Request` that is a `MultiDictProxy` instance. */
473-
class AiohttpRequestMultiDictProxyInstances extends Multidict::MultiDictProxy::InstanceSource {
474-
AiohttpRequestMultiDictProxyInstances() {
475-
this.(DataFlow::AttrRead).getObject() = Request::instance() and
476-
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
477-
or
478-
// Handle the common case of `x = await request.post()`
479-
// but don't try to handle anything else, since we don't have an easy way to do this yet.
480-
// TODO: more complete handling of `await request.post()`
481-
exists(Await await, DataFlow::CallCfgNode call, DataFlow::AttrRead read |
482-
this.asExpr() = await
483-
|
484-
read.(DataFlow::AttrRead).getObject() = Request::instance() and
485-
read.(DataFlow::AttrRead).getAttributeName() = "post" and
486-
call.getFunction() = read and
487-
await.getValue() = call.asExpr()
488-
)
489-
}
490-
}
491-
492-
/** An attribute read on an `aiohttp.web.Request` that is a `yarl.URL` instance. */
493-
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
494-
AiohttpRequestYarlUrlInstances() {
495-
this.(DataFlow::AttrRead).getObject() = Request::instance() and
496-
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
497-
}
498-
}
499-
500-
/** An attribute read on an `aiohttp.web.Request` that is a `aiohttp.StreamReader` instance. */
501-
class AiohttpRequestStreamReaderInstances extends StreamReader::InstanceSource {
502-
AiohttpRequestStreamReaderInstances() {
503-
this.(DataFlow::AttrRead).getObject() = Request::instance() and
504-
this.(DataFlow::AttrRead).getAttributeName() in ["content", "_payload"]
505-
}
506-
}
507-
508484
// ---------------------------------------------------------------------------
509485
// aiohttp.web Response modeling
510486
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)