1
+ /**
2
+ * Provides predicates that track strings and regular expressions to where they are used.
3
+ * This is implemented using TypeTracking in two phases:
4
+ *
5
+ * 1: An exploratory analysis that just imprecisely tracks all string and regular expressions
6
+ * to all places where regular expressions (as string or as regular expression objects) can be used.
7
+ * The exploratory phase then ends with a backwards analysis from the uses that were reached.
8
+ * This is similar to the exploratory phase of the JavaScript global DataFlow library.
9
+ *
10
+ * 2: A precise type tracking analysis that tracks
11
+ * strings and regular expressions to the places where they are used.
12
+ * This phase keeps track of which strings and regular expressions ends up in which places.
13
+ */
14
+
1
15
private import codeql.ruby.Regexp as RE
2
16
private import codeql.ruby.AST as Ast
3
17
private import codeql.ruby.CFG
@@ -11,41 +25,114 @@ private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
11
25
private import codeql.ruby.TaintTracking
12
26
private import codeql.ruby.frameworks.core.String
13
27
14
- /**
15
- * Gets a node that has been tracked from the string constant `start` to some node.
16
- * This is used to figure out where `start` is evaluated as a regular expression against an input string,
17
- * or where `start` is compiled into a regular expression.
18
- */
19
- private DataFlow:: LocalSourceNode strToReg ( DataFlow:: Node start , TypeTracker t ) {
20
- t .start ( ) and
21
- start = result and
28
+ /** Gets a constant string value that may be used as a regular expression. */
29
+ DataFlow:: LocalSourceNode strStart ( ) {
22
30
result .asExpr ( ) =
23
31
any ( ExprCfgNode e |
24
32
e .getConstantValue ( ) .isString ( _) and
25
33
not e instanceof ExprNodes:: VariableReadAccessCfgNode and
26
34
not e instanceof ExprNodes:: ConstantReadAccessCfgNode
27
35
)
36
+ }
37
+
38
+ /** Gets a dataflow node for a regular expression literal. */
39
+ DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
40
+
41
+ /**
42
+ * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
43
+ * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
44
+ * The types are either "string" or "regexp".
45
+ */
46
+ predicate step (
47
+ DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
48
+ ) {
49
+ fromType = toType and
50
+ fromType = "string" and
51
+ (
52
+ // include taint flow through `String` summaries
53
+ TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
54
+ nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
55
+ String:: SummarizedCallable
56
+ or
57
+ // string concatenations, and
58
+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
59
+ op = nodeTo .asExpr ( ) and
60
+ op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
61
+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
62
+ )
63
+ or
64
+ // string interpolations
65
+ nodeFrom .asExpr ( ) =
66
+ nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
67
+ )
68
+ or
69
+ fromType = "string" and
70
+ toType = "reg" and
71
+ exists ( DataFlow:: CallNode call |
72
+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
73
+ nodeFrom = call .getArgument ( 0 ) and
74
+ nodeTo = call
75
+ )
76
+ }
77
+
78
+ /** Gets a node where string values that flow to the node are interpreted as regular expressions. */
79
+ DataFlow:: Node stringSink ( ) {
80
+ result instanceof RE:: RegExpInterpretation:: Range and
81
+ not exists ( DataFlow:: CallNode mce | mce .getMethodName ( ) = [ "match" , "match?" ] |
82
+ // receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
83
+ result = mce .getReceiver ( ) and
84
+ mce .getArgument ( 0 ) = trackRegexpType ( )
85
+ or
86
+ // first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
87
+ result = mce .getArgument ( 0 ) and
88
+ mce .getReceiver ( ) = trackRegexpType ( )
89
+ )
90
+ }
91
+
92
+ /** Gets a node where regular expressions that flow to the node are used. */
93
+ DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
94
+
95
+ /** Gets a node that is reachable by type-tracking from any string or regular expression. */
96
+ DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
97
+ t .start ( ) and
98
+ result = [ strStart ( ) , regStart ( ) ]
99
+ or
100
+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
101
+ or
102
+ exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
103
+ }
104
+
105
+ /**
106
+ * Gets a node that is backwards reachable from any regular expression use,
107
+ * where that use is reachable by type-tracking from any string or regular expression.
108
+ */
109
+ DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
110
+ t .start ( ) and
111
+ result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
112
+ result = forward ( TypeTracker:: end ( ) )
28
113
or
29
- exists ( TypeTracker t2 | result = strToReg ( start , t2 ) .track ( t2 , t ) )
114
+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
30
115
or
31
- exists ( TypeTracker t2 , DataFlow:: Node nodeFrom | t2 = t .continue ( ) |
32
- strToReg ( start , t2 ) .flowsTo ( nodeFrom ) and
33
- (
34
- // include taint flow through `String` summaries
35
- TaintTracking:: localTaintStep ( nodeFrom , result ) and
36
- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
37
- String:: SummarizedCallable
38
- or
39
- // string concatenations, and
40
- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
41
- op = result .asExpr ( ) and
42
- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
43
- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
44
- )
45
- or
46
- // string interpolations
47
- nodeFrom .asExpr ( ) =
48
- result .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
116
+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
117
+ }
118
+
119
+ /**
120
+ * Gets a node that has been tracked from the string constant `start` to some node.
121
+ * This is used to figure out where `start` is evaluated as a regular expression against an input string,
122
+ * or where `start` is compiled into a regular expression.
123
+ */
124
+ private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
125
+ result = backwards ( _) and
126
+ (
127
+ t .start ( ) and
128
+ start = result and
129
+ result = strStart ( )
130
+ or
131
+ exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
132
+ or
133
+ // an additional step from string to string
134
+ exists ( TypeTracker t2 | t2 = t .continue ( ) |
135
+ step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
49
136
)
50
137
)
51
138
}
@@ -54,19 +141,18 @@ private DataFlow::LocalSourceNode strToReg(DataFlow::Node start, TypeTracker t)
54
141
* Gets a node that has been tracked from the regular expression `start` to some node.
55
142
* This is used to figure out where `start` is executed against an input string.
56
143
*/
57
- private DataFlow:: LocalSourceNode regToReg ( DataFlow:: Node start , TypeTracker t ) {
58
- t .start ( ) and
59
- start = result and
60
- result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral
61
- or
62
- exists ( TypeTracker t2 | result = regToReg ( start , t2 ) .track ( t2 , t ) )
63
- or
64
- exists ( TypeTracker t2 |
65
- t2 = t .continue ( ) and
66
- exists ( DataFlow:: CallNode call |
67
- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
68
- strToReg ( start , t2 ) .flowsTo ( call .getArgument ( 0 ) ) and
69
- result = call
144
+ private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
145
+ result = backwards ( _) and
146
+ (
147
+ t .start ( ) and
148
+ start = result and
149
+ result = regStart ( )
150
+ or
151
+ exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
152
+ or
153
+ // an additional step where a string is converted to a regular expression
154
+ exists ( TypeTracker t2 | t2 = t .continue ( ) |
155
+ step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
70
156
)
71
157
)
72
158
}
@@ -75,7 +161,7 @@ private DataFlow::LocalSourceNode regToReg(DataFlow::Node start, TypeTracker t)
75
161
private DataFlow:: LocalSourceNode trackRegexpType ( TypeTracker t ) {
76
162
t .start ( ) and
77
163
(
78
- result . asExpr ( ) . getExpr ( ) instanceof Ast :: RegExpLiteral or
164
+ result = regStart ( ) or
79
165
result = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] )
80
166
)
81
167
or
@@ -85,25 +171,14 @@ private DataFlow::LocalSourceNode trackRegexpType(TypeTracker t) {
85
171
/** Gests a node that references a regular expression. */
86
172
DataFlow:: Node trackRegexpType ( ) { trackRegexpType ( TypeTracker:: end ( ) ) .flowsTo ( result ) }
87
173
88
- /** Gets a the value for the regular expression that is evaluated at `re`. */
174
+ /** Gets a node holding a value for the regular expression that is evaluated at `re`. */
89
175
cached
90
176
DataFlow:: Node regExpSource ( DataFlow:: Node re ) {
91
- exists ( DataFlow:: LocalSourceNode end | end = strToReg ( result , TypeTracker:: end ( ) ) |
92
- end .flowsTo ( re ) and
93
- re instanceof RE:: RegExpInterpretation:: Range and
94
- not exists ( DataFlow:: CallNode mce | mce .getMethodName ( ) = [ "match" , "match?" ] |
95
- // receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
96
- re = mce .getReceiver ( ) and
97
- mce .getArgument ( 0 ) = trackRegexpType ( )
98
- or
99
- // first argument of https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
100
- re = mce .getArgument ( 0 ) and
101
- mce .getReceiver ( ) = trackRegexpType ( )
102
- )
177
+ exists ( DataFlow:: LocalSourceNode end | end = trackStrings ( result , TypeTracker:: end ( ) ) |
178
+ end .getALocalUse ( ) = re and re = stringSink ( )
103
179
)
104
180
or
105
- exists ( DataFlow:: LocalSourceNode end | end = regToReg ( result , TypeTracker:: end ( ) ) |
106
- end .flowsTo ( re ) and
107
- re = any ( RegexExecution exec ) .getRegex ( )
181
+ exists ( DataFlow:: LocalSourceNode end | end = trackRegs ( result , TypeTracker:: end ( ) ) |
182
+ end .getALocalUse ( ) = re and re = regSink ( )
108
183
)
109
184
}
0 commit comments