7
7
8
8
import regexp.RegExpTreeView // re-export
9
9
private import regexp.internal.ParseRegExp
10
- private import regexp.internal.RegExpConfiguration
11
- private import codeql.ruby.ast.Literal as Ast
10
+ private import regexp.internal.RegExpTracking as RegExpTracking
11
+ private import codeql.ruby.AST as Ast
12
+ private import codeql.ruby.CFG
12
13
private import codeql.ruby.DataFlow
13
14
private import codeql.ruby.ApiGraphs
15
+ private import codeql.ruby.Concepts
14
16
15
17
/**
16
18
* Provides utility predicates related to regular expressions.
@@ -63,7 +65,11 @@ private class RegExpLiteralPatternSource extends RegExpPatternSource {
63
65
private class StringRegExpPatternSource extends RegExpPatternSource {
64
66
private DataFlow:: Node parse ;
65
67
66
- StringRegExpPatternSource ( ) { this = regExpSource ( parse ) }
68
+ StringRegExpPatternSource ( ) {
69
+ this = regExpSource ( parse ) and
70
+ // `regExpSource()` tracks both strings and regex literals, narrow it down to strings.
71
+ this .asExpr ( ) .getConstantValue ( ) .isString ( _)
72
+ }
67
73
68
74
override DataFlow:: Node getAParse ( ) { result = parse }
69
75
@@ -104,6 +110,7 @@ module RegExpInterpretation {
104
110
105
111
/**
106
112
* A node interpreted as a regular expression.
113
+ * Speficically nodes where string values are interpreted as regular expressions.
107
114
*/
108
115
class StdLibRegExpInterpretation extends RegExpInterpretation:: Range {
109
116
StdLibRegExpInterpretation ( ) {
@@ -115,16 +122,100 @@ class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
115
122
mce .getMethodName ( ) = [ "match" , "match?" ] and
116
123
this = mce .getArgument ( 0 ) and
117
124
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match
118
- not mce .getReceiver ( ) = trackRegexpType ( )
125
+ not mce .getReceiver ( ) = RegExpTracking :: trackRegexpType ( )
119
126
)
120
127
}
121
128
}
122
129
130
+ /**
131
+ * Holds if `exec` is a node where `regexp` is interpreted as a regular expression and
132
+ * tested against the string value of `input`.
133
+ * `name` describes the regexp execution, typically the name of the method being called.
134
+ */
135
+ private predicate regexExecution (
136
+ DataFlow:: Node exec , DataFlow:: Node input , DataFlow:: Node regexp , string name
137
+ ) {
138
+ // `=~` or `!~`
139
+ exists ( CfgNodes:: ExprNodes:: BinaryOperationCfgNode op |
140
+ name = op .getOperator ( ) and
141
+ exec .asExpr ( ) = op and
142
+ (
143
+ op .getExpr ( ) instanceof Ast:: RegExpMatchExpr or
144
+ op .getExpr ( ) instanceof Ast:: NoRegExpMatchExpr
145
+ ) and
146
+ (
147
+ input .asExpr ( ) = op .getLeftOperand ( ) and regexp .asExpr ( ) = op .getRightOperand ( )
148
+ or
149
+ input .asExpr ( ) = op .getRightOperand ( ) and regexp .asExpr ( ) = op .getLeftOperand ( )
150
+ )
151
+ )
152
+ or
153
+ // Any of the methods on `String` that take a regexp.
154
+ exists ( DataFlow:: CallNode call | exec = call |
155
+ name = "String#" + call .getMethodName ( ) and
156
+ call .getMethodName ( ) =
157
+ [
158
+ "[]" , "gsub" , "gsub!" , "index" , "match" , "match?" , "partition" , "rindex" , "rpartition" ,
159
+ "scan" , "slice!" , "split" , "sub" , "sub!"
160
+ ] and
161
+ input = call .getReceiver ( ) and
162
+ regexp = call .getArgument ( 0 ) and
163
+ // exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match, they are handled on the next case of this disjunction
164
+ // also see `StdLibRegExpInterpretation`
165
+ not (
166
+ call .getMethodName ( ) = [ "match" , "match?" ] and
167
+ call .getReceiver ( ) = RegExpTracking:: trackRegexpType ( )
168
+ )
169
+ )
170
+ or
171
+ // A call to `match` or `match?` where the regexp is the receiver.
172
+ exists ( DataFlow:: CallNode call | exec = call |
173
+ name = "Regexp#" + call .getMethodName ( ) and
174
+ call .getMethodName ( ) = [ "match" , "match?" ] and
175
+ regexp = call .getReceiver ( ) and
176
+ input = call .getArgument ( 0 )
177
+ )
178
+ or
179
+ // a case-when statement
180
+ exists ( CfgNodes:: ExprNodes:: CaseExprCfgNode caseExpr |
181
+ exec .asExpr ( ) = caseExpr and
182
+ input .asExpr ( ) = caseExpr .getValue ( )
183
+ |
184
+ name = "case-when" and
185
+ regexp .asExpr ( ) = caseExpr .getBranch ( _) .( CfgNodes:: ExprNodes:: WhenClauseCfgNode ) .getPattern ( _)
186
+ or
187
+ name = "case-in" and
188
+ regexp .asExpr ( ) = caseExpr .getBranch ( _) .( CfgNodes:: ExprNodes:: InClauseCfgNode ) .getPattern ( )
189
+ )
190
+ }
191
+
192
+ /**
193
+ * An execution of a regular expression by the standard library.
194
+ */
195
+ private class StdRegexpExecution extends RegexExecution:: Range {
196
+ DataFlow:: Node regexp ;
197
+ DataFlow:: Node input ;
198
+ string name ;
199
+
200
+ StdRegexpExecution ( ) { regexExecution ( this , input , regexp , name ) }
201
+
202
+ override DataFlow:: Node getRegex ( ) { result = regexp }
203
+
204
+ override DataFlow:: Node getString ( ) { result = input }
205
+
206
+ override string getName ( ) { result = name }
207
+ }
208
+
123
209
/**
124
210
* Gets a node whose value may flow (inter-procedurally) to `re`, where it is interpreted
125
211
* as a part of a regular expression.
126
212
*/
127
213
cached
128
- DataFlow:: Node regExpSource ( DataFlow:: Node re ) {
129
- exists ( RegExpConfiguration c | c .hasFlow ( result , re ) )
214
+ DataFlow:: Node regExpSource ( DataFlow:: Node re ) { result = RegExpTracking:: regExpSource ( re ) }
215
+
216
+ /** Gets a parsed regular expression term that is executed at `exec`. */
217
+ RegExpTerm getTermForExecution ( RegexExecution exec ) {
218
+ exists ( RegExpPatternSource source | source = regExpSource ( exec .getRegex ( ) ) |
219
+ result = source .getRegExpTerm ( )
220
+ )
130
221
}
0 commit comments