Skip to content

Commit fc2fe6c

Browse files
authored
Merge pull request #4928 from esbena/js/rewrite-multi-sanitization
Approved by asgerf
2 parents 545451e + 8476879 commit fc2fe6c

File tree

4 files changed

+204
-92
lines changed

4 files changed

+204
-92
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* The query "Incomplete multi-character sanitization" (`js/incomplete-multi-character-sanitization`) has been improved to produce additional true positives and fewer false positives.

javascript/ql/src/Security/CWE-116/IncompleteMultiCharacterSanitization.ql

Lines changed: 138 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -12,69 +12,155 @@
1212
*/
1313

1414
import javascript
15-
import semmle.javascript.security.IncompleteBlacklistSanitizer
1615

17-
predicate isDangerous(RegExpTerm t) {
18-
// path traversals
19-
t.getAMatchedString() = ["..", "/..", "../"]
20-
or
21-
exists(RegExpTerm start |
22-
start = t.(RegExpSequence).getAChild() and
23-
start.getConstantValue() = "." and
24-
start.getSuccessor().getConstantValue() = "." and
25-
not [start.getPredecessor(), start.getSuccessor().getSuccessor()].getConstantValue() = "."
16+
/**
17+
* A regexp term that matches substrings that should be replaced with the empty string.
18+
*/
19+
class EmptyReplaceRegExpTerm extends RegExpTerm {
20+
EmptyReplaceRegExpTerm() {
21+
exists(StringReplaceCall replace |
22+
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
23+
this = replace.getRegExp().getRoot().getAChild*()
24+
)
25+
}
26+
}
27+
28+
/**
29+
* A prefix that may be dangerous to sanitize explicitly.
30+
*
31+
* Note that this class exists solely as a (necessary) optimization for this query.
32+
*/
33+
class DangerousPrefix extends string {
34+
DangerousPrefix() {
35+
this = ["/..", "../"] or
36+
this = "<!--" or
37+
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
38+
}
39+
}
40+
41+
/**
42+
* A substring of a prefix that may be dangerous to sanitize explicitly.
43+
*/
44+
class DangerousPrefixSubstring extends string {
45+
DangerousPrefixSubstring() {
46+
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
47+
}
48+
}
49+
50+
/**
51+
* Gets a dangerous prefix that is in the prefix language of `t`.
52+
*/
53+
DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
54+
result = getADangerousMatchedPrefixSubstring(t) and
55+
not exists(EmptyReplaceRegExpTerm pred | pred = t.getPredecessor+() and not pred.isNullable())
56+
}
57+
58+
/**
59+
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
60+
*
61+
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
62+
*/
63+
DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
64+
exists(string left |
65+
t.isNullable() and left = ""
66+
or
67+
t.getAMatchedString() = left
68+
or
69+
(
70+
t instanceof RegExpOpt or
71+
t instanceof RegExpStar or
72+
t instanceof RegExpPlus or
73+
t instanceof RegExpGroup or
74+
t instanceof RegExpAlt
75+
) and
76+
left = getADangerousMatchedPrefixSubstring(t.getAChild())
77+
|
78+
result = left + getADangerousMatchedPrefixSubstring(t.getSuccessor()) or
79+
result = left
2680
)
27-
or
28-
// HTML comments
29-
t.getAMatchedString() = "<!--"
30-
or
31-
// HTML scripts
32-
t.getAMatchedString().regexpMatch("(?i)<script.*")
33-
or
34-
exists(RegExpSequence seq | seq = t |
35-
t.getChild(0).getConstantValue() = "<" and
36-
// the `cript|scrip` case has been observed in the wild, not sure what the goal of that pattern is...
37-
t.getChild(0)
38-
.getSuccessor+()
39-
.getAMatchedString()
40-
.regexpMatch("(?i)iframe|script|cript|scrip|style")
81+
}
82+
83+
/**
84+
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerablity of kind `kind`.
85+
*/
86+
predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
87+
prefix = getADangerousMatchedPrefix(t) and
88+
(
89+
kind = "path injection" and
90+
// upwards navigation
91+
prefix = ["/..", "../"] and
92+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_-].*") // explicit path name mentions make this an unlikely sanitizer
93+
or
94+
kind = "HTML element injection" and
95+
(
96+
// comments
97+
prefix = "<!--" and
98+
not t.getSuccessor*().getAMatchedString().regexpMatch("(?is).*[a-z0-9_].*") // explicit comment content mentions make this an unlikely sanitizer
99+
or
100+
// specific tags
101+
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"] // the `cript|scrip` case has been observed in the wild several times
102+
)
41103
)
42104
or
43-
// HTML attributes
44-
exists(string dangerousPrefix | dangerousPrefix = ["ng-", "on"] |
45-
t.getAMatchedString().regexpMatch("(i?)" + dangerousPrefix + "[a-z]+")
105+
kind = "HTML attribute injection" and
106+
prefix =
107+
[
108+
// ordinary event handler prefix
109+
"on",
110+
// angular prefixes
111+
"ng-", "ng:", "data-ng-", "x-ng-"
112+
] and
113+
(
114+
// explicit matching: `onclick` and `ng-bind`
115+
t.getAMatchedString().regexpMatch("(?i)" + prefix + "[a-z]+")
46116
or
47-
exists(RegExpTerm start, RegExpTerm event | start = t.getAChild() |
48-
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + dangerousPrefix) and
49-
event = start.getSuccessor() and
50-
exists(RegExpTerm quantified | quantified = event.(RegExpQuantifier).getChild(0) |
51-
quantified
52-
.(RegExpCharacterClass)
53-
.getAChild()
54-
.(RegExpCharacterRange)
55-
.isRange(["a", "A"], ["z", "Z"]) or
56-
[quantified, quantified.(RegExpRange).getAChild()].(RegExpCharacterClassEscape).getValue() =
57-
"w"
58-
)
117+
// regexp-based matching: `on[a-z]+`
118+
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
119+
start.getConstantValue().regexpMatch("(?i)[^a-z]*" + prefix) and
120+
isCommonWordMatcher(start.getSuccessor())
59121
)
60122
)
61123
}
62124

63-
from StringReplaceCall replace, RegExpTerm regexp, RegExpTerm dangerous
125+
/**
126+
* Holds if `t` is a common pattern for matching words
127+
*/
128+
predicate isCommonWordMatcher(RegExpTerm t) {
129+
exists(RegExpTerm quantified | quantified = t.(RegExpQuantifier).getChild(0) |
130+
// [a-z]+ and similar
131+
quantified
132+
.(RegExpCharacterClass)
133+
.getAChild()
134+
.(RegExpCharacterRange)
135+
.isRange(["a", "A"], ["z", "Z"])
136+
or
137+
// \w+ or [\w]+
138+
[quantified, quantified.(RegExpCharacterClass).getAChild()]
139+
.(RegExpCharacterClassEscape)
140+
.getValue() = "w"
141+
)
142+
}
143+
144+
from
145+
StringReplaceCall replace, EmptyReplaceRegExpTerm regexp, EmptyReplaceRegExpTerm dangerous,
146+
string prefix, string kind
64147
where
65-
[replace.getRawReplacement(), replace.getCallback(1).getAReturn()].mayHaveStringValue("") and
66-
replace.isGlobal() and
67148
regexp = replace.getRegExp().getRoot() and
68149
dangerous.getRootTerm() = regexp and
69-
isDangerous(dangerous) and
70-
// avoid anchored terms
71-
not exists(RegExpAnchor a | a.getRootTerm() = regexp) and
72-
// avoid flagging wrappers
73-
not (
74-
dangerous instanceof RegExpAlt or
75-
dangerous instanceof RegExpGroup
150+
// skip leading optional elements
151+
not dangerous.isNullable() and
152+
// only warn about the longest match (presumably the most descriptive)
153+
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length()) and
154+
// only warn once per kind
155+
not exists(EmptyReplaceRegExpTerm other |
156+
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
157+
|
158+
matchesDangerousPrefix(other, _, kind) and
159+
not other.isNullable()
76160
) and
77161
// don't flag replace operations in a loop
78-
not replace.getReceiver().getALocalSource() = replace
79-
select replace, "The replaced string may still contain a substring that starts matching at $@.",
80-
dangerous, dangerous.toString()
162+
not replace.getAMethodCall*().flowsTo(replace.getReceiver()) and
163+
// avoid anchored terms
164+
not exists(RegExpAnchor a | regexp = a.getRootTerm())
165+
select replace, "This string may still contain $@, which may cause a " + kind + " vulnerability.",
166+
dangerous, prefix

0 commit comments

Comments
 (0)