diff --git a/README.md b/README.md index c891f7e1..4f35cb25 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,14 @@ A library for performing fast, configurable cleansing of HTML coming from untrus Another way of saying that could be: It's an API that helps you make sure that clients don't supply malicious cargo code in the HTML they supply for their profile, comments, etc., that get persisted on the server. The term "malicious code" in regards to web applications usually mean "JavaScript." Mostly, Cascading Stylesheets are only considered malicious -when they invoke the JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. +when they invoke JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. -How to Use ----------- +More details on antisamy are available at: https://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project. Particularly at: https://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#tab=How_do_I_get_started_3F. + +There is also a legacy developers guide at: https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/owaspantisamy/Developer%20Guide.pdf (not sure how long that will remain accessible). + +How to Import +------------- First, add the dependency from Maven: ```xml diff --git a/pom.xml b/pom.xml index eca7b299..8325a5de 100644 --- a/pom.xml +++ b/pom.xml @@ -55,56 +55,74 @@ org.apache.xmlgraphics batik-css 1.11 + + + + commons-logging + commons-logging + + - + net.sourceforge.nekohtml nekohtml 1.9.22 + + + + xerces + xercesImpl + + - - junit - junit - jar - test - 4.12 - - - commons-codec - commons-codec - 1.12 - test - - - commons-io - commons-io - 2.6 - test - org.apache.httpcomponents httpclient 4.5.7 + + + + commons-codec + commons-codec + + xerces xercesImpl 2.12.0 - - org.eclipse.jetty - jetty-server - 7.6.21.v20160908 - test - - org.eclipse.jetty - jetty-servlet - 7.6.21.v20160908 + commons-codec + commons-codec + 1.12 + + + + + junit + junit + 4.12 test + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.1.1 + + + + + org.apache.maven.plugins + maven-clean-plugin + 3.1.0 + org.apache.maven.plugins maven-compiler-plugin @@ -131,8 +149,14 @@ - + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + org.apache.maven.plugins maven-javadoc-plugin 3.0.1 @@ -142,18 +166,61 @@ jar - + + + org.apache.maven.plugins + maven-site-plugin + 3.7.1 + - maven-source-plugin - 3.0.1 - - - attach-sources - package - jar-no-fork - - - + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + package + jar-no-fork + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.1 + + + + + org.codehaus.mojo + versions-maven-plugin + 2.5 + + + + dependency-updates-report + plugin-updates-report + + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.0.0 + + + + dependency-convergence + + + + + false + + + + diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 8050b5d7..dfb7f3c7 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1284,7 +1284,6 @@ public void testGithubIssue23() throws ScanException, PolicyException { // However, the test above can't replicate this misbehavior. } - @Test public void testGithubIssue24() throws ScanException, PolicyException { @@ -1297,7 +1296,20 @@ public void testGithubIssue24() throws ScanException, PolicyException { assertThat(as.scan(test24, revisedPolicy, AntiSamy.SAX).getCleanHTML(), containsString(email)); assertThat(as.scan(test24, revisedPolicy, AntiSamy.DOM).getCleanHTML(), containsString(email)); } - + + @Test + public void testGithubIssue26() throws ScanException, PolicyException { + // Potential bypass (False positive) + String test26 = ""><img src=a onerror=alert(1)>"; + // Issue claims you end up with this: + // > + + assertThat(as.scan(test26, policy, AntiSamy.SAX).getCleanHTML(), not(containsString(""))); + assertThat(as.scan(test26, policy, AntiSamy.DOM).getCleanHTML(), not(containsString(""))); + + // But you actually end up with this: "><img src=a onerror=alert(1)> -- Which is as expected + } + @Test public void testGithubIssue27() throws ScanException, PolicyException { // This test doesn't cause an ArrayIndexOutOfBoundsException, as reported in this issue even though it @@ -1307,4 +1319,76 @@ public void testGithubIssue27() throws ScanException, PolicyException { assertThat(as.scan(test27, policy, AntiSamy.SAX).getCleanHTML(), containsString("test")); } +static final String test33 = "\n" + + "\n" + + " Test\n" + + "\n" + + "\n" + + "

Tricky Encoding

\n" + + "

NOT Sanitized by AntiSamy

\n" + + "
    \n" + + "
  1. X:x
  2. \n" + + "
  3. X:y
  4. \n" + + + "
  5. X:x
  6. \n" + + "
  7. X:y
  8. \n" + + + "
  9. X:x
  10. \n" + + "
  11. X:y
  12. \n" + + + "
  13. X:x
  14. \n" + + "
  15. X:y
  16. \n" + + "
\n" + + "

Tricky Encoding with Ampersand Encoding

\n" + + "

AntiSamy turns harmless payload into XSS by just decoding the encoded ampersands in the href attribute\n" + + "

    \n" + + "
  1. X&#x3A;x
  2. \n" + + "
  3. X&#x3A;x
  4. \n" + + + "
  5. X&#x3A;x
  6. \n" + + "
  7. X&#x3A;x
  8. \n" + + + "
  9. X&#x3A;x
  10. \n" + + "
  11. X&#x3A;x
  12. \n" + + "
\n" + + "

Original without ampersand encoding

\n" + + "\n" + + ""; + + @Test + public void testGithubIssue33a() throws ScanException, PolicyException { + + // Potential bypass + + // Issue claims you end up with this: + // javascript:x=alert and other similar problems (javascript:x=alert,x%281%29) but can't replicate that. + //System.out.println(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML()); + + assertThat(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + assertThat(as.scan(test33, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); + } + + + @Test + public void testGithubIssue34a() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "
Hello\\uD83D\\uDC95
" should be sanitized to: "
Hello
" + + String test34a = "
Hello\uD83D\uDC95
"; + assertEquals("
Hello
", as.scan(test34a, policy, AntiSamy.SAX).getCleanHTML()); + assertEquals("
Hello
", as.scan(test34a, policy, AntiSamy.DOM).getCleanHTML()); + } + + @Test + public void testGithubIssue34b() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "
Hello\\uD83D\\uDC95
" should be sanitized to: "
Hello
" + + String test34b = "\uD888"; + assertEquals("", as.scan(test34b, policy, AntiSamy.DOM).getCleanHTML()); + assertEquals("", as.scan(test34b, policy, AntiSamy.SAX).getCleanHTML()); + } + }