diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index ba19b181..8050b5d7 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -27,8 +27,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.not; + +import org.junit.Before; +import org.junit.Test; import java.io.IOException; import java.io.Reader; @@ -42,8 +49,7 @@ import java.util.regex.Pattern; import org.apache.commons.codec.binary.Base64; -import org.junit.Before; -import org.junit.Test; + import org.owasp.validator.html.AntiSamy; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; @@ -56,6 +62,12 @@ /** * This class tests AntiSamy functionality and the basic policy file which * should be immune to XSS and CSS phishing attacks. + * + * The test cases titled issue##() map to the issues identified in the original AntiSamy + * source code repo at: https://code.google.com/archive/p/owaspantisamy/issues. + * + * The test cases titled githubIssue##() map to the issues documented at: + * https://github.com/nahsra/antisamy/issues * * @author Arshan Dabirsiaghi */ @@ -90,9 +102,9 @@ public class AntiSamyTest { public void setUp() throws Exception { /* - * Load the policy. You may have to change the path to find the Policy - * file for your environment. - */ + * Load the policy. You may have to change the path to find the Policy + * file for your environment. + */ //get Policy instance from a URL. URL url = getClass().getResource("/antisamy.xml"); @@ -113,8 +125,8 @@ public void SAX() { } /* - * Test basic XSS cases. - */ + * Test basic XSS cases. + */ @Test public void scriptAttacks() throws ScanException, PolicyException { @@ -145,7 +157,6 @@ public void scriptAttacks() throws ScanException, PolicyException { as.scan("Google", policy, AntiSamy.DOM); as.scan("Google", policy, AntiSamy.SAX); - } @Test @@ -161,26 +172,21 @@ public void imgAttacks() throws ScanException, PolicyException { assertTrue(!as.scan("", policy, AntiSamy.SAX) .getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("alert")); assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("alert")); - String s = as - .scan( + String s = as.scan( "", policy, AntiSamy.DOM).getCleanHTML(); assertTrue(s.length() == 0 || s.contains("&")); - s = as - .scan( - "", + s = as.scan( "", policy, AntiSamy.SAX).getCleanHTML(); assertTrue(s.length() == 0 || s.contains("&")); @@ -269,12 +275,10 @@ public void hrefAttacks() throws ScanException, PolicyException { assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("style")); - assertTrue(!as - .scan( + assertTrue(!as.scan( "
", policy, AntiSamy.SAX).getCleanHTML().contains("style")); @@ -317,8 +319,8 @@ public void hrefAttacks() throws ScanException, PolicyException { } /* - * Test CSS protections. - */ + * Test CSS protections. + */ @Test public void cssAttacks() throws ScanException, PolicyException { @@ -334,20 +336,18 @@ public void cssAttacks() throws ScanException, PolicyException { assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("z-index")); assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("z-index")); - } /* - * Test a bunch of strings that have tweaked the XML parsing capabilities of - * NekoHTML. - */ + * Test a bunch of strings that have tweaked the XML parsing capabilities of + * NekoHTML. + */ @Test public void IllegalXML() throws PolicyException { for (String BASE64_BAD_XML_STRING : BASE64_BAD_XML_STRINGS) { try { - String testStr = new String(Base64.decodeBase64(BASE64_BAD_XML_STRING.getBytes())); as.scan(testStr, policy, AntiSamy.DOM); as.scan(testStr, policy, AntiSamy.SAX); @@ -389,10 +389,9 @@ public void IllegalXML() throws PolicyException { public void issue12() throws ScanException, PolicyException { /* - * issues 12 (and 36, which was similar). empty tags cause display - * problems/"formjacking" - */ - + * issues 12 (and 36, which was similar). empty tags cause display + * problems/"formjacking" + */ Pattern p = Pattern.compile(".*.*"); String s1 = as.scan("
hello world
", policy, AntiSamy.DOM).getCleanHTML(); @@ -594,27 +593,21 @@ public void issue38() throws ScanException, PolicyException { s = "foo@import 'x';bar"; as.scan(s, policy, AntiSamy.DOM); as.scan(s, policy, AntiSamy.SAX); - } @Test public void issue40() throws ScanException, PolicyException { - /* issue #40 - handling "; Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); CleanResults cr = as.scan(s, revised, AntiSamy.DOM); - // System.out.println("here: " + cr.getCleanHTML()); assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - // System.out.println(cr.getCleanHTML()); cr = as.scan(s, revised, AntiSamy.SAX); - // System.out.println(cr.getCleanHTML()); assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - } @Test @@ -687,16 +680,13 @@ public void issue41() throws ScanException, PolicyException { assertTrue(!as.scan(s, revised2, AntiSamy.DOM).getCleanHTML().contains("" + ""; as.scan(s, policy, AntiSamy.DOM); assertEquals(as.scan(s, policy, AntiSamy.DOM).getNumberOfErrors(), 3); @@ -708,11 +698,10 @@ public void issue44() throws ScanException, PolicyException { @Test public void issue51() throws ScanException, PolicyException { - /* issue #51 - offsite urls with () are found to be invalid */ + /* issue #51 - offsite URLs with () are found to be invalid */ String s = "test"; CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - // System.out.println(cr.getCleanHTML()); assertEquals(cr.getNumberOfErrors(), 0); cr = as.scan(s, policy, AntiSamy.SAX); @@ -763,7 +752,6 @@ public void issue61() throws ScanException, PolicyException { @Test public void issue69() throws ScanException, PolicyException { - /* issue #69 - char attribute should allow single char or entity ref */ String s = "
test
"; @@ -804,17 +792,16 @@ public void CDATAByPass() throws ScanException, PolicyException { assertTrue(crSax.contains("<script") && !crDom.contains("") && !crDom.contains("")); assertTrue(!crSax.contains("") && !crSax.contains("")); - sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(); sb.append("foobar"); sb.append(""); @@ -945,10 +929,6 @@ public void issue112() throws ScanException, PolicyException { @Test public void nestedCdataAttacks() throws ScanException, PolicyException { - /* - * #112 - empty tag becomes self closing - */ - /* * Testing for nested CDATA attacks against the SAX parser. @@ -992,10 +972,8 @@ public void issue101InternationalCharacterSupport() throws ScanException, Policy public void iframeAsReportedByOndrej() throws ScanException, PolicyException { String html = ""; - Policy revised; - Tag tag = new Tag("iframe", Collections.emptyMap(), Policy.ACTION_VALIDATE); - revised = policy.addTagRule(tag); + Policy revised = policy.addTagRule(tag); String crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); String crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); @@ -1009,46 +987,34 @@ public void iframeAsReportedByOndrej() throws ScanException, PolicyException { * have an action set to "validate" (may be implicit) in the policy file. */ @Test - public void nofollowAnchors() { - - try { - - // if we have activated nofollowAnchors - String val = policy.getDirective(Policy.ANCHORS_NOFOLLOW); - - Policy revisedPolici = policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, "true"); + public void nofollowAnchors() throws ScanException, PolicyException { - // adds when not present + // if we have activated nofollowAnchors + Policy revisedPolicy = policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, "true"); - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + // adds when not present + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // adds properly even with bad attr - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + // adds properly even with bad attr + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // rel with bad value gets corrected - assertTrue(as.scan("link", revisedPolici, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", revisedPolici, AntiSamy.SAX).getCleanHTML().contains("link")); + // rel with bad value gets corrected + assertTrue(as.scan("link", revisedPolicy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", revisedPolicy, AntiSamy.SAX).getCleanHTML().contains("link")); - // correct attribute doesnt get messed with - assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); + // correct attribute doesn't get messed with + assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); - // if two correct attributes, only one remaining after scan - assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); - assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); + // if two correct attributes, only one remaining after scan + assertTrue(as.scan("link", policy, AntiSamy.DOM).getCleanHTML().contains("link")); + assertTrue(as.scan("link", policy, AntiSamy.SAX).getCleanHTML().contains("link")); - // test if value is off - does it add? - - assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.DOM).getCleanHTML().contains("nofollow")); - assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.SAX).getCleanHTML().contains("nofollow")); - - policy.cloneWithDirective(Policy.ANCHORS_NOFOLLOW, val); - - } catch (Exception e) { - fail("Caught exception in testNofollowAnchors(): " + e.getMessage()); - } + // test if value is off - does it add? + assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.DOM).getCleanHTML().contains("nofollow")); + assertTrue(!as.scan("a href=\"blah\">link", policy, AntiSamy.SAX).getCleanHTML().contains("nofollow")); } @Test @@ -1060,11 +1026,11 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { String input = ""; String expectedOutput = ""; CleanResults cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); String saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.SAX); - assertTrue(cr.getCleanHTML().equals(saxExpectedOutput)); + assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput)); // now what if someone sticks malicious URL in the value of the // value attribute in the param tag? remove that param tag @@ -1072,10 +1038,10 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { expectedOutput = ""; saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); cr = as.scan(input, revised, AntiSamy.SAX); - assertTrue(cr.getCleanHTML().equals(saxExpectedOutput)); + assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput)); // now what if someone sticks malicious URL in the value of the src // attribute in the embed tag? remove that embed tag @@ -1084,9 +1050,9 @@ public void validateParamAsEmbed() throws ScanException, PolicyException { saxExpectedOutput = ""; cr = as.scan(input, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains(expectedOutput)); + assertThat(cr.getCleanHTML(), containsString(expectedOutput)); CleanResults scan = as.scan(input, revised, AntiSamy.SAX); - assertTrue(scan.getCleanHTML().equals(saxExpectedOutput)); + assertThat(scan.getCleanHTML(), equalTo(saxExpectedOutput)); } @Test @@ -1192,7 +1158,7 @@ public void testOnsiteRegex() throws ScanException, PolicyException { void assertIsGoodOnsiteURL(String url) throws ScanException, PolicyException { String html = as.scan("X", policy, AntiSamy.DOM).getCleanHTML(); - assertTrue(html.contains("href=\"")); + assertThat(html, containsString("href=\"")); } @Test @@ -1217,11 +1183,9 @@ public void issue75() throws ScanException, PolicyException { as.scan("", pol, AntiSamy.SAX); } - @Test public void issue144() throws ScanException, PolicyException { String pinata = "pi\u00f1ata"; - System.out.println(pinata); CleanResults results = as.scan(pinata, policy, AntiSamy.DOM); String cleanHTML = results.getCleanHTML(); assertEquals(pinata, cleanHTML); @@ -1258,8 +1222,6 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException { String test = "whatever"; CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); assertEquals( results_sax.getCleanHTML(), results_dom.getCleanHTML()); @@ -1270,8 +1232,6 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException { public void testAnotherXSS() throws ScanException, PolicyException { String test = "foo"; CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); assertEquals( results_sax.getCleanHTML(), results_dom.getCleanHTML()); @@ -1281,8 +1241,8 @@ public void testAnotherXSS() throws ScanException, PolicyException { @Test public void testIssue2() throws ScanException, PolicyException { String test = ""; - assertFalse(as.scan(test, policy, AntiSamy.DOM).getCleanHTML().contains("alert")); - assertFalse(as.scan(test, policy, AntiSamy.SAX).getCleanHTML().contains("alert")); + assertThat(as.scan(test, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("alert"))); + assertThat(as.scan(test, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("alert"))); } /* @@ -1293,20 +1253,58 @@ public void testUnknownTags() throws ScanException, PolicyException { String test = "<%/onmouseover=prompt(1)>"; CleanResults saxResults = as.scan(test, policy, AntiSamy.SAX); CleanResults domResults = as.scan(test, policy, AntiSamy.DOM); - System.out.println("OnUnknown (SAX): " + saxResults.getCleanHTML()); - System.out.println("OnUnknown (DOM): " + domResults.getCleanHTML()); - assertFalse(saxResults.getCleanHTML().contains("<%/")); - assertFalse(domResults.getCleanHTML().contains("<%/")); + assertThat(saxResults.getCleanHTML(), not(containsString("<%/"))); + assertThat(domResults.getCleanHTML(), not(containsString("<%/"))); } @Test public void testStreamScan() throws ScanException, PolicyException, InterruptedException, ExecutionException { - Reader reader = new StringReader("whatever"); + String testImgSrcURL = "whatever" + testImgSrcURL + "onmouseover=\"alert('xss')\">"); Writer writer = new StringWriter(); as.scan(reader, writer, policy); String cleanHtml = writer.toString().trim(); - assertEquals("whatever", cleanHtml); + assertEquals("whatever" + testImgSrcURL + "/>", cleanHtml); + } + + @Test + public void testGithubIssue23() throws ScanException, PolicyException { + + // Antisamy Stripping nested lists and tables + String test23 = "
  • one
  • two
  • three
    • a
    • b
"; + // Issue claims you end up with this: + //
  • one
  • two
  • three
    • a
    • b
    + // Meaning the
  • a
  • b
  • elements were moved outside of the nested
      list they were in + + // The a.replaceAll("\\s","") is used to strip out all the whitespace in the CleanHTML so we can successfully find + // what we expect to find. + assertThat(as.scan(test23, policy, AntiSamy.SAX).getCleanHTML().replaceAll("\\s",""), containsString("
      • a
      • ")); + assertThat(as.scan(test23, policy, AntiSamy.DOM).getCleanHTML().replaceAll("\\s",""), containsString("
        • a
        • ")); + + // However, the test above can't replicate this misbehavior. + } + + + @Test + public void testGithubIssue24() throws ScanException, PolicyException { + + // if we have onUnknownTag set to encode, it still strips out the @ and everything else after the it + // DOM Parser actually rips out the entire value even with onUnknownTag set + TestPolicy revisedPolicy = policy.cloneWithDirective("onUnknownTag", "encode"); + + String email = "name@mail.com"; + String test24 = "firstname,lastname<" + email + ">"; + assertThat(as.scan(test24, revisedPolicy, AntiSamy.SAX).getCleanHTML(), containsString(email)); + assertThat(as.scan(test24, revisedPolicy, AntiSamy.DOM).getCleanHTML(), containsString(email)); + } + + @Test + public void testGithubIssue27() throws ScanException, PolicyException { + // This test doesn't cause an ArrayIndexOutOfBoundsException, as reported in this issue even though it + // replicates the test as described. + String test27 = "my &test"; + assertThat(as.scan(test27, policy, AntiSamy.DOM).getCleanHTML(), containsString("test")); + assertThat(as.scan(test27, policy, AntiSamy.SAX).getCleanHTML(), containsString("test")); } }