Skip to content

Commit

Permalink
Automatic tidy refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
spassarop committed Aug 17, 2024
1 parent 5dbb5da commit 16693f5
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 77 deletions.
106 changes: 48 additions & 58 deletions src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.IOException;
import java.io.Writer;
import java.util.Locale;

import org.apache.xml.serialize.ElementState;
import org.apache.xml.serialize.HTMLdtd;
import org.apache.xml.serialize.OutputFormat;
Expand Down Expand Up @@ -39,44 +38,38 @@ protected String getEntityRef(int charToPrint) {
}

/**
* Called to serialize a DOM element. Equivalent to calling {@link
* #startElement}, {@link #endElement} and serializing everything
* inbetween, but better optimized.
* Called to serialize a DOM element. Equivalent to calling {@link #startElement}, {@link
* #endElement} and serializing everything inbetween, but better optimized.
*/
@Override
protected void serializeElement(Element elem )
throws IOException
{
protected void serializeElement(Element elem) throws IOException {
Attr attr;
NamedNodeMap attrMap;
int i;
int i;
Node child;
ElementState state;
boolean preserveSpace;
String name;
String value;
String tagName;
boolean preserveSpace;
String name;
String value;
String tagName;

tagName = elem.getTagName();
state = getElementState();
if ( isDocumentState() ) {
if (isDocumentState()) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if ( ! _started )
startDocument( tagName );
if (!_started) startDocument(tagName);
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if ( state.empty )
_printer.printText( '>' );
if (state.empty) _printer.printText('>');
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if ( _indenting && ! state.preserveSpace &&
( state.empty || state.afterElement ) )
if (_indenting && !state.preserveSpace && (state.empty || state.afterElement))
_printer.breakLine();
}
preserveSpace = state.preserveSpace;
Expand All @@ -85,8 +78,8 @@ protected void serializeElement(Element elem )
// This only happens in endElement().

// XHTML: element names are lower case, DOM will be different
_printer.printText( '<' );
_printer.printText( tagName );
_printer.printText('<');
_printer.printText(tagName);
_printer.indent();

// Lookup the element's attribute, but only print specified
Expand All @@ -95,79 +88,74 @@ protected void serializeElement(Element elem )
// separated with a space so the element can be broken on
// multiple lines.
attrMap = elem.getAttributes();
if ( attrMap != null ) {
for ( i = 0 ; i < attrMap.getLength() ; ++i ) {
attr = (Attr) attrMap.item( i );
if (attrMap != null) {
for (i = 0; i < attrMap.getLength(); ++i) {
attr = (Attr) attrMap.item(i);
name = attr.getName().toLowerCase(Locale.ENGLISH);
value = attr.getValue();
if ( attr.getSpecified() ) {
if (attr.getSpecified()) {
_printer.printSpace();
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
if ( value == null ) {
if (value == null) {
value = "";
}
if ( !_format.getPreserveEmptyAttributes() && value.length() == 0 )
_printer.printText( name );
else if ( HTMLdtd.isURI( tagName, name ) ) {
_printer.printText( name );
_printer.printText( "=\"" );
_printer.printText( escapeURI( value ) );
_printer.printText( '"' );
} else if ( HTMLdtd.isBoolean( tagName, name ) )
_printer.printText( name );
if (!_format.getPreserveEmptyAttributes() && value.length() == 0)
_printer.printText(name);
else if (HTMLdtd.isURI(tagName, name)) {
_printer.printText(name);
_printer.printText("=\"");
_printer.printText(escapeURI(value));
_printer.printText('"');
} else if (HTMLdtd.isBoolean(tagName, name)) _printer.printText(name);
else {
_printer.printText( name );
_printer.printText( "=\"" );
printEscaped( value );
_printer.printText( '"' );
_printer.printText(name);
_printer.printText("=\"");
printEscaped(value);
_printer.printText('"');
}
}
}
}
if ( HTMLdtd.isPreserveSpace( tagName ) )
preserveSpace = true;
if (HTMLdtd.isPreserveSpace(tagName)) preserveSpace = true;

// If element has children, or if element is not an empty tag,
// serialize an opening tag.
if ( elem.hasChildNodes() || ! HTMLdtd.isEmptyTag( tagName ) ) {
if (elem.hasChildNodes() || !HTMLdtd.isEmptyTag(tagName)) {
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
state = enterElementState( null, null, tagName, preserveSpace );
state = enterElementState(null, null, tagName, preserveSpace);

// Prevents line breaks inside A/TD
if ( tagName.equalsIgnoreCase( "A" ) || tagName.equalsIgnoreCase( "TD" ) ) {
if (tagName.equalsIgnoreCase("A") || tagName.equalsIgnoreCase("TD")) {
state.empty = false;
_printer.printText( '>' );
_printer.printText('>');
}

// Handle SCRIPT and STYLE specifically by changing the
// state of the current element to CDATA (XHTML) or
// unescaped (HTML).
if ( tagName.equalsIgnoreCase( "SCRIPT" ) ||
tagName.equalsIgnoreCase( "STYLE" ) ) {
if (tagName.equalsIgnoreCase("SCRIPT") || tagName.equalsIgnoreCase("STYLE")) {
// HTML: Print contents unescaped
state.unescaped = true;
}
child = elem.getFirstChild();
while ( child != null ) {
serializeNode( child );
while (child != null) {
serializeNode(child);
child = child.getNextSibling();
}
endElementIO( null, null, tagName );
endElementIO(null, null, tagName);
} else {
_printer.unindent();
// XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
// HTML: Empty tags are defined as such in DTD no in document.
if (!elem.hasChildNodes() && isAllowedEmptyTag(tagName) && !requiresClosingTag(tagName))
_printer.printText( "/>" );
else
_printer.printText( '>' );
_printer.printText("/>");
else _printer.printText('>');
// After element but parent element is no longer empty.
state.afterElement = true;
state.empty = false;
if ( isDocumentState() )
_printer.flush();
if (isDocumentState()) _printer.flush();
}
}

Expand All @@ -185,11 +173,13 @@ public void endElementIO(String namespaceURI, String localName, String rawName)
if (state.empty && isAllowedEmptyTag(rawName) && !requiresClosingTag(rawName)) { //
_printer.printText("/>");
} else {
if(state.empty) _printer.printText('>');
if (state.empty) _printer.printText('>');
// This element is not empty and that last content was another element, so print a line break
// before that last element and this element's closing tag. [keith] Provided this is not an
// anchor. HTML: some elements do not print closing tag (e.g. LI)
if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) {
if (rawName == null
|| !HTMLdtd.isOnlyOpening(rawName)
|| HTMLdtd.isOptionalClosing(rawName)) {
if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine();
// Must leave CData section first (Illegal in HTML, but still)
if (state.inCData) _printer.printText("]]>");
Expand Down Expand Up @@ -230,6 +220,6 @@ private boolean requiresClosingTag(String tagName) {
}

private boolean isAllowedEmptyTag(String tagName) {
return "head".equals(tagName) || allowedEmptyTags.matches( tagName);
return "head".equals(tagName) || allowedEmptyTags.matches(tagName);
}
}
40 changes: 21 additions & 19 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@
import org.owasp.validator.html.model.Property;
import org.owasp.validator.html.model.Tag;
import org.owasp.validator.html.scan.Constants;
import org.owasp.validator.html.util.ErrorMessageUtil;

/**
* This class tests AntiSamy functionality and the basic policy file which should be immune to XSS
Expand Down Expand Up @@ -1582,20 +1581,26 @@ public void validateParamAsEmbed() throws ScanException, PolicyException {
.cloneWithDirective(Policy.FORMAT_OUTPUT, "false");

// let's start with a YouTube embed
String input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
String expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
String input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
String expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
CleanResults cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));

String saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
String saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
cr = as.scan(input, revised, AntiSamy.SAX);
assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput));

// now what if someone sticks malicious URL in the value of the
// value attribute in the param tag? remove that param tag
input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));

Expand All @@ -1604,9 +1609,12 @@ public void validateParamAsEmbed() throws ScanException, PolicyException {

// now what if someone sticks malicious URL in the value of the src
// attribute in the embed tag? remove that embed tag
input = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput = "<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";
saxExpectedOutput = "<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";
input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";
saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";

cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));
Expand Down Expand Up @@ -1806,9 +1814,7 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException {
assertEquals(
"whatever<img src=\"https://ssl.gstatic.com/codesite/ph/images/defaultlogo.png\"/>",
results_sax.getCleanHTML());
assertEquals(
results_sax.getCleanHTML(),
results_dom.getCleanHTML());
assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML());
}

@Test
Expand Down Expand Up @@ -2716,11 +2722,7 @@ public void testGithubIssue484() throws ScanException, PolicyException {
CleanResults crSax = as.scan(s, policy, AntiSamy.SAX);
String domValue = crDom.getCleanHTML();
String saxValue = crSax.getCleanHTML();
assertEquals("<p>this is para data</p>\n"
+ "<br/>\n"
+ "<p>this is para data 2</p>", domValue);
assertEquals("<p>this is para data</p>\n"
+ "<br/>\n"
+ "<p>this is para data 2</p>", saxValue);
assertEquals("<p>this is para data</p>\n" + "<br/>\n" + "<p>this is para data 2</p>", domValue);
assertEquals("<p>this is para data</p>\n" + "<br/>\n" + "<p>this is para data 2</p>", saxValue);
}
}

0 comments on commit 16693f5

Please sign in to comment.