diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..4bb50dc
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text eol=lf
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0db216b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+npm-debug.log
+node_modules
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..d63ba09
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,5 @@
+language: node_js
+node_js:
+ - 0.8
+ - 0.10
+ - 0.11
diff --git a/CHANGELOG b/CHANGELOG
deleted file mode 100644
index c262712..0000000
--- a/CHANGELOG
+++ /dev/null
@@ -1,38 +0,0 @@
-v1.8.0
- *
-
-v1.7.3
- * Renamed node-htmlparser.* to htmlparser.* and created shims for people still expecting node-htmlparser.*
-
-v1.7.2
- * Document position feature fixed to work correctly with chunked parsing
-
-v1.7.1
- * Document position feature disabled until it works correctly with chunked parsing
-
-v1.7.0
- * Empty tag checking switch to being case insensitive [fgnass]
- * Added feature to include document position (row, col) in element data [fgnass]
- * Added parser option "includeLocation" to enable document position data
-
-v1.6.4
- * Fixed 'prevElement' error [Swizec]
-
-v1.6.3
- * Updated to support being an npm package
- * Fixed DomUtils.testElement()
-
-v1.6.1
- * Optimized DomUtils by up to 2-3x
-
-v1.6.0
- * Added support for RSS/Atom feeds
-
-v1.5.0
- * Added DefaultHandler option "enforceEmptyTags" so that XML can be parsed correctly
-
-v1.4.2
- * Added tests for parsing XML with namespaces
-
-v1.4.1
- * Added minified version
diff --git a/README.md b/README.md
index 52467ba..9ed236d 100644
--- a/README.md
+++ b/README.md
@@ -1,186 +1,81 @@
-#NodeHtmlParser
-A forgiving HTML/XML/RSS parser written in JS for both the browser and NodeJS (yes, despite the name it works just fine in any modern browser). The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
+#htmlparser2 [](https://npmjs.org/package/htmlparser2) [](http://travis-ci.org/fb55/htmlparser2) [](https://david-dm.org/fb55/htmlparser2)
-##Installing
+A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
- npm install htmlparser
-
-##Running Tests
-
-###Run tests under node:
- node runtests.js
-
-###Run tests in browser:
-View runtests.html in any browser
-
-##Usage In Node
- var htmlparser = require("htmlparser");
- var rawHtml = "Xyz
- , Style: "style" //Special tag
- , Tag: "tag" //Any tag that isn't special
-}
-
-function Parser (handler, options) {
- this._options = options ? options : { };
- if (this._options.includeLocation == undefined) {
- this._options.includeLocation = false; //Do not track element position in document by default
- }
-
- this.validateHandler(handler);
- this._handler = handler;
- this.reset();
-}
-
- //**"Static"**//
- //Regular expressions used for cleaning up and parsing (stateless)
- Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
- Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
- Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
- Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
-
- //Regular expressions used for parsing (stateful)
- Parser._reAttrib = //Find attributes in a tag
- /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
- Parser._reTags = /[\<\>]/g; //Find tag markers
-
- //**Public**//
- //Methods//
- //Parses a complete HTML and pushes it to the handler
- Parser.prototype.parseComplete = function Parser$parseComplete (data) {
- this.reset();
- this.parseChunk(data);
- this.done();
- }
-
- //Parses a piece of an HTML document
- Parser.prototype.parseChunk = function Parser$parseChunk (data) {
- if (this._done)
- this.handleError(new Error("Attempted to parse chunk after parsing already done"));
- this._buffer += data; //FIXME: this can be a bottleneck
- this.parseTags();
- }
-
- //Tells the parser that the HTML being parsed is complete
- Parser.prototype.done = function Parser$done () {
- if (this._done)
- return;
- this._done = true;
-
- //Push any unparsed text into a final element in the element list
- if (this._buffer.length) {
- var rawData = this._buffer;
- this._buffer = "";
- var element = {
- raw: rawData
- , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
- , type: this._parseState
- };
- if (this._parseState == ElementType.Tag || this._parseState == ElementType.Script || this._parseState == ElementType.Style)
- element.name = this.parseTagName(element.data);
- this.parseAttribs(element);
- this._elements.push(element);
- }
-
- this.writeHandler();
- this._handler.done();
- }
-
- //Resets the parser to a blank state, ready to parse a new HTML document
- Parser.prototype.reset = function Parser$reset () {
- this._buffer = "";
- this._done = false;
- this._elements = [];
- this._elementsCurrent = 0;
- this._current = 0;
- this._next = 0;
- this._location = {
- row: 0
- , col: 0
- , charOffset: 0
- , inBuffer: 0
- };
- this._parseState = ElementType.Text;
- this._prevTagSep = '';
- this._tagStack = [];
- this._handler.reset();
- }
-
- //**Private**//
- //Properties//
- Parser.prototype._options = null; //Parser options for how to behave
- Parser.prototype._handler = null; //Handler for parsed elements
- Parser.prototype._buffer = null; //Buffer of unparsed data
- Parser.prototype._done = false; //Flag indicating whether parsing is done
- Parser.prototype._elements = null; //Array of parsed elements
- Parser.prototype._elementsCurrent = 0; //Pointer to last element in _elements that has been processed
- Parser.prototype._current = 0; //Position in data that has already been parsed
- Parser.prototype._next = 0; //Position in data of the next tag marker (<>)
- Parser.prototype._location = null; //Position tracking for elements in a stream
- Parser.prototype._parseState = ElementType.Text; //Current type of element being parsed
- Parser.prototype._prevTagSep = ''; //Previous tag marker found
- //Stack of element types previously encountered; keeps track of when
- //parsing occurs inside a script/comment/style tag
- Parser.prototype._tagStack = null;
-
- //Methods//
- //Takes an array of elements and parses any found attributes
- Parser.prototype.parseTagAttribs = function Parser$parseTagAttribs (elements) {
- var idxEnd = elements.length;
- var idx = 0;
-
- while (idx < idxEnd) {
- var element = elements[idx++];
- if (element.type == ElementType.Tag || element.type == ElementType.Script || element.type == ElementType.style)
- this.parseAttribs(element);
- }
-
- return(elements);
- }
-
- //Takes an element and adds an "attribs" property for any element attributes found
- Parser.prototype.parseAttribs = function Parser$parseAttribs (element) {
- //Only parse attributes for tags
- if (element.type != ElementType.Script && element.type != ElementType.Style && element.type != ElementType.Tag)
- return;
-
- var tagName = element.data.split(Parser._reWhitespace, 1)[0];
- var attribRaw = element.data.substring(tagName.length);
- if (attribRaw.length < 1)
- return;
-
- var match;
- Parser._reAttrib.lastIndex = 0;
- while (match = Parser._reAttrib.exec(attribRaw)) {
- if (element.attribs == undefined)
- element.attribs = {};
-
- if (typeof match[1] == "string" && match[1].length) {
- element.attribs[match[1]] = match[2];
- } else if (typeof match[3] == "string" && match[3].length) {
- element.attribs[match[3].toString()] = match[4].toString();
- } else if (typeof match[5] == "string" && match[5].length) {
- element.attribs[match[5]] = match[6];
- } else if (typeof match[7] == "string" && match[7].length) {
- element.attribs[match[7]] = match[7];
- }
- }
- }
-
- //Extracts the base tag name from the data value of an element
- Parser.prototype.parseTagName = function Parser$parseTagName (data) {
- if (data == null || data == "")
- return("");
- var match = Parser._reTagName.exec(data);
- if (!match)
- return("");
- return((match[1] ? "/" : "") + match[2]);
- }
-
- //Parses through HTML text and returns an array of found elements
- //I admit, this function is rather large but splitting up had an noticeable impact on speed
- Parser.prototype.parseTags = function Parser$parseTags () {
- var bufferEnd = this._buffer.length - 1;
- while (Parser._reTags.test(this._buffer)) {
- this._next = Parser._reTags.lastIndex - 1;
- var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
- var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
-
- //A new element to eventually be appended to the element list
- var element = {
- raw: rawData
- , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
- , type: this._parseState
- };
-
- var elementName = this.parseTagName(element.data);
-
- //This section inspects the current tag stack and modifies the current
- //element if we're actually parsing a special area (script/comment/style tag)
- if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
- if (this._tagStack[this._tagStack.length - 1] == ElementType.Script) { //We're currently in a script tag
- if (elementName == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
- this._tagStack.pop();
- else { //Not a closing script tag
- if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
- //All data from here to script close is now a text element
- element.type = ElementType.Text;
- //If the previous element is text, append the current text to it
- if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
- var prevElement = this._elements[this._elements.length - 1];
- prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
- element.raw = element.data = ""; //This causes the current element to not be added to the element list
- }
- }
- }
- }
- else if (this._tagStack[this._tagStack.length - 1] == ElementType.Style) { //We're currently in a style tag
- if (elementName == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
- this._tagStack.pop();
- else {
- if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
- //All data from here to style close is now a text element
- element.type = ElementType.Text;
- //If the previous element is text, append the current text to it
- if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
- var prevElement = this._elements[this._elements.length - 1];
- if (element.raw != "") {
- prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
- element.raw = element.data = ""; //This causes the current element to not be added to the element list
- } else { //Element is empty, so just append the last tag marker found
- prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
- }
- } else { //The previous element was not text
- if (element.raw != "") {
- element.raw = element.data = element.raw;
- }
- }
- }
- }
- }
- else if (this._tagStack[this._tagStack.length - 1] == ElementType.Comment) { //We're currently in a comment tag
- var rawLen = element.raw.length;
- if (element.raw.charAt(rawLen - 2) == "-" && element.raw.charAt(rawLen - 1) == "-" && tagSep == ">") {
- //Actually, we're no longer in a style tag, so pop it off the stack
- this._tagStack.pop();
- //If the previous element is a comment, append the current text to it
- if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
- var prevElement = this._elements[this._elements.length - 1];
- prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
- element.raw = element.data = ""; //This causes the current element to not be added to the element list
- element.type = ElementType.Text;
- }
- else //Previous element not a comment
- element.type = ElementType.Comment; //Change the current element's type to a comment
- }
- else { //Still in a comment tag
- element.type = ElementType.Comment;
- //If the previous element is a comment, append the current text to it
- if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
- var prevElement = this._elements[this._elements.length - 1];
- prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
- element.raw = element.data = ""; //This causes the current element to not be added to the element list
- element.type = ElementType.Text;
- }
- else
- element.raw = element.data = element.raw + tagSep;
- }
- }
- }
-
- //Processing of non-special tags
- if (element.type == ElementType.Tag) {
- element.name = elementName;
-
- if (element.raw.indexOf("!--") == 0) { //This tag is really comment
- element.type = ElementType.Comment;
- delete element["name"];
- var rawLen = element.raw.length;
- //Check if the comment is terminated in the current element
- if (element.raw.charAt(rawLen - 1) == "-" && element.raw.charAt(rawLen - 2) == "-" && tagSep == ">")
- element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
- else { //It's not so push the comment onto the tag stack
- element.raw += tagSep;
- this._tagStack.push(ElementType.Comment);
- }
- }
- else if (element.raw.indexOf("!") == 0 || element.raw.indexOf("?") == 0) {
- element.type = ElementType.Directive;
- //TODO: what about CDATA?
- }
- else if (element.name == "script") {
- element.type = ElementType.Script;
- //Special tag, push onto the tag stack if not terminated
- if (element.data.charAt(element.data.length - 1) != "/")
- this._tagStack.push(ElementType.Script);
- }
- else if (element.name == "/script")
- element.type = ElementType.Script;
- else if (element.name == "style") {
- element.type = ElementType.Style;
- //Special tag, push onto the tag stack if not terminated
- if (element.data.charAt(element.data.length - 1) != "/")
- this._tagStack.push(ElementType.Style);
- }
- else if (element.name == "/style")
- element.type = ElementType.Style;
- if (element.name && element.name.charAt(0) == "/")
- element.data = element.name;
- }
-
- //Add all tags and non-empty text elements to the element list
- if (element.raw != "" || element.type != ElementType.Text) {
- if (this._options.includeLocation && !element.location) {
- element.location = this.getLocation(element.type == ElementType.Tag);
- }
- this.parseAttribs(element);
- this._elements.push(element);
- //If tag self-terminates, add an explicit, separate closing tag
- if (
- element.type != ElementType.Text
- &&
- element.type != ElementType.Comment
- &&
- element.type != ElementType.Directive
- &&
- element.data.charAt(element.data.length - 1) == "/"
- )
- this._elements.push({
- raw: "/" + element.name
- , data: "/" + element.name
- , name: "/" + element.name
- , type: element.type
- });
- }
- this._parseState = (tagSep == "<") ? ElementType.Tag : ElementType.Text;
- this._current = this._next + 1;
- this._prevTagSep = tagSep;
- }
-
- if (this._options.includeLocation) {
- this.getLocation();
- this._location.row += this._location.inBuffer;
- this._location.inBuffer = 0;
- this._location.charOffset = 0;
- }
- this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
- this._current = 0;
-
- this.writeHandler();
- }
-
- Parser.prototype.getLocation = function Parser$getLocation (startTag) {
- var c,
- l = this._location,
- end = this._current - (startTag ? 1 : 0),
- chunk = startTag && l.charOffset == 0 && this._current == 0;
-
- for (; l.charOffset < end; l.charOffset++) {
- c = this._buffer.charAt(l.charOffset);
- if (c == '\n') {
- l.inBuffer++;
- l.col = 0;
- } else if (c != '\r') {
- l.col++;
- }
- }
- return {
- line: l.row + l.inBuffer + 1
- , col: l.col + (chunk ? 0: 1)
- };
- }
-
- //Checks the handler to make it is an object with the right "interface"
- Parser.prototype.validateHandler = function Parser$validateHandler (handler) {
- if ((typeof handler) != "object")
- throw new Error("Handler is not an object");
- if ((typeof handler.reset) != "function")
- throw new Error("Handler method 'reset' is invalid");
- if ((typeof handler.done) != "function")
- throw new Error("Handler method 'done' is invalid");
- if ((typeof handler.writeTag) != "function")
- throw new Error("Handler method 'writeTag' is invalid");
- if ((typeof handler.writeText) != "function")
- throw new Error("Handler method 'writeText' is invalid");
- if ((typeof handler.writeComment) != "function")
- throw new Error("Handler method 'writeComment' is invalid");
- if ((typeof handler.writeDirective) != "function")
- throw new Error("Handler method 'writeDirective' is invalid");
- }
-
- //Writes parsed elements out to the handler
- Parser.prototype.writeHandler = function Parser$writeHandler (forceFlush) {
- forceFlush = !!forceFlush;
- if (this._tagStack.length && !forceFlush)
- return;
- while (this._elements.length) {
- var element = this._elements.shift();
- switch (element.type) {
- case ElementType.Comment:
- this._handler.writeComment(element);
- break;
- case ElementType.Directive:
- this._handler.writeDirective(element);
- break;
- case ElementType.Text:
- this._handler.writeText(element);
- break;
- default:
- this._handler.writeTag(element);
- break;
- }
- }
- }
-
- Parser.prototype.handleError = function Parser$handleError (error) {
- if ((typeof this._handler.error) == "function")
- this._handler.error(error);
- else
- throw error;
- }
-
-//TODO: make this a trully streamable handler
-function RssHandler (callback) {
- RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
-}
-inherits(RssHandler, DefaultHandler);
-
- RssHandler.prototype.done = function RssHandler$done () {
- var feed = { };
- var feedRoot;
-
- var found = DomUtils.getElementsByTagName(function (value) { return(value == "rss" || value == "feed"); }, this.dom, false);
- if (found.length) {
- feedRoot = found[0];
- }
- if (feedRoot) {
- if (feedRoot.name == "rss") {
- feed.type = "rss";
- feedRoot = feedRoot.children[0]; //
- feed.id = "";
- try {
- feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
- } catch (ex) { }
- try {
- feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- feed.items = [];
- DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
- var entry = {};
- try {
- entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
- } catch (ex) { }
- feed.items.push(entry);
- });
- } else {
- feed.type = "atom";
- try {
- feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
- } catch (ex) { }
- try {
- feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
- } catch (ex) { }
- try {
- feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
- } catch (ex) { }
- feed.items = [];
- DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
- var entry = {};
- try {
- entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
- } catch (ex) { }
- try {
- entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
- } catch (ex) { }
- try {
- entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
- } catch (ex) { }
- feed.items.push(entry);
- });
- }
-
- this.dom = feed;
- }
- RssHandler.super_.prototype.done.call(this);
- }
-
-///////////////////////////////////////////////////
-
-function DefaultHandler (callback, options) {
- this.reset();
- this._options = options ? options : { };
- if (this._options.ignoreWhitespace == undefined)
- this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
- if (this._options.verbose == undefined)
- this._options.verbose = true; //Keep data property for tags and raw property for all
- if (this._options.enforceEmptyTags == undefined)
- this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
- if ((typeof callback) == "function")
- this._callback = callback;
-}
-
- //**"Static"**//
- //HTML Tags that shouldn't contain child nodes
- DefaultHandler._emptyTags = {
- area: 1
- , base: 1
- , basefont: 1
- , br: 1
- , col: 1
- , frame: 1
- , hr: 1
- , img: 1
- , input: 1
- , isindex: 1
- , link: 1
- , meta: 1
- , param: 1
- , embed: 1
- }
- //Regex to detect whitespace only text nodes
- DefaultHandler.reWhitespace = /^\s*$/;
-
- //**Public**//
- //Properties//
- DefaultHandler.prototype.dom = null; //The hierarchical object containing the parsed HTML
- //Methods//
- //Resets the handler back to starting state
- DefaultHandler.prototype.reset = function DefaultHandler$reset() {
- this.dom = [];
- this._done = false;
- this._tagStack = [];
- this._tagStack.last = function DefaultHandler$_tagStack$last () {
- return(this.length ? this[this.length - 1] : null);
- }
- }
- //Signals the handler that parsing is done
- DefaultHandler.prototype.done = function DefaultHandler$done () {
- this._done = true;
- this.handleCallback(null);
- }
- DefaultHandler.prototype.writeTag = function DefaultHandler$writeTag (element) {
- this.handleElement(element);
- }
- DefaultHandler.prototype.writeText = function DefaultHandler$writeText (element) {
- if (this._options.ignoreWhitespace)
- if (DefaultHandler.reWhitespace.test(element.data))
- return;
- this.handleElement(element);
- }
- DefaultHandler.prototype.writeComment = function DefaultHandler$writeComment (element) {
- this.handleElement(element);
- }
- DefaultHandler.prototype.writeDirective = function DefaultHandler$writeDirective (element) {
- this.handleElement(element);
- }
- DefaultHandler.prototype.error = function DefaultHandler$error (error) {
- this.handleCallback(error);
- }
-
- //**Private**//
- //Properties//
- DefaultHandler.prototype._options = null; //Handler options for how to behave
- DefaultHandler.prototype._callback = null; //Callback to respond to when parsing done
- DefaultHandler.prototype._done = false; //Flag indicating whether handler has been notified of parsing completed
- DefaultHandler.prototype._tagStack = null; //List of parents to the currently element being processed
- //Methods//
- DefaultHandler.prototype.handleCallback = function DefaultHandler$handleCallback (error) {
- if ((typeof this._callback) != "function")
- if (error)
- throw error;
- else
- return;
- this._callback(error, this.dom);
- }
-
- DefaultHandler.prototype.isEmptyTag = function(element) {
- var name = element.name.toLowerCase();
- if (name.charAt(0) == '/') {
- name = name.substring(1);
- }
- return this._options.enforceEmptyTags && !!DefaultHandler._emptyTags[name];
- };
-
- DefaultHandler.prototype.handleElement = function DefaultHandler$handleElement (element) {
- if (this._done)
- this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
- if (!this._options.verbose) {
-// element.raw = null; //FIXME: Not clean
- //FIXME: Serious performance problem using delete
- delete element.raw;
- if (element.type == "tag" || element.type == "script" || element.type == "style")
- delete element.data;
- }
- if (!this._tagStack.last()) { //There are no parent elements
- //If the element can be a container, add it to the tag stack and the top level list
- if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
- if (element.name.charAt(0) != "/") { //Ignore closing tags that obviously don't have an opening tag
- this.dom.push(element);
- if (!this.isEmptyTag(element)) { //Don't add tags to the tag stack that can't have children
- this._tagStack.push(element);
- }
- }
- }
- else //Otherwise just add to the top level list
- this.dom.push(element);
- }
- else { //There are parent elements
- //If the element can be a container, add it as a child of the element
- //on top of the tag stack and then add it to the tag stack
- if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
- if (element.name.charAt(0) == "/") {
- //This is a closing tag, scan the tagStack to find the matching opening tag
- //and pop the stack up to the opening tag's parent
- var baseName = element.name.substring(1);
- if (!this.isEmptyTag(element)) {
- var pos = this._tagStack.length - 1;
- while (pos > -1 && this._tagStack[pos--].name != baseName) { }
- if (pos > -1 || this._tagStack[0].name == baseName)
- while (pos < this._tagStack.length - 1)
- this._tagStack.pop();
- }
- }
- else { //This is not a closing tag
- if (!this._tagStack.last().children)
- this._tagStack.last().children = [];
- this._tagStack.last().children.push(element);
- if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
- this._tagStack.push(element);
- }
- }
- else { //This is not a container element
- if (!this._tagStack.last().children)
- this._tagStack.last().children = [];
- this._tagStack.last().children.push(element);
- }
- }
- }
-
- var DomUtils = {
- testElement: function DomUtils$testElement (options, element) {
- if (!element) {
- return false;
- }
-
- for (var key in options) {
- if (key == "tag_name") {
- if (element.type != "tag" && element.type != "script" && element.type != "style") {
- return false;
- }
- if (!options["tag_name"](element.name)) {
- return false;
- }
- } else if (key == "tag_type") {
- if (!options["tag_type"](element.type)) {
- return false;
- }
- } else if (key == "tag_contains") {
- if (element.type != "text" && element.type != "comment" && element.type != "directive") {
- return false;
- }
- if (!options["tag_contains"](element.data)) {
- return false;
- }
- } else {
- if (!element.attribs || !options[key](element.attribs[key])) {
- return false;
- }
- }
- }
-
- return true;
- }
-
- , getElements: function DomUtils$getElements (options, currentElement, recurse, limit) {
- recurse = (recurse === undefined || recurse === null) || !!recurse;
- limit = isNaN(parseInt(limit)) ? -1 : parseInt(limit);
-
- if (!currentElement) {
- return([]);
- }
-
- var found = [];
- var elementList;
-
- function getTest (checkVal) {
- return(function (value) { return(value == checkVal); });
- }
- for (var key in options) {
- if ((typeof options[key]) != "function") {
- options[key] = getTest(options[key]);
- }
- }
-
- if (DomUtils.testElement(options, currentElement)) {
- found.push(currentElement);
- }
-
- if (limit >= 0 && found.length >= limit) {
- return(found);
- }
-
- if (recurse && currentElement.children) {
- elementList = currentElement.children;
- } else if (currentElement instanceof Array) {
- elementList = currentElement;
- } else {
- return(found);
- }
-
- for (var i = 0; i < elementList.length; i++) {
- found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
- if (limit >= 0 && found.length >= limit) {
- break;
- }
- }
-
- return(found);
- }
-
- , getElementById: function DomUtils$getElementById (id, currentElement, recurse) {
- var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
- return(result.length ? result[0] : null);
- }
-
- , getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement, recurse, limit) {
- return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
- }
-
- , getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) {
- return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
- }
- }
-
- function inherits (ctor, superCtor) {
- var tempCtor = function(){};
- tempCtor.prototype = superCtor.prototype;
- ctor.super_ = superCtor;
- ctor.prototype = new tempCtor();
- ctor.prototype.constructor = ctor;
- }
-
-exports.Parser = Parser;
-
-exports.DefaultHandler = DefaultHandler;
-
-exports.RssHandler = RssHandler;
-
-exports.ElementType = ElementType;
-
-exports.DomUtils = DomUtils;
-
-})();
diff --git a/lib/htmlparser.min.js b/lib/htmlparser.min.js
deleted file mode 100644
index 2e09f29..0000000
--- a/lib/htmlparser.min.js
+++ /dev/null
@@ -1,22 +0,0 @@
-/***********************************************
-Copyright 2010, 2011, Chris Winberry . All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-/* v1.8.0 */
-(function(){function e(a,c){this._options=c?c:{};if(this._options.includeLocation==undefined)this._options.includeLocation=false;this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function i(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags== undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require=="function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"}; e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/;e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()}; e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer="";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current= this._elementsCurrent=0;this._location={row:0,col:0,charOffset:0,inBuffer:0};this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()};e.prototype._options=null;e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._location=null;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs= function(a){for(var c=a.length,b=0;b"){this._tagStack.pop(); if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=(g.raw+b.raw).replace(e._reTrimComment,"");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")== 0){b.type=d.Comment;delete b.name;g=b.raw.length;if(b.raw.charAt(g-1)=="-"&&b.raw.charAt(g-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+=c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&& this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){if(this._options.includeLocation&&!b.location)b.location=this.getLocation(b.type==d.Tag);this.parseAttribs(b);this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current= this._next+1;this._prevTagSep=c}if(this._options.includeLocation){this.getLocation();this._location.row+=this._location.inBuffer;this._location.inBuffer=0;this._location.charOffset=0}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.getLocation=function(a){for(var c=this._location,b=this._current-(a?1:0),h=a&&c.charOffset==0&&this._current==0;c.charOffset-1&&this._tagStack[a--].name!=c;);if(a>-1||this._tagStack[0].name==c)for(;a=0&&l.length>=h)return l;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return l; for(m=0;m=0&&l.length>=h)break}return l},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a,c,b,h){return f.getElements({tag_type:a},c,b,h)}};exports.Parser=e;exports.DefaultHandler=i;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})();
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
new file mode 100644
index 0000000..9fa2761
--- /dev/null
+++ b/lib/index.js
@@ -0,0 +1,70 @@
+var Parser = require("./Parser.js"),
+ DomHandler = require("domhandler");
+
+function defineProp(name, value){
+ delete module.exports[name];
+ module.exports[name] = value;
+ return value;
+}
+
+module.exports = {
+ Parser: Parser,
+ Tokenizer: require("./Tokenizer.js"),
+ ElementType: require("domelementtype"),
+ DomHandler: DomHandler,
+ get FeedHandler(){
+ return defineProp("FeedHandler", require("./FeedHandler.js"));
+ },
+ get Stream(){
+ return defineProp("Stream", require("./Stream.js"));
+ },
+ get WritableStream(){
+ return defineProp("WritableStream", require("./WritableStream.js"));
+ },
+ get ProxyHandler(){
+ return defineProp("ProxyHandler", require("./ProxyHandler.js"));
+ },
+ get DomUtils(){
+ return defineProp("DomUtils", require("domutils"));
+ },
+ get CollectingHandler(){
+ return defineProp("CollectingHandler", require("./CollectingHandler.js"));
+ },
+ // For legacy support
+ DefaultHandler: DomHandler,
+ get RssHandler(){
+ return defineProp("RssHandler", this.FeedHandler);
+ },
+ //helper methods
+ parseDOM: function(data, options) {
+ var handler = new DomHandler(options);
+ var parser = new Parser(handler, options);
+ parser.end(data);
+ return handler.dom;
+ },
+ parseFeed: function(feed, options){
+ var handler = new module.exports.FeedHandler(options);
+ var parser = new Parser(handler, options);
+ parser.end(feed);
+ return handler.dom;
+ },
+ createDomStream: function(cb, options, elementCb){
+ var handler = new DomHandler(cb, options, elementCb);
+ return new Parser(handler, options);
+ },
+ // List of all events that the parser emits
+ EVENTS: { /* Format: eventname: number of arguments */
+ attribute: 2,
+ cdatastart: 0,
+ cdataend: 0,
+ text: 1,
+ processinginstruction: 2,
+ comment: 1,
+ commentend: 0,
+ closetag: 1,
+ opentag: 2,
+ opentagname: 1,
+ error: 1,
+ end: 0
+ }
+};
diff --git a/lib/node-htmlparser.js b/lib/node-htmlparser.js
deleted file mode 100644
index 1fc03ea..0000000
--- a/lib/node-htmlparser.js
+++ /dev/null
@@ -1,6 +0,0 @@
-var htmlparser = require("./htmlparser");
-exports.Parser = htmlparser.Parser;
-exports.DefaultHandler = htmlparser.DefaultHandler;
-exports.RssHandler = htmlparser.RssHandler;
-exports.ElementType = htmlparser.ElementType;
-exports.DomUtils = htmlparser.DomUtils;
diff --git a/lib/node-htmlparser.min.js b/lib/node-htmlparser.min.js
deleted file mode 100644
index 27d5eea..0000000
--- a/lib/node-htmlparser.min.js
+++ /dev/null
@@ -1,6 +0,0 @@
-var htmlparser = require("./htmlparser.min");
-exports.Parser = htmlparser.Parser;
-exports.DefaultHandler = htmlparser.DefaultHandler;
-exports.RssHandler = htmlparser.RssHandler;
-exports.ElementType = htmlparser.ElementType;
-exports.DomUtils = htmlparser.DomUtils;
diff --git a/package.json b/package.json
index b395c90..45d49d2 100644
--- a/package.json
+++ b/package.json
@@ -1,23 +1,33 @@
{
- "name": "htmlparser"
- , "description": "Forgiving HTML/XML/RSS Parser in JS for *both* Node and Browsers"
- , "version": "1.7.3"
- , "author": "Chris Winberry "
- , "contributors": []
- , "repository": {
- "type": "git"
- , "url": "git://github.com/tautologistics/node-htmlparser.git"
- }
- , "bugs": {
- "mail": "chris@winberry.net"
- , "web": "http://github.com/tautologistics/node-htmlparser/issues"
- }
- , "os": [ "linux", "darwin", "freebsd", "win32" ]
- , "directories": { "lib": "./lib/" }
- , "main": "./lib/htmlparser"
- , "engines": { "node": ">=0.1.33" }
- , "licenses": [{
- "type": "MIT"
- , "url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
- }]
+ "name": "htmlparser2",
+ "description": "Fast & forgiving HTML/XML/RSS parser",
+ "version": "3.4.0",
+ "author": "Felix Boehm ",
+ "keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
+ "contributors": ["Chris Winberry "],
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/fb55/htmlparser2.git"
+ },
+ "bugs": {
+ "mail": "me@feedic.com",
+ "url": "http://github.com/fb55/htmlparser2/issues"
+ },
+ "directories": {
+ "lib": "lib/"
+ },
+ "main": "lib/index.js",
+ "scripts": {
+ "test": "mocha -R spec"
+ },
+ "dependencies": {
+ "domhandler": "2.2",
+ "domutils": "1.3",
+ "domelementtype": "1",
+ "readable-stream": "1.1"
+ },
+ "devDependencies": {
+ "mocha": "1"
+ },
+ "license": "MIT"
}
diff --git a/profile.js b/profile.js
deleted file mode 100644
index f9d0ef2..0000000
--- a/profile.js
+++ /dev/null
@@ -1,63 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var fs = require("fs");
-var http = require("http");
-var htmlparser = require("./lib/htmlparser");
-//var libxml = require('./libxmljs');
-
-var testNHP = true; //Should node-htmlparser be exercised?
-var testLXJS = false; //Should libxmljs be exercised?
-var testIterations = 100; //Number of test loops to run
-
-var testHost = "localhost"; //Host to fetch test HTML from
-var testPort = 80; //Port on host to fetch test HTML from
-var testPath = "/~chris/feed.xml"; //Path on host to fetch HTML from
-
-function getMillisecs () {
- return((new Date()).getTime());
-}
-
-function timeExecutions (loops, func) {
- var start = getMillisecs();
-
- while (loops--)
- func();
-
- return(getMillisecs() - start);
-}
-
-var html = "";
-http.createClient(testPort, testHost)
- .request("GET", testPath, { host: testHost })
- .addListener("response", function (response) {
- if (response.statusCode == "200") {
- response.setEncoding("utf8");
- response.addListener("data", function (chunk) {
- html += chunk;
- }).addListener("end", function() {
- var timeNodeHtmlParser = !testNHP ? 0 : timeExecutions(testIterations, function () {
- var handler = new htmlparser.DefaultHandler(function(err, dom) {
- if (err)
- sys.debug("Error: " + err);
- });
- var parser = new htmlparser.Parser(handler, { includeLocation: true });
- parser.parseComplete(html);
- })
-
- var timeLibXmlJs = !testLXJS ? 0 : timeExecutions(testIterations, function () {
- var dom = libxml.parseHtmlString(html);
- })
-
- if (testNHP)
- sys.debug("NodeHtmlParser: " + timeNodeHtmlParser);
- if (testLXJS)
- sys.debug("LibXmlJs: " + timeLibXmlJs);
- if (testNHP && testLXJS)
- sys.debug("Difference: " + ((timeNodeHtmlParser - timeLibXmlJs) / timeLibXmlJs) * 100);
- });
- }
- else
- sys.debug("Error: got response status " + response.statusCode);
- })
- .end();
diff --git a/runtests.html b/runtests.html
deleted file mode 100644
index e89702d..0000000
--- a/runtests.html
+++ /dev/null
@@ -1,108 +0,0 @@
-
-
-
-
- Node.js HTML Parser
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/runtests.js b/runtests.js
deleted file mode 100644
index e906fe4..0000000
--- a/runtests.js
+++ /dev/null
@@ -1,75 +0,0 @@
-/***********************************************
-Copyright 2010, Chris Winberry . All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-
-var sys = require("sys");
-var fs = require("fs");
-var htmlparser = require("./lib/htmlparser");
-
-var testFolder = "./tests";
-var chunkSize = 5;
-
-var testFiles = fs.readdirSync(testFolder);
-var testCount = 0;
-var failedCount = 0;
-for (var i in testFiles) {
- testCount++;
- var fileParts = testFiles[i].split(".");
- fileParts.pop();
- var moduleName = fileParts.join(".");
- var test = require(testFolder + "/" + moduleName);
- var handlerCallback = function handlerCallback (error) {
- if (error)
- sys.puts("Handler error: " + error);
- }
- var handler = (test.type == "rss") ?
- new htmlparser.RssHandler(handlerCallback, test.options.handler)
- :
- new htmlparser.DefaultHandler(handlerCallback, test.options.handler)
- ;
- var parser = new htmlparser.Parser(handler, test.options.parser);
- parser.parseComplete(test.html);
- var resultComplete = handler.dom;
- var chunkPos = 0;
- parser.reset();
- while (chunkPos < test.html.length) {
- parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
- chunkPos += chunkSize;
- }
- parser.done();
- var resultChunk = handler.dom;
- var testResult =
- sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
- &&
- sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
- ;
- sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
- if (!testResult) {
- failedCount++;
- sys.puts("== Complete ==");
- sys.puts(sys.inspect(resultComplete, false, null));
- sys.puts("== Chunked ==");
- sys.puts(sys.inspect(resultChunk, false, null));
- sys.puts("== Expected ==");
- sys.puts(sys.inspect(test.expected, false, null));
- }
-}
-sys.puts("Total tests: " + testCount);
-sys.puts("Failed tests: " + failedCount);
diff --git a/runtests.min.html b/runtests.min.html
deleted file mode 100644
index 73ea4c7..0000000
--- a/runtests.min.html
+++ /dev/null
@@ -1,108 +0,0 @@
-
-
-
-
- Node.js HTML Parser
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/runtests.min.js b/runtests.min.js
deleted file mode 100644
index df33736..0000000
--- a/runtests.min.js
+++ /dev/null
@@ -1,75 +0,0 @@
-/***********************************************
-Copyright 2010, Chris Winberry . All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-
-var sys = require("sys");
-var fs = require("fs");
-var htmlparser = require("./lib/htmlparser.min");
-
-var testFolder = "./tests";
-var chunkSize = 5;
-
-var testFiles = fs.readdirSync(testFolder);
-var testCount = 0;
-var failedCount = 0;
-for (var i in testFiles) {
- testCount++;
- var fileParts = testFiles[i].split(".");
- fileParts.pop();
- var moduleName = fileParts.join(".");
- var test = require(testFolder + "/" + moduleName);
- var handlerCallback = function handlerCallback (error) {
- if (error)
- sys.puts("Handler error: " + error);
- }
- var handler = (test.type == "rss") ?
- new htmlparser.RssHandler(handlerCallback, test.options.handler)
- :
- new htmlparser.DefaultHandler(handlerCallback, test.options.handler)
- ;
- var parser = new htmlparser.Parser(handler, test.options.parser);
- parser.parseComplete(test.html);
- var resultComplete = handler.dom;
- var chunkPos = 0;
- parser.reset();
- while (chunkPos < test.html.length) {
- parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
- chunkPos += chunkSize;
- }
- parser.done();
- var resultChunk = handler.dom;
- var testResult =
- sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
- &&
- sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
- ;
- sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
- if (!testResult) {
- failedCount++;
- sys.puts("== Complete ==");
- sys.puts(sys.inspect(resultComplete, false, null));
- sys.puts("== Chunked ==");
- sys.puts(sys.inspect(resultChunk, false, null));
- sys.puts("== Expected ==");
- sys.puts(sys.inspect(test.expected, false, null));
- }
-}
-sys.puts("Total tests: " + testCount);
-sys.puts("Failed tests: " + failedCount);
diff --git a/snippet.js b/snippet.js
deleted file mode 100644
index 9448ea3..0000000
--- a/snippet.js
+++ /dev/null
@@ -1,15 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var htmlparser = require("./htmlparser");
-
-var html = " text";
-
-var handler = new htmlparser.DefaultHandler(function(err, dom) {
- if (err)
- sys.debug("Error: " + err);
- else
- sys.debug(sys.inspect(dom, false, null));
-}, { enforceEmptyTags: true });
-var parser = new htmlparser.Parser(handler);
-parser.parseComplete(html);
diff --git a/test/.DS_Store b/test/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/test/.DS_Store differ
diff --git a/test/01-events.js b/test/01-events.js
new file mode 100644
index 0000000..a3c7cf3
--- /dev/null
+++ b/test/01-events.js
@@ -0,0 +1,9 @@
+var helper = require("./test-helper.js");
+
+helper.mochaTest("Events", __dirname, function(test, cb){
+ helper.writeToParser(
+ helper.getEventCollector(cb),
+ test.options.parser,
+ test.html
+ );
+});
\ No newline at end of file
diff --git a/test/02-stream.js b/test/02-stream.js
new file mode 100644
index 0000000..3403980
--- /dev/null
+++ b/test/02-stream.js
@@ -0,0 +1,23 @@
+var helper = require("./test-helper.js"),
+ Stream = require("..").WritableStream,
+ fs = require("fs"),
+ path = require("path");
+
+helper.mochaTest("Stream", __dirname, function(test, cb){
+ var filePath = path.join(__dirname, "Documents", test.file);
+ fs.createReadStream(filePath).pipe(
+ new Stream(
+ helper.getEventCollector(function(err, events){
+ cb(err, events);
+
+ var handler = helper.getEventCollector(cb),
+ stream = new Stream(handler, test.options);
+
+ fs.readFile(filePath, function(err, data){
+ if(err) throw err;
+ else stream.end(data);
+ });
+ }
+ ), test.options)
+ ).on("error", cb);
+});
\ No newline at end of file
diff --git a/test/03-feed.js b/test/03-feed.js
new file mode 100644
index 0000000..8e78eb8
--- /dev/null
+++ b/test/03-feed.js
@@ -0,0 +1,19 @@
+//Runs tests for feeds
+
+var helper = require("./test-helper.js"),
+ FeedHandler = require("../lib/FeedHandler.js"),
+ fs = require("fs"),
+ path = require("path");
+
+helper.mochaTest("Feeds", __dirname, function(test, cb){
+ fs.readFile(
+ path.join(__dirname, "Documents", test.file),
+ function(err, file){
+ helper.writeToParser(
+ new FeedHandler(cb),
+ { xmlMode: true },
+ file.toString()
+ );
+ }
+ );
+});
\ No newline at end of file
diff --git a/test/Documents/Atom_Example.xml b/test/Documents/Atom_Example.xml
new file mode 100644
index 0000000..7349745
--- /dev/null
+++ b/test/Documents/Atom_Example.xml
@@ -0,0 +1,25 @@
+
+
+
+ Example Feed
+ A subtitle.
+
+
+ urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6
+ 2003-12-13T18:30:02Z
+
+ John Doe
+ johndoe@example.com
+
+
+
+ Atom-Powered Robots Run Amok
+
+
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 2003-12-13T18:30:02Z
+ Some text.
+
+
+
\ No newline at end of file
diff --git a/test/Documents/Attributes.html b/test/Documents/Attributes.html
new file mode 100644
index 0000000..f3bfa09
--- /dev/null
+++ b/test/Documents/Attributes.html
@@ -0,0 +1,16 @@
+
+
+
+ Attributes test
+
+
+
+ class="value0" title="value1"
+
+
+ class=value2 disabled
+
+
+ class="value4"title="value5"
+
+
\ No newline at end of file
diff --git a/test/Documents/Basic.html b/test/Documents/Basic.html
new file mode 100644
index 0000000..65957a2
--- /dev/null
+++ b/test/Documents/Basic.html
@@ -0,0 +1 @@
+The Title Hello world
\ No newline at end of file
diff --git a/test/Documents/RDF_Example.xml b/test/Documents/RDF_Example.xml
new file mode 100644
index 0000000..068da17
--- /dev/null
+++ b/test/Documents/RDF_Example.xml
@@ -0,0 +1,63 @@
+
+
+
+ craigslist | all community in SF bay area
+ http://sfbay.craigslist.org/ccc/
+
+ en-us
+ Copyright 2011 craigslist, inc.
+ webmaster@craigslist.org
+ webmaster@craigslist.org
+ http://sfbay.craigslist.org/ccc//
+ craigslist | all community in SF bay area
+ Collection
+ 2011-11-04T09:39:10-07:00
+ 4
+ hourly
+
+
+
+
+
+
+ -
+
+
+http://sfbay.craigslist.org/sby/muc/2681301534.html
+
+ We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only. Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here: http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html Guitar Set up (acoustic and electronic) $40!
+]]>
+ 2011-11-04T09:35:17-07:00
+ en-us
+ Copyright 2011 craigslist, inc.
+
+http://sfbay.craigslist.org/sby/muc/2681301534.html
+
+
+ text
+ 2011-11-04T09:35:17-07:00
+
+ -
+
+
+http://sfbay.craigslist.org/eby/rid/2685010755.html
+
+
+]]>
+ 2011-11-04T09:34:54-07:00
+ en-us
+ Copyright 2011 craigslist, inc.
+
+http://sfbay.craigslist.org/eby/rid/2685010755.html
+
+
+ text
+ 2011-11-04T09:34:54-07:00
+
+
\ No newline at end of file
diff --git a/test/Documents/RSS_Example.xml b/test/Documents/RSS_Example.xml
new file mode 100644
index 0000000..0d1fde8
--- /dev/null
+++ b/test/Documents/RSS_Example.xml
@@ -0,0 +1,48 @@
+
+
+
+
+ Liftoff News
+ http://liftoff.msfc.nasa.gov/
+ Liftoff to Space Exploration.
+ en-us
+ Tue, 10 Jun 2003 04:00:00 GMT
+
+ Tue, 10 Jun 2003 09:41:01 GMT
+ http://blogs.law.harvard.edu/tech/rss
+ Weblog Editor 2.0
+ editor@example.com
+ webmaster@example.com
+ -
+
+
Star City
+ http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp
+ How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.
+ Tue, 03 Jun 2003 09:39:21 GMT
+ http://liftoff.msfc.nasa.gov/2003/06/03.html#item573
+
+
+ -
+
Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.
+ Fri, 30 May 2003 11:06:42 GMT
+ http://liftoff.msfc.nasa.gov/2003/05/30.html#item572
+
+
+ -
+
The Engine That Does More
+ http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp
+ Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.
+ Tue, 27 May 2003 08:37:32 GMT
+ http://liftoff.msfc.nasa.gov/2003/05/27.html#item571
+
+
+ -
+
Astronauts' Dirty Laundry
+ http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp
+ Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.
+ Tue, 20 May 2003 08:56:02 GMT
+ http://liftoff.msfc.nasa.gov/2003/05/20.html#item570
+
+
+
+
\ No newline at end of file
diff --git a/test/Events/01-simple.json b/test/Events/01-simple.json
new file mode 100644
index 0000000..ab3076a
--- /dev/null
+++ b/test/Events/01-simple.json
@@ -0,0 +1,44 @@
+{
+ "name": "simple",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "adsf ",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "h1"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "class",
+ "test"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "h1",
+ {
+ "class": "test"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "adsf"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "h1"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/02-template.json b/test/Events/02-template.json
new file mode 100644
index 0000000..df344b6
--- /dev/null
+++ b/test/Events/02-template.json
@@ -0,0 +1,63 @@
+{
+ "name": "Template script tags",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "
",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "p"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "p",
+ {}
+ ]
+ },
+ {
+ "event": "opentagname",
+ "data": [
+ "script"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "type",
+ "text/template"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "script",
+ {
+ "type": "text/template"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "Heading1 "
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "script"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "p"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/03-lowercase_tags.json b/test/Events/03-lowercase_tags.json
new file mode 100644
index 0000000..9b58c59
--- /dev/null
+++ b/test/Events/03-lowercase_tags.json
@@ -0,0 +1,46 @@
+{
+ "name": "Lowercase tags",
+ "options": {
+ "handler": {},
+ "parser": {
+ "lowerCaseTags": true
+ }
+ },
+ "html": "adsf ",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "h1"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "class",
+ "test"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "h1",
+ {
+ "class": "test"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "adsf"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "h1"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/04-cdata.json b/test/Events/04-cdata.json
new file mode 100644
index 0000000..71d4860
--- /dev/null
+++ b/test/Events/04-cdata.json
@@ -0,0 +1,43 @@
+{
+ "name": "CDATA",
+ "options": {
+ "handler": {},
+ "parser": {"xmlMode": true}
+ },
+ "html": "<> fo]]> ",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "tag"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "tag",
+ {}
+ ]
+ },
+ {
+ "event": "cdatastart",
+ "data": []
+ },
+ {
+ "event": "text",
+ "data": [
+ " asdf ><> fo"
+ ]
+ },
+ {
+ "event": "cdataend",
+ "data": []
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "tag"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/05-cdata-special.json b/test/Events/05-cdata-special.json
new file mode 100644
index 0000000..686cb1a
--- /dev/null
+++ b/test/Events/05-cdata-special.json
@@ -0,0 +1,35 @@
+{
+ "name": "CDATA (inside special)",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "script"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "script",
+ {}
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "/*<> fo/*]]>*/"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "script"
+ ]
+ }
+ ]
+}
diff --git a/test/Events/06-leading-lt.json b/test/Events/06-leading-lt.json
new file mode 100644
index 0000000..fcec852
--- /dev/null
+++ b/test/Events/06-leading-lt.json
@@ -0,0 +1,16 @@
+{
+ "name": "leading lt",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": ">a>",
+ "expected": [
+ {
+ "event": "text",
+ "data": [
+ ">a>"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/07-self-closing.json b/test/Events/07-self-closing.json
new file mode 100644
index 0000000..f8903aa
--- /dev/null
+++ b/test/Events/07-self-closing.json
@@ -0,0 +1,67 @@
+{
+ "name": "Self-closing tags",
+ "options": {
+ "handler": {
+
+ },
+ "parser": {
+
+ }
+ },
+ "html": "Foo ",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "a"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "href",
+ "http://test.com/"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "a",
+ {
+ "href": "http://test.com/"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "Foo"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "a"
+ ]
+ },
+ {
+ "event": "opentagname",
+ "data": [
+ "hr"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "hr",
+ {}
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "hr"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/08-implicit-close-tags.json b/test/Events/08-implicit-close-tags.json
new file mode 100644
index 0000000..3441f20
--- /dev/null
+++ b/test/Events/08-implicit-close-tags.json
@@ -0,0 +1,59 @@
+{
+ "name": "Implicit close tags",
+ "options": {},
+ "html": "
Heading 2
",
+ "expected": [
+ { "event": "opentagname", "data": [ "ol" ] },
+ { "event": "opentag", "data": [ "ol", {} ] },
+ { "event": "opentagname", "data": [ "li" ] },
+ { "event": "attribute", "data": [ "class", "test" ] },
+ { "event": "opentag", "data": [ "li", { "class": "test" } ] },
+ { "event": "opentagname", "data": [ "div" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "opentagname", "data": [ "table" ] },
+ { "event": "attribute", "data": [ "style", "width:100%" ] },
+ { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
+ { "event": "opentagname", "data": [ "tr" ] },
+ { "event": "opentag", "data": [ "tr", {} ] },
+ { "event": "opentagname", "data": [ "td" ] },
+ { "event": "attribute", "data": [ "colspan", "2" ] },
+ { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
+ { "event": "opentagname", "data": [ "h3" ] },
+ { "event": "opentag", "data": [ "h3", {} ] },
+ { "event": "text", "data": [ "Heading" ] },
+ { "event": "closetag", "data": [ "h3" ] },
+ { "event": "closetag", "data": [ "td" ] },
+ { "event": "closetag", "data": [ "tr" ] },
+ { "event": "opentagname", "data": [ "tr" ] },
+ { "event": "opentag", "data": [ "tr", {} ] },
+ { "event": "opentagname", "data": [ "td" ] },
+ { "event": "opentag", "data": [ "td", {} ] },
+ { "event": "opentagname", "data": [ "div" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "text", "data": [ "Div" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "td" ] },
+ { "event": "opentagname", "data": [ "td" ] },
+ { "event": "opentag", "data": [ "td", {} ] },
+ { "event": "opentagname", "data": [ "div" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "text", "data": [ "Div2" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "td" ] },
+ { "event": "closetag", "data": [ "tr" ] },
+ { "event": "closetag", "data": [ "table" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "li" ] },
+ { "event": "opentagname", "data": [ "li" ] },
+ { "event": "opentag", "data": [ "li", {} ] },
+ { "event": "opentagname", "data": [ "div" ] },
+ { "event": "opentag", "data": [ "div", {} ] },
+ { "event": "opentagname", "data": [ "h3" ] },
+ { "event": "opentag", "data": [ "h3", {} ] },
+ { "event": "text", "data": [ "Heading 2" ] },
+ { "event": "closetag", "data": [ "h3" ] },
+ { "event": "closetag", "data": [ "div" ] },
+ { "event": "closetag", "data": [ "li" ] },
+ { "event": "closetag", "data": [ "ol" ] }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/09-attributes.json b/test/Events/09-attributes.json
new file mode 100644
index 0000000..afa6e4a
--- /dev/null
+++ b/test/Events/09-attributes.json
@@ -0,0 +1,68 @@
+{
+ "name": "attributes (no white space, no value, no quotes)",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "adsf ",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "button"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "class",
+ "test0"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "title",
+ "test1"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "disabled",
+ ""
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "value",
+ "test2"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "button",
+ {
+ "class": "test0",
+ "title": "test1",
+ "disabled": "",
+ "value": "test2"
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "adsf"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "button"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/10-crazy-attrib.json b/test/Events/10-crazy-attrib.json
new file mode 100644
index 0000000..baf319f
--- /dev/null
+++ b/test/Events/10-crazy-attrib.json
@@ -0,0 +1,52 @@
+{
+ "name": "crazy attribute",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "stuff
",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "p"
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "<",
+ ""
+ ]
+ },
+ {
+ "event": "attribute",
+ "data": [
+ "fail",
+ ""
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "p",
+ {
+ "<": "",
+ "fail": ""
+ }
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "stuff"
+ ]
+ },
+ {
+ "event": "closetag",
+ "data": [
+ "p"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/test/Events/11-script_in_script.json b/test/Events/11-script_in_script.json
new file mode 100644
index 0000000..ddbb87c
--- /dev/null
+++ b/test/Events/11-script_in_script.json
@@ -0,0 +1,54 @@
+{
+ "name": "Scripts creating other scripts",
+ "options": {
+ "handler": {},
+ "parser": {}
+ },
+ "html": "
",
+ "expected": [
+ {
+ "event": "opentagname",
+ "data": [
+ "p"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "p",
+ {}
+ ]
+ },
+ {
+ "event": "opentagname",
+ "data": [
+ "script"
+ ]
+ },
+ {
+ "event": "opentag",
+ "data": [
+ "script",
+ {}
+ ]
+ },
+ {
+ "event": "text",
+ "data": [
+ "var str = '";
-exports.expected =
-[ { raw: 'head'
- , data: 'head'
- , type: 'tag'
- , name: 'head'
- , children:
- [ { raw: 'script language="Javascript"'
- , data: 'script language="Javascript"'
- , type: 'script'
- , name: 'script'
- , attribs: { language: 'Javascript' }
- , children:
- [ { raw: 'var foo = ""; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
- , data: 'var foo = ""; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
- , type: 'text'
- }
- ]
- }
- ]
- }
-];
-
-})();
diff --git a/tests/05-tags_in_comment.js b/tests/05-tags_in_comment.js
deleted file mode 100644
index 68a0779..0000000
--- a/tests/05-tags_in_comment.js
+++ /dev/null
@@ -1,48 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Special char in comment";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "";
-exports.expected =
-[ { raw: 'head'
- , data: 'head'
- , type: 'tag'
- , name: 'head'
- , children:
- [ { raw: ' commented out tags Test '
- , data: ' commented out tags Test '
- , type: 'comment'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/06-comment_in_script.js b/tests/06-comment_in_script.js
deleted file mode 100644
index 2d04ec0..0000000
--- a/tests/06-comment_in_script.js
+++ /dev/null
@@ -1,48 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Script source in comment";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "";
-exports.expected =
-[ { raw: 'script'
- , data: 'script'
- , type: 'script'
- , name: 'script'
- , children:
- [ { raw: 'var foo = 1;'
- , data: 'var foo = 1;'
- , type: 'comment'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/07-unescaped_in_style.js b/tests/07-unescaped_in_style.js
deleted file mode 100644
index 563a64a..0000000
--- a/tests/07-unescaped_in_style.js
+++ /dev/null
@@ -1,49 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Unescaped chars in style";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "";
-exports.expected =
-[ { raw: 'style type="text/css"'
- , data: 'style type="text/css"'
- , type: 'style'
- , name: 'style'
- , attribs: { type: 'text/css' }
- , children:
- [ { raw: '\n body > p\n { font-weight: bold; }'
- , data: '\n body > p\n { font-weight: bold; }'
- , type: 'text'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/08-extra_spaces_in_tag.js b/tests/08-extra_spaces_in_tag.js
deleted file mode 100644
index 1767565..0000000
--- a/tests/08-extra_spaces_in_tag.js
+++ /dev/null
@@ -1,49 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Extra spaces in tag";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "<\n font \n size='14' \n>the text<\n / \nfont \n>";
-exports.expected =
-[ { raw: '\n font \n size=\'14\' \n'
- , data: 'font \n size=\'14\''
- , type: 'tag'
- , name: 'font'
- , attribs: { size: '14' }
- , children:
- [ { raw: 'the text'
- , data: 'the text'
- , type: 'text'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/09-unquoted_attrib.js b/tests/09-unquoted_attrib.js
deleted file mode 100644
index da6bac7..0000000
--- a/tests/09-unquoted_attrib.js
+++ /dev/null
@@ -1,49 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Unquoted attributes";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "the text ";
-exports.expected =
-[ { raw: 'font size= 14'
- , data: 'font size= 14'
- , type: 'tag'
- , name: 'font'
- , attribs: { size: '14' }
- , children:
- [ { raw: 'the text'
- , data: 'the text'
- , type: 'text'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/10-singular_attribute.js b/tests/10-singular_attribute.js
deleted file mode 100644
index 6c22e1a..0000000
--- a/tests/10-singular_attribute.js
+++ /dev/null
@@ -1,43 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Singular attribute";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "";
-exports.expected =
-[ { raw: 'option value=\'foo\' selected'
- , data: 'option value=\'foo\' selected'
- , type: 'tag'
- , name: 'option'
- , attribs: { value: 'foo', selected: 'selected' }
- }
-];
-
-})();
diff --git a/tests/11-text_outside_tags.js b/tests/11-text_outside_tags.js
deleted file mode 100644
index ae63136..0000000
--- a/tests/11-text_outside_tags.js
+++ /dev/null
@@ -1,50 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Text outside tags";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "Line one\n \nline two";
-exports.expected =
-[ { raw: 'Line one\n'
- , data: 'Line one\n'
- , type: 'text'
- }
- , { raw: 'br'
- , data: 'br'
- , type: 'tag'
- , name: 'br'
- }
- , { raw: '\nline two'
- , data: '\nline two'
- , type: 'text'
- }
-];
-
-})();
diff --git a/tests/12-text_only.js b/tests/12-text_only.js
deleted file mode 100644
index 64fab9e..0000000
--- a/tests/12-text_only.js
+++ /dev/null
@@ -1,41 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Only text";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "this is the text";
-exports.expected =
-[ { raw: 'this is the text'
- , data: 'this is the text'
- , type: 'text'
- }
-];
-
-})();
diff --git a/tests/13-comment_in_text.js b/tests/13-comment_in_text.js
deleted file mode 100644
index e201ef6..0000000
--- a/tests/13-comment_in_text.js
+++ /dev/null
@@ -1,49 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Comment within text";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "this is the text";
-exports.expected =
-[ { raw: 'this is '
- , data: 'this is '
- , type: 'text'
- }
-, { raw: ' the comment '
- , data: ' the comment '
- , type: 'comment'
- }
-, { raw: ' the text'
- , data: ' the text'
- , type: 'text'
- }
-];
-
-})();
diff --git a/tests/14-comment_in_text_in_script.js b/tests/14-comment_in_text_in_script.js
deleted file mode 100644
index 215a02e..0000000
--- a/tests/14-comment_in_text_in_script.js
+++ /dev/null
@@ -1,57 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Comment within text within script";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "";
-exports.expected =
-[ { raw: 'script'
- , data: 'script'
- , type: 'script'
- , name: 'script'
- , children:
- [ { raw: 'this is '
- , data: 'this is '
- , type: 'text'
- }
- , { raw: ' the comment '
- , data: ' the comment '
- , type: 'comment'
- }
- , { raw: ' the text'
- , data: ' the text'
- , type: 'text'
- }
-
- ]
- }
-];
-
-})();
diff --git a/tests/15-non-verbose.js b/tests/15-non-verbose.js
deleted file mode 100644
index 829fce4..0000000
--- a/tests/15-non-verbose.js
+++ /dev/null
@@ -1,46 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Option 'verbose' set to 'false'";
-exports.options = {
- handler: { verbose: false }
- , parser: {}
-};
-exports.html = "<\n font \n size='14' \n>the text<\n / \nfont \n>";
-exports.expected =
-[ { type: 'tag'
- , name: 'font'
- , attribs: { size: '14' }
- , children:
- [ { data: 'the text'
- , type: 'text'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/16-ignore_whitespace.js b/tests/16-ignore_whitespace.js
deleted file mode 100644
index 68f4439..0000000
--- a/tests/16-ignore_whitespace.js
+++ /dev/null
@@ -1,71 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Options 'ignoreWhitespace' set to 'true'";
-exports.options = {
- handler: { ignoreWhitespace: true }
- , parser: {}
-};
-exports.html = "Line one\n \t\n \nline two\n x ";
-exports.expected =
-[ { raw: 'Line one\n'
- , data: 'Line one\n'
- , type: 'text'
- }
- , { raw: 'br'
- , data: 'br'
- , type: 'tag'
- , name: 'br'
- }
- , { raw: 'br'
- , data: 'br'
- , type: 'tag'
- , name: 'br'
- }
- , { raw: '\nline two'
- , data: '\nline two'
- , type: 'text'
- }
- , { raw: 'font'
- , data: 'font'
- , type: 'tag'
- , name: 'font'
- , children:
- [ { raw: 'br'
- , data: 'br'
- , type: 'tag'
- , name: 'br'
- }
- , { raw: ' x '
- , data: ' x '
- , type: 'text'
- }
- ]
- }
-];
-
-})();
diff --git a/tests/17-xml_namespace.js b/tests/17-xml_namespace.js
deleted file mode 100644
index 562f26b..0000000
--- a/tests/17-xml_namespace.js
+++ /dev/null
@@ -1,38 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "XML Namespace";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = "text ";
-exports.expected =
- [ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ] }
- ];
-
-})();
diff --git a/tests/18-enforce_empty_tags.js b/tests/18-enforce_empty_tags.js
deleted file mode 100644
index 3ea3757..0000000
--- a/tests/18-enforce_empty_tags.js
+++ /dev/null
@@ -1,40 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Enforce empty tags";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.html = " text";
-exports.expected =
- [
- { raw: 'link', data: 'link', type: 'tag', name: 'link' }
- , { raw: 'text', data: 'text', type: 'text' }
- ];
-
-})();
diff --git a/tests/19-ignore_empty_tags.js b/tests/19-ignore_empty_tags.js
deleted file mode 100644
index 4f47a59..0000000
--- a/tests/19-ignore_empty_tags.js
+++ /dev/null
@@ -1,41 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Ignore empty tags";
-exports.options = {
- handler: { enforceEmptyTags: false }
- , parser: {}
-};
-exports.html = " text";
-exports.expected =
- [
- { raw: 'link', data: 'link', type: 'tag', name: 'link', children: [
- { raw: 'text', data: 'text', type: 'text' }
- ] }
- ];
-
-})();
diff --git a/tests/20-rss.js b/tests/20-rss.js
deleted file mode 100644
index 52442d6..0000000
--- a/tests/20-rss.js
+++ /dev/null
@@ -1,120 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "RSS (2.0)";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.type = "rss";
-//http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
-exports.html = '\
-\
- \
- Liftoff News \
- http://liftoff.msfc.nasa.gov/\
- Liftoff to Space Exploration. \
- en-us \
- Tue, 10 Jun 2003 04:00:00 GMT \
-\
- Tue, 10 Jun 2003 09:41:01 GMT \
- http://blogs.law.harvard.edu/tech/rss \
- Weblog Editor 2.0 \
- editor@example.com \
- webmaster@example.com \
- - \
-\
-
Star City \
- http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp\
- How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. \
- Tue, 03 Jun 2003 09:39:21 GMT \
- http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 \
-\
- \
- - \
-
Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. \
- Fri, 30 May 2003 11:06:42 GMT \
- http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 \
-\
- \
- - \
-
The Engine That Does More \
- http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp\
- Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. \
- Tue, 27 May 2003 08:37:32 GMT \
- http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 \
-\
- \
- - \
-
Astronauts\' Dirty Laundry \
- http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp\
- Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. \
- Tue, 20 May 2003 08:56:02 GMT \
- http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 \
-\
- \
- \
- ';
-exports.expected = {
- type: "rss"
- , id: ""
- , title: "Liftoff News"
- , link: "http://liftoff.msfc.nasa.gov/"
- , description: "Liftoff to Space Exploration."
- , updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT")
- , author: "editor@example.com"
- , items: [
- {
- id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
- , title: "Star City"
- , link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
- , description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\">Star City</a>."
- , pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
- }
- , {
- id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
- , description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\">partial eclipse of the Sun</a> on Saturday, May 31st."
- , pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
- }
- , {
- id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
- , title: "The Engine That Does More"
- , link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
- , description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that."
- , pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
- }
- , {
- id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
- , title: "Astronauts' Dirty Laundry"
- , link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
- , description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options."
- , pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
- }
- ]
- };
-
-})();
diff --git a/tests/21-atom.js b/tests/21-atom.js
deleted file mode 100644
index 4d8c279..0000000
--- a/tests/21-atom.js
+++ /dev/null
@@ -1,80 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Atom (1.0)";
-exports.options = {
- handler: {}
- , parser: {}
-};
-exports.type = "rss";
-//http://en.wikipedia.org/wiki/Atom_%28standard%29
-exports.html = '\
-\
-\
-\
- Example Feed \
- A subtitle. \
- \
- \
- urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6 \
- 2003-12-13T18:30:02Z \
- \
- John Doe \
- johndoe@example.com \
- \
-\
- \
- Atom-Powered Robots Run Amok \
- \
- \
- \
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a \
- 2003-12-13T18:30:02Z \
- Some text. \
- \
-\
- ';
-exports.expected = {
- type: "atom"
- , id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
- , title: "Example Feed"
- , link: "http://example.org/feed/"
- , description: "A subtitle."
- , updated: new Date("2003-12-13T18:30:02Z")
- , author: "johndoe@example.com"
- , items: [
- {
- id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
- , title: "Atom-Powered Robots Run Amok"
- , link: "http://example.org/2003/12/13/atom03"
- , description: "Some text."
- , pubDate: new Date("2003-12-13T18:30:02Z")
- }
- ]
- };
-
-})();
diff --git a/tests/22-position_data.js b/tests/22-position_data.js
deleted file mode 100644
index fcd7c90..0000000
--- a/tests/22-position_data.js
+++ /dev/null
@@ -1,100 +0,0 @@
-(function () {
-
-function RunningInNode () {
- return(
- (typeof require) == "function"
- &&
- (typeof exports) == "object"
- &&
- (typeof module) == "object"
- &&
- (typeof __filename) == "string"
- &&
- (typeof __dirname) == "string"
- );
-}
-
-if (!RunningInNode()) {
- if (!this.Tautologistics)
- this.Tautologistics = {};
- if (!this.Tautologistics.NodeHtmlParser)
- this.Tautologistics.NodeHtmlParser = {};
- if (!this.Tautologistics.NodeHtmlParser.Tests)
- this.Tautologistics.NodeHtmlParser.Tests = [];
- exports = {};
- this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
-exports.name = "Postion data";
-exports.options = {
- handler: {}
- , parser: { includeLocation: true }
-};
-exports.html = "\r\n\n\tThe Title \nHello world\r\n\n\n\n";
-exports.expected = [
- {
- raw: 'html',
- data: 'html',
- type: 'tag',
- name: 'html',
- location: {
- line: 1,
- col: 1
- },
- children: [{
- raw: '\r\n\n\t',
- data: '\r\n\n\t',
- type: 'text',
- location: {
- line: 1,
- col: 7
- }
- }, {
- raw: 'title',
- data: 'title',
- type: 'tag',
- name: 'title',
- location: {
- line: 3,
- col: 2
- },
- children: [{
- raw: 'The Title',
- data: 'The Title',
- type: 'text',
- location: {
- line: 3,
- col: 9
- }
- }]
- }, {
- raw: 'body',
- data: 'body',
- type: 'tag',
- name: 'body',
- location: {
- line: 3,
- col: 26
- },
- children: [{
- raw: '\nHello world\r\n\n',
- data: '\nHello world\r\n\n',
- type: 'text',
- location: {
- line: 3,
- col: 32
- }
- }]
- }, {
- raw: '\n\n',
- data: '\n\n',
- type: 'text',
- location: {
- line: 6,
- col: 8
- }
- }]
- }
- ];
-
-})();
diff --git a/utils_example.js b/utils_example.js
deleted file mode 100644
index d219de5..0000000
--- a/utils_example.js
+++ /dev/null
@@ -1,35 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var htmlparser = require("./lib/htmlparser");
-
-var html = "text a text b text c text e hhh hellow world ";
-
-var handler = new htmlparser.DefaultHandler(function(err, dom) {
- if (err) {
- sys.debug("Error: " + err);
- }
- else {
- sys.debug(sys.inspect(dom, false, null));
- var id = htmlparser.DomUtils.getElementById("x", dom);
- sys.debug("id: " + sys.inspect(id, false, null));
- var class = htmlparser.DomUtils.getElements({ class: "y" }, dom);
- sys.debug("class: " + sys.inspect(class, false, null));
- var multiclass = htmlparser.DomUtils.getElements({ class: function (value) { return(value && value.indexOf("h") > -1); } }, dom);
- sys.debug("multiclass: " + sys.inspect(multiclass, false, null));
- var name = htmlparser.DomUtils.getElementsByTagName("a", dom);
- sys.debug("name: " + sys.inspect(name, false, null));
- var text = htmlparser.DomUtils.getElementsByTagType("text", dom);
- sys.debug("text: " + sys.inspect(text, false, null));
- var nested = htmlparser.DomUtils.getElements({ tag_name: "d", id: "z", class: "w" }, dom);
- nested = htmlparser.DomUtils.getElementsByTagName("e", nested);
- nested = htmlparser.DomUtils.getElementsByTagType("text", nested);
- sys.debug("nested: " + sys.inspect(nested, false, null));
- var double = htmlparser.DomUtils.getElementsByTagName("yy", dom);
- sys.debug("double: " + sys.inspect(double, false, null));
- var single = htmlparser.DomUtils.getElements( { tag_name: "yy", id: "secondyy" }, dom);
- sys.debug("single: " + sys.inspect(single, false, null));
- }
-}, { verbose: false });
-var parser = new htmlparser.Parser(handler);
-parser.parseComplete(html);