From 5cf2d1f13e0fa64b32408a64a694604d285e9705 Mon Sep 17 00:00:00 2001
From: nicojs
Date: Wed, 3 Feb 2016 17:33:07 +0100
Subject: [PATCH] Made sanitization options of the HtmlRenderer pluggable (fix
for issue 84)
---
README.md | 21 ++++++++++++++++-----
lib/common.js | 27 ++++++++++++++++++++++++++-
lib/html.js | 37 +++++++++++++++++++------------------
test/sanitize.txt | 47 +++++++++++++++++++++++++++++++++++++++++++++++
test/test.js | 10 ++++++++++
5 files changed, 118 insertions(+), 24 deletions(-)
create mode 100644 test/sanitize.txt
diff --git a/README.md b/README.md
index 4acd7436..1c815eb9 100644
--- a/README.md
+++ b/README.md
@@ -89,11 +89,22 @@ The following options are currently supported:
- `smart`: if `true`, straight quotes will be made curly, `--` will
be changed to an en dash, `---` will be changed to an em dash, and
`...` will be changed to ellipses.
-- `safe`: if `true`, raw HTML will not be passed through to HTML
- output (it will be replaced by comments), and potentially unsafe
- URLs in links and images (those beginning with `javascript:`,
- `vbscript:`, `file:`, and with a few exceptions `data:`) will
- be replaced with empty strings.
+- `safe`: if `true`, raw HTML will be passed through the `sanitize`
+ function before inserting in the target document. Image urls and
+ links will be passed true the `isUrlSafe` function to determin if they are considered safe.
+- `sanitize`: When the `safe`option is `true` this function will be used to
+ sanitize the raw html fragments. The function will get the html
+ (HtmlBlock or HtmlInline) AST node as parameter and should return
+ the html string replacement as output. The AST node will have a
+ `literal` property containing the raw html to be sanitized.
+ Default implementation is: `function(node) { return ''; }`
+- `isUrlSafe`: When the `safe` option is `true` this function will be used to
+ verify if image and link url's are considered safe. The function gets the
+ string containing the image source or link url as parameter and should return
+ a truthy value if the url is safe. If unsafe, the src and href attributes
+ will be omitted from the output html.
+ Default: Strings beginning with `javascript:`, `vbscript:`, `file:`, and
+ with a few exceptions `data:` are considered to be 'unsafe' and will be omitted.
It is also possible to override the `escape` and `softbreak`
properties of a renderer. So, to make soft breaks render as hard
diff --git a/lib/common.js b/lib/common.js
index 605a3bf7..ba128a2c 100644
--- a/lib/common.js
+++ b/lib/common.js
@@ -92,6 +92,30 @@ var escapeXml = function(s, preserve_entities) {
}
};
+if (typeof Object.assign != 'function') {
+ (function () {
+ Object.assign = function (target) {
+ 'use strict';
+ if (target === undefined || target === null) {
+ throw new TypeError('Cannot convert undefined or null to object');
+ }
+
+ var output = Object(target);
+ for (var index = 1; index < arguments.length; index++) {
+ var source = arguments[index];
+ if (source !== undefined && source !== null) {
+ for (var nextKey in source) {
+ if (source.hasOwnProperty(nextKey)) {
+ output[nextKey] = source[nextKey];
+ }
+ }
+ }
+ }
+ return output;
+ };
+ })();
+}
+
module.exports = { unescapeString: unescapeString,
normalizeURI: normalizeURI,
escapeXml: escapeXml,
@@ -99,5 +123,6 @@ module.exports = { unescapeString: unescapeString,
OPENTAG: OPENTAG,
CLOSETAG: CLOSETAG,
ENTITY: ENTITY,
- ESCAPABLE: ESCAPABLE
+ ESCAPABLE: ESCAPABLE,
+ objectAssign: Object.assign
};
diff --git a/lib/html.js b/lib/html.js
index 7f274902..3df37ba9 100644
--- a/lib/html.js
+++ b/lib/html.js
@@ -1,6 +1,9 @@
"use strict";
-var escapeXml = require('./common').escapeXml;
+var common = require('./common');
+var escapeXml = common.escapeXml;
+var objectAssign = common.objectAssign;
+
// Helper function to produce an HTML tag.
var tag = function(name, attrs, selfclosing) {
@@ -25,9 +28,8 @@ var reHtmlTag = /\<[^>]*\>/;
var reUnsafeProtocol = /^javascript:|vbscript:|file:|data:/i;
var reSafeDataProtocol = /^data:image\/(?:png|gif|jpeg|webp)/i;
-var potentiallyUnsafe = function(url) {
- return reUnsafeProtocol.test(url) &&
- !reSafeDataProtocol.test(url);
+var isUrlSafe = function(url) {
+ return !reUnsafeProtocol.test(url) || reSafeDataProtocol.test(url);
};
var renderNodes = function(block) {
@@ -61,6 +63,14 @@ var renderNodes = function(block) {
if (options.time) { console.time("rendering"); }
+ function handleHtml(htmlNode){
+ if (options.safe) {
+ out(options.sanitize(htmlNode));
+ } else {
+ out(htmlNode.literal);
+ }
+ }
+
while ((event = walker.next())) {
entering = event.entering;
node = event.node;
@@ -98,11 +108,7 @@ var renderNodes = function(block) {
break;
case 'HtmlInline':
- if (options.safe) {
- out('');
- } else {
- out(node.literal);
- }
+ handleHtml(node);
break;
case 'CustomInline':
@@ -115,7 +121,7 @@ var renderNodes = function(block) {
case 'Link':
if (entering) {
- if (!(options.safe && potentiallyUnsafe(node.destination))) {
+ if (!options.safe || options.isUrlSafe(node.destination)) {
attrs.push(['href', esc(node.destination, true)]);
}
if (node.title) {
@@ -130,8 +136,7 @@ var renderNodes = function(block) {
case 'Image':
if (entering) {
if (disableTags === 0) {
- if (options.safe &&
- potentiallyUnsafe(node.destination)) {
+ if (options.safe && !options.isUrlSafe(node.destination)) {
out('
');
- } else {
- out(node.literal);
- }
+ handleHtml(node);
cr();
break;
@@ -278,7 +279,7 @@ function HtmlRenderer(options){
// set to "
" to make them hard breaks
// set to " " if you want to ignore line wrapping in source
escape: escapeXml,
- options: options || {},
+ options: objectAssign({ sanitize: function() { return ''; }, isUrlSafe: isUrlSafe }, options),
render: renderNodes
};
}
diff --git a/test/sanitize.txt b/test/sanitize.txt
new file mode 100644
index 00000000..ab21f627
--- /dev/null
+++ b/test/sanitize.txt
@@ -0,0 +1,47 @@
+## Sanitization options
+
+A safe html block should be preserved
+
+```````````````````````````````` example
+Should be preserved:
+
+
+.
+Should be preserved:
+
+````````````````````````````````
+
+An unsafe html block should be omitted
+```````````````````````````````` example
+should be omitted:
+
+
+.
+should be omitted:
+
+````````````````````````````````
+
+A safe inline html should be preserved
+```````````````````````````````` example
+Should be preserved:
+.
+Should be preserved:
+````````````````````````````````
+
+An unsafe inline html should be omitted
+```````````````````````````````` example
+Should be omitted:
+.
+Should be omitted:
+````````````````````````````````
+
+An safe url should be preserved:
+```````````````````````````````` example
+Should be preserved: ![image](https://saved-by-the-bell)
+.
+Should be preserved: ![image](https://saved-by-the-bell)
+````````````````````````````````
diff --git a/test/test.js b/test/test.js
index a672e3ce..3af4abaa 100755
--- a/test/test.js
+++ b/test/test.js
@@ -39,6 +39,12 @@ var cursor = {
};
var writer = new commonmark.HtmlRenderer();
+var writerSanitized = new commonmark.HtmlRenderer({safe: true, isUrlSafe: function(url){ return url === 'https://saved-by-the-bell'; }, sanitize: function(htmlFragment) {
+ if(htmlFragment.literal === '' || htmlFragment.literal === '
'){
+ return htmlFragment.literal;
+ }else{
+ return '';
+ } }});
var reader = new commonmark.Parser();
var readerSmart = new commonmark.Parser({smart: true});
@@ -154,6 +160,10 @@ specTests('test/smart_punct.txt', results, function(z) {
return writer.render(readerSmart.parse(z));
});
+specTests('test/sanitize.txt', results, function(z){
+ return writerSanitized.render(reader.parse(z));
+ });
+
// pathological cases
cursor.write('Pathological cases:\n');