Skip to content

Commit a817d75

Browse files
committed
Don't match JavaScript comments and strings when removing script tags.
First remove HTML comments and CDATA sections. Next remove the script tags. Fixes #13
1 parent d037640 commit a817d75

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

Embed/Url.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ public function getHtmlContent () {
153153
$response = mb_convert_encoding($response, 'HTML-ENTITIES', 'UTF-8');
154154
$response = preg_replace('/<head[^>]*>/','<head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">', $response);
155155
}
156-
157-
//Remove all script elements (thanks https://github.com/jasny)
158-
$response = preg_replace('%<script\b(?:"(?:[^"\\\\\\\\]++|\\\\\\\\.)*+"|\'(?:[^\\\\\\\\]++|\\\\\\\\.)*+\'|[^>"]++)*>(?:"(?:[^"\\\\\\\\]++|\\\\\\\\.)*+"|\'(?:[^\\\\\\\\]++|\\\\\\\\.)*+\'|//.*?\n|/\*(?:[^\*]++|\*)*?\*/|[^<"/]++|/|(?R)|<)*?</\s*script>%si', '', $response);
159-
156+
157+
//Remove all script elements, CDATA sections and comments (thanks https://github.com/jasny)
158+
$response = preg_replace(array('%<!--(?:[^-]++|-)*?-->|<!\[CDATA\[(?:[^\]]++|\])*?\]\]>%si', '%<script\b(?:"(?:[^"\\\\]++|\\\\.)*+"|\'(?:[^\'\\\\]++|\\\\.)*+\'|[^>"\']++)*>(?:[^<]++|<)*?</\s*script\s*>%si'), '', $response);
159+
160160
$this->htmlContent->loadHTML($response);
161161
libxml_use_internal_errors($errors);
162162

0 commit comments

Comments
 (0)