From ad6b8df845a7825b610cb1d0eea64c2df3035e28 Mon Sep 17 00:00:00 2001 From: William Lachance Date: Mon, 17 Feb 2025 16:35:20 -0500 Subject: [PATCH 1/8] Don't include HTML content in title search index Closes #13355. --- sphinx/builders/html/__init__.py | 3 ++- tests/js/fixtures/cpp/searchindex.js | 2 +- tests/roots/test-search/escapedtitle.rst | 4 ++++ tests/test_search.py | 19 ++++++++++++++----- 4 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 tests/roots/test-search/escapedtitle.rst diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index 5e6acdeaf9d..6b173caf503 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -679,8 +679,9 @@ def write_doc(self, docname: str, doctree: nodes.document) -> None: def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None: self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir) self.post_process_images(doctree) + # get title as plain text title_node = self.env.longtitles.get(docname) - title = self.render_partial(title_node)['title'] if title_node else '' + title = title_node.astext() if title_node else '' self.index_page(docname, doctree, title) def finish(self) -> None: diff --git a/tests/js/fixtures/cpp/searchindex.js b/tests/js/fixtures/cpp/searchindex.js index 42adb88db92..f48a3ec1e0e 100644 --- a/tests/js/fixtures/cpp/searchindex.js +++ b/tests/js/fixtures/cpp/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles":{},"docnames":["index"],"envversion":{"sphinx":65,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["index.rst"],"indexentries":{"sphinx (c++ class)":[[0,"_CPPv46Sphinx",false]]},"objects":{"":[[0,0,1,"_CPPv46Sphinx","Sphinx"]]},"objnames":{"0":["cpp","class","C++ class"]},"objtypes":{"0":"cpp:class"},"terms":{"The":0,"becaus":0,"c":0,"can":0,"cardin":0,"challeng":0,"charact":0,"class":0,"descript":0,"drop":0,"engin":0,"fixtur":0,"frequent":0,"gener":0,"i":0,"index":0,"inflat":0,"mathemat":0,"occur":0,"often":0,"project":0,"punctuat":0,"queri":0,"relat":0,"sampl":0,"search":0,"size":0,"sphinx":0,"term":0,"thei":0,"thi":0,"token":0,"us":0,"web":0,"would":0},"titles":["<no title>"],"titleterms":{}}) \ No newline at end of file +Search.setIndex({"alltitles":{},"docnames":["index"],"envversion":{"sphinx":65,"sphinx.domains.c":3,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":9,"sphinx.domains.index":1,"sphinx.domains.javascript":3,"sphinx.domains.math":2,"sphinx.domains.python":4,"sphinx.domains.rst":2,"sphinx.domains.std":2},"filenames":["index.rst"],"indexentries":{"sphinx (c++ class)":[[0,"_CPPv46Sphinx",false]]},"objects":{"":[[0,0,1,"_CPPv46Sphinx","Sphinx"]]},"objnames":{"0":["cpp","class","C++ class"]},"objtypes":{"0":"cpp:class"},"terms":{"The":0,"becaus":0,"c":0,"can":0,"cardin":0,"challeng":0,"charact":0,"class":0,"descript":0,"drop":0,"engin":0,"fixtur":0,"frequent":0,"gener":0,"i":0,"index":0,"inflat":0,"mathemat":0,"occur":0,"often":0,"project":0,"punctuat":0,"queri":0,"relat":0,"sampl":0,"search":0,"size":0,"sphinx":0,"term":0,"thei":0,"thi":0,"token":0,"us":0,"web":0,"would":0},"titles":[""],"titleterms":{}}) \ No newline at end of file diff --git a/tests/roots/test-search/escapedtitle.rst b/tests/roots/test-search/escapedtitle.rst new file mode 100644 index 00000000000..70f4c982d5b --- /dev/null +++ b/tests/roots/test-search/escapedtitle.rst @@ -0,0 +1,4 @@ +`escaped` title with < and > in it +================================== + +this document has escaped content in the title but also the characters < and > in it \ No newline at end of file diff --git a/tests/test_search.py b/tests/test_search.py index 22fa6ab7616..5fab7a9d167 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -155,8 +155,17 @@ def test_term_in_heading_and_section(app: SphinxTestApp) -> None: # if search term is in the title of one doc and in the text of another # both documents should be a hit in the search index as a title, # respectively text hit - assert '"textinhead":2' in searchindex - assert '"textinhead":0' in searchindex + assert '"textinhead":3' in searchindex + assert '"textinhead":1' in searchindex + + +@pytest.mark.sphinx('html', testroot='search') +def test_escaped_title(app: SphinxTestApp) -> None: + app.build(force_all=True) + searchindex = load_searchindex(app.outdir / 'searchindex.js') + print(searchindex) + assert 'escapedtitle' in searchindex['docnames'] + assert 'escaped title with < and > in it' in searchindex['titles'] @pytest.mark.sphinx('html', testroot='search') @@ -398,7 +407,7 @@ def test_search_index_gen_zh(app: SphinxTestApp) -> None: def test_nosearch(app: SphinxTestApp) -> None: app.build() index = load_searchindex(app.outdir / 'searchindex.js') - assert index['docnames'] == ['index', 'nosearch', 'tocitem'] + assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem'] # latex is in 'nosearch.rst', and nowhere else assert 'latex' not in index['terms'] # cat is in 'index.rst' but is marked with the 'no-search' class @@ -406,7 +415,7 @@ def test_nosearch(app: SphinxTestApp) -> None: # bat is indexed from 'index.rst' and 'tocitem.rst' (document IDs 0, 2), and # not from 'nosearch.rst' (document ID 1) assert 'bat' in index['terms'] - assert index['terms']['bat'] == [0, 2] + assert index['terms']['bat'] == [1, 3] @pytest.mark.sphinx( @@ -418,7 +427,7 @@ def test_nosearch(app: SphinxTestApp) -> None: def test_parallel(app: SphinxTestApp) -> None: app.build() index = load_searchindex(app.outdir / 'searchindex.js') - assert index['docnames'] == ['index', 'nosearch', 'tocitem'] + assert index['docnames'] == ['escapedtitle', 'index', 'nosearch', 'tocitem'] @pytest.mark.sphinx('html', testroot='search') From 62779cad9c8c1871cd6948faada026d6de09fc29 Mon Sep 17 00:00:00 2001 From: William Lachance Date: Mon, 17 Feb 2025 16:47:42 -0500 Subject: [PATCH 2/8] CHANGES, fix js tests --- CHANGES.rst | 2 ++ tests/js/searchtools.spec.js | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f9490d957fd..db1955d2370 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -159,6 +159,8 @@ Bugs fixed Patch by Adam Turner. * #13328: Fix parsing of PEP 695 functions with return annotations. Patch by Bénédikt Tran. Initial work by Arash Badie-Modiri. +* #13355: Don't include escaped title content in the search index. + Patch by Will Lachance. Testing ------- diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index 809fd19d0f4..bafbc4b42d2 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -34,7 +34,7 @@ describe('Basic html theme search', function() { hits = [[ "index", - "<no title>", + "", "", null, 5, From 681ee18356b1acd0cdd212c9f1cf5f3cd3d3f9ea Mon Sep 17 00:00:00 2001 From: William Lachance Date: Mon, 17 Feb 2025 20:09:21 -0500 Subject: [PATCH 3/8] escape html --- sphinx/themes/basic/static/searchtools.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 91f4be57fc8..7efa0537fe1 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -324,6 +324,14 @@ const Search = { const titles = Search._index.titles; const allTitles = Search._index.alltitles; const indexEntries = Search._index.indexentries; + const htmlEscape = (text) => { + return String(text) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); + } // Collect multiple result groups to be sorted separately and then ordered. // Each is an array of [docname, title, anchor, descr, score, filename, kind]. @@ -340,7 +348,9 @@ const Search = { const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], - titles[file] !== title ? `${titles[file]} > ${title}` : title, + htmlEscape( + titles[file] !== title ? `${titles[file]} > ${title}` : title + ), id !== null ? "#" + id : "", null, score + boost, @@ -358,7 +368,7 @@ const Search = { const score = Math.round(100 * queryLower.length / entry.length); const result = [ docNames[file], - titles[file], + htmlEscape(titles[file]), id ? "#" + id : "", null, score, From ec63ed464a86cf4173190aeb0768aa309e6d3089 Mon Sep 17 00:00:00 2001 From: William Lachance Date: Tue, 18 Feb 2025 07:14:10 -0500 Subject: [PATCH 4/8] Better escape html --- sphinx/themes/basic/static/searchtools.js | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 7efa0537fe1..3ff8b47b874 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -58,6 +58,15 @@ const _removeChildren = (element) => { const _escapeRegExp = (string) => string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string +const _escapeHTML = (text) => { + return text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """) + .replaceAll("'", "'"); +} + const _displayItem = (item, searchTerms, highlightTerms) => { const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; @@ -324,14 +333,6 @@ const Search = { const titles = Search._index.titles; const allTitles = Search._index.alltitles; const indexEntries = Search._index.indexentries; - const htmlEscape = (text) => { - return String(text) - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll('"', """) - .replaceAll("'", "'"); - } // Collect multiple result groups to be sorted separately and then ordered. // Each is an array of [docname, title, anchor, descr, score, filename, kind]. @@ -348,7 +349,7 @@ const Search = { const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], - htmlEscape( + _escapeHTML( titles[file] !== title ? `${titles[file]} > ${title}` : title ), id !== null ? "#" + id : "", @@ -368,7 +369,7 @@ const Search = { const score = Math.round(100 * queryLower.length / entry.length); const result = [ docNames[file], - htmlEscape(titles[file]), + _escapeHTML(titles[file]), id ? "#" + id : "", null, score, From d9a7838dda286913f0a64bcc2c26a5fbaa084956 Mon Sep 17 00:00:00 2001 From: William Lachance Date: Tue, 18 Feb 2025 07:17:11 -0500 Subject: [PATCH 5/8] expect escaped html --- tests/js/searchtools.spec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index bafbc4b42d2..b228f0de18f 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -184,7 +184,7 @@ describe('Basic html theme search', function() { expectedRanking = [ ['index', 'Main Page', '#index-0'], /* index entry */ - ['index', 'Main Page > Result Scoring', '#result-scoring'], /* title */ + ['index', 'Main Page > Result Scoring', '#result-scoring'], /* title */ ]; searchParameters = Search._parseQuery('scoring'); @@ -198,7 +198,7 @@ describe('Basic html theme search', function() { expectedRanking = [ ['relevance', 'Relevance', ''], /* main title */ - ['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */ + ['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */ ]; searchParameters = Search._parseQuery('relevance'); From aaa2f5931633d7b1f6da05b709e5c2cf8d0c7c76 Mon Sep 17 00:00:00 2001 From: Will Lachance Date: Tue, 18 Feb 2025 07:34:35 -0500 Subject: [PATCH 6/8] Update sphinx/themes/basic/static/searchtools.js Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- sphinx/themes/basic/static/searchtools.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 3ff8b47b874..dc2d8689034 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -64,7 +64,7 @@ const _escapeHTML = (text) => { .replaceAll("<", "<") .replaceAll(">", ">") .replaceAll('"', """) - .replaceAll("'", "'"); + .replaceAll("'", "'"); } const _displayItem = (item, searchTerms, highlightTerms) => { From cdcbbbbb5fc732ced72699fd0addd2da9f8328dd Mon Sep 17 00:00:00 2001 From: William Lachance Date: Tue, 18 Feb 2025 07:51:05 -0500 Subject: [PATCH 7/8] Escape HTML on display --- sphinx/themes/basic/static/searchtools.js | 10 ++++------ tests/js/searchtools.spec.js | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index dc2d8689034..32b9089290e 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -99,10 +99,10 @@ const _displayItem = (item, searchTerms, highlightTerms) => { let linkEl = listItem.appendChild(document.createElement("a")); linkEl.href = linkUrl + anchor; linkEl.dataset.score = score; - linkEl.innerHTML = title; + linkEl.innerHTML = _escapeHTML(title); if (descr) { listItem.appendChild(document.createElement("span")).innerHTML = - " (" + descr + ")"; + " (" + _escapeHTML(descr) + ")"; // highlight search terms in the description if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); @@ -349,9 +349,7 @@ const Search = { const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], - _escapeHTML( - titles[file] !== title ? `${titles[file]} > ${title}` : title - ), + titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, score + boost, @@ -369,7 +367,7 @@ const Search = { const score = Math.round(100 * queryLower.length / entry.length); const result = [ docNames[file], - _escapeHTML(titles[file]), + titles[file], id ? "#" + id : "", null, score, diff --git a/tests/js/searchtools.spec.js b/tests/js/searchtools.spec.js index b228f0de18f..bafbc4b42d2 100644 --- a/tests/js/searchtools.spec.js +++ b/tests/js/searchtools.spec.js @@ -184,7 +184,7 @@ describe('Basic html theme search', function() { expectedRanking = [ ['index', 'Main Page', '#index-0'], /* index entry */ - ['index', 'Main Page > Result Scoring', '#result-scoring'], /* title */ + ['index', 'Main Page > Result Scoring', '#result-scoring'], /* title */ ]; searchParameters = Search._parseQuery('scoring'); @@ -198,7 +198,7 @@ describe('Basic html theme search', function() { expectedRanking = [ ['relevance', 'Relevance', ''], /* main title */ - ['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */ + ['index', 'Main Page > Relevance', '#relevance'], /* subsection heading title */ ]; searchParameters = Search._parseQuery('relevance'); From 0f1af0640d312074b42707d7753b15a8082006b4 Mon Sep 17 00:00:00 2001 From: Will Lachance Date: Wed, 26 Feb 2025 05:47:03 -0500 Subject: [PATCH 8/8] Update sphinx/themes/basic/static/searchtools.js Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> --- sphinx/themes/basic/static/searchtools.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 32b9089290e..e63bcfa8ec6 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -102,7 +102,7 @@ const _displayItem = (item, searchTerms, highlightTerms) => { linkEl.innerHTML = _escapeHTML(title); if (descr) { listItem.appendChild(document.createElement("span")).innerHTML = - " (" + _escapeHTML(descr) + ")"; + _escapeHTML(" (" + descr + ")"); // highlight search terms in the description if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));