Skip to content

Commit fb387ce

Browse files
Merge pull request #190 from Chandra-Prakash-TS/Searcherrorfix
search fix
2 parents a6609d3 + 9806042 commit fb387ce

File tree

2 files changed

+106
-46
lines changed

2 files changed

+106
-46
lines changed

src/components/Search.jsx

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ function useAutocomplete({ close }) {
7070
return search(query, { limit: 5 })
7171
},
7272
getItemUrl({ item }) {
73-
return item.url
73+
return item?.url || ''
7474
},
75+
7576
onSelect: navigate,
7677
},
7778
]
@@ -125,7 +126,7 @@ function HighlightQuery({ text, query }) {
125126

126127
function SearchResult({ result, autocomplete, collection, query, index }) {
127128
let sectionTitle = navigation.find((section) =>
128-
section.links.find((link) => link.href === result.url.split('#')[0]),
129+
section.links.find((link) => link.href === result?.url.split('#')[0]),
129130
)?.title
130131

131132
let hierarchy = [sectionTitle, result.pageTitle].filter(
@@ -200,12 +201,16 @@ function SearchResults({ autocomplete, query, collection }) {
200201
return (
201202
<p className="px-4 py-8 text-center text-sm text-slate-700 dark:text-slate-400">
202203
Couldn't find what you are looking for?&nbsp;
203-
<Link href="https://github.com/gofr-dev/gofr/issues" target="_blank" className='underline'>
204+
<Link
205+
href="https://github.com/gofr-dev/gofr/issues"
206+
target="_blank"
207+
className="underline"
208+
>
204209
Create an Issue on GitHub for &ldquo;
205-
<span className="break-words text-slate-900 dark:text-white">
206-
{query}
207-
</span>
208-
&rdquo;
210+
<span className="break-words text-slate-900 dark:text-white">
211+
{query}
212+
</span>
213+
&rdquo;
209214
</Link>
210215
</p>
211216
)

src/markdoc/search.mjs

Lines changed: 94 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,68 @@ import * as url from 'url'
99
const __filename = url.fileURLToPath(import.meta.url)
1010
const slugify = slugifyWithCounter()
1111

12-
function toString(node) {
13-
let str =
14-
node.type === 'text' && typeof node.attributes?.content === 'string'
15-
? node.attributes.content
16-
: ''
17-
if ('children' in node) {
12+
// Comprehensive text extraction that gets EVERYTHING
13+
function extractAllTextFromNode(node) {
14+
let texts = []
15+
16+
// Direct text content
17+
if (node.type === 'text' && node.attributes?.content) {
18+
texts.push(node.attributes.content)
19+
}
20+
21+
// Code content
22+
if (node.type === 'code' && node.attributes?.content) {
23+
texts.push(node.attributes.content)
24+
}
25+
26+
// Code block content
27+
if (node.type === 'code_block' && node.attributes?.content) {
28+
texts.push(node.attributes.content)
29+
}
30+
31+
// Process all children recursively
32+
if (node.children && Array.isArray(node.children)) {
1833
for (let child of node.children) {
19-
str += toString(child)
34+
texts.push(...extractAllTextFromNode(child))
2035
}
2136
}
22-
return str
37+
38+
return texts
2339
}
2440

25-
function extractSections(node, sections, isRoot = true) {
41+
function extractSectionsAndContent(node, sections, isRoot = true) {
2642
if (isRoot) {
2743
slugify.reset()
2844
}
29-
if (node.type === 'heading' || node.type === 'paragraph') {
30-
let content = toString(node).trim()
31-
if (node.type === 'heading' && node.attributes.level <= 2) {
45+
46+
if (node.type === 'heading') {
47+
let content = extractAllTextFromNode(node).join('').trim()
48+
if (node.attributes.level <= 2) {
3249
let hash = node.attributes?.id ?? slugify(content)
3350
sections.push([content, hash, []])
3451
} else {
52+
// Include smaller headings as content
53+
if (sections.length > 0) {
54+
sections.at(-1)[2].push(content)
55+
}
56+
}
57+
} else if (
58+
node.type === 'paragraph' ||
59+
node.type === 'list' ||
60+
node.type === 'item' ||
61+
node.type === 'blockquote' ||
62+
node.type === 'code_block'
63+
) {
64+
let content = extractAllTextFromNode(node).join(' ').trim()
65+
if (content && sections.length > 0) {
3566
sections.at(-1)[2].push(content)
3667
}
37-
} else if ('children' in node) {
68+
}
69+
70+
// Recursively process children
71+
if (node.children && Array.isArray(node.children)) {
3872
for (let child of node.children) {
39-
extractSections(child, sections, false)
73+
extractSectionsAndContent(child, sections, false)
4074
}
4175
}
4276
}
@@ -60,52 +94,73 @@ export default function withSearch(nextConfig = {}) {
6094
let md = fs.readFileSync(path.join(pagesDir, file), 'utf8')
6195

6296
let sections
97+
let fullRawText = ''
6398

6499
if (cache.get(file)?.[0] === md) {
65100
sections = cache.get(file)[1]
101+
fullRawText = cache.get(file)[2]
66102
} else {
67103
let ast = Markdoc.parse(md)
68104
let title =
69105
ast.attributes?.frontmatter?.match(
70106
/^title:\s*(.*?)\s*$/m,
71107
)?.[1]
72-
sections = [[title, null, []]]
73-
extractSections(ast, sections)
74-
cache.set(file, [md, sections])
108+
109+
// Extract structured sections
110+
sections = [[title || 'Untitled', null, []]]
111+
extractSectionsAndContent(ast, sections)
112+
113+
// Extract ALL raw text content (this is the key improvement)
114+
let allRawText = extractAllTextFromNode(ast)
115+
fullRawText = allRawText.join(' ').replace(/\s+/g, ' ').trim()
116+
117+
// Also include the original markdown for fallback
118+
fullRawText += ' ' + md.replace(/^---[\s\S]*?---/, '').trim()
119+
120+
cache.set(file, [md, sections, fullRawText])
75121
}
76122

77-
return { url, sections }
123+
return { url, sections, fullRawText }
78124
})
79125

80126
// When this file is imported within the application
81127
// the following module is loaded:
82128
return `
83129
import FlexSearch from 'flexsearch'
84130
85-
let sectionIndex = new FlexSearch.Document({
86-
tokenize: 'full',
87-
document: {
88-
id: 'url',
89-
index: 'content',
90-
store: ['title', 'pageTitle'],
91-
},
92-
context: {
93-
resolution: 9,
94-
depth: 2,
95-
bidirectional: true
96-
}
97-
})
98-
131+
const searchIndex = new Map()
132+
const urlToData = new Map()
133+
99134
let data = ${JSON.stringify(data)}
100135
101-
for (let { url, sections } of data) {
136+
// Build comprehensive search data
137+
for (let { url, sections, fullRawText } of data) {
138+
let pageTitle = sections[0][0] || 'Untitled'
139+
140+
// Index full page
141+
let pageData = {
142+
url: url,
143+
title: pageTitle,
144+
pageTitle: undefined,
145+
content: fullRawText,
146+
searchText: (pageTitle + ' ' + fullRawText).toLowerCase()
147+
}
148+
urlToData.set(url, pageData)
149+
150+
// Index individual sections
102151
for (let [title, hash, content] of sections) {
103-
sectionIndex.add({
104-
url: url + (hash ? ('#' + hash) : ''),
105-
title,
106-
content: [title, ...content].join('\\n'),
107-
pageTitle: hash ? sections[0][0] : undefined,
108-
})
152+
if (hash && title) {
153+
let sectionUrl = url + '#' + hash
154+
let sectionContent = [title, ...content].join(' ')
155+
let sectionData = {
156+
url: sectionUrl,
157+
title: title,
158+
pageTitle: pageTitle,
159+
content: sectionContent,
160+
searchText: (title + ' ' + sectionContent + ' ' + pageTitle + ' ' + fullRawText).toLowerCase()
161+
}
162+
urlToData.set(sectionUrl, sectionData)
163+
}
109164
}
110165
}
111166

0 commit comments

Comments
 (0)