@@ -9,34 +9,68 @@ import * as url from 'url'
99const __filename = url . fileURLToPath ( import . meta. url )
1010const slugify = slugifyWithCounter ( )
1111
12- function toString ( node ) {
13- let str =
14- node . type === 'text' && typeof node . attributes ?. content === 'string'
15- ? node . attributes . content
16- : ''
17- if ( 'children' in node ) {
12+ // Comprehensive text extraction that gets EVERYTHING
13+ function extractAllTextFromNode ( node ) {
14+ let texts = [ ]
15+
16+ // Direct text content
17+ if ( node . type === 'text' && node . attributes ?. content ) {
18+ texts . push ( node . attributes . content )
19+ }
20+
21+ // Code content
22+ if ( node . type === 'code' && node . attributes ?. content ) {
23+ texts . push ( node . attributes . content )
24+ }
25+
26+ // Code block content
27+ if ( node . type === 'code_block' && node . attributes ?. content ) {
28+ texts . push ( node . attributes . content )
29+ }
30+
31+ // Process all children recursively
32+ if ( node . children && Array . isArray ( node . children ) ) {
1833 for ( let child of node . children ) {
19- str += toString ( child )
34+ texts . push ( ... extractAllTextFromNode ( child ) )
2035 }
2136 }
22- return str
37+
38+ return texts
2339}
2440
25- function extractSections ( node , sections , isRoot = true ) {
41+ function extractSectionsAndContent ( node , sections , isRoot = true ) {
2642 if ( isRoot ) {
2743 slugify . reset ( )
2844 }
29- if ( node . type === 'heading' || node . type === 'paragraph' ) {
30- let content = toString ( node ) . trim ( )
31- if ( node . type === 'heading' && node . attributes . level <= 2 ) {
45+
46+ if ( node . type === 'heading' ) {
47+ let content = extractAllTextFromNode ( node ) . join ( '' ) . trim ( )
48+ if ( node . attributes . level <= 2 ) {
3249 let hash = node . attributes ?. id ?? slugify ( content )
3350 sections . push ( [ content , hash , [ ] ] )
3451 } else {
52+ // Include smaller headings as content
53+ if ( sections . length > 0 ) {
54+ sections . at ( - 1 ) [ 2 ] . push ( content )
55+ }
56+ }
57+ } else if (
58+ node . type === 'paragraph' ||
59+ node . type === 'list' ||
60+ node . type === 'item' ||
61+ node . type === 'blockquote' ||
62+ node . type === 'code_block'
63+ ) {
64+ let content = extractAllTextFromNode ( node ) . join ( ' ' ) . trim ( )
65+ if ( content && sections . length > 0 ) {
3566 sections . at ( - 1 ) [ 2 ] . push ( content )
3667 }
37- } else if ( 'children' in node ) {
68+ }
69+
70+ // Recursively process children
71+ if ( node . children && Array . isArray ( node . children ) ) {
3872 for ( let child of node . children ) {
39- extractSections ( child , sections , false )
73+ extractSectionsAndContent ( child , sections , false )
4074 }
4175 }
4276}
@@ -60,52 +94,73 @@ export default function withSearch(nextConfig = {}) {
6094 let md = fs . readFileSync ( path . join ( pagesDir , file ) , 'utf8' )
6195
6296 let sections
97+ let fullRawText = ''
6398
6499 if ( cache . get ( file ) ?. [ 0 ] === md ) {
65100 sections = cache . get ( file ) [ 1 ]
101+ fullRawText = cache . get ( file ) [ 2 ]
66102 } else {
67103 let ast = Markdoc . parse ( md )
68104 let title =
69105 ast . attributes ?. frontmatter ?. match (
70106 / ^ t i t l e : \s * ( .* ?) \s * $ / m,
71107 ) ?. [ 1 ]
72- sections = [ [ title , null , [ ] ] ]
73- extractSections ( ast , sections )
74- cache . set ( file , [ md , sections ] )
108+
109+ // Extract structured sections
110+ sections = [ [ title || 'Untitled' , null , [ ] ] ]
111+ extractSectionsAndContent ( ast , sections )
112+
113+ // Extract ALL raw text content (this is the key improvement)
114+ let allRawText = extractAllTextFromNode ( ast )
115+ fullRawText = allRawText . join ( ' ' ) . replace ( / \s + / g, ' ' ) . trim ( )
116+
117+ // Also include the original markdown for fallback
118+ fullRawText += ' ' + md . replace ( / ^ - - - [ \s \S ] * ?- - - / , '' ) . trim ( )
119+
120+ cache . set ( file , [ md , sections , fullRawText ] )
75121 }
76122
77- return { url, sections }
123+ return { url, sections, fullRawText }
78124 } )
79125
80126 // When this file is imported within the application
81127 // the following module is loaded:
82128 return `
83129 import FlexSearch from 'flexsearch'
84130
85- let sectionIndex = new FlexSearch.Document({
86- tokenize: 'full',
87- document: {
88- id: 'url',
89- index: 'content',
90- store: ['title', 'pageTitle'],
91- },
92- context: {
93- resolution: 9,
94- depth: 2,
95- bidirectional: true
96- }
97- })
98-
131+ const searchIndex = new Map()
132+ const urlToData = new Map()
133+
99134 let data = ${ JSON . stringify ( data ) }
100135
101- for (let { url, sections } of data) {
136+ // Build comprehensive search data
137+ for (let { url, sections, fullRawText } of data) {
138+ let pageTitle = sections[0][0] || 'Untitled'
139+
140+ // Index full page
141+ let pageData = {
142+ url: url,
143+ title: pageTitle,
144+ pageTitle: undefined,
145+ content: fullRawText,
146+ searchText: (pageTitle + ' ' + fullRawText).toLowerCase()
147+ }
148+ urlToData.set(url, pageData)
149+
150+ // Index individual sections
102151 for (let [title, hash, content] of sections) {
103- sectionIndex.add({
104- url: url + (hash ? ('#' + hash) : ''),
105- title,
106- content: [title, ...content].join('\\n'),
107- pageTitle: hash ? sections[0][0] : undefined,
108- })
152+ if (hash && title) {
153+ let sectionUrl = url + '#' + hash
154+ let sectionContent = [title, ...content].join(' ')
155+ let sectionData = {
156+ url: sectionUrl,
157+ title: title,
158+ pageTitle: pageTitle,
159+ content: sectionContent,
160+ searchText: (title + ' ' + sectionContent + ' ' + pageTitle + ' ' + fullRawText).toLowerCase()
161+ }
162+ urlToData.set(sectionUrl, sectionData)
163+ }
109164 }
110165 }
111166
0 commit comments