Skip to content

Commit 8423a86

Browse files
committed
feat(markdown-parser): implement link and list item normalization plugins; update version to 0.0.9
1 parent 4dea770 commit 8423a86

File tree

18 files changed

+616
-532
lines changed

18 files changed

+616
-532
lines changed

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "vue-renderer-markdown",
33
"type": "module",
44
"version": "0.0.61-beta.0",
5-
"packageManager": "pnpm@10.19.0",
5+
"packageManager": "pnpm@10.20.0",
66
"description": "Vue 3 Markdown renderer optimized for large docs: progressive Mermaid, streaming diff code blocks, and fast real-time preview.",
77
"author": "Simon He",
88
"license": "MIT",
@@ -103,13 +103,13 @@
103103
},
104104
"dependencies": {
105105
"@floating-ui/dom": "^1.7.4",
106-
"stream-markdown-parser": "^0.0.8"
106+
"stream-markdown-parser": "^0.0.9"
107107
},
108108
"devDependencies": {
109109
"@antfu/eslint-config": "^5.4.1",
110110
"@types/node": "^18.19.130",
111111
"@vitejs/plugin-vue": "^5.2.4",
112-
"@vitest/ui": "^4.0.3",
112+
"@vitest/ui": "^4.0.4",
113113
"@vue/test-utils": "^2.4.6",
114114
"autoprefixer": "^10.4.21",
115115
"bumpp": "^8.2.1",
@@ -129,7 +129,7 @@
129129
"vite-plugin-dts": "^4.5.4",
130130
"vite-plugin-pages": "^0.33.1",
131131
"vite-svg-loader": "^5.1.0",
132-
"vitest": "^4.0.3",
132+
"vitest": "^4.0.4",
133133
"vue": "^3.5.22",
134134
"vue-tsc": "^2.2.12"
135135
},

packages/markdown-parser/package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"name": "stream-markdown-parser",
33
"type": "module",
4-
"version": "0.0.8",
5-
"packageManager": "pnpm@10.19.0",
4+
"version": "0.0.9",
5+
"packageManager": "pnpm@10.20.0",
66
"description": "Pure markdown parser and renderer utilities with streaming support - framework agnostic",
77
"author": "Simon He",
88
"license": "MIT",
@@ -63,8 +63,8 @@
6363
},
6464
"devDependencies": {
6565
"@types/markdown-it": "^14.1.2",
66-
"@types/node": "^22.18.12",
67-
"tsdown": "^0.15.10",
66+
"@types/node": "^22.18.13",
67+
"tsdown": "^0.15.11",
6868
"typescript": "^5.9.3",
6969
"vitest": "^3.2.4"
7070
}

packages/markdown-parser/src/factory.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ import type { MathOptions } from './config'
22
import MarkdownIt from 'markdown-it'
33
import { getDefaultMathOptions } from './config'
44
import { applyContainers } from './plugins/containers'
5+
import { applyFixLinkInline } from './plugins/fixLinkInline'
6+
import { applyFixLinkTokens } from './plugins/fixLinkTokens'
7+
import { applyFixListItem } from './plugins/fixListItem'
8+
import { applyFixStrongTokens } from './plugins/fixStrongTokens'
9+
import { applyFixTableTokens } from './plugins/fixTableTokens'
510
import { applyMath } from './plugins/math'
611
import { applyRenderRules } from './renderers'
712

@@ -26,6 +31,20 @@ export function factory(opts: FactoryOptions = {}): MarkdownIt {
2631
}
2732
if (opts.enableContainers ?? true)
2833
applyContainers(md)
34+
// Apply link-fixing plugin early so tokens produced during parsing
35+
// have corrected inline children. This runs during markdown-it's
36+
// core stage (after inline tokenization) instead of after parse.
37+
// Install inline-level link tokenizer before the built-in 'link' rule
38+
applyFixLinkInline(md)
39+
// Retain the core-stage fix as a fallback for any cases the inline
40+
// tokenizer does not handle.
41+
applyFixLinkTokens(md)
42+
// Also apply strong-token normalization at the same stage.
43+
applyFixStrongTokens(md)
44+
// Apply list-item inline normalization as well.
45+
applyFixListItem(md)
46+
// Apply table token normalization at block stage.
47+
applyFixTableTokens(md)
2948
applyRenderRules(md)
3049

3150
return md

packages/markdown-parser/src/parser/index.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import type MarkdownIt from 'markdown-it'
22
import type { MarkdownToken, ParsedNode, ParseOptions } from '../types'
3-
import { fixTableTokens } from './fixTableTokens'
43
import { parseInlineTokens } from './inline-parsers'
54
import { parseFenceToken } from './inline-parsers/fence-parser'
65
import { parseAdmonition } from './node-parsers/admonition-parser'
@@ -49,6 +48,7 @@ export function parseMarkdownToStructure(
4948
if (pre && typeof pre === 'function') {
5049
transformedTokens = pre(tokens) || tokens
5150
}
51+
5252
// Process the tokens into our structured format
5353
let result = processTokens(transformedTokens)
5454

@@ -81,7 +81,11 @@ export function processTokens(tokens: MarkdownToken[]): ParsedNode[] {
8181

8282
const result: ParsedNode[] = []
8383
let i = 0
84-
tokens = fixTableTokens(tokens)
84+
// Note: table token normalization is applied during markdown-it parsing
85+
// via the `applyFixTableTokens` plugin (core.ruler.after('block')).
86+
// Link/strong/list-item fixes are applied during the inline stage by
87+
// their respective plugins. That keeps parsing-time fixes centralized
88+
// and avoids ad-hoc post-processing here.
8589
while (i < tokens.length) {
8690
const token = tokens[i]
8791
switch (token.type) {

packages/markdown-parser/src/parser/inline-parsers/fixListItem.ts

Lines changed: 0 additions & 12 deletions
This file was deleted.

packages/markdown-parser/src/parser/inline-parsers/index.ts

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@ import { parseCheckboxInputToken, parseCheckboxToken } from './checkbox-parser'
33
import { parseEmojiToken } from './emoji-parser'
44
import { parseEmphasisToken } from './emphasis-parser'
55
import { parseFenceToken } from './fence-parser'
6-
import { fixLinkToken } from './fixLinkToken'
7-
import { fixListItem } from './fixListItem'
8-
import { fixStrongTokens } from './fixStrongTokens'
96
import { parseFootnoteRefToken } from './footnote-ref-parser'
107
import { parseHardbreakToken } from './hardbreak-parser'
118
import { parseHighlightToken } from './highlight-parser'
@@ -30,9 +27,10 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
3027
let currentTextNode: TextNode | null = null
3128

3229
let i = 0
33-
tokens = fixStrongTokens(tokens)
34-
tokens = fixListItem(tokens)
35-
tokens = fixLinkToken(tokens)
30+
// Note: strong-token normalization and list-item normalization are
31+
// applied during markdown-it parsing via core rules (plugins that
32+
// run after 'inline'). Inline parsers should receive normalized
33+
// children and only focus on parsing.
3634

3735
// Helpers to manage text node merging and pushing parsed nodes
3836
function resetCurrentTextNode() {
@@ -560,8 +558,40 @@ export function parseInlineTokens(tokens: MarkdownToken[], raw?: string, pPreTok
560558
}
561559
const { node, nextIndex } = parseLinkToken(tokens, i)
562560
i = nextIndex
563-
564-
node.loading = false
561+
// Determine loading state conservatively: if the link token parser
562+
// marked it as loading already, keep it; otherwise compute from raw
563+
// and href as a fallback so unclosed links remain marked as loading.
564+
const hrefAttr = token.attrs?.find(([name]) => name === 'href')?.[1]
565+
const hrefStr = String(hrefAttr ?? '')
566+
// Only override the link parser's default loading state when we
567+
// actually have an href to check against the raw source. If the
568+
// tokenizer emitted a link_open without an href (partial tokenizers
569+
// may do this), prefer the parseLinkToken's initial loading value
570+
// (which defaults to true for mid-state links).
571+
if (raw && hrefStr) {
572+
// More robust: locate the first "](" after the link text and see if
573+
// there's a matching ')' that closes the href. This avoids false
574+
// positives when other parentheses appear elsewhere in the source.
575+
const openIdx = raw.indexOf('](')
576+
if (openIdx === -1) {
577+
// No explicit link start found in raw — be conservative and keep
578+
// the parser's default loading value.
579+
}
580+
else {
581+
const closeIdx = raw.indexOf(')', openIdx + 2)
582+
if (closeIdx === -1) {
583+
node.loading = true
584+
}
585+
else {
586+
// Check that the href inside the parens corresponds to this token
587+
const inside = raw.slice(openIdx + 2, closeIdx)
588+
if (inside.includes(hrefStr))
589+
node.loading = false
590+
else
591+
node.loading = true
592+
}
593+
}
594+
}
565595
pushParsed(node)
566596
}
567597

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import type MarkdownIt from 'markdown-it'
2+
3+
export function applyFixLinkInline(md: MarkdownIt) {
4+
// Inline tokenizer that tries to recognize [text](href) and loading
5+
// link forms like "[x](http://a" earlier, producing link_open/text/link_close
6+
// tokens so downstream code sees them as links during the inline pass.
7+
const rule = (state: unknown, silent: boolean) => {
8+
const s = state as unknown as { src: string, pos: number, push: (type: string, tag?: string, nesting?: number) => any }
9+
const start = s.pos
10+
if (s.src[start] !== '[')
11+
return false
12+
13+
// Don't handle image syntax here
14+
if (start > 0 && s.src[start - 1] === '!')
15+
return false
16+
17+
// Look for closing ']' and opening '(' after it
18+
const rest = s.src.slice(start)
19+
// eslint-disable-next-line regexp/no-useless-quantifier
20+
const m = /^\[([^\]]*)\]\(([^)\s]*)?/.exec(rest)
21+
if (!m)
22+
return false
23+
24+
if (silent)
25+
return true
26+
27+
const text = m[1] ?? ''
28+
const href = m[2] ?? ''
29+
// Be conservative: if the link text contains characters that indicate
30+
// emphasis or emoji shortcodes (e.g. '*' or ':'), don't pre-tokenize
31+
// here — let the core inline parser handle these ambiguous mid-states.
32+
if (text.includes('*') || text.includes(':'))
33+
return false
34+
const idxClose = rest.indexOf(')')
35+
const hasClosingParen = idxClose !== -1
36+
37+
// push link_open
38+
const open = s.push('link_open', 'a', 1)
39+
open.attrs = [['href', href]]
40+
// push inner text
41+
const txt = s.push('text', '', 0)
42+
txt.content = text
43+
44+
// only emit link_close if the source actually contained a closing paren
45+
if (hasClosingParen) {
46+
s.push('link_close', 'a', -1)
47+
// consume through the closing paren
48+
s.pos += idxClose + 1
49+
}
50+
else {
51+
// consume the matched prefix (e.g. "[x](http://a") but do not
52+
// emit a link_close so downstream logic treats this as a loading link
53+
s.pos += m[0].length
54+
}
55+
return true
56+
}
57+
58+
// Insert before default 'link' rule to take precedence
59+
md.inline.ruler.before('link', 'fix_link_inline', rule)
60+
}

packages/markdown-parser/src/parser/inline-parsers/fixLinkToken.ts renamed to packages/markdown-parser/src/plugins/fixLinkTokens.ts

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,39 @@
1-
import type { MarkdownToken } from '../../types'
1+
import type MarkdownIt from 'markdown-it'
2+
import type { MarkdownToken } from '../types'
3+
4+
export function applyFixLinkTokens(md: MarkdownIt) {
5+
// Run after the inline rule so markdown-it has produced inline tokens
6+
// for block-level tokens; we then adjust each inline token's children
7+
// so downstream code receives corrected token arrays during the same
8+
// parsing pass.
9+
md.core.ruler.after('inline', 'fix_link_tokens', (state: unknown) => {
10+
const s = state as unknown as { tokens?: Array<{ type?: string, children?: any[] }> }
11+
const toks = s.tokens ?? []
12+
for (let i = 0; i < toks.length; i++) {
13+
const t = toks[i]
14+
if (t && t.type === 'inline' && Array.isArray(t.children)) {
15+
try {
16+
t.children = fixLinkToken(t.children)
17+
}
18+
catch (e) {
19+
// Swallow errors to avoid breaking parsing; keep original children
20+
// so parse still succeeds even if our fix fails for an unexpected shape.
21+
// Errors should be rare and indicate malformed token arrays.
22+
23+
console.error('[applyFixLinkTokens] failed to fix inline children', e)
24+
}
25+
}
26+
}
27+
})
28+
}
229

330
// narrow helper to reduce non-null assertions on text tokens
431
function isTextToken(t?: MarkdownToken): t is MarkdownToken & { type: 'text', content: string } {
532
return !!t && t.type === 'text' && typeof (t as any).content === 'string'
633
}
734

8-
export function fixLinkToken(tokens: MarkdownToken[]): MarkdownToken[] {
9-
const tokensAny = tokens as unknown as import('../../types').MarkdownToken[]
35+
function fixLinkToken(tokens: MarkdownToken[]): MarkdownToken[] {
36+
const tokensAny = tokens as unknown as MarkdownToken[]
1037
tokens = fixLinkToken4(fixLinkToken3(tokens))
1138
if (tokens.length < 5)
1239
return tokens
@@ -50,8 +77,8 @@ export function fixLinkToken(tokens: MarkdownToken[]): MarkdownToken[] {
5077
return tokens
5178
}
5279

53-
export function fixLinkTokens2(tokens: MarkdownToken[]): MarkdownToken[] {
54-
const tokensAny = tokens as unknown as import('../../types').MarkdownToken[]
80+
function fixLinkTokens2(tokens: MarkdownToken[]): MarkdownToken[] {
81+
const tokensAny = tokens as unknown as MarkdownToken[]
5582
if (tokens.length < 8)
5683
return tokens
5784
let length = tokens.length
@@ -98,8 +125,8 @@ export function fixLinkTokens2(tokens: MarkdownToken[]): MarkdownToken[] {
98125
return tokens
99126
}
100127

101-
export function fixLinkToken3(tokens: MarkdownToken[]): MarkdownToken[] {
102-
const tokensAny = tokens as unknown as import('../../types').MarkdownToken[]
128+
function fixLinkToken3(tokens: MarkdownToken[]): MarkdownToken[] {
129+
const tokensAny = tokens as unknown as MarkdownToken[]
103130
const last = tokens[tokens.length - 1]
104131
const preLast = tokens[tokens.length - 2]
105132
const fixedTokens = [...tokens]
@@ -128,8 +155,8 @@ export function fixLinkToken3(tokens: MarkdownToken[]): MarkdownToken[] {
128155
return fixedTokens
129156
}
130157

131-
export function fixLinkToken4(tokens: MarkdownToken[]): MarkdownToken[] {
132-
const tokensAny = tokens as unknown as import('../../types').MarkdownToken[]
158+
function fixLinkToken4(tokens: MarkdownToken[]): MarkdownToken[] {
159+
const tokensAny = tokens as unknown as MarkdownToken[]
133160
const fixedTokens = [...tokens]
134161
for (let i = tokens.length - 1; i >= 3; i--) {
135162
const token = tokens[i]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import type MarkdownIt from 'markdown-it'
2+
import type { MarkdownToken } from '../types'
3+
4+
export function applyFixListItem(md: MarkdownIt) {
5+
// Normalize list-item related inline tokens after inline tokenization
6+
// so downstream parsers see corrected children.
7+
md.core.ruler.after('inline', 'fix_list_item_tokens', (state: unknown) => {
8+
const s = state as unknown as { tokens?: Array<{ type?: string, children?: any[] }> }
9+
const toks = s.tokens ?? []
10+
for (let i = 0; i < toks.length; i++) {
11+
const t = toks[i]
12+
if (t && t.type === 'inline' && Array.isArray(t.children)) {
13+
try {
14+
t.children = fixListItem(t.children)
15+
}
16+
catch (e) {
17+
// Keep original children on error to avoid breaking parsing
18+
19+
console.error('[applyFixListItem] failed to fix inline children', e)
20+
}
21+
}
22+
}
23+
})
24+
}
25+
26+
function fixListItem(tokens: MarkdownToken[]): MarkdownToken[] {
27+
const last = tokens[tokens.length - 1]
28+
const lastContent = String(last?.content ?? '')
29+
30+
if (last?.type === 'text' && (/^\s*\d+\.\s*$/.test(lastContent) && tokens[tokens.length - 2]?.tag === 'br')) {
31+
tokens.splice(tokens.length - 1, 1)
32+
}
33+
34+
return tokens
35+
}

0 commit comments

Comments
 (0)