Skip to content

Commit b06e11c

Browse files
committed
fix(math): improve handling of math delimiters and escape sequences
1 parent 886cb72 commit b06e11c

File tree

6 files changed

+79
-114
lines changed

6 files changed

+79
-114
lines changed

playground/src/const/markdown.ts

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -249,60 +249,6 @@ def echo(msg: Message):
249249
return {"reply": f"Echo: {msg.text}"}
250250
\`\`\`
251251
252-
\`\`\`tsx:src/components/ChatWindow.tsx
253-
import React, { useEffect, useRef, useState } from 'react';
254-
255-
type Message = { sender: 'me' | 'other'; text: string; time: string };
256-
257-
export default function ChatWindow() {
258-
const [messages, setMessages] = useState<Message[]>([
259-
{ sender: 'other', text: 'Hi there!', time: '10:00' },
260-
]);
261-
const [input, setInput] = useState('');
262-
const containerRef = useRef<HTMLDivElement>(null);
263-
264-
useEffect(() => {
265-
const el = containerRef.current;
266-
if (!el) return;
267-
el.scrollTop = el.scrollHeight;
268-
}, [messages]);
269-
270-
const send = () => {
271-
const text = input.trim();
272-
if (!text) return;
273-
const now = new Date();
274-
const time = now.getHours() + ':' + String(now.getMinutes()).padStart(2,'0');
275-
setMessages((m) => [...m, { sender: 'me', text, time }]);
276-
setInput('');
277-
setTimeout(() => {
278-
setMessages((m) => [...m, { sender: 'other', text: 'Auto-reply', time }]);
279-
}, 800);
280-
};
281-
282-
return (
283-
<div className="chat-window">
284-
<div className="messages" ref={containerRef}>
285-
{messages.map((m, i) => (
286-
<div key={i} className={'msg ' + m.sender}>
287-
{m.text}
288-
<span className="time">{m.time}</span>
289-
</div>
290-
))}
291-
</div>
292-
<div className="input">
293-
<input
294-
value={input}
295-
onChange={e => setInput(e.target.value)}
296-
onKeyDown={e => e.key==='Enter' && send()}
297-
placeholder="Type..."
298-
/>
299-
<button onClick={send}>Send</button>
300-
</div>
301-
</div>
302-
);
303-
}
304-
\`\`\`
305-
306252
5. Create a native module example \\(C++\\):
307253
308254
\`\`\`cpp:src/native/compute.cpp
@@ -376,13 +322,6 @@ import router from './router';
376322
createApp(App).use(router).mount('#app');
377323
\`\`\`
378324
379-
To run the application:
380-
381-
\`\`\`bash
382-
npm run electron:dev
383-
\`\`\`
384-
385-
386325
9. Mermaid graphic:
387326
388327
\`\`\`mermaid

src/utils/markdown-parser/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export function parseMarkdownToStructure(
2222
md: MarkdownIt,
2323
): ParsedNode[] {
2424
// Ensure markdown is a string — guard against null/undefined inputs from callers
25-
const safeMarkdown = (markdown ?? '').toString().replace(/\right/g, '\\right')
25+
const safeMarkdown = (markdown ?? '').toString().replace(/([^\\])\right/g, '$1\\right')
2626

2727
// Get tokens from markdown-it
2828
const tokens = md.parse(safeMarkdown, {}) as MarkdownToken[]

src/utils/markdown-parser/inline-parsers/math-inline-parser.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ export function parseMathInlineToken(token: MarkdownToken): MathInlineNode {
77
content: token.content || '',
88
loading: !!token.loading,
99
raw:
10-
token.markup === '\\(\\)'
11-
? `\\(${token.content}\\)`
10+
token.markup.replace(/\\/g, '') === '()'
11+
? `\(${token.content}\)`
1212
: `$$${token.content}$$`,
1313
}
1414
}

src/utils/markdown/plugins/math.ts

Lines changed: 67 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ export const KATEX_COMMANDS = [
6464
'prod',
6565
'int',
6666
'sqrt',
67+
'fbox',
68+
'boxed',
69+
'color',
70+
'rule',
71+
'edef',
72+
'fcolorbox',
73+
'hline',
74+
'hdashline',
6775
'cdot',
6876
'times',
6977
'pm',
@@ -96,13 +104,31 @@ export const ESCAPED_KATEX_COMMANDS = KATEX_COMMANDS
96104
.join('|')
97105
const CONTROL_CHARS_CLASS = '[\t\r\b\f\v]'
98106

107+
// Hoisted map of control characters -> escaped letter (e.g. '\t' -> 't').
108+
// Kept at module scope to avoid recreating on every normalization call.
109+
const CONTROL_MAP: Record<string, string> = {
110+
'\t': 't',
111+
'\r': 'r',
112+
'\b': 'b',
113+
'\f': 'f',
114+
'\v': 'v',
115+
}
116+
99117
// Precompiled regexes for isMathLike to avoid reconstructing them per-call
118+
// and prebuilt default regexes for normalizeStandaloneBackslashT when the
119+
// default command set is used.
100120
const TEX_CMD_RE = /\\[a-z]+/i
101121
const PREFIX_CLASS = '(?:\\\\|\\u0008)'
102122
const TEX_CMD_WITH_BRACES_RE = new RegExp(`${PREFIX_CLASS}(?:${ESCAPED_TEX_BRACE_COMMANDS})\\s*\\{[^}]+\\}`, 'i')
103123
const TEX_SPECIFIC_RE = /\\(?:text|frac|left|right|times)/
104124
const SUPER_SUB_RE = /\^|_/
105-
const OPS_RE = /[=+\-*/^<>]|\\times|\\pm|\\cdot|\\le|\\ge|\\neq/
125+
// Match common math operator symbols or named commands.
126+
// Avoid treating the C/C++ increment operator ("++") as a math operator by
127+
// ensuring a lone '+' isn't matched when it's part of a '++' sequence.
128+
// Use a RegExp constructed from a string to avoid issues escaping '/' in a
129+
// regex literal on some platforms/linters.
130+
// eslint-disable-next-line prefer-regex-literals
131+
const OPS_RE = new RegExp('(?<!\\+)\\+(?!\\+)|[=\\-*/^<>]|\\\\times|\\\\pm|\\\\cdot|\\\\le|\\\\ge|\\\\neq')
106132
const FUNC_CALL_RE = /[A-Z]+\s*\([^)]+\)/i
107133
const WORDS_RE = /\b(?:sin|cos|tan|log|ln|exp|sqrt|frac|sum|lim|int|prod)\b/
108134

@@ -142,54 +168,26 @@ export function isMathLike(s: string) {
142168
}
143169

144170
export function normalizeStandaloneBackslashT(s: string, opts?: MathOptions) {
145-
// Map of characters or words that may have lost a leading backslash when
146-
// interpreted in JS string literals (for example "\b" -> backspace U+0008)
147-
// Keys may use backslash escapes in the source; the actual string keys
148-
// become the unescaped character/word (e.g. '\\t' -> '\t' -> tab char).
149-
// Keys are the actual control characters as they appear in JS strings when
150-
// an escape was interpreted (e.g. '\\t' -> actual tab char '\t').
151-
const controlMap: Record<string, string> = {
152-
'\t': 't',
153-
'\r': 'r',
154-
'\b': 'b',
155-
'\f': 'f',
156-
'\v': 'v',
157-
// Note: deliberately omitting \n since real newlines are structural and
158-
// shouldn't be collapsed into a two-character escape in most cases.
159-
}
160-
161-
// use top-level KATEX_COMMANDS constant
162-
163-
// Build a regex that matches either a lone control character (tab, etc.)
164-
// or one of the known command words that is NOT already prefixed by a
165-
// backslash. We ensure the matched word isn't part of a larger word by
166-
// using a word boundary where appropriate.
167171
const commands = opts?.commands ?? KATEX_COMMANDS
168172
const escapeExclamation = opts?.escapeExclamation ?? true
169173

170-
// Choose a prebuilt regex when using default command set for performance,
171-
// otherwise build one from the provided commands. Use a negative
172-
// lookbehind to ensure the matched command isn't already escaped (i.e.
173-
// not preceded by a backslash) and not part of a larger word. We also
174-
// match literal control characters (tab, backspace, etc.). This form
175-
// avoids capturing the prefix (p1) which previously caused overlapping
176-
// replacement issues.
177-
const commandPattern = (opts?.commands == null)
178-
? `(?:${ESCAPED_KATEX_COMMANDS})`
179-
: `(?:${commands.slice().sort((a, b) => b.length - a.length).map(c => c.replace(/[.*+?^${}()|[\\]\\"\]/g, '\\$&')).join('|')})`
180-
181-
// Match either a control character or an unescaped command word.
182-
const re = new RegExp(`${CONTROL_CHARS_CLASS}|(?<!\\\\|\\w)(${commandPattern})\\b`, 'g')
183-
184-
let out = s.replace(re, (m, cmd) => {
185-
// If m is a literal control character (e.g. '\t' as actual tab), map it.
186-
if (controlMap[m] !== undefined)
187-
return `\\${controlMap[m]}`
188-
189-
// Otherwise cmd will be populated with the matched command word.
174+
const useDefault = opts?.commands == null
175+
176+
// Build or reuse regex: match control chars or unescaped command words.
177+
let re: RegExp
178+
if (useDefault) {
179+
re = new RegExp(`${CONTROL_CHARS_CLASS}|(?<!\\\\|\\w)(${ESCAPED_KATEX_COMMANDS})\\b`, 'g')
180+
}
181+
else {
182+
const commandPattern = `(?:${commands.slice().sort((a, b) => b.length - a.length).map(c => c.replace(/[.*+?^${}()|[\\]\\"\]/g, '\\$&')).join('|')})`
183+
re = new RegExp(`${CONTROL_CHARS_CLASS}|(?<!\\\\|\\w)(${commandPattern})\\b`, 'g')
184+
}
185+
186+
let out = s.replace(re, (m: string, cmd?: string) => {
187+
if (CONTROL_MAP[m] !== undefined)
188+
return `\\${CONTROL_MAP[m]}`
190189
if (cmd && commands.includes(cmd))
191190
return `\\${cmd}`
192-
193191
return m
194192
})
195193

@@ -204,14 +202,17 @@ export function normalizeStandaloneBackslashT(s: string, opts?: MathOptions) {
204202
// Use default escaped list when possible. Include TEX_BRACE_COMMANDS so
205203
// known brace-taking TeX commands (e.g. `text`, `boldsymbol`) are also
206204
// restored when their leading backslash was lost.
207-
const braceEscaped = (opts?.commands == null)
205+
const braceEscaped = useDefault
208206
? [ESCAPED_TEX_BRACE_COMMANDS, ESCAPED_KATEX_COMMANDS].filter(Boolean).join('|')
209207
: [commands.map(c => c.replace(/[.*+?^${}()|[\\]\\\]/g, '\\$&')).join('|'), ESCAPED_TEX_BRACE_COMMANDS].filter(Boolean).join('|')
208+
let result = out
210209
if (braceEscaped) {
211210
const braceCmdRe = new RegExp(`(^|[^\\\\])(${braceEscaped})\\s*\\{`, 'g')
212-
out = out.replace(braceCmdRe, (_m, p1, p2) => `${p1}\\${p2}{`)
211+
result = result.replace(braceCmdRe, (_m: string, p1: string, p2: string) => `${p1}\\${p2}{`)
213212
}
214-
return out
213+
result = result.replace(/span\{([^}]+)\}/, 'span\\{$1\\}')
214+
.replace(/\\operatorname\{span\}\{((?:[^{}]|\{[^}]*\})+)\}/, '\\operatorname{span}\\{$1\\}')
215+
return result
215216
}
216217
export function applyMath(md: MarkdownIt, mathOpts?: MathOptions) {
217218
// Inline rule for \(...\) and $$...$$ and $...$
@@ -225,6 +226,7 @@ export function applyMath(md: MarkdownIt, mathOpts?: MathOptions) {
225226
['\(', '\)'],
226227
]
227228
let searchPos = 0
229+
let jump = true
228230
// use findMatchingClose from util
229231
for (const [open, close] of delimiters) {
230232
// We'll scan the entire inline source and tokenize all occurrences
@@ -282,10 +284,24 @@ export function applyMath(md: MarkdownIt, mathOpts?: MathOptions) {
282284
continue
283285
}
284286
const content = src.slice(index + open.length, endIdx)
285-
287+
if (!isMathLike(content)) {
288+
// push remaining text after last match
289+
// not math-like; skip this match and continue scanning
290+
const temp = searchPos
291+
searchPos = endIdx + close.length
292+
if (!src.includes(open, endIdx + close.length)) {
293+
const text = src.slice(temp, searchPos)
294+
if (!state.pending && state.pos + open.length < searchPos)
295+
pushText(text)
296+
if (jump)
297+
return false
298+
}
299+
continue
300+
}
286301
foundAny = true
287302

288303
if (!silent) {
304+
jump = false
289305
// push text before this math
290306
const before = src.slice(0, index)
291307
// If we already consumed some content, avoid duplicating the prefix
@@ -314,7 +330,8 @@ export function applyMath(md: MarkdownIt, mathOpts?: MathOptions) {
314330
}
315331

316332
// strong prefix handling (preserve previous behavior)
317-
pushText(isStrongPrefix ? toPushBefore.replace(/^\*+/, '') : toPushBefore)
333+
if (state.pending !== toPushBefore)
334+
pushText(isStrongPrefix ? toPushBefore.replace(/^\*+/, '') : toPushBefore)
318335

319336
const token = state.push('math_inline', 'math', 0)
320337
token.content = normalizeStandaloneBackslashT(content, mathOpts)
@@ -435,7 +452,7 @@ export function applyMath(md: MarkdownIt, mathOpts?: MathOptions) {
435452
= openDelim === '$$' ? '$$' : openDelim === '[' ? '[]' : '\\[\\]'
436453
token.map = [startLine, startLine + 1]
437454
token.block = true
438-
455+
token.loading = false
439456
state.line = startLine + 1
440457
return true
441458
}

test/debug/isMathLike.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,8 @@ import { isMathLike } from '../../src/utils/markdown/plugins/math'
33

44
it('recognizes \"\\boldsymbol{...}\" as math-like', () => {
55
expect(isMathLike('\\boldsymbol{\\beta}')).toBe(true)
6+
expect(isMathLike('\\(C++\\)')).toBe(false)
7+
expect(isMathLike('\\(W\\)')).toBe(false)
8+
expect(isMathLike('\\(f^{(k)}(a)\\)')).toBe(true)
9+
expect(isMathLike('\\(W^\perp\\)')).toBe(true)
610
})

test/normalize-backslash-direct.test.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,9 @@ describe('normalizeStandaloneBackslashT direct tests', () => {
3737
const out = normalizeStandaloneBackslashT('a!b')
3838
expect(out).toBe('a\\!b')
3939
})
40+
41+
it('span to \\{\\}', () => {
42+
const out = normalizeStandaloneBackslashT('operatorname{span}{\boldsymbol{alpha}}')
43+
expect(out).toBe('\\operatorname{span}\\{\\boldsymbol{\\alpha}\\}')
44+
})
4045
})

0 commit comments

Comments
 (0)