-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
81 lines (61 loc) · 2.32 KB
/
parser.py
File metadata and controls
81 lines (61 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import re
def slugify(text):
"""Generates a slug suitable for use as an HTML id."""
return re.sub(r'[^\w\-]+', '', re.sub(r'\s+', '-', text.strip())).lower()
def markdown_to_html(markdown_text):
text = markdown_text
# Code: `inline code`
text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)
# Bold: **bold**
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
# Italics: *italics*
# To avoid double-processing within <strong>, do italics after bold
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
# Links: [text](url)
text = re.sub(r'\[(.+?)\]\((.+?)\)', r'<a href="\2">\1</a>', text)
# Line breaks (Markdown: two spaces at end of line or '\n' inside paragraph)
text = re.sub(r'([^\n]) {2,}\n', r'\1<br>\n', text)
# Headings with ID
def heading_repl(match):
hashes = match.group(1)
heading_text = match.group(2).strip()
level = len(hashes)
heading_id = slugify(heading_text)
return f'<h{level} id="{heading_id}">{heading_text}</h{level}>'
text = re.sub(r'^(#{1,3})\s+(.+)$', heading_repl, text, flags=re.MULTILINE)
# Paragraphs (blocks separated by blank lines, avoid wrapping headings/code/HTML)
lines = text.split('\n')
html_lines = []
buffer = []
for line in lines:
stripped = line.strip()
if not stripped:
if buffer:
html_lines.append('<p>' + ' '.join(buffer) + '</p>')
buffer = []
elif re.match(r'<h[1-3]|<code>|<a |<strong>|<em>', stripped):
if buffer:
html_lines.append('<p>' + ' '.join(buffer) + '</p>')
buffer = []
html_lines.append(stripped)
else:
buffer.append(stripped)
if buffer:
html_lines.append('<p>' + ' '.join(buffer) + '</p>')
return '\n'.join(html_lines)
# --- Test Harness ---
if __name__ == "__main__":
sample_markdown = """
# Sample Title
Here is a **bold** statement, and *emphasis* as well.
Now an [inline link](http://github.com) and some `inline code`.
## Subheading Example
Paragraph with two spaces and
a line break.
### Tiny Heading
Just text.
Another paragraph after blank line.
"""
print("Input Markdown:\n", sample_markdown)
print("\nGenerated HTML:\n")
print(markdown_to_html(sample_markdown))