-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharticle.py
107 lines (82 loc) · 3.19 KB
/
article.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
A container for the articles.
"""
from datetime import datetime, timezone
import re
from typing import Iterable, List, Set
from bs4 import BeautifulSoup
import markdown
from slugify import slugify
from tag import register_tags, Tag
from titleoffset import TitleOffset
SLUG_BLACKLIST = ("tags", "browse")
ALL_ARTICLES: Set['Article'] = set()
class Article:
"""A container for atricles."""
def __init__(self, path: str):
self.date: datetime
self._tags: Set[Tag]
self.title: str
self.slug: str
self.draft: bool = False
with open(path, 'r') as article:
while (line := article.readline().strip()) != "---":
self._parse_property(line)
self.contents = article.read()
if not self.draft:
ALL_ARTICLES.add(self)
register_tags(self._tags)
@property
def tags(self) -> List[Tag]:
"""Return a sorted list of this article's tags."""
output = list(self._tags)
output.sort(key=lambda t: t.name.lower())
return output
def similar_articles(self) -> Iterable[str]:
"""Return 3 similar articles (based on the number of common tags)."""
similar_articles = list(ALL_ARTICLES)
similar_articles.remove(self)
# Sort in alphabetical order first
similar_articles.sort(key=lambda art: art.title)
# Sort by number of common tags
similar_articles.sort(key= lambda art: len(art._tags & self._tags), reverse=True)
return similar_articles[:3]
def render(self, offset: int=0) -> str:
"""Render this article's markdown."""
return markdown.markdown(self.contents, output_format="html5", extensions=['md_in_html', TitleOffset(offset=offset)])
def excerpt(self) -> str:
"""Return the beginning of this article (without html)."""
# Get article "raw" text
full_render = self.render()
soup = BeautifulSoup(full_render, features="html.parser")
text = soup.get_text().strip()
text = re.sub("(\s|\r?\n)", " ", text)
# Extract around 120 chars
match = re.match("^(?P<text>.{,120})(?:\s|$)", text)
output = match['text']
if not output.endswith(("?","…","!",".")):
output += "…"
return output
@property
def url(self) -> str:
"""Return the url for this article."""
return f"/articles/{self.slug}/"
def _parse_property(self, line: str):
"""Parse a single property line."""
key, value = line.split(sep=":", maxsplit=1)
key = key.strip()
value = value.strip()
if key == "date":
self.date = datetime.fromisoformat(value)
self.date = self.date.astimezone(timezone.utc)
elif key == "title":
self.title = value
self.slug = slugify(value, lower=False)
if self.slug in SLUG_BLACKLIST:
raise ValueError(f"Change the title for {title}.")
elif key == "tags":
self._tags = set(Tag(name) for name in value.split(sep=","))
elif key == "draft":
self.draft = value == "yes"
else:
raise ValueError(f"Unknown property: {key}.")