Skip to content

Commit

Permalink
Merge branch 'sukiletxe-tosefta'
Browse files Browse the repository at this point in the history
  • Loading branch information
wyattscarpenter committed Dec 16, 2020
2 parents b335030 + 7b032fc commit a1e1389
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions get_unsong.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

CHAPTERS = []
AUTHOR_NOTES = []
TOSEFTA = []
header = """<!doctype html>
<html>
<head>
Expand All @@ -22,6 +23,7 @@
footer = """</body></html>"""
INCLUDE_AUTHOR_NOTES = True #True, False, or "appendix"
INCLUDE_AUTOGEN_COVER = True
INCLUDE_TOSEFTA = True

def make_cover():
title_img_data = fetch_or_get("http://i.imgur.com/d9LvKMc.png", binary=True)
Expand Down Expand Up @@ -89,6 +91,10 @@ def create_book():
fp.write("<h1>Appendix: Author Notes</h1>")
fp.write("\n\n\n".join(AUTHOR_NOTES))
fp.write("</section>")
if INCLUDE_TOSEFTA:
fp.write("<section>")
fp.write("\n\n\n".join(TOSEFTA))
fp.write("</section>")
fp.write(footer)
fp.close()

Expand Down Expand Up @@ -162,7 +168,7 @@ def get_url(url):
return cached_parsed
details = {}
soup = BeautifulSoup(data, "html.parser")
post = soup.find_all("div", "post")
post = soup.find_all("div", ["post", "page"])
nav = soup.find_all("div", "pjgm-navigation")
heading = post[0].find_all("h1", "pjgm-posttitle")[0]
if heading.text.lower().startswith("book"):
Expand All @@ -171,19 +177,22 @@ def get_url(url):
details["type"] = "author note"
elif heading.text.lower().startswith(("prologue","epilogue")):
details["type"] = "logue"
elif heading.text.lower().startswith("tosefta"):
details["type"] = "tosefta"
else:
details["type"] = "chapter"
if details["type"] in ("book", "logue"):
if details["type"] in ("book", "logue", "tosefta"):
heading.name = "h1"
else:
heading.name = "h2"
content = post[0].find_all("div", "pjgm-postcontent")[0]
prev = None
next = None
prevs = nav[0].find_all("a", {"rel": "prev"})
if prevs: prev = prevs[0].attrs["href"]
nexts = nav[0].find_all("a", {"rel": "next"})
if nexts: next = nexts[0].attrs["href"]
if nav:
prevs = nav[0].find_all("a", {"rel": "prev"})
if prevs: prev = prevs[0].attrs["href"]
nexts = nav[0].find_all("a", {"rel": "next"})
if nexts: next = nexts[0].attrs["href"]
share = soup.find_all("div", "sharedaddy")
[s.extract() for s in share]

Expand Down Expand Up @@ -214,6 +223,8 @@ def get_next(next):
AUTHOR_NOTES.append(html)
elif INCLUDE_AUTHOR_NOTES:
CHAPTERS.append(html)
elif details["type"] == "tosefta":
TOSEFTA.append(html)
else:
CHAPTERS.append(html)
if next:
Expand All @@ -239,4 +250,6 @@ def get_next(next):
if "--force" in sys.argv:
FORCE = True
get_next("http://unsongbook.com/prologue-2/")
get_next("http://unsongbook.com/tosefta/")

create_book()

0 comments on commit a1e1389

Please sign in to comment.