-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_verses.py
51 lines (39 loc) · 1.48 KB
/
get_verses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import time
from joblib import Parallel, delayed
from src.jwreader import Reader
from argparse import ArgumentParser
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('--save-dir', type=str, help='where to save the texts')
parser.add_argument('--lang-url', type=str, help='language portion of language specific url')
parser.add_argument('--min-chapter-num', type=int)
parser.add_argument('--n-jobs', type=int)
parsed_args = parser.parse_args().__dict__
n_jobs = parsed_args['n_jobs']
lang_url = parsed_args['save_dir']
save_dir = parsed_args['lang_url']
min_chapter_num = parsed_args['min_chapter_num']
if min_chapter_num is None:
min_chapter_num = -1
rd = Reader(
home_url="https://www.jw.org",
lang_url=lang_url,
save_dir=save_dir
)
print(f"Will be saving to {rd.save_dir}")
def books_loop(book):
start_time = time.time()
chapters = rd.collect_chapters(books[book])
for chapter in chapters:
verses = rd.collect_verses(
chapter=chapters[chapter],
)
end_time = time.time()
duration = (end_time - start_time)/60
print(books[book]["title"], f": \n completed in {duration} min")
return verses
books = rd.set_books()
books = {k: v for k, v in books.items() if k >= min_chapter_num}
Parallel(n_jobs=n_jobs)(
delayed(books_loop)(book) for book in books
)