-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathwordpress.mjs
101 lines (86 loc) · 2.89 KB
/
wordpress.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
Wordpress post downloader code
Copyright (C) 2022 Akshay S Dinesh
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Find Akshay's contact details at https://asd.learnlearn.in/about/#contact
*/
import { urlToDom, createId } from "./utils.mjs";
const olderSelector = "a[rel=prev]";
const newerSelector = "a[rel=next]";
const titleSelector = ".entry-title";
const bodySelector = ".entry-content";
const domToStructured = (dom) => {
const title =
dom.window.document.querySelector(titleSelector)?.textContent?.trim() ||
dom.window.document.title?.trim() ||
"Untitled";
const bodyDom = dom.window.document.querySelector(bodySelector);
[...bodyDom?.querySelectorAll(".sharedaddy, .jp-relatedposts")].map((n) =>
n.parentNode.removeChild(n)
);
return {
title,
older: dom.window.document.querySelector(olderSelector)?.href,
newer: dom.window.document.querySelector(newerSelector)?.href,
id: createId(dom.window.document.URL),
bodyDom,
url: dom.window.document.URL,
};
};
const getOnePost = (postUrl) => urlToDom(postUrl).then(domToStructured);
async function* getOlderPostsFrom(startUrl) {
let currentUrl = startUrl;
while (currentUrl) {
const data = await getOnePost(currentUrl);
yield data;
currentUrl = data.older;
}
}
async function* getNewerPostsFrom(startUrl) {
let currentUrl = startUrl;
const chapters = [];
while (currentUrl) {
await getOnePost(currentUrl).then(({ older, newer, ...data }) => {
chapters.unshift({
...data,
});
currentUrl = newer;
});
}
yield* chapters;
}
const getListPage = async (url) => {
const dom = await urlToDom(url);
const posts = [...dom.window.document.querySelectorAll(".entry-title a")].map(
(a) => a?.href
);
return {
posts,
};
};
export async function* getFromWordPress(url, urlType) {
if (urlType === "listing-page") {
console.log(
`List detection on WordPress is experimental. Try passing individual blogpost link for better results`
);
const guessedStart = (await getListPage(url))?.posts?.[0];
const { older, newer } = await getOnePost(guessedStart);
if (newer) {
yield* getNewerPostsFrom(newer);
yield* getOlderPostsFrom(guessedStart);
} else {
yield* getOlderPostsFrom(guessedStart);
}
} else {
yield* getOlderPostsFrom(url);
}
}