-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathplaywright.js
More file actions
72 lines (69 loc) · 4.11 KB
/
playwright.js
File metadata and controls
72 lines (69 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import { HARVESTER_PATH, isObj, isFunc, isStr } from './utils.js'
/**
* Just a helper function to check harvestPage() & harvestPageAll() functions arguments. It also
* injects harvester.js script into the page if opt.inject option is set to true. Throws an
* exception in case of error
* @param {Page} page Browser's page instance to work on
* @param {String} tpl Pseudo tree-like template of data we are searching for
* @param {String} query CSS Query of first element we are start searching from
* @param {Object} opt Options for this function. It's a different options than harvester() func opt
*/
async function initHarvester (page, tpl, query, opt = {}) {
if (!page || !isObj(page) || !isFunc(page.evaluate)) throw new Error('"page" argument is not set or not a Page instance')
if (!tpl || !isStr(tpl)) throw new Error('"tpl" argument is not set or not a string')
if (!query || !isStr(query)) throw new Error('"query" argument is not set or not a string')
if (opt.inject) {
if (!opt.path) opt.path = HARVESTER_PATH
try {
return page.addScriptTag({ path: opt.path })
} catch (e) { throw new Error(`Error while injecting harvester library into the HTML page: ${e}`) }
}
}
/**
* Extracts one peace of data by template, query, options and returns a Promise. It works with
* Puppeteer's page object, so you may use it in your Puppeteer projects. You may use additional
* options like "inject" - to inject harvester.js file into the destination HTML file. In this case
* take a look on "path" option, which by default is equal to HARVESTER_PATH. Otherwise you have to
* inject harvester.js file in your client's code. Pay attention that calling this function more than
* once will inject harvester.js into the page few times.
* @param {Page} page Browser's page instance to work on
* @param {String} tpl Pseudo tree-like template of data we are searching for
* @param {String} query CSS Query of first element we are start searching from
* @param {Object} opt Options for this function. It's a different options than harvester() func opt
* @returns {Promise<[Object, maxScore, score, metaTree]>} Returns promise with harvested data in an
* array of four elements
*/
export async function harvestPage (page, tpl, query, opt = {}) {
await initHarvester(page, tpl, query, opt)
return page.evaluate(([tpl, query, opt]) => {
const el = document.querySelector(query)
if (!el) throw new Error(`Selector "${query}" not found`)
return harvest(tpl, el, opt) // eslint-disable-line no-undef
}, [tpl, query, opt])
}
/**
* Extracts all peaces of data by template, query, options and returns a Promise. It works with
* Puppeteer's page object, so you may use it in your Puppeteer projects. The difference between this
* function and harvestPage() that this one will find all DOM elements for "query" parameter and
* extract all data peaces in an array. You may use additional options like "inject" - to inject
* harvester.js file into the destination HTML file. In this case take a look on "path" option,
* which by default is equal to HARVESTER_PATH. Otherwise you have to inject harvester.js file in
* your client's code. Pay attention that calling this function more than once will inject harvester.js
* into the page few times.
* @param {Page} page Browser's page instance to work on
* @param {String} tpl Pseudo tree-like template of data we are searching for
* @param {String} query CSS Query of first element we are start searching from
* @param {Object} opt Options for this function. It's a different options than harvester() func opt
* @returns {Promise<[Object, maxScore, score, metaTree]>} Returns promise with harvested data in an
* array of four elements
*/
export async function harvestPageAll (page, tpl, query, opt = {}) {
await initHarvester(page, tpl, query, opt)
return page.evaluate(([tpl, query, opt]) => {
let els = document.querySelectorAll(query)
if (!els || !els.length) throw new Error(`Selector "${query}" not found`)
els = Array.from(els)
if (opt.amount > 0) els.length = opt.amount
return els.map(el => harvest(tpl, el, opt)) // eslint-disable-line no-undef
}, [tpl, query, opt])
}