-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
75 lines (63 loc) · 2.08 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
var cheerio = require('cheerio');
var request = require('request');
var url = require('url');
var fs = require('fs');
var mustache = require('mustache');
var AWS = require('aws-sdk');
var moment = require('moment');
exports.handler = function (event, context) {
request(event.webpage, function (err, response, body) {
if (err) {
console.log(err, err.stack); // an error occurred
}
var $ = cheerio.load(body);
var links = [];
AWS.config.apiVersions = {
s3: '2006-03-01',
// other service API versions
};
var s3 = new AWS.S3();
$('a').each(function () {
var anchor = $(this);
var href = anchor.attr('href');
var text = anchor.text();
if (typeof href !== 'undefined') {
var abs = url.resolve(event.webpage, href);
if (text === '') {
text = abs;
}
var new_item = {
text: text,
url: abs
};
if (links.indexOf(new_item) === -1) {
links.push(new_item);
}
}
});
fs.readFile('template.html', 'utf8', function (err, data) {
if (err) {
console.log(err, err.stack); // an error occurred
}
var view = {
links: links,
page: event.webpage,
time: moment().format('MMMM Do YYYY, h:mm:ss a')
};
var output = mustache.render(data, view);
var s3_params = {
Bucket: JSON.parse(fs.readFileSync('stack-outputs.json', 'utf8'))['StackOutputs']['S3Bucket'],
Key: 'links.html',
ContentType: 'text/html',
Body: output
};
s3.putObject(s3_params, function (err, data) {
if (err) {
console.log(data);
console.log(err, err.stack); // an error occurred
}
context.done(null, 'link-scraper complete.');
});
});
});
};