-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:typesense/showcase-xkcd-search
- Loading branch information
Showing
7 changed files
with
228 additions
and
95 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,33 @@ | ||
module.exports = { | ||
"extends": "eslint:recommended", | ||
extends: 'eslint:recommended', | ||
plugins: ['prettier'], | ||
rules: { | ||
'prettier/prettier': 'error', | ||
//https://stackoverflow.com/a/53769213 | ||
'prettier/prettier': [ | ||
'error', | ||
{ | ||
endOfLine: 'auto', | ||
}, | ||
], | ||
}, | ||
parser: 'babel-eslint', | ||
"parserOptions": { | ||
"ecmaFeatures": { | ||
"jsx": true, | ||
"modules": true | ||
parserOptions: { | ||
ecmaFeatures: { | ||
jsx: true, | ||
modules: true, | ||
}, | ||
"ecmaVersion": 2020, | ||
"sourceType": "module", | ||
"useJSXTextNode": true | ||
ecmaVersion: 2020, | ||
sourceType: 'module', | ||
useJSXTextNode: true, | ||
}, | ||
"root": true, | ||
"env": { | ||
"browser": true, | ||
"es6": true, | ||
"node": true, | ||
"commonjs": true | ||
root: true, | ||
env: { | ||
browser: true, | ||
es6: true, | ||
node: true, | ||
commonjs: true, | ||
}, | ||
globals: { | ||
$: true, | ||
}, | ||
"globals": { | ||
"$": true | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,54 @@ | ||
import 'dotenv/config' | ||
import 'dotenv/config'; | ||
import fetch from 'node-fetch'; | ||
import fs from 'fs'; | ||
import { fileURLToPath } from 'url'; | ||
import path from 'path'; | ||
import { DATA_DIR } from './utils/path.mjs'; | ||
import { exponentialBackoffRetry, BatchAPICall } from './utils/network.mjs'; | ||
|
||
const __filename = fileURLToPath(import.meta.url); | ||
const __dirname = path.dirname(__filename); | ||
const DATA_DIR = path.resolve(__dirname, '../data/raw') | ||
|
||
let response | ||
let response; | ||
|
||
response = await fetch('https://xkcd.com/info.0.json'); | ||
const latestComicId = (await response.json())['num']; | ||
const comicIds = [...Array(latestComicId + 1).keys()].slice(1); | ||
const batchAPICall = new BatchAPICall(); | ||
|
||
for await (const comicId of comicIds) { | ||
const filePath = `${DATA_DIR}/${comicId}.html` | ||
if(fs.existsSync(filePath)) { | ||
console.log(`Comic ${comicId} already exists. Skipping.`) | ||
const filePath = `${DATA_DIR}/${comicId}.html`; | ||
if (fs.existsSync(filePath)) { | ||
console.log(`Explanation for comic ${comicId} already exists. Skipping.`); | ||
} else { | ||
console.log(`Fetching explanation for comic ${comicId}.`) | ||
response = await fetch(`https://www.explainxkcd.com/wiki/index.php/${comicId}`); | ||
// 🙏 https://stackoverflow.com/a/51302466/123545 | ||
const fileStream = fs.createWriteStream(filePath); | ||
await new Promise((resolve, reject) => { | ||
response.body.pipe(fileStream); | ||
response.body.on("error", reject); | ||
fileStream.on("finish", resolve); | ||
}); | ||
const request = async () => { | ||
console.log(`Fetching explanation for comic ${comicId}.`); | ||
|
||
const fetchExplanation = async () => { | ||
const res = await fetch( | ||
`https://www.explainxkcd.com/wiki/index.php/${comicId}` | ||
); | ||
if (!res.ok) throw new Error('Request failed!'); | ||
return res; | ||
}; | ||
|
||
try { | ||
const response = await exponentialBackoffRetry(fetchExplanation, { | ||
callback: ({ attempt, delayMs }) => | ||
console.log( | ||
`Retry fetching explanation for comic ${comicId}: attempt ${attempt} after ${delayMs}ms` | ||
), | ||
}); | ||
// 🙏 https://stackoverflow.com/a/51302466/123545 | ||
const fileStream = fs.createWriteStream(filePath); | ||
await new Promise((resolve, reject) => { | ||
response.body.pipe(fileStream); | ||
response.body.on('error', reject); | ||
fileStream.on('finish', resolve); | ||
}); | ||
return `Explanation ${comicId} success`; | ||
} catch (error) { | ||
console.warn(`Error fetching explanation for comic ${comicId}`); | ||
return `Explanation ${comicId} failed`; | ||
} | ||
}; | ||
batchAPICall.requestList.push(request); | ||
} | ||
} | ||
|
||
batchAPICall.makeRequests(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,60 @@ | ||
import 'dotenv/config' | ||
import 'dotenv/config'; | ||
import fetch from 'node-fetch'; | ||
import fs from 'fs'; | ||
import { fileURLToPath } from 'url'; | ||
import path from 'path'; | ||
import { DATA_DIR } from './utils/path.mjs'; | ||
import { exponentialBackoffRetry, BatchAPICall } from './utils/network.mjs'; | ||
|
||
const __filename = fileURLToPath(import.meta.url); | ||
const __dirname = path.dirname(__filename); | ||
const DATA_DIR = path.resolve(__dirname, '../data/raw') | ||
|
||
let response | ||
let response; | ||
|
||
response = await fetch('https://xkcd.com/info.0.json'); | ||
const latestComicId = (await response.json())['num']; | ||
const comicIds = [...Array(latestComicId + 1).keys()].slice(1); | ||
|
||
const batchAPICall = new BatchAPICall(); | ||
|
||
for await (const comicId of comicIds) { | ||
const filePath = `${DATA_DIR}/${comicId}.json` | ||
if(fs.existsSync(filePath)) { | ||
console.log(`Comic ${comicId} already exists. Skipping.`) | ||
} else { | ||
console.log(`Fetching info for comic ${comicId}.`) | ||
response = await fetch(`https://xkcd.com/${comicId}/info.0.json`); | ||
// 🙏 https://stackoverflow.com/a/51302466/123545 | ||
const fileStream = fs.createWriteStream(filePath); | ||
await new Promise((resolve, reject) => { | ||
response.body.pipe(fileStream); | ||
response.body.on("error", reject); | ||
fileStream.on("finish", resolve); | ||
}); | ||
const filePath = `${DATA_DIR}/${comicId}.json`; | ||
if (fs.existsSync(filePath)) { | ||
console.log(`Comic ${comicId} already exists. Skipping.`); | ||
continue; | ||
} | ||
if (comicId === 404) { | ||
// id 404 is an April fools joke | ||
const writer = fs.createWriteStream(filePath); | ||
writer.write('{}'); | ||
continue; | ||
} | ||
const request = async () => { | ||
const fetchInfo = async () => { | ||
console.log(`Fetching info for comic ${comicId}.`); | ||
const res = await fetch(`https://xkcd.com/${comicId}/info.0.json`); | ||
|
||
if (!res.ok) throw new Error('Request failed!'); | ||
return res; | ||
}; | ||
|
||
try { | ||
const response = await exponentialBackoffRetry(fetchInfo, { | ||
callback: ({ attempt, delayMs }) => | ||
console.log( | ||
`Retry fetching info for comic ${comicId}: attempt ${attempt} after ${delayMs}ms` | ||
), | ||
}); | ||
// 🙏 https://stackoverflow.com/a/51302466/123545 | ||
const fileStream = fs.createWriteStream(filePath); | ||
await new Promise((resolve, reject) => { | ||
response.body.pipe(fileStream); | ||
response.body.on('error', reject); | ||
fileStream.on('finish', resolve); | ||
}); | ||
return `Comic info ${comicId} success`; | ||
} catch (error) { | ||
console.warn(`Error fetching explanation for comic ${comicId}`); | ||
return `Comic info ${comicId} failed`; | ||
} | ||
}; | ||
|
||
batchAPICall.requestList.push(request); | ||
} | ||
|
||
batchAPICall.makeRequests(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import 'dotenv/config'; | ||
|
||
const SCRAPE_REQUEST_BATCH_SIZE = parseInt( | ||
process.env.SCRAPE_REQUEST_BATCH_SIZE || '52' | ||
); | ||
|
||
// https://www.codewithyou.com/blog/how-to-implement-retry-with-exponential-backoff-in-nodejs | ||
export function exponentialBackoffRetry( | ||
fn, | ||
{ maxAttempts = 5, baseDelayMs = 1000, callback = () => {} } | ||
) { | ||
let attempt = 1; | ||
|
||
const execute = async () => { | ||
try { | ||
return await fn(); | ||
} catch (error) { | ||
if (attempt >= maxAttempts) { | ||
throw error; | ||
} | ||
|
||
const delayMs = baseDelayMs * 2 ** attempt; | ||
callback({ attempt, delayMs }); | ||
await new Promise((resolve) => setTimeout(resolve, delayMs)); | ||
|
||
attempt++; | ||
return execute(); | ||
} | ||
}; | ||
|
||
return execute(); | ||
} | ||
|
||
export class BatchAPICall { | ||
constructor(batchSize = SCRAPE_REQUEST_BATCH_SIZE) { | ||
this.batchSize = batchSize; | ||
} | ||
requestList = []; | ||
|
||
async makeRequests() { | ||
if (this.requestList.length === 0) return console.log('No requests!'); | ||
|
||
for (let i = 0; i <= this.requestList.length / this.batchSize + 1; i++) { | ||
const result = await Promise.all( | ||
this.requestList | ||
.slice((i === 0 ? 0 : i - 1) * this.batchSize, i * this.batchSize) | ||
.map((fn) => fn()) | ||
); | ||
console.log(result); | ||
} | ||
} | ||
} |
Oops, something went wrong.