diff --git a/lib/helpers.d.ts b/lib/helpers.d.ts index 4c12291..6111991 100644 --- a/lib/helpers.d.ts +++ b/lib/helpers.d.ts @@ -2,3 +2,4 @@ import got from 'got'; export declare const txt: (el: Cheerio) => string; export declare const dom: (url: got.GotUrl, opts?: got.GotOptions) => Promise; +export declare const parseDate: (str: string) => Date; diff --git a/lib/helpers.js b/lib/helpers.js index 97587fc..291dda6 100644 --- a/lib/helpers.js +++ b/lib/helpers.js @@ -13,8 +13,27 @@ var __importDefault = (this && this.__importDefault) || function (mod) { Object.defineProperty(exports, "__esModule", { value: true }); const got_1 = __importDefault(require("got")); const cheerio_1 = __importDefault(require("cheerio")); +const MONTHS = [ + 'enero', + 'febrero', + 'marzo', + 'abril', + 'mayo', + 'junio', + 'julio', + 'agosto', + 'septiembre', + 'octubre', + 'noviembre', + 'diciembre', +]; exports.txt = (el) => el.text().trim(); exports.dom = (url, opts) => __awaiter(this, void 0, void 0, function* () { const { body } = yield got_1.default(url, opts); return cheerio_1.default.load(body); }); +exports.parseDate = (str) => { + const [date, month, year] = str.toLowerCase().split(' de '); + const monthIndex = MONTHS.indexOf(month); + return new Date(+year, monthIndex, +date); +}; diff --git a/lib/index.d.ts b/lib/index.d.ts index b115b4b..c5549fc 100644 --- a/lib/index.d.ts +++ b/lib/index.d.ts @@ -1,5 +1,5 @@ import got from 'got'; -declare const _default: (session?: string, defaultOpts?: got.GotOptions) => Promise<{ +declare const GetOnBrd: (session?: string, defaultOpts?: got.GotOptions) => Promise<{ getCompanyProfile: (url: string, gotOpts?: got.GotOptions) => Promise<{ title: string; logo: string; @@ -17,6 +17,7 @@ declare const _default: (session?: string, defaultOpts?: got.GotOptions) => }>; getJob: (url: string, gotOpts?: got.GotOptions) => Promise<{ date: string; + parsedDate: Date; salary: number[]; company: { logo: string; @@ -36,6 +37,8 @@ declare const _default: (session?: string, defaultOpts?: got.GotOptions) => country: string; city: string; }>; + getCategories: (gotOpts?: got.GotOptions) => Promise; + getJobsFromCategory: (categoryUrl: string, gotOpts?: got.GotOptions) => Promise; _csrfToken: string; }>; -export default _default; +export default GetOnBrd; diff --git a/lib/index.js b/lib/index.js index 738c7fb..274a55e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -27,6 +27,7 @@ const qs_1 = __importDefault(require("qs")); const sanitize_html_1 = __importDefault(require("sanitize-html")); const helpers_1 = require("./helpers"); const HOST = 'https://www.getonbrd.com'; +const HOST_CL = 'https://www.getonbrd.cl'; const SEARCH_URL = `${HOST}/webpros/search_jobs`; const DEFAULT_HEADERS = { 'accept-language': 'es-US,es;q=0.9,es-419;q=0.8,en;q=0.7', @@ -38,9 +39,9 @@ const getContent = (el, excludedTags = ['div']) => { return sanitize_html_1.default(descHtml, { allowedTags, allowedIframeHostnames: ['www.youtube.com'], - }); + }).trim(); }; -exports.default = (session, defaultOpts = {}) => __awaiter(this, void 0, void 0, function* () { +const GetOnBrd = (session, defaultOpts = {}) => __awaiter(this, void 0, void 0, function* () { const sessionCookie = `_getonboard_session=${session};`; let csrfToken = ''; if (session) { @@ -87,8 +88,10 @@ exports.default = (session, defaultOpts = {}) => __awaiter(this, void 0, void 0, .map(n => n.match(/\d+/g).join('')) .map(Number) : null; + const date = helpers_1.txt(_company.find('time')); return { - date: helpers_1.txt(_company.find('time')), + date, + parsedDate: helpers_1.parseDate(date), salary, company: { logo: _company.find('.gb-company-logo__img').attr('src'), @@ -130,6 +133,21 @@ exports.default = (session, defaultOpts = {}) => __awaiter(this, void 0, void 0, links, }; }); + const getCategories = (gotOpts = defaultOpts) => __awaiter(this, void 0, void 0, function* () { + const $ = yield helpers_1.dom(HOST_CL, gotOpts); + return $('.bg-white a[href^="/emp"]') + .map((i, el) => HOST + $(el).attr('href')) + .get(); + }); + const getJobsFromCategory = (categoryUrl, gotOpts = defaultOpts) => __awaiter(this, void 0, void 0, function* () { + const $ = yield helpers_1.dom(categoryUrl, gotOpts); + return $('.job') + .map((i, el) => $(el) + .find('a') + .first() + .attr('href')) + .get(); + }); return { getCompanyProfile, getJobsBySalary: (...args) => __awaiter(this, void 0, void 0, function* () { @@ -138,6 +156,9 @@ exports.default = (session, defaultOpts = {}) => __awaiter(this, void 0, void 0, return getJobsBySalary(...args); }), getJob, + getCategories, + getJobsFromCategory, _csrfToken: csrfToken, }; }); +exports.default = GetOnBrd; diff --git a/package.json b/package.json index d692968..8a1051a 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "@chile-sh/getonbrd-scraper", "types": "lib/index.d.ts", "main": "lib/index.js", - "version": "3.0.1", + "version": "3.0.2", "repository": "git@github.com:chile-sh/getonbrd-scraper.git", "author": "mallendeo", "license": "MIT",