diff --git a/.gitignore b/.gitignore index fb7a88e1..f1b827b7 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,6 @@ build-iPhoneSimulator/ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: .rvmrc .DS_Store + +# node +node_modules/ diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..34c5164d --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--format documentation +--color +--require spec_helper diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..1c600fe8 --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +gem 'nokogiri' +gem 'rspec' \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 00000000..c0c1fd62 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,51 @@ +GEM + remote: https://rubygems.org/ + specs: + diff-lcs (1.6.2) + nokogiri (1.18.8-aarch64-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.8-aarch64-linux-musl) + racc (~> 1.4) + nokogiri (1.18.8-arm-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.8-arm-linux-musl) + racc (~> 1.4) + nokogiri (1.18.8-arm64-darwin) + racc (~> 1.4) + nokogiri (1.18.8-x86_64-darwin) + racc (~> 1.4) + nokogiri (1.18.8-x86_64-linux-gnu) + racc (~> 1.4) + nokogiri (1.18.8-x86_64-linux-musl) + racc (~> 1.4) + racc (1.8.1) + rspec (3.13.1) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.5) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.5) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.5) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-support (3.13.4) + +PLATFORMS + aarch64-linux-gnu + aarch64-linux-musl + arm-linux-gnu + arm-linux-musl + arm64-darwin + x86_64-darwin + x86_64-linux-gnu + x86_64-linux-musl + +DEPENDENCIES + nokogiri + rspec + +BUNDLED WITH + 2.6.7 diff --git a/README.md b/README.md index 4d5a093f..1d2beda6 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,169 @@ Add also to your array the painting thumbnails present in the result page file ( Test against 2 other similar result pages to make sure it works against different layouts. (Pages that contain the same kind of carrousel. Don't necessarily have to be paintings.) The suggested time for this challenge is 4 hours. But, you can take your time and work more on it if you want. + +------------------------------------------------------------------------------- + +# Solution + +The source of the page contains images in the markup of the artwork knowledge panel in two forms. Images above the fold +that are not in the truncated part of the panel, are embedded as base64 encoded data in the page itself. Image not +visible contains links to the image files to display, which are fetched when user expands the section. + +Each artwork in the panel has the following structure: +![Artwork Structure](./files/artwork-structure.png) + +```html + +
+ + {name} +
+
{name}
+ +
{year}
+
+
+
+ +## Implementation +``` + +### Extracting Images + +Extracting the other values is straightforward, but images require special handling due to their different formats: + +- For the remote images, the solution extracts the image url from the `data-src` attribute +- For the embedded images, the solution extracts the data from script tags. The source contains script tags with the + base64 encoded image data for lazily rendered images. These script tags can be associated with their respective + artwork using the id attribute on image and `ii` variable in the script. Then the image data can be extracted from the + `s` variable in the script tag. + +## Code + +Since I did not have prior ruby experience I implemented a solution in TypeScript using the `cheerio` library for HTML +parsing. + +Following that I caught up on some ruby basics, and then implemented a ruby solution with the typescript code as the +reference implementation. + +### Files + +```txt +├── bin +│ ├── extractor.rb - Ruby script to run the extractor +│ └── extractor.ts - TypeScript script to run the extractor +├── files - Test HTML files +│ ├── ... +│ ├── hokusai-artwork.html +│ ├── mc-escher-artwork.html +│ ├── van-gogh-paintings.html +│ └── ... +├── ... +├── lib +│ ├── extractor.rb - Ruby extractor functionality +│ └── extractor.ts - TypeScript extractor functionality +├── ... +├── spec +│ ├── extractor_spec.rb - Ruby RSpec tests for the extractor +│ ├── extractor.spec.ts - TypeScript tests using the node builtin test utils +│ └── spec_helper.rb +└── ... +``` + +## Running the code + +### Setup + +#### With Mise + +If you have [Mise](https://github.com/jdx/mise) you can use that to set up the environment for both node and ruby. + +```bash +mise install +``` + +#### Without Mise - Node + +You'd need node > `23.6` (or node > `22.6` and with `--experimental-strip-types`) to run the typescript code. After +ensuring you have that, install dependencies: + +```bash +npm install +``` + +#### Without Mise - Ruby + +You'd need ruby > `3.0` to run the ruby code. After ensuring you have that, install dependencies: + +```bash +bundle install +``` + +### Running + +#### With Mise + +Test the code with: + +```bash +mise run test +``` + +This will run tests for the typescript version and the ruby version. Then run both and compare each of their outputs +with the output in `expected-array.json` file. + +Run ts version with: + +```bash +mise run ts:extract # Optionally pipe through jq for syntax highlighting `| jq` +``` + +Run ruby version with: + +```bash +mise run ruby:extract # Optionally pipe through jq for syntax highlighting `| jq` +``` + +#### Without Mise + +Run tests + +```bash +npm test # For typescript +bundle exec rspec # For ruby +``` + +Run extractor +For typescript + +```bash +npm run extract +``` + +For ruby + +```bash +ruby bin/extractor.rb +``` + +#### Examples + +```bash +mise run ts:extract files/van-gogh-paintings.html | jq +mise run ts:extract files/hokusai-artwork.html + +npm run extract files/van-gogh-paintings.html + +ruby bin/extractor.rb files/van-gogh-paintings.html | jq +ruby bin/extractor.rb files/hokusai-artwork.html + +ruby bin/extractor.rb files/mc-escher-artwork.html | jq +``` diff --git a/bin/extractor.rb b/bin/extractor.rb new file mode 100644 index 00000000..4d7ecf6b --- /dev/null +++ b/bin/extractor.rb @@ -0,0 +1,26 @@ +require './lib/extractor' +require 'json' + +def main + if ARGV.empty? + puts('Please provide the path to the Google search results page HTML file.') + puts("\tUsage: ruby #{$PROGRAM_NAME} ") + puts("\tExample: ruby #{$PROGRAM_NAME} ./files/van-gogh-paintings.html") + exit(1) + end + + serp_path = ARGV[0] + + begin + artworks = Extractor.extract_artworks_from_serp_file(serp_path) + puts(JSON.pretty_generate({ artworks: artworks })) + rescue StandardError => e + puts("An error occurred while extracting artworks: #{e.message}") + exit(1) + end + +end + +if __FILE__ == $PROGRAM_NAME + main +end \ No newline at end of file diff --git a/bin/extractor.ts b/bin/extractor.ts new file mode 100644 index 00000000..f8e657a1 --- /dev/null +++ b/bin/extractor.ts @@ -0,0 +1,24 @@ +import {extractArtworksFromSERPFile} from "../lib/extractor.ts"; + +async function main() { + const serpPath = process.argv[2]; + if (!serpPath) { + console.error('Please provide the path to the Google search results page HTML file.'); + console.error('\tUsage: npm run extract '); + console.error('\tExample: npm run extract ./files/van-gogh-paintings.html'); + process.exit(1); + } + + try { + const artworks = await extractArtworksFromSERPFile(serpPath); + console.log(JSON.stringify({artworks}, null, 2)); + } catch (error) { + console.error('Error extracting artworks:', error); + process.exit(1); + } +} + +if (import.meta.url === `file://${process.argv[1]}`) { + await main(); +} + diff --git a/files/artwork-structure.png b/files/artwork-structure.png new file mode 100644 index 00000000..de276f81 Binary files /dev/null and b/files/artwork-structure.png differ diff --git a/files/hokusai-artwork.html b/files/hokusai-artwork.html new file mode 100644 index 00000000..2cb63ede --- /dev/null +++ b/files/hokusai-artwork.html @@ -0,0 +1,29 @@ +hokusai artwork - Google Search

Accessibility links

About 11,400,000 results (1.20 seconds) 
Hokusai
Google apps
diff --git a/files/mc-escher-artwork.html b/files/mc-escher-artwork.html new file mode 100644 index 00000000..7a7c631c --- /dev/null +++ b/files/mc-escher-artwork.html @@ -0,0 +1,30 @@ +mc escher artwork - Google Search

Accessibility links

About 4,670,000 results (0.95 seconds) 
Maurits Cornelis Escher
Google apps
diff --git a/lib/extractor.rb b/lib/extractor.rb new file mode 100644 index 00000000..d1259d5d --- /dev/null +++ b/lib/extractor.rb @@ -0,0 +1,155 @@ +# frozen_string_literal: true +require 'nokogiri' +require 'uri' + +module Extractor + class Error < StandardError; end + + # Relevant script tags end with this i.e. statement that sets the src of the lazy loaded images. + SCRIPT_MARKER = '_setImagesSrc(ii,s' + IMAGE_DATA_REGEX = Regexp.new(/var s='(.*?)';/) + IMAGE_ID_REGEX = Regexp.new(/var ii=\['(.*?)'\];/) + + # Google serp has images that are lazily rendered, their data is stored in + # variables in the script tags in base64 encoding. The tags contain ids for + # their respective images. At runtime these scripts replace the src attribute + # of their images with the base64 encoded data. This function go to all of + # these script tags and collects the image ids and their base64 encoded data. + # Script tags look like this (prettified): + # ```js + # (function() { + # var s = 'image data encoded in base64'; + # var ii = ['image-id']; + # var r = ''; // may be omitted + # _setImageSrc(ii, s, r); // 'r' may be omitted + # })(); + # ``` + # @param doc [Nokogiri::HTML::Document] The parsed HTML document containing the script tags. + # @return [Hash] A hash where keys are image IDs and values are the base64 encoded image data. + def self.collect_lazy_images(doc) + images = {} + + doc.search('script').each do |script| + next unless script.text.include?(SCRIPT_MARKER) + + id_match = script.text.match(IMAGE_ID_REGEX) + if id_match.nil? || id_match[1].nil? || id_match[1].empty? + warn("Script tag is missing image ID: #{script.text.inspect}") + next + end + image_id = id_match[1] + + data_match = script.text.match(IMAGE_DATA_REGEX) + if data_match.nil? || data_match[1].nil? || data_match[1].empty? + warn("Script tag is missing base64 data: #{script.text.inspect}") + next + end + # Un-escape the padding '=' characters in the base64 string from '\\x3d' to '=' + image_data = data_match[1].gsub("\\x3d", '=') + + # Store the base64 data with the corresponding image ID + images[image_id] = image_data + end + + images + end + + # CSS selectors for extracting artworks from the Google knowledge panel. + + ARTWORK_CONTAINER_ANCHOR_SELECTOR = 'div.iELo6 > a' + ARTWORK_NAME_SELECTOR = 'div.pgNMRc' + ARTWORK_YEAR_SELECTOR = 'div.cxzHyb' + ARTWORK_IMAGE_SELECTOR = 'img.taFZJe' + + GOOGLE_BASE_URL = 'https://www.google.com' + + # Extracts artworks from the artwork knowledge panel in a Google search results page. + # Structure: + # + # @param html [String] The HTML content of the Google search results page. + # @return [Array] An array of hashes, each containing details of an artwork. + def self.extract_artworks_from_knowledge_panel(html) + doc = Nokogiri::HTML(html) + lazy_images = collect_lazy_images(doc) + artworks = [] + + doc.search(ARTWORK_CONTAINER_ANCHOR_SELECTOR).each do |element| + name = element.at_css(ARTWORK_NAME_SELECTOR).text.strip + year = element.at_css(ARTWORK_YEAR_SELECTOR).text.strip + + href = element['href'] + unless href.is_a?(String) + warn("Artwork element is missing href: name=#{name.inspect}") + next + end + + image_element = element.at_css(ARTWORK_IMAGE_SELECTOR) + # Visible artworks only have a src attribute with the base64 encoded + # image. Hidden artworks in the truncated part of the knowledge panel + # have a data-src attribute with a remote URL that is fetched and + # displayed as base64 when the user expands the knowledge panel. We get + # the remote url from data-src if it exists, otherwise we use the base64 + # encoded image from the src attribute. + image = lazy_images[image_element['id']] || image_element['data-src'] + unless image.is_a?(String) + warn("Artwork element is missing image: name=#{name.inspect}") + next + end + + if name.empty? || href.empty? || image.empty? + warn("Artwork element is missing required fields: name=#{name.inspect}, href=#{href.inspect}, image=#{image.inspect}") + next + end + + link = URI.join(GOOGLE_BASE_URL, href).to_s + + artwork = { + name: name, + link: link, + image: image, + } + + unless year.empty? + artwork[:extensions] = [year] + end + + artworks.append(artwork) + end + + artworks + end + + # @param serp_path [String] The path to the Google search results page HTML file. + # @return [Array] An array of hashes, each containing details of an artwork. + def self.extract_artworks_from_serp_file(serp_path) + if serp_path.nil? || serp_path.empty? + raise ArgumentError, 'Please provide the path to the Google search results page HTML file.' + end + + begin + html_content = File.read(serp_path) + self.extract_artworks_from_knowledge_panel(html_content) + rescue Errno::ENOENT + raise Error, "File not found: #{serp_path}" + rescue StandardError => e + raise Error, "An error occurred while reading the file: #{e.message}" + end + end + +end diff --git a/lib/extractor.ts b/lib/extractor.ts new file mode 100644 index 00000000..fa35a578 --- /dev/null +++ b/lib/extractor.ts @@ -0,0 +1,166 @@ +import type {CheerioAPI} from "cheerio"; +import * as cheerio from 'cheerio'; +import {readFile} from "node:fs/promises"; + +export type Artwork = { + name: string; + /** + * Extra information about the year, only the year right now. + */ + extensions?: string[]; + /** + * URL for the search page for this artwork. + */ + link: string; + /** + * Base64 encoded image or URL to the image of the artwork. + */ + image: string; +} + +// Relevant script tags end with this i.e. statement that sets the src of the lazy loaded images. +const SCRIPT_MARKER = "_setImagesSrc(ii,s"; + +const IMAGE_DATA_REGEX = new RegExp(/var s='(.*?)';/); +const IMAGE_ID_REGEX = new RegExp(/var ii=\['(.*?)'];/); + +/** + * Google serp has images that are lazily rendered, their data is stored in + * variables in the script tags in base64 encoding. The tags contain ids for + * their respective images. At runtime these scripts replace the src attribute + * of their images with the base64 encoded data. This function go to all of + * these script tags and collects the image ids and their base64 encoded data. + * Script tags look like this (prettified): + * ```js + * (function() { + * var s = 'image data encoded in base64'; + * var ii = ['image-id']; + * var r = ''; // may be omitted + * _setImageSrc(ii, s, r); // 'r' may be omitted + * })(); + * ``` + * @param $ - Cheerio object containing the parsed HTML of the Google search results page. + * @return A map where keys are image IDs and values are base64 encoded image data. + */ +function collectLazyImages($: CheerioAPI): Map { + const lazyImages = new Map(); + + $('script').each((_, script) => { + const scriptContent = $(script).html(); + if (!scriptContent) return; + + if (!scriptContent.includes(SCRIPT_MARKER)) { + // Ignore irrelevant script tags that do not contain image data + return; + } + + const idMatch = scriptContent.match(IMAGE_ID_REGEX); + if (!idMatch || !idMatch[1] || idMatch[1].length === 0) { + return; + } + const imageId = idMatch[1]; + + // Extract the base64 encoded image data from the script content + const dataMatch = scriptContent.match(IMAGE_DATA_REGEX); + if (!dataMatch || !dataMatch[1] || dataMatch[1].length === 0) { + console.warn(`No image data found for image ID: ${imageId}`); + return; + } + // Un-escape the padding '=' characters in the base64 string from '\\x3d' to '=' + const unescapedImageData = dataMatch[1].replaceAll('\\x3d', '='); + lazyImages.set(imageId, unescapedImageData); + }) + + return lazyImages; +} + +// CSS selectors for extracting artworks from the Google knowledge panel. + +const ARTWORK_CONTAINER_ANCHOR_SELECTOR = 'div.iELo6 > a'; +const ARTWORK_NAME_SELECTOR = 'div.pgNMRc'; +const ARTWORK_YEAR_SELECTOR = 'div.cxzHyb'; +const ARTWORK_IMAGE_SELECTOR = 'img.taFZJe'; + +const GOOGLE_BASE_URL = 'https://www.google.com'; + + +/** + * Extracts artworks from the artwork knowledge panel in a Google search results page. + * Structure: + * ```html + * + * ``` + * @param pageHtml - The HTML content of the Google search results page containing the knowledge panel. + * @return A promise that resolves to an array of artworks extracted from the knowledge panel. + * @throws Error if the HTML cannot be parsed or if the expected structure is not found. + */ +export async function extractArtworksFromKnowledgePanel(pageHtml: string): Promise { + const $ = cheerio.load(pageHtml); + const lazyImages = collectLazyImages($); + const artworks: Artwork[] = []; + + $(ARTWORK_CONTAINER_ANCHOR_SELECTOR).each( + (_, element) => { + const $element = $(element); + + const name = $element.find(ARTWORK_NAME_SELECTOR).text().trim(); + const year = $element.find(ARTWORK_YEAR_SELECTOR).text().trim(); + const href = $element.attr('href'); + + const imageElement = $element.find(ARTWORK_IMAGE_SELECTOR); + // Visible artworks only have a src attribute with the base64 encoded + // image. Hidden artworks in the truncated part of the knowledge panel + // have a data-src attribute with a remote URL that is fetched and + // displayed as base64 when the user expands the knowledge panel. We get + // the remote url from data-src if it exists, otherwise we use the base64 + // encoded image from the src attribute. + const image = lazyImages.get(imageElement.attr('id') || '') || imageElement.attr('data-src'); + + if (!name || !href || !image) { + console.warn(`Artwork element is missing required fields: name='${name}', href='${href}', image='${image}'`); + return; + } + + // Normalize relative image search URLs to absolute URLs + const link = new URL(href, GOOGLE_BASE_URL).href; + + const extensions = year ? [year] : undefined; + + artworks.push({ + name, + extensions, + link, + image + }); + } + ) + + return artworks; +} + + +/** + * @param serpPath - The path to the Google search results page html file. + * @return A promise that resolves to an array of artworks extracted from the knowledge panel. + * @throws Error if the file cannot be read or parsed. + */ +export async function extractArtworksFromSERPFile(serpPath: string): Promise { + const html = await readFile(serpPath, 'utf-8'); + return extractArtworksFromKnowledgePanel(html); +} diff --git a/mise.toml b/mise.toml new file mode 100644 index 00000000..b15fcca0 --- /dev/null +++ b/mise.toml @@ -0,0 +1,37 @@ +[tools] +jq = "1" +node = "24" +ruby = "3" + +[tasks.install] +alias = "i" +description = "Install dependencies" +run = [ + "npm install", + "bundle install" +] + +[tasks.'ts:extract'] +description = "Extract artworks with the TypeScript implementation" +run = "npm -s run extract" + +[tasks.'ruby:extract'] +description = "Extract artworks with the Ruby implementation" +run = "ruby ./bin/extractor.rb" + +[tasks.test] +description = "Run tests" +run = [ + "npm run test", + "bundle exec rspec", + "echo 'Comparing solutions with expected output...'", + """ + diff \\ + <(mise run ts:extract './files/van-gogh-paintings.html' | jq --sort-keys) \\ + <(cat ./files/expected-array.json | jq --sort-keys) \\ + && diff \\ + <(mise run ruby:extract './files/van-gogh-paintings.html' | jq --sort-keys) \\ + <(cat ./files/expected-array.json | jq --sort-keys) + """, + "echo 'Tests completed successfully'" +] diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..46a76d47 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,347 @@ +{ + "name": "code-challenge", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "code-challenge", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "cheerio": "^1.1.0" + }, + "devDependencies": { + "@types/node": "^24.0.10", + "typescript": "^5.8.3" + } + }, + "node_modules/@types/node": { + "version": "24.0.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.0.10.tgz", + "integrity": "sha512-ENHwaH+JIRTDIEEbDK6QSQntAYGtbvdDXnMXnZaZ6k13Du1dPMmprkEHIL7ok2Wl2aZevetwTAb5S+7yIF+enA==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~7.8.0" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, + "node_modules/cheerio": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.1.0.tgz", + "integrity": "sha512-+0hMx9eYhJvWbgpKV9hN7jg0JcwydpopZE4hgi+KvQtByZXPp04NiCWU0LzcAbP63abZckIHkTQaXVF52mX3xQ==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.0", + "htmlparser2": "^10.0.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.10.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=18.17" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/htmlparser2": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz", + "integrity": "sha512-TwAZM+zE5Tq3lrEHvOlvwgj1XLWQCtaaibSN11Q+gGBAS7Y1uZSWwXXRe4iF6OXnaq1riyQAPFOBtYc77Mxq0g==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.1", + "entities": "^6.0.0" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/typescript": { + "version": "5.8.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", + "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici": { + "version": "7.11.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.11.0.tgz", + "integrity": "sha512-heTSIac3iLhsmZhUCjyS3JQEkZELateufzZuBaVM5RHXdSBMb1LPMQf5x+FH7qjsZYDP0ttAc3nnVpUB+wYbOg==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, + "node_modules/undici-types": { + "version": "7.8.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.8.0.tgz", + "integrity": "sha512-9UJ2xGDvQ43tYyVMpuHlsgApydB8ZKfVYTsLDhXkFL/6gfkp+U8xTGdh8pMJv1SpZna0zxG1DwsKZsreLbXBxw==", + "dev": true, + "license": "MIT" + }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 00000000..89aede11 --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "name": "serpapi-code-challenge", + "version": "1.0.0", + "description": "Extract artworks from google artworks knowledge panel", + "license": "MIT", + "author": "Jasir Zaeem", + "type": "module", + "main": "bin/extractor.ts", + "scripts": { + "extract": "node bin/extractor.ts", + "test": "node --test spec/*.spec.ts" + }, + "dependencies": { + "cheerio": "^1.1.0" + }, + "devDependencies": { + "@types/node": "^24.0.10", + "typescript": "^5.8.3" + } +} diff --git a/spec/extractor.spec.ts b/spec/extractor.spec.ts new file mode 100644 index 00000000..782bec11 --- /dev/null +++ b/spec/extractor.spec.ts @@ -0,0 +1,93 @@ +import test from 'node:test'; +import assert from 'node:assert'; +import {readFile} from 'node:fs/promises'; +import {type Artwork, extractArtworksFromKnowledgePanel, extractArtworksFromSERPFile} from '../lib/extractor.ts'; + +const TEST_PAGES = [ + './files/van-gogh-paintings.html', + './files/mc-escher-artwork.html', + './files/hokusai-artwork.html', +] + +/** + * Placeholder used by google for images that are not loaded yet (because they + * are in the truncated part of the knowledge panel). They shouldn't show up in + * the extracted artworks because their alternative remote url should be + * extracted instead. + */ +const PLACEHOLDER_BASE64_IMAGE = '' + +/** + * Validates that the given artwork object conforms to the expected structure. + * @param artwork - The artwork object to validate. + * @param artworkId - Identifier for the artwork, used in error messages. + * @throws AssertionError Will throw an error if the artwork does not conform to the expected structure. + */ +function validateArtwork(artwork: unknown, artworkId: string): asserts artwork is Artwork { + assert(typeof artwork === 'object' && artwork !== null, `Artwork #${artworkId} should be an object, got ${typeof artwork}`); + + assert('name' in artwork, `Artwork #${artworkId} should have a name`); + assert(typeof artwork.name === 'string', `Artwork #${artworkId} name should be a string, got ${typeof artwork.name}`); + assert(artwork.name.length > 0, `Artwork #${artworkId} name should not be empty`); + + if ('extensions' in artwork && artwork.extensions !== undefined) { + assert(Array.isArray(artwork.extensions), `Artwork #${artworkId} extensions should be an array`); + if (artwork.extensions.length > 0) { + artwork.extensions.forEach(extension => { + assert(typeof extension === 'string', `Artwork #${artworkId} each extension should be a string`); + assert(extension.length > 0, `Artwork #${artworkId} Extension should not be empty`); + }); + } + } + + assert('link' in artwork, `Artwork #${artworkId} Artwork should have a link`); + assert(typeof artwork.link === 'string', `Artwork #${artworkId} Artwork link should be a string, got ${typeof artwork.link}`); + assert(artwork.link.length > 0, `Artwork #${artworkId} link should not be empty`); + assert.match(artwork.link, /^https:\/\/www\.google\.com\//, `Artwork #${artworkId} link should be an absolute Google URL`); + + assert('image' in artwork, `Artwork #${artworkId} should have an image`); + assert(typeof artwork.image === 'string', `Artwork #${artworkId} image should be a string, got ${typeof artwork.image}`); + assert(artwork.image.length > 0, `Artwork #${artworkId} image should not be empty`); + + assert.match(artwork.image, /^(data:image\/(png|jpeg|gif);base64,|https)/, `Artwork #${artworkId} image should be a base64 encoded image or a URL`); + assert.notEqual(artwork.image, PLACEHOLDER_BASE64_IMAGE, `Artwork #${artworkId} image should not be the placeholder base64 image`); +} + +test('extractArtworksFromSERPFile should extract artworks from van Gogh HTML file', async () => { + + for (const page of TEST_PAGES) { + const artworks = await extractArtworksFromSERPFile(page); + assert(Array.isArray(artworks), `Should return an array for ${page}`); + assert(artworks.length > 0, `Should extract at least one artwork from ${page}`); + + artworks.forEach((artwork, index) => { + validateArtwork(artwork, `${page} #${index + 1}`); + }); + } +}); + +test('extractArtworksFromKnowledgePanel should extract artworks from HTML string', async () => { + const page = TEST_PAGES[0] + const html = await readFile(page, 'utf-8'); + const artworks = await extractArtworksFromKnowledgePanel(html); + assert(Array.isArray(artworks), 'Should return an array'); + assert(artworks.length > 0, 'Should extract at least one artwork'); + + artworks.forEach((artwork, index) => { + validateArtwork(artwork, `${page} #${index + 1}`); + }); +}); + +test('extractArtworksFromKnowledgePanel should handle empty HTML', async () => { + const artworks = await extractArtworksFromKnowledgePanel(''); + + assert(Array.isArray(artworks), 'Should return an array'); + assert.strictEqual(artworks.length, 0, 'Should return empty array for HTML without artwork containers'); +}); + +test('extractArtworksFromSERPFile should reject for non-existent file', async () => { + await assert.rejects( + extractArtworksFromSERPFile('./non-existent-file.html'), + 'Should reject when file does not exist' + ); +}); diff --git a/spec/extractor_spec.rb b/spec/extractor_spec.rb new file mode 100644 index 00000000..2c872396 --- /dev/null +++ b/spec/extractor_spec.rb @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +RSpec.describe Extractor do + let(:test_pages) do + %w[./files/van-gogh-paintings.html ./files/mc-escher-artwork.html ./files/hokusai-artwork.html] + end + + # Placeholder used by google for images that are not loaded yet (because they + # are in the truncated part of the knowledge panel). They shouldn't show up in + # the extracted artworks because their alternative remote url should be + # extracted instead. + let(:placeholder_base64_image) do + '' + end + + # Validates that the given artwork hash conforms to the expected structure. + def validate_artwork(artwork, artwork_id) + expect(artwork).to be_a(Hash), "Artwork ##{artwork_id} should be a hash, got #{artwork.class}" + + expect(artwork).to have_key(:name), "Artwork ##{artwork_id} should have a name" + expect(artwork[:name]).to be_a(String), "Artwork ##{artwork_id} name should be a string, got #{artwork[:name].class}" + expect(artwork[:name]).not_to be_empty, "Artwork ##{artwork_id} name should not be empty" + + if artwork.key?(:extensions) && !artwork[:extensions].nil? + expect(artwork[:extensions]).to be_a(Array), "Artwork ##{artwork_id} extensions should be an array" + if artwork[:extensions].any? + artwork[:extensions].each do |extension| + expect(extension).to be_a(String), "Artwork ##{artwork_id} each extension should be a string" + expect(extension).not_to be_empty, "Artwork ##{artwork_id} extension should not be empty" + end + end + end + + expect(artwork).to have_key(:link), "Artwork ##{artwork_id} should have a link" + expect(artwork[:link]).to be_a(String), "Artwork ##{artwork_id} link should be a string, got #{artwork[:link].class}" + expect(artwork[:link]).not_to be_empty, "Artwork ##{artwork_id} link should not be empty" + expect(artwork[:link]).to match(/\Ahttps:\/\/www\.google\.com\//), "Artwork ##{artwork_id} link should be an absolute Google URL" + + expect(artwork).to have_key(:image), "Artwork ##{artwork_id} should have an image" + expect(artwork[:image]).to be_a(String), "Artwork ##{artwork_id} image should be a string, got #{artwork[:image].class}" + expect(artwork[:image]).not_to be_empty, "Artwork ##{artwork_id} image should not be empty" + + expect(artwork[:image]).to match(/\A(data:image\/(png|jpeg|gif);base64,|https)/), "Artwork ##{artwork_id} image should be a base64 encoded image or a URL" + + expect(artwork[:image]).not_to eq(placeholder_base64_image), "Artwork ##{artwork_id} image should not be the placeholder base64 image" + end + + describe '.extract_artworks_from_serp_file' do + it 'extracts artworks from all test HTML files' do + test_pages.each do |page| + artworks = described_class.extract_artworks_from_serp_file(page) + + expect(artworks).to be_a(Array), "Should return an array for #{page}" + expect(artworks.length).to be > 0, "Should extract at least one artwork from #{page}" + + artworks.each_with_index do |artwork, index| + validate_artwork(artwork, "#{page} ##{index + 1}") + end + end + end + + it 'raises an error for non-existent file' do + expect { + described_class.extract_artworks_from_serp_file('./non-existent-file.html') + }.to raise_error(Extractor::Error, /File not found/) + end + + it 'raises an error for nil path' do + expect { + described_class.extract_artworks_from_serp_file(nil) + }.to raise_error(ArgumentError, /Please provide the path/) + end + + it 'raises an error for empty path' do + expect { + described_class.extract_artworks_from_serp_file('') + }.to raise_error(ArgumentError, /Please provide the path/) + end + end + + describe '.extract_artworks_from_knowledge_panel' do + it 'extracts artworks from HTML string' do + page = test_pages.first + html_content = File.read(page) + artworks = described_class.extract_artworks_from_knowledge_panel(html_content) + + expect(artworks).to be_a(Array), 'Should return an array' + expect(artworks.length).to be > 0, 'Should extract at least one artwork' + + artworks.each_with_index do |artwork, index| + validate_artwork(artwork, "#{page} ##{index + 1}") + end + end + + it 'handles empty HTML' do + artworks = described_class.extract_artworks_from_knowledge_panel('') + + expect(artworks).to be_a(Array), 'Should return an array' + expect(artworks.length).to eq(0), 'Should return empty array for HTML without artwork containers' + end + + it 'handles malformed HTML' do + artworks = described_class.extract_artworks_from_knowledge_panel('
invalid
') + + expect(artworks).to be_a(Array), 'Should return an array' + expect(artworks.length).to eq(0), 'Should return empty array for malformed HTML' + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..7e3d64c8 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +require "./lib/extractor" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 00000000..b4eaaffe --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,113 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig to read more about this file */ + + /* Projects */ + // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ + // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ + // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ + // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ + // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ + // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + + /* Language and Environment */ + "target": "ESNext", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ + // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ + // "jsx": "preserve", /* Specify what JSX code is generated. */ + // "libReplacement": true, /* Enable lib replacement. */ + // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ + // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ + // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ + // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ + // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ + // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ + // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ + // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ + + /* Modules */ + "module": "NodeNext", /* Specify what module code is generated. */ + // "rootDir": "./", /* Specify the root folder within your source files. */ + // "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */ + // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ + // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ + // "types": [], /* Specify type package names to be included without being referenced in a source file. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ + "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ + // "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */ + // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ + // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ + // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ + // "noUncheckedSideEffectImports": true, /* Check side effect imports. */ + // "resolveJsonModule": true, /* Enable importing .json files. */ + // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ + // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ + + /* JavaScript Support */ + // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ + // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ + // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ + + /* Emit */ + // "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ + // "declarationMap": true, /* Create sourcemaps for d.ts files. */ + // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ + // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ + // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ + // "noEmit": true, /* Disable emitting files from a compilation. */ + // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ + // "outDir": "./", /* Specify an output folder for all emitted files. */ + // "removeComments": true, /* Disable emitting comments. */ + // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ + // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ + // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ + // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ + // "newLine": "crlf", /* Set the newline character for emitting files. */ + // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ + // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ + // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ + // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ + // "declarationDir": "./", /* Specify the output directory for generated declaration files. */ + + /* Interop Constraints */ + // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ + // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ + // "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */ + // "erasableSyntaxOnly": true, /* Do not allow runtime constructs that are not part of ECMAScript. */ + // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ + "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */ + // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ + "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ + + /* Type Checking */ + "strict": true, /* Enable all strict type-checking options. */ + // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ + // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ + // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ + // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ + // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ + // "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */ + // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ + // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ + // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ + // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ + // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ + // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ + // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ + // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ + // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ + // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ + // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ + // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ + + /* Completeness */ + // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ + "skipLibCheck": true /* Skip type checking all .d.ts files. */ + } +}