Skip to content

Commit

Permalink
fix: algolia indexing script
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurFlag committed Jan 16, 2025
1 parent 455541f commit e1417da
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 21 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/create-index-algolia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ name: Update global Aiven index
on:
schedule:
- cron: '0 6 * * 2' # Runs at 06:00 UTC every Tuesday
workflow_dispatch:
pull_request:
branches:
- main

jobs:
build:
Expand All @@ -25,7 +27,7 @@ jobs:
- name: Build Docusaurus site
run: yarn build
- name: Index docs output
run: node scripts/create_index_algolia.js
run: node scripts/create_index_algolia.cjs
env:
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,28 @@ const path = require('path');
const glob = require('glob');
const crypto = require('crypto');

// Load environment variables
require('dotenv').config({path: path.resolve(__dirname, '../.env')});

// Connect and authenticate with your Algolia app
const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID;
const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY;
const ALGOLIA_INDEX_NAME = process.env.ALGOLIA_INDEX_NAME;

const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY);
const index = client.initIndex(ALGOLIA_INDEX_NAME);

// Function to extract data from HTML files
function extractDataFromHtml(filePath, buildDir, urlBasePath) {
const html = fs.readFileSync(filePath, 'utf8');
const $ = cheerio.load(html);
// Extract title and body
const title = $('h1').text();
const body = $(
'article .theme-doc-markdown p, article .theme-doc-markdown li',
).text();

// Construct the slug based on the file path
let relativeFilePath = path.relative(buildDir, filePath);
relativeFilePath = relativeFilePath.replace(path.sep, '/'); // Ensure forward slashes
relativeFilePath = relativeFilePath.replace('index.html', ''); // Remove 'index.html'
relativeFilePath = relativeFilePath.replace('.html', ''); // Remove '.html' from other pages
const slug = urlBasePath + relativeFilePath; // Prepend the base URL
const slug = urlBasePath + relativeFilePath; // Prepend the base URL;

// Use SHA-256 hash of the relative file path as the objectID
const hash = crypto.createHash('sha256');
hash.update(relativeFilePath);
const objectID = hash.digest('hex');
Expand All @@ -57,24 +50,30 @@ const buildDir = path.join(__dirname, '..', 'build');
const urlBasePath = 'https://aiven.io/docs/';

let pages = [];
// Define a list of pages to exclude
const excludedPages = ['404.html', 'search.html'];

// Traverse the build directory and extract data from HTML files
glob.sync(buildDir + '/**/*.html').forEach((filePath) => {
// Check if the current file is in the list of excluded pages
if (!excludedPages.includes(path.basename(filePath))) {
const pageData = extractDataFromHtml(filePath, buildDir, urlBasePath);
pages.push(pageData);
}
});

// Push the data to Algolia
index
.saveObjects(pages, {autoGenerateObjectIDIfNotExist: true})
.then(({objectIDs}) => {
console.log(objectIDs);
})
.catch((err) => {
console.error(err);
});
async function pushToAlgolia() {
const req = pages.map((page) => ({action: 'addObject', body: page}));

try {
const response = await client.batch({
indexName: ALGOLIA_INDEX_NAME,
batchWriteParams: {
requests: req,
},
});
console.log('Data pushed to index successfully:\n', response);
} catch (error) {
console.error('Error pushing data to index:\n', error);
}
}

pushToAlgolia();

0 comments on commit e1417da

Please sign in to comment.