fix: algolia indexing script

aiven · Jan 16, 2025 · e1417da · e1417da
1 parent 455541f
commit e1417da
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 21 deletions.
diff --git a/.github/workflows/create-index-algolia.yaml b/.github/workflows/create-index-algolia.yaml
@@ -5,7 +5,9 @@ name: Update global Aiven index
 on:
   schedule:
     - cron: '0 6 * * 2' # Runs at 06:00 UTC every Tuesday
-  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
 
 jobs:
   build:
@@ -25,7 +27,7 @@ jobs:
       - name: Build Docusaurus site
         run: yarn build
       - name: Index docs output
-        run: node scripts/create_index_algolia.js
+        run: node scripts/create_index_algolia.cjs
         env:
           ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
           ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }}

diff --git a/scripts/create_index_algolia.js → scripts/create_index_algolia.cjs b/scripts/create_index_algolia.js → scripts/create_index_algolia.cjs
@@ -5,35 +5,28 @@ const path = require('path');
 const glob = require('glob');
 const crypto = require('crypto');
 
-// Load environment variables
 require('dotenv').config({path: path.resolve(__dirname, '../.env')});
-
-// Connect and authenticate with your Algolia app
 const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID;
 const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY;
 const ALGOLIA_INDEX_NAME = process.env.ALGOLIA_INDEX_NAME;
 
 const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY);
-const index = client.initIndex(ALGOLIA_INDEX_NAME);
 
 // Function to extract data from HTML files
 function extractDataFromHtml(filePath, buildDir, urlBasePath) {
   const html = fs.readFileSync(filePath, 'utf8');
   const $ = cheerio.load(html);
-  // Extract title and body
   const title = $('h1').text();
   const body = $(
     'article .theme-doc-markdown p, article .theme-doc-markdown li',
   ).text();
 
-  // Construct the slug based on the file path
   let relativeFilePath = path.relative(buildDir, filePath);
   relativeFilePath = relativeFilePath.replace(path.sep, '/'); // Ensure forward slashes
   relativeFilePath = relativeFilePath.replace('index.html', ''); // Remove 'index.html'
   relativeFilePath = relativeFilePath.replace('.html', ''); // Remove '.html' from other pages
-  const slug = urlBasePath + relativeFilePath; // Prepend the base URL
+  const slug = urlBasePath + relativeFilePath; // Prepend the base URL;
 
-  // Use SHA-256 hash of the relative file path as the objectID
   const hash = crypto.createHash('sha256');
   hash.update(relativeFilePath);
   const objectID = hash.digest('hex');
@@ -57,24 +50,30 @@ const buildDir = path.join(__dirname, '..', 'build');
 const urlBasePath = 'https://aiven.io/docs/';
 
 let pages = [];
-// Define a list of pages to exclude
 const excludedPages = ['404.html', 'search.html'];
 
 // Traverse the build directory and extract data from HTML files
 glob.sync(buildDir + '/**/*.html').forEach((filePath) => {
-  // Check if the current file is in the list of excluded pages
   if (!excludedPages.includes(path.basename(filePath))) {
     const pageData = extractDataFromHtml(filePath, buildDir, urlBasePath);
     pages.push(pageData);
   }
 });
 
-// Push the data to Algolia
-index
-  .saveObjects(pages, {autoGenerateObjectIDIfNotExist: true})
-  .then(({objectIDs}) => {
-    console.log(objectIDs);
-  })
-  .catch((err) => {
-    console.error(err);
-  });
+async function pushToAlgolia() {
+  const req = pages.map((page) => ({action: 'addObject', body: page}));
+
+  try {
+    const response = await client.batch({
+      indexName: ALGOLIA_INDEX_NAME,
+      batchWriteParams: {
+        requests: req,
+      },
+    });
+    console.log('Data pushed to index successfully:\n', response);
+  } catch (error) {
+    console.error('Error pushing data to index:\n', error);
+  }
+}
+
+pushToAlgolia();