diff --git a/packages/catalog-server/src/lib/catalog.ts b/packages/catalog-server/src/lib/catalog.ts index 386bf7ee..3332299d 100644 --- a/packages/catalog-server/src/lib/catalog.ts +++ b/packages/catalog-server/src/lib/catalog.ts @@ -39,6 +39,11 @@ const toTemporalInstant = (date: Date) => { */ const defaultPackageRefreshInterval = Temporal.Duration.from({minutes: 5}); +/** + * The default amount of time between automated bulk updates of packages. + */ +const defaultPackageUpdateInterval = Temporal.Duration.from({hours: 6}); + export interface CatalogInit { repository: Repository; files: PackageFiles; @@ -74,7 +79,7 @@ export class Catalog { packageVersion?: PackageVersion; problems?: ValidationProblem[]; }> { - console.log('Catalog.importPackage'); + console.log('Catalog.importPackage', packageName); const currentPackageInfo = await this.#repository.getPackageInfo( packageName @@ -323,4 +328,17 @@ export class Catalog { // to the repository return this.#repository.queryElements({query, limit}); } + + async getPackagesToUpdate(notUpdatedSince?: Temporal.Instant) { + if (notUpdatedSince === undefined) { + const now = Temporal.Now.instant(); + notUpdatedSince = now.subtract(defaultPackageUpdateInterval); + } + + const packages = await this.#repository.getPackagesToUpdate( + notUpdatedSince, + 100 + ); + return packages; + } } diff --git a/packages/catalog-server/src/lib/firestore/firestore-repository.ts b/packages/catalog-server/src/lib/firestore/firestore-repository.ts index 1bc2cd6c..1009929a 100644 --- a/packages/catalog-server/src/lib/firestore/firestore-repository.ts +++ b/packages/catalog-server/src/lib/firestore/firestore-repository.ts @@ -9,6 +9,7 @@ import { Query, CollectionReference, CollectionGroup, + Timestamp, } from '@google-cloud/firestore'; import {Firestore} from '@google-cloud/firestore'; import firebase from 'firebase-admin'; @@ -42,6 +43,7 @@ import { import {packageVersionConverter} from './package-version-converter.js'; import {customElementConverter} from './custom-element-converter.js'; import {validationProblemConverter} from './validation-problem-converter.js'; +import type {Temporal} from '@js-temporal/polyfill'; const projectId = 'wc-catalog'; firebase.initializeApp({projectId}); @@ -523,13 +525,38 @@ export class FirestoreRepository implements Repository { return result; } - getPackageRef(packageName: string) { + async getPackagesToUpdate( + notUpdatedSince: Temporal.Instant, + limit = 100 + ): Promise> { + + const date = new Date(notUpdatedSince.epochMilliseconds); + const notUpdatedSinceTimestamp = Timestamp.fromDate(date); + + // Only query 'READY', 'ERROR', and 'NOT_FOUND' packages. + // INITIALIZING and UPDATING packages are being updated, possibly by the + // batch update task calling this method. + // ERROR and NOT_FOUND are "recoverable" errors, so we should try to import + // them again. + const result = await this.getPackageCollectionRef() + .where('status', 'in', ['READY', 'ERROR', 'NOT_FOUND']) + .where('lastUpdate', '<', notUpdatedSinceTimestamp) + .limit(limit) + .get(); + const packages = result.docs.map((d) => d.data()); + return packages; + } + + getPackageCollectionRef() { return db .collection('packages' + (this.namespace ? `-${this.namespace}` : '')) - .doc(packageNameToId(packageName)) .withConverter(packageInfoConverter); } + getPackageRef(packageName: string) { + return this.getPackageCollectionRef().doc(packageNameToId(packageName)); + } + getPackageVersionCollectionRef(packageName: string) { return this.getPackageRef(packageName) .collection('versions') diff --git a/packages/catalog-server/src/lib/repository.ts b/packages/catalog-server/src/lib/repository.ts index eb2e0178..659e0ce6 100644 --- a/packages/catalog-server/src/lib/repository.ts +++ b/packages/catalog-server/src/lib/repository.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import type {Temporal} from '@js-temporal/polyfill'; import type { CustomElement, PackageInfo, @@ -146,4 +147,12 @@ export interface Repository { packageName: string, version: string ): Promise; + + /** + * Returns packages that have not been updated since the date given. + */ + getPackagesToUpdate( + notUpdatedSince: Temporal.Instant, + limit: number + ): Promise>; } diff --git a/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts b/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts index 7e8ad7d0..a17c5539 100644 --- a/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts +++ b/packages/catalog-server/src/lib/server/routes/bootstrap-packages.ts @@ -22,6 +22,10 @@ export const makeBootstrapPackagesRoute = const bootstrapListFile = await readFile(bootstrapListFilePath, 'utf-8'); const bootstrapList = JSON.parse(bootstrapListFile); const packageNames = bootstrapList['packages'] as Array; + + // TODO (justinfagnani): rather than import the packages directly, add them + // to the DB in a non-imported state, then kick off the standard update + // workflow, which will import them all. const results = await Promise.all( packageNames.map( async ( diff --git a/packages/catalog-server/src/lib/server/routes/update-packages.ts b/packages/catalog-server/src/lib/server/routes/update-packages.ts new file mode 100644 index 00000000..443a31a3 --- /dev/null +++ b/packages/catalog-server/src/lib/server/routes/update-packages.ts @@ -0,0 +1,92 @@ +import {Temporal} from '@js-temporal/polyfill'; +import {PackageInfo} from '@webcomponents/catalog-api/lib/schema.js'; +import type Koa from 'koa'; +import type {Catalog} from '../../catalog.js'; + +// Google Cloud Run default request timeout is 5 minutes, so to do longer +// imports we need to configure the timeout. +const maxImportDuration = Temporal.Duration.from({minutes: 5}); + +export const makeUpdatePackagesRoute = + (catalog: Catalog) => async (context: Koa.Context) => { + const startInstant = Temporal.Now.instant(); + // If the `force` query parameter is present we force updating of all + // packages by setting the `notUpdatedSince` parameter to `startInstant` so + // that we get all packages last updated before now. We calculate the + // `notUpdatedSince` time once before updates so that we don't retrieve + // packages that we update in this operation. + // `force`` is useful for development and testing as we may be trying to + // update packages that were just imported. + // TODO (justinfagnani): check a DEV mode also so this isn't available + // in production? + const force = 'force' in context.query; + const notUpdatedSince = force ? startInstant : undefined; + + // If `force` is true, override the default packageUpdateInterval + // TODO: how do we make an actually 0 duration? + const packageUpdateInterval = force + ? Temporal.Duration.from({microseconds: 1}) + : undefined; + + console.log('Starting package update at', startInstant, `force: ${force}`); + + let packagesToUpdate!: Array; + let packagesUpdated = 0; + let iteration = 0; + + // Loop through batches of packages to update. + // We batch here so that we can pause and check that we're still within the + // maxImportDuration, and use small enough batches so that we can ensure at + // least one batch in that time. + do { + // getPackagesToUpdate() queries the first N (default 100) packages that + // have not been updated since the update interval (default 6 hours). + // When a package is imported it's lastUpdate date will be updated and the + // next call to getPackagesToUpdate() will return the next 100 packages. + // This way we don't need a DB cursor to make progress through the + // package list. + packagesToUpdate = await catalog.getPackagesToUpdate(notUpdatedSince); + + if (packagesToUpdate.length === 0) { + // No more packages to update + if (iteration === 0) { + console.log('No packages to update'); + } + break; + } + + await Promise.allSettled( + packagesToUpdate.map(async (pkg) => { + try { + return await catalog.importPackage(pkg.name, packageUpdateInterval); + } catch (e) { + console.error(e); + throw e; + } + }) + ); + packagesUpdated += packagesToUpdate.length; + + const now = Temporal.Now.instant(); + const timeSinceStart = now.since(startInstant); + // If the time since the update started is not less than that max import + // duration, stop. + // TODO (justinfagnani): we need a way to test this + if (Temporal.Duration.compare(timeSinceStart, maxImportDuration) !== -1) { + break; + } + } while (true); + console.log(`Updated ${packagesUpdated} packages`); + + if (packagesToUpdate.length > 0) { + // TODO (justinfagnani): kick off new update request + console.log(`Not all packages were updated (${packagesToUpdate.length})`); + } + + context.status = 200; + context.type = 'html'; + context.body = ` +

Update Results

+

Updated ${packagesUpdated} package

+ `; + }; diff --git a/packages/catalog-server/src/lib/server/server.ts b/packages/catalog-server/src/lib/server/server.ts index 1b0fc92d..36400f99 100644 --- a/packages/catalog-server/src/lib/server/server.ts +++ b/packages/catalog-server/src/lib/server/server.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2021 Google LLC + * Copyright 2022 Google LLC * SPDX-License-Identifier: BSD-3-Clause */ @@ -17,6 +17,7 @@ import {NpmAndUnpkgFiles} from '@webcomponents/custom-elements-manifest-tools/li import {makeGraphQLRoute} from './routes/graphql.js'; import {makeBootstrapPackagesRoute} from './routes/bootstrap-packages.js'; +import {makeUpdatePackagesRoute} from './routes/update-packages.js'; export const makeServer = async () => { const files = new NpmAndUnpkgFiles(); @@ -32,6 +33,8 @@ export const makeServer = async () => { router.get('/bootstrap-packages', makeBootstrapPackagesRoute(catalog)); + router.get('/update-packages', makeUpdatePackagesRoute(catalog)); + router.get('/', async (ctx) => { ctx.status = 200; ctx.type = 'html';