From c24c9c4062b84108f2b4e935f6c16d9fa9a18112 Mon Sep 17 00:00:00 2001 From: Jacky Jiang Date: Wed, 17 Apr 2024 21:26:34 +1000 Subject: [PATCH] fixed: checking internal storage res as admin users --- src/index.ts | 2 + src/onRecordFound.ts | 81 +++++++++++++++++++++++++-------- src/test/onRecordFound.spec.ts | 20 ++++++-- src/test/testStorageUrl.spec.ts | 17 +++++-- 4 files changed, 90 insertions(+), 30 deletions(-) diff --git a/src/index.ts b/src/index.ts index cbcd428..b87a1b3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -63,6 +63,8 @@ function sleuthBrokenLinks() { registry, argv.storageApiBaseUrl, argv.datasetBucketName, + argv.jwtSecret, + argv.userId, argv.externalRetries, 1, argv.domainWaitTimeConfig as any, diff --git a/src/onRecordFound.ts b/src/onRecordFound.ts index 3475caf..048bbe9 100644 --- a/src/onRecordFound.ts +++ b/src/onRecordFound.ts @@ -16,12 +16,15 @@ import { } from "./HttpRequests.js"; import getUrlWaitTime from "./getUrlWaitTime.js"; import wait from "./wait.js"; +import { buildJwt } from "@magda/utils"; export default async function onRecordFound( record: Record, registry: Registry, storageApiBaseUrl: string, datasetBucketName: string, + jwtSecret: string, + actionUserId: string, retries: number = 1, baseRetryDelaySeconds: number = 1, domainWaitTimeConfig: { [domain: string]: number } = {}, @@ -49,7 +52,9 @@ export default async function onRecordFound( _.partialRight(getUrlWaitTime, domainWaitTimeConfig), requestOpts, storageApiBaseUrl, - datasetBucketName + datasetBucketName, + jwtSecret, + actionUserId ) ); @@ -172,7 +177,9 @@ function checkDistributionLink( getUrlWaitTime: (url: string) => number, requestOpts: CoreOptions, storageApiBaseUrl: string, - datasetBucketName: string + datasetBucketName: string, + jwtSecret: string, + actionUserId: string ): DistributionLinkCheck[] { type DistURL = { url?: URI; @@ -190,15 +197,7 @@ function checkDistributionLink( } ] .map((urlObj) => { - const url = - typeof urlObj.url === "string" - ? getStorageApiResourceAccessUrl( - urlObj.url, - storageApiBaseUrl, - datasetBucketName - ) - : urlObj.url; - return { ...urlObj, url: parseUriSafe(url) }; + return { ...urlObj, url: parseUriSafe(urlObj.url) }; }) .filter((x) => x.url && x.url.protocol().length > 0); @@ -232,7 +231,11 @@ function checkDistributionLink( retries, ftpHandler, getUrlWaitTime, - requestOpts + requestOpts, + storageApiBaseUrl, + datasetBucketName, + jwtSecret, + actionUserId ) .then((aspect) => { console.info("Finished retrieving " + parsedURL); @@ -262,15 +265,28 @@ function retrieve( retries: number, ftpHandler: FTPHandler, getUrlWaitTime: (url: string) => number, - requestOpts: CoreOptions + requestOpts: CoreOptions, + storageApiBaseUrl: string, + datasetBucketName: string, + jwtSecret: string, + actionUserId: string ): Promise { - if (parsedURL.protocol() === "http" || parsedURL.protocol() === "https") { + if ( + parsedURL.protocol() === "http" || + parsedURL.protocol() === "https" || + (parsedURL.protocol() === "magda" && + parsedURL.hostname() === "storage-api") + ) { return retrieveHttp( parsedURL.toString(), baseRetryDelay, retries, getUrlWaitTime, - requestOpts + requestOpts, + storageApiBaseUrl, + datasetBucketName, + jwtSecret, + actionUserId ); } else if (parsedURL.protocol() === "ftp") { return retrieveFtp(parsedURL, ftpHandler); @@ -319,16 +335,41 @@ async function retrieveHttp( baseRetryDelay: number, retries: number, getUrlWaitTime: (url: string) => number, - requestOpts: CoreOptions + requestOpts: CoreOptions, + storageApiBaseUrl: string, + datasetBucketName: string, + jwtSecret: string, + actionUserId: string ): Promise { + const isInternalStorageRes = url.indexOf("magda://storage-api/") === 0; + const resUrl = getStorageApiResourceAccessUrl( + url, + storageApiBaseUrl, + datasetBucketName + ); + const runtimeRequestOpts = { ...requestOpts }; + if (requestOpts?.headers) { + runtimeRequestOpts.headers = { + ...requestOpts.headers + }; + } + if (isInternalStorageRes) { + if (!runtimeRequestOpts?.headers) { + runtimeRequestOpts.headers = {}; + } + runtimeRequestOpts.headers = { + ...runtimeRequestOpts.headers, + "X-Magda-Session": buildJwt(jwtSecret, actionUserId) + }; + } async function operation() { try { - await wait(getUrlWaitTime(url)); - return await headRequest(url, requestOpts); + await wait(getUrlWaitTime(resUrl)); + return await headRequest(resUrl, runtimeRequestOpts); } catch (e) { // --- HEAD Method not allowed - await wait(getUrlWaitTime(url)); - return await getRequest(url, requestOpts); + await wait(getUrlWaitTime(resUrl)); + return await getRequest(resUrl, runtimeRequestOpts); } } diff --git a/src/test/onRecordFound.spec.ts b/src/test/onRecordFound.spec.ts index daef25d..0cb9087 100644 --- a/src/test/onRecordFound.spec.ts +++ b/src/test/onRecordFound.spec.ts @@ -37,7 +37,9 @@ import { } from "../getUrlWaitTime.js"; const defaultStorageApiBaseUrl = "http://storage-api/v0"; -const defaultDatsetBucketName = "magda-datasets"; +const defaultDatasetBucketName = "magda-datasets"; +const jwtSecret = "sdsfsfdsfsddsfsdfdsfds2323432423"; +const actionUserId = "user-id-1"; const schema = require("@magda/registry-aspects/source-link-status.schema.json"); describe("onRecordFound", function (this: Mocha.Suite) { @@ -379,7 +381,9 @@ describe("onRecordFound", function (this: Mocha.Suite) { record, registry, defaultStorageApiBaseUrl, - defaultDatsetBucketName, + defaultDatasetBucketName, + jwtSecret, + actionUserId, 0, 0, {}, @@ -592,7 +596,9 @@ describe("onRecordFound", function (this: Mocha.Suite) { record, registry, defaultStorageApiBaseUrl, - defaultDatsetBucketName, + defaultDatasetBucketName, + jwtSecret, + actionUserId, retryCount, 0 ) @@ -716,7 +722,9 @@ describe("onRecordFound", function (this: Mocha.Suite) { record, registry, defaultStorageApiBaseUrl, - defaultDatsetBucketName, + defaultDatasetBucketName, + jwtSecret, + actionUserId, failures.length, 0, delayConfig @@ -763,7 +771,9 @@ describe("onRecordFound", function (this: Mocha.Suite) { record, registry, defaultStorageApiBaseUrl, - defaultDatsetBucketName + defaultDatasetBucketName, + jwtSecret, + actionUserId ).then(() => { afterEachProperty(); diff --git a/src/test/testStorageUrl.spec.ts b/src/test/testStorageUrl.spec.ts index e73e088..e281a82 100644 --- a/src/test/testStorageUrl.spec.ts +++ b/src/test/testStorageUrl.spec.ts @@ -15,9 +15,12 @@ import { setDefaultDomainWaitTime, getDefaultDomainWaitTime } from "../getUrlWaitTime.js"; +import { buildJwt } from "@magda/utils"; const defaultStorageApiBaseUrl = "http://storage-api/v0"; const defaultDatasetBucketName = "magda-datasets"; +const jwtSecret = "sdsfsfdsfsddsfsdfdsfds2323432423"; +const actionUserId = "user-id-1"; const schema = require("@magda/registry-aspects/source-link-status.schema.json"); describe("Test Internal Storage URL", function (this: Mocha.Suite) { @@ -109,23 +112,25 @@ describe("Test Internal Storage URL", function (this: Mocha.Suite) { }; const defaultStorageApiBaseUri = urijs(defaultStorageApiBaseUrl); - + const jwt = buildJwt(jwtSecret, actionUserId); const storageApiScope = nock( - defaultStorageApiBaseUri.clone().path("").toString() + defaultStorageApiBaseUri.clone().path("").toString(), + { + reqheaders: { + "X-Magda-Session": jwt + } + } ); - storageApiScope .head( `${defaultStorageApiBaseUri.path()}/${defaultDatasetBucketName}/ds-1/dist-1/test-file1.pdf` ) - .query(true) .reply(200); storageApiScope .head( `${defaultStorageApiBaseUri.path()}/${defaultDatasetBucketName}/ds-1/dist-2/test-file2.pdf` ) - .query(true) .reply(200); ["dist-1", "dist-2"].forEach((distId) => { @@ -160,6 +165,8 @@ describe("Test Internal Storage URL", function (this: Mocha.Suite) { registry, defaultStorageApiBaseUrl, defaultDatasetBucketName, + jwtSecret, + actionUserId, 0, 0, {},