Skip to content

Commit

Permalink
datacube: simplify caching support (#3846)
Browse files Browse the repository at this point in the history
* testing: modify ag-grid tests

* cleanup post #3849

* datacube: cleanup exposed utilities

* datacube: simplify #3861

* datacube: cleanup post #3862
  • Loading branch information
akphi authored Feb 5, 2025
1 parent 035582f commit 8793609
Show file tree
Hide file tree
Showing 45 changed files with 715 additions and 618 deletions.
3 changes: 3 additions & 0 deletions .changeset/twenty-guests-grab.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
'@finos/legend-query-builder': patch
---
7 changes: 7 additions & 0 deletions .changeset/unlucky-shirts-flash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'@finos/legend-application-data-cube': patch
'@finos/legend-application-query': patch
'@finos/legend-query-builder': patch
'@finos/legend-application': patch
'@finos/legend-data-cube': patch
---
10 changes: 10 additions & 0 deletions .changeset/witty-rivers-sneeze.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
'@finos/legend-vscode-extension-dependencies': patch
'@finos/legend-application-data-cube': patch
'@finos/legend-application-query': patch
'@finos/legend-application-repl': patch
'@finos/legend-query-builder': patch
'@finos/legend-application': patch
'@finos/legend-data-cube': patch
'@finos/legend-graph': patch
---
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export const LEGEND_DATA_CUBE_ROUTE_PATTERN = Object.freeze({
QUERY_BUILDER: `/:${LEGEND_DATA_CUBE_ROUTE_PATTERN_TOKEN.QUERY_ID}?`,
});

export type LegendDataCubeQueryBuilderQueryPathParams = {
export type LegendDataCubeQueryBuilderPathParams = {
[LEGEND_DATA_CUBE_ROUTE_PATTERN_TOKEN.QUERY_ID]: string;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@
* limitations under the License.
*/

import {
APPLICATION_EVENT,
BrowserEnvironmentProvider,
useApplicationStore,
} from '@finos/legend-application';
import { BrowserEnvironmentProvider } from '@finos/legend-application';
import { Route, Routes } from '@finos/legend-application/browser';
import {
LegendDataCubeFrameworkProvider,
Expand All @@ -27,11 +23,9 @@ import {
import { observer } from 'mobx-react-lite';
import { LegendDataCubeQueryBuilder } from './query-builder/LegendDataCubeQueryBuilder.js';
import { LEGEND_DATA_CUBE_ROUTE_PATTERN } from '../__lib__/LegendDataCubeNavigation.js';
import { LogEvent } from '@finos/legend-shared';
import { useEffect } from 'react';

const LegendDataCubeWebApplicationRouter = observer(() => {
const application = useApplicationStore();
const store = useLegendDataCubeBaseStore();

useEffect(() => {
Expand All @@ -40,24 +34,6 @@ const LegendDataCubeWebApplicationRouter = observer(() => {
.catch((error) => store.alertService.alertUnhandledError(error));
}, [store]);

useEffect(() => {
application.navigationService.navigator.blockNavigation(
// Only block navigation in production
// eslint-disable-next-line no-process-env
[() => process.env.NODE_ENV === 'production'],
undefined,
() => {
application.logService.warn(
LogEvent.create(APPLICATION_EVENT.NAVIGATION_BLOCKED),
`Navigation from the application is blocked`,
);
},
);
return (): void => {
application.navigationService.navigator.unblockNavigation();
};
}, [application]);

return (
<div className="h-full">
{store.initializeState.hasSucceeded && (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ import {
import { useParams } from '@finos/legend-application/browser';
import {
LEGEND_DATA_CUBE_ROUTE_PATTERN_TOKEN,
type LegendDataCubeQueryBuilderQueryPathParams,
type LegendDataCubeQueryBuilderPathParams,
} from '../../__lib__/LegendDataCubeNavigation.js';
import { useEffect } from 'react';
import { LegendDataCubeSettingStorageKey } from '../../__lib__/LegendDataCubeSetting.js';
import { assertErrorThrown, type PlainObject } from '@finos/legend-shared';

const LegendDataCubeQueryBuilderHeader = observer(() => {
const store = useLegendDataCubeQueryBuilderStore();
Expand Down Expand Up @@ -67,37 +66,37 @@ export const LegendDataCubeQueryBuilder = withLegendDataCubeQueryBuilderStore(
const store = useLegendDataCubeQueryBuilderStore();
const builder = store.builder;
const application = store.application;
const params = useParams<LegendDataCubeQueryBuilderQueryPathParams>();
const params = useParams<LegendDataCubeQueryBuilderPathParams>();
const queryId = params[LEGEND_DATA_CUBE_ROUTE_PATTERN_TOKEN.QUERY_ID];
const sourceData =
application.navigationService.navigator.getCurrentLocationParameterValue(
LEGEND_DATA_CUBE_ROUTE_PATTERN_TOKEN.SOURCE_DATA,

useEffect(() => {
application.navigationService.navigator.blockNavigation(
// Only block navigation in production, in development, we should have
// the flexibility to reload the page quickly
// eslint-disable-next-line no-process-env
[() => process.env.NODE_ENV === 'production'],
);
return (): void => {
application.navigationService.navigator.unblockNavigation();
};
}, [application]);

useEffect(() => {
if (sourceData) {
try {
const sourceDataJson = JSON.parse(
decodeURIComponent(atob(sourceData)),
) as PlainObject;
store.newQueryState
.finalize(sourceDataJson)
.catch((error) => store.alertService.alertUnhandledError(error));
} catch (error) {
assertErrorThrown(error);
}
} else if (queryId !== store.builder?.persistentQuery?.id) {
store
.loadQuery(queryId)
.catch((error) => store.alertService.alertUnhandledError(error));
}
}, [store, queryId, sourceData]);
store
.loadQuery(queryId)
.catch((error) => store.alertService.alertUnhandledError(error));
}, [store, queryId]);

useEffect(() => {
if (!store.builder && !queryId && !sourceData) {
store.loader.display.open();
}
}, [store, queryId, sourceData]);
store.engine
.initializeCacheManager()
.catch((error) => store.alertService.alertUnhandledError(error));
return () => {
store.engine
.disposeCacheManager()
.catch((error) => store.alertService.alertUnhandledError(error));
};
}, [store]);

if (!builder) {
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import {
} from '@finos/legend-data-cube';
import { CODE_EDITOR_LANGUAGE } from '@finos/legend-code-editor';
import { useLegendDataCubeQueryBuilderStore } from '../LegendDataCubeQueryBuilderStoreProvider.js';
import { useApplicationStore } from '@finos/legend-application';

const LegendQuerySearcher = observer((props: { state: QueryLoaderState }) => {
const { state } = props;
Expand Down Expand Up @@ -252,18 +253,33 @@ const LegendQuerySearcher = observer((props: { state: QueryLoaderState }) => {
export const LegendQueryDataCubeSourceBuilder = observer(
(props: { sourceBuilder: LegendQueryDataCubeSourceBuilderState }) => {
const { sourceBuilder } = props;
const application = useApplicationStore();
const store = useLegendDataCubeQueryBuilderStore();
const query = sourceBuilder.query;

if (!query) {
return <LegendQuerySearcher state={sourceBuilder.queryLoader} />;
}
return (
<div className="h-full">
<div className="mb-0.5 flex h-[60px] w-full border border-neutral-200 bg-neutral-100">
<div className="relative mb-0.5 flex h-[60px] w-full border border-neutral-200 bg-neutral-100">
<div className="w-full">
<div className="h-6 w-4/5 overflow-hidden text-ellipsis whitespace-nowrap px-1.5 leading-6">
{query.name}
</div>
<button
className="absolute right-1 top-1 flex aspect-square w-5 items-center justify-center text-neutral-500"
title="Copy ID to clipboard"
onClick={() => {
application.clipboardService
.copyTextToClipboard(query.id)
.catch((error) =>
store.alertService.alertUnhandledError(error),
);
}}
>
<DataCubeIcon.Clipboard />
</button>
<div className="flex h-[18px] items-start justify-between px-1.5 text-sm text-neutral-500">
{`[ ${generateGAVCoordinates(
query.groupId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,31 @@ import * as duckdb from '@duckdb/duckdb-wasm';
import duckdb_wasm from '@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm';
import duckdb_wasm_next from '@duckdb/duckdb-wasm/dist/duckdb-eh.wasm';
import {
PRIMITIVE_TYPE,
TDSExecutionResult,
TDSRow,
TabularDataSet,
} from '@finos/legend-graph';
import type { AsyncDuckDBConnection } from '@duckdb/duckdb-wasm';
import { assertNonNullable } from '@finos/legend-shared';
import { assertNonNullable, guaranteeNonNullable } from '@finos/legend-shared';
import type { CachedDataCubeSource } from '@finos/legend-data-cube';

export class LegendDataCubeDataCubeCacheManager {
private static readonly DUCKDB_DEFAULT_SCHEMA_NAME = 'main'; // See https://duckdb.org/docs/sql/statements/use.html
private static readonly TABLE_NAME_PREFIX = 'cache';
private static tableCounter = 0;

export class LegendDataCubeDataCubeCacheEngine {
private _database?: duckdb.AsyncDuckDB | undefined;
private _connection?: AsyncDuckDBConnection | undefined;

// Documentation: https://duckdb.org/docs/api/wasm/instantiation.html
async initializeDuckDb(result: TDSExecutionResult) {
private get database(): duckdb.AsyncDuckDB {
return guaranteeNonNullable(
this._database,
`Cache manager database not initialized`,
);
}

async initialize() {
// Initialize DuckDB with WASM
// See: https://duckdb.org/docs/api/wasm/instantiation.html
const MANUAL_BUNDLES: duckdb.DuckDBBundles = {
mvp: {
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
Expand All @@ -52,20 +64,56 @@ export class LegendDataCubeDataCubeCacheEngine {
// Select a bundle based on browser checks
const bundle = await duckdb.selectBundle(MANUAL_BUNDLES);
// Instantiate the asynchronus version of DuckDB-wasm
assertNonNullable(bundle.mainWorker, `Can't initialize duck db`);
assertNonNullable(
bundle.mainWorker,
`Can't initialize cache manager: DuckDB main worker not initialized`,
);
const worker = new Worker(bundle.mainWorker);
const logger = new duckdb.ConsoleLogger();
this._database = new duckdb.AsyncDuckDB(logger, worker);
await this._database.instantiate(bundle.mainModule, bundle.pthreadWorker);
this._connection = await this._database.connect();
const database = new duckdb.AsyncDuckDB(logger, worker);
await database.instantiate(bundle.mainModule, bundle.pthreadWorker);
this._database = database;
}

async cache(result: TDSExecutionResult) {
const schema =
LegendDataCubeDataCubeCacheManager.DUCKDB_DEFAULT_SCHEMA_NAME;
LegendDataCubeDataCubeCacheManager.tableCounter += 1;
const table = `${LegendDataCubeDataCubeCacheManager.TABLE_NAME_PREFIX}${LegendDataCubeDataCubeCacheManager.tableCounter}`;

const connection = await this.database.connect();

// TODO: review if we can improve performance here using CSV/Arrow for ingestion
const columns: string[] = [];
result.builder.columns.forEach((col) =>
columns.push(`"${col.name}" ${this.getDuckDbType(col.type)}`),
);
result.builder.columns.forEach((col) => {
let colType: string;
switch (col.type as string) {
case PRIMITIVE_TYPE.BOOLEAN: {
colType = 'BOOLEAN';
break;
}
case PRIMITIVE_TYPE.NUMBER: {
colType = 'DOUBLE';
break;
}
case PRIMITIVE_TYPE.INTEGER: {
colType = 'INTEGER';
break;
}
case PRIMITIVE_TYPE.DATE: {
colType = 'TIMESTAMP';
break;
}
case PRIMITIVE_TYPE.STRING:
default: {
colType = 'VARCHAR';
}
}
columns.push(`"${col.name}" ${colType}`);
});

const CREATE_TABLE_SQL = `CREATE TABLE cached_tbl (${columns.join(',')})`;
await this._connection.query(CREATE_TABLE_SQL);
const CREATE_TABLE_SQL = `CREATE TABLE ${schema}.${table} (${columns.join(',')})`;
await connection.query(CREATE_TABLE_SQL);

const rowString: string[] = [];

Expand All @@ -81,15 +129,19 @@ export class LegendDataCubeDataCubeCacheEngine {
rowString.push(`(${updatedRows.join(',')})`);
});

const INSERT_TABLE_SQL = `INSERT INTO cached_tbl VALUES ${rowString.join(',')}`;
const INSERT_TABLE_SQL = `INSERT INTO ${schema}.${table} VALUES ${rowString.join(',')}`;

await this._connection.query(INSERT_TABLE_SQL);
await connection.query(INSERT_TABLE_SQL);
await connection.close();

return { table, schema, rowCount: result.result.rows.length };
}

async runQuery(sql: string) {
const result = await this._connection?.query(sql);
const columnNames = Object.keys(result?.toArray().at(0));
const rows = result?.toArray().map((row) => {
async runSQLQuery(sql: string) {
const connection = await this.database.connect();
const result = (await connection.query(sql)).toArray();
const columnNames = Object.keys(result.at(0));
const rows = result.map((row) => {
const values = new TDSRow();
values.values = columnNames.map(
(column) => row[column] as string | number | boolean | null,
Expand All @@ -99,34 +151,20 @@ export class LegendDataCubeDataCubeCacheEngine {
const tdsExecutionResult = new TDSExecutionResult();
const tds = new TabularDataSet();
tds.columns = columnNames;
tds.rows = rows !== undefined ? rows : [new TDSRow()];
tds.rows = rows;
tdsExecutionResult.result = tds;
return tdsExecutionResult;
}

async clearDuckDb() {
await this._connection?.close();
await this._database?.flushFiles();
await this._database?.terminate();
async disposeCache(source: CachedDataCubeSource) {
const connection = await this.database.connect();
const DROP_TABLE_SQL = `DROP TABLE IF EXISTS "${source.schema}.${source.table}"`;
await connection.query(DROP_TABLE_SQL);
await connection.close();
}

private getDuckDbType(type: string | undefined): string {
switch (type?.toLowerCase()) {
//TODO: mapping from tds build to duckdb data types
case 'string':
return 'VARCHAR';
case 'boolean':
return 'BOOLEAN';
case 'bigint':
return 'BIGINT';
case 'number':
return 'DOUBLE';
case 'integer':
return 'INTEGER';
case 'date':
return 'TIMESTAMP';
default:
return 'VARCHAR';
}
async dispose() {
await this._database?.flushFiles();
await this._database?.terminate();
}
}
Loading

0 comments on commit 8793609

Please sign in to comment.