From 505f54f0f2b243ba350b4f77d950541fb0768c7c Mon Sep 17 00:00:00 2001 From: gs-gunjan Date: Thu, 13 Feb 2025 18:00:45 +0530 Subject: [PATCH] datacube: add csv file as one of the data cube source for hosted datacube --- .changeset/early-socks-enjoy.md | 7 + .../builder/LegendDataCubeBuilder.tsx | 6 +- .../builder/LegendDataCubeCreator.tsx | 9 + .../CSVFileQueryDataCubeSourceBuilder.tsx | 53 +++++ .../stores/LegendDataCubeDataCubeEngine.ts | 219 +++++++++++++++++- ...nager.ts => LegendDataCubeDuckDBEngine.ts} | 88 +++++-- .../builder/LegendDataCubeCreatorState.tsx | 6 + .../CSVFileQueryDataCubeSourceBuilderState.ts | 89 +++++++ .../LegendDataCubeSourceBuilderState.ts | 1 + .../model/CSVFIleQueryDataCubeSource.ts | 61 +++++ .../src/stores/core/DataCubeEngine.tsx | 5 + .../stores/core/model/CachedDataCubeSource.ts | 2 - .../src/format/FormatterUtils.ts | 11 +- 13 files changed, 532 insertions(+), 25 deletions(-) create mode 100644 .changeset/early-socks-enjoy.md create mode 100644 packages/legend-application-data-cube/src/components/builder/source/CSVFileQueryDataCubeSourceBuilder.tsx rename packages/legend-application-data-cube/src/stores/{LegendDataCubeCacheManager.ts => LegendDataCubeDuckDBEngine.ts} (68%) create mode 100644 packages/legend-application-data-cube/src/stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.ts create mode 100644 packages/legend-application-data-cube/src/stores/model/CSVFIleQueryDataCubeSource.ts diff --git a/.changeset/early-socks-enjoy.md b/.changeset/early-socks-enjoy.md new file mode 100644 index 0000000000..16e171ebe5 --- /dev/null +++ b/.changeset/early-socks-enjoy.md @@ -0,0 +1,7 @@ +--- +'@finos/legend-application-data-cube': minor +'@finos/legend-data-cube': patch +'@finos/legend-shared': patch +--- + +DataCube: Add CSV File as one of the data cube source diff --git a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx index 0e35369527..5970de93e7 100644 --- a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx +++ b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeBuilder.tsx @@ -38,6 +38,7 @@ import { import { useEffect } from 'react'; import { LegendDataCubeSettingStorageKey } from '../../__lib__/LegendDataCubeSetting.js'; import type { LegendDataCubeBuilderStore } from '../../stores/builder/LegendDataCubeBuilderStore.js'; +import { CSVFileQueryDataCubeSource } from '../../stores/model/CSVFileQueryDataCubeSource.js'; const LegendDataCubeBuilderHeader = observer(() => { const store = useLegendDataCubeBuilderStore(); @@ -57,7 +58,10 @@ const LegendDataCubeBuilderHeader = observer(() => { store.saverDisplay.open()} > Save DataCube diff --git a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeCreator.tsx b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeCreator.tsx index 82f089e8ef..d5538899ab 100644 --- a/packages/legend-application-data-cube/src/components/builder/LegendDataCubeCreator.tsx +++ b/packages/legend-application-data-cube/src/components/builder/LegendDataCubeCreator.tsx @@ -28,6 +28,8 @@ import { LegendQueryDataCubeSourceBuilder } from './source/LegendQueryDataCubeSo import { AdhocQueryDataCubeSourceBuilder } from './source/AdhocQueryDataCubeSourceBuilder.js'; import { AdhocQueryDataCubeSourceBuilderState } from '../../stores/builder/source/AdhocQueryDataCubeSourceBuilderState.js'; import { useLegendDataCubeBuilderStore } from './LegendDataCubeBuilderStoreProvider.js'; +import { CSVFileQueryDataCubeSourceBuilderState } from '../../stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.js'; +import { CSVFileQueryDataCubeSourceBuilder } from './source/CSVFileQueryDataCubeSourceBuilder.js'; export const LegendDataCubeCreator = observer(() => { const store = useLegendDataCubeBuilderStore(); @@ -61,6 +63,7 @@ export const LegendDataCubeCreator = observer(() => { {[ LegendDataCubeSourceBuilderType.LEGEND_QUERY, LegendDataCubeSourceBuilderType.ADHOC_QUERY, + LegendDataCubeSourceBuilderType.CSV_FILE_QUERY, ].map((type) => ( { sourceBuilder={sourceBuilder} /> )} + {sourceBuilder instanceof + CSVFileQueryDataCubeSourceBuilderState && ( + + )} diff --git a/packages/legend-application-data-cube/src/components/builder/source/CSVFileQueryDataCubeSourceBuilder.tsx b/packages/legend-application-data-cube/src/components/builder/source/CSVFileQueryDataCubeSourceBuilder.tsx new file mode 100644 index 0000000000..d22239cf6a --- /dev/null +++ b/packages/legend-application-data-cube/src/components/builder/source/CSVFileQueryDataCubeSourceBuilder.tsx @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { observer } from 'mobx-react-lite'; +import type { CSVFileQueryDataCubeSourceBuilderState } from '../../../stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.js'; +import { csvStringify, parseCSVFile } from '@finos/legend-shared'; + +export const CSVFileQueryDataCubeSourceBuilder = observer( + (props: { sourceBuilder: CSVFileQueryDataCubeSourceBuilderState }) => { + const { sourceBuilder } = props; + + const handleFileChange = (e: React.ChangeEvent) => { + const file = e.target.files ? e.target.files[0] : null; + + if (file) { + parseCSVFile(file, { + complete: (result) => { + // Set the parsed data to state + sourceBuilder.setFileData( + csvStringify(result.data, { escapeChar: `'`, quoteChar: `'` }), + ); + sourceBuilder.setFileName(file.name); + sourceBuilder.setRowCount(result.data.length); + }, + header: true, + dynamicTyping: false, + skipEmptyLines: true, + }); + } + }; + + return ( +
+
+ +
+
+ ); + }, +); diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts index b21b544367..8d06a8e43c 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeDataCubeEngine.ts @@ -72,6 +72,12 @@ import { PackageableElementPointerType, DatabaseType, PRIMITIVE_TYPE, + V1_BigInt, + V1_Decimal, + V1_Double, + V1_Timestamp, + V1_TinyInt, + V1_SmallInt, } from '@finos/legend-graph'; import { _elementPtr, @@ -104,7 +110,10 @@ import { filterByType, } from '@finos/legend-shared'; import type { LegendDataCubeApplicationStore } from './LegendDataCubeBaseStore.js'; -import { LegendDataCubeDataCubeCacheManager } from './LegendDataCubeCacheManager.js'; +import { + DUCKDB_DATA_TYPES, + LegendDataCubeDuckDBEngine, +} from './LegendDataCubeDuckDBEngine.js'; import { APPLICATION_EVENT } from '@finos/legend-application'; import { LEGEND_QUERY_DATA_CUBE_SOURCE_TYPE, @@ -116,13 +125,18 @@ import { resolveVersion, type DepotServerClient, } from '@finos/legend-server-depot'; +import { + CSV_FILE_QUERY_DATA_CUBE_SOURCE_TYPE, + CSVFileQueryDataCubeSource, + RawCSVFileQueryDataCubeSource, +} from './model/CSVFileQueryDataCubeSource.js'; export class LegendDataCubeDataCubeEngine extends DataCubeEngine { private readonly _application: LegendDataCubeApplicationStore; private readonly _depotServerClient: DepotServerClient; private readonly _engineServerClient: V1_EngineServerClient; private readonly _graphManager: V1_PureGraphManager; - private readonly _cacheManager: LegendDataCubeDataCubeCacheManager; + private readonly _cacheManager: LegendDataCubeDuckDBEngine; constructor( application: LegendDataCubeApplicationStore, @@ -136,7 +150,7 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { this._depotServerClient = depotServerClient; this._engineServerClient = engineServerClient; this._graphManager = graphManager; - this._cacheManager = new LegendDataCubeDataCubeCacheManager(); + this._cacheManager = new LegendDataCubeDuckDBEngine(); } async initializeCacheManager() { @@ -176,6 +190,40 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { } return source; } + case CSV_FILE_QUERY_DATA_CUBE_SOURCE_TYPE: { + const rawSource = + RawCSVFileQueryDataCubeSource.serialization.fromJson(value); + const source = new CSVFileQueryDataCubeSource(); + source.fileName = rawSource.fileName; + source.count = rawSource.count; + source.db = rawSource.db; + source.model = rawSource.model; + source.runtime = rawSource.runtime; + source.schema = rawSource.schema; + source.table = rawSource.table; + + const query = new V1_ClassInstance(); + query.type = V1_ClassInstanceType.RELATION_STORE_ACCESSOR; + const storeAccessor = new V1_RelationStoreAccessor(); + storeAccessor.path = [source.db, source.schema, source.table]; + query.value = storeAccessor; + source.query = query; + + try { + source.columns = ( + await this._getLambdaRelationType( + this.serializeValueSpecification(_lambda([], [source.query])), + serialize(source.model), + ) + ).columns; + } catch (error) { + assertErrorThrown(error); + throw new Error( + `Can't get query result columns. Make sure the source query return a relation (i.e. typed TDS). Error: ${error.message}`, + ); + } + return source; + } case LEGEND_QUERY_DATA_CUBE_SOURCE_TYPE: { const rawSource = RawLegendQueryDataCubeSource.serialization.fromJson(value); @@ -455,6 +503,34 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { result: await this._cacheManager.runSQLQuery(sql), executionTime: endTime - startTime, }; + } else if (source instanceof CSVFileQueryDataCubeSource) { + // get the execute plan to extract the generated SQL to run against cached DB + const executionPlan = await this._generateExecutionPlan( + query, + source.model, + [], + // NOTE: for caching, we're using DuckDB, but its protocol models + // are not available in the latest production protocol version V1_33_0, so + // we have to force using VX_X_X + // once we either cut another protocol version or backport the DuckDB models + // to V1_33_0, we will can remove this + { ...options, clientVersion: PureClientVersion.VX_X_X }, + ); + const sql = guaranteeNonNullable( + executionPlan instanceof V1_SimpleExecutionPlan + ? executionPlan.rootExecutionNode.executionNodes + .filter(filterByType(V1_SQLExecutionNode)) + .at(-1)?.sqlQuery + : undefined, + `Can't process execution plan: failed to extract generated SQL`, + ); + const endTime = performance.now(); + return { + executedQuery: await queryCodePromise, + executedSQL: sql, + result: await this._cacheManager.runSQLQuery(sql), + executionTime: endTime - startTime, + }; } else { throw new UnsupportedOperationError( `Can't execute query with unsupported source`, @@ -500,10 +576,139 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { DataCubeFunction.FROM, [_elementPtr(source.runtime)].filter(isNonNullable), ); + } else if (source instanceof CSVFileQueryDataCubeSource) { + return _function( + DataCubeFunction.FROM, + [_elementPtr(source.runtime)].filter(isNonNullable), + ); } return undefined; } + // --------------------------------- FILE INGEST ----------------------------------- + + override async ingestFileData( + csvString: string, + ): Promise { + const { + schema: schemaName, + table: tableName, + dbSchema: dbSchema, + } = await this._cacheManager.ingestFileData(csvString); + + const packagePath = 'ingest::local'; + + const table = new V1_Table(); + table.name = tableName; + table.columns = dbSchema.map((col) => { + const column = new V1_Column(); + column.name = col[0] as string; + // TODO: check if we have a duckdb enum mapping + switch (col[1] as string) { + case DUCKDB_DATA_TYPES.BIGINT: { + column.type = new V1_BigInt(); + break; + } + case DUCKDB_DATA_TYPES.BIT: { + column.type = new V1_Bit(); + break; + } + case DUCKDB_DATA_TYPES.BOOLEAN: { + // TODO: understand why boolean is not present in relationalDataType + column.type = new V1_VarChar(); + break; + } + case DUCKDB_DATA_TYPES.DATE: { + column.type = new V1_Date(); + break; + } + case DUCKDB_DATA_TYPES.DECIMAL: { + column.type = new V1_Decimal(); + break; + } + case DUCKDB_DATA_TYPES.DOUBLE: { + column.type = new V1_Double(); + break; + } + case DUCKDB_DATA_TYPES.FLOAT: { + column.type = new V1_Float(); + break; + } + case DUCKDB_DATA_TYPES.INTEGER: { + column.type = new V1_Integer(); + break; + } + case DUCKDB_DATA_TYPES.SMALLINT: { + column.type = new V1_SmallInt(); + break; + } + case DUCKDB_DATA_TYPES.TIMESTAMP: { + column.type = new V1_Timestamp(); + break; + } + case DUCKDB_DATA_TYPES.TINYINT: { + column.type = new V1_TinyInt(); + break; + } + case DUCKDB_DATA_TYPES.VARCHAR: { + column.type = new V1_VarChar(); + break; + } + default: { + throw new UnsupportedOperationError( + `Can't initialize cache: failed to find matching relational data type for DuckDB type '${col[1]}' when synthesizing table definition`, + ); + } + } + return column; + }); + + const schema = new V1_Schema(); + schema.name = schemaName; + schema.tables = [table]; + const database = new V1_Database(); + database.name = 'db'; + database.package = packagePath; + database.schemas = [schema]; + + const connection = new V1_RelationalDatabaseConnection(); + connection.databaseType = DatabaseType.DuckDB; + connection.type = DatabaseType.DuckDB; + const dataSourceSpec = new V1_DuckDBDatasourceSpecification(); + dataSourceSpec.path = '/tmpIngestFile'; + connection.store = database.path; + connection.datasourceSpecification = dataSourceSpec; + connection.authenticationStrategy = new V1_TestAuthenticationStrategy(); + + const runtime = new V1_EngineRuntime(); + const storeConnections = new V1_StoreConnections(); + storeConnections.store = new V1_PackageableElementPointer( + PackageableElementPointerType.STORE, + database.path, + ); + const identifiedConnection = new V1_IdentifiedConnection(); + identifiedConnection.connection = connection; + identifiedConnection.id = 'c0'; + storeConnections.storeConnections = [identifiedConnection]; + runtime.connections = [storeConnections]; + + const packageableRuntime = new V1_PackageableRuntime(); + packageableRuntime.runtimeValue = runtime; + packageableRuntime.package = packagePath; + packageableRuntime.name = 'rt'; + + const model = new V1_PureModelContextData(); + model.elements = [database, packageableRuntime]; + + const csvFileSource = new CSVFileQueryDataCubeSource(); + csvFileSource.model = model; + csvFileSource.runtime = packageableRuntime.path; + csvFileSource.db = database.path; + csvFileSource.schema = schema.name; + csvFileSource.table = table.name; + return csvFileSource; + } + // ---------------------------------- CACHING -------------------------------------- override async initializeCache( @@ -527,7 +732,7 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { } = await this._cacheManager.cache(result.result); // model - const pacakgePath = 'local'; + const packagePath = 'local'; const table = new V1_Table(); table.name = tableName; @@ -573,7 +778,7 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { schema.tables = [table]; const database = new V1_Database(); database.name = 'db'; - database.package = pacakgePath; + database.package = packagePath; database.schemas = [schema]; const connection = new V1_RelationalDatabaseConnection(); @@ -599,7 +804,7 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { const packageableRuntime = new V1_PackageableRuntime(); packageableRuntime.runtimeValue = runtime; - packageableRuntime.package = pacakgePath; + packageableRuntime.package = packagePath; packageableRuntime.name = 'rt'; const model = new V1_PureModelContextData(); @@ -640,6 +845,8 @@ export class LegendDataCubeDataCubeEngine extends DataCubeEngine { return this._getLambdaRelationType(query, source.model); } else if (source instanceof CachedDataCubeSource) { return this._getLambdaRelationType(query, serialize(source.model)); + } else if (source instanceof CSVFileQueryDataCubeSource) { + return this._getLambdaRelationType(query, serialize(source.model)); } throw new UnsupportedOperationError( `Can't get relation type for lambda with unsupported source`, diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts similarity index 68% rename from packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts rename to packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts index c1c9ce01be..ca0ad4cb38 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeDuckDBEngine.ts @@ -34,11 +34,20 @@ import { import type { CachedDataCubeSource } from '@finos/legend-data-cube'; import { Type } from 'apache-arrow'; -export class LegendDataCubeDataCubeCacheManager { +export class LegendDataCubeDuckDBEngine { private static readonly DUCKDB_DEFAULT_SCHEMA_NAME = 'main'; // See https://duckdb.org/docs/sql/statements/use.html - private static readonly TABLE_NAME_PREFIX = 'cache'; - private static readonly CSV_FILE_NAME = 'data'; - private static tableCounter = 0; + private static readonly CACHE_TABLE_NAME_PREFIX = 'cache'; + private static readonly INGEST_TABLE_NAME_PREFIX = 'ingest'; + private static readonly CACHE_FILE_NAME = 'cacheData'; + private static readonly INGEST_FILE_DATA_FILE_NAME = 'ingestData'; + private static cacheTableCounter = 0; + private static ingestFileTableCounter = 0; + // https://duckdb.org/docs/guides/meta/describe.html + private static readonly COLUMN_NAME = 'column_name'; + private static readonly COLUMN_TYPE = 'column_type'; + // Options for creating csv using papa parser: https://www.papaparse.com/docs#config + private static readonly ESCAPE_CHAR = `'`; + private static readonly QUOTE_CHAR = `'`; private _database?: duckdb.AsyncDuckDB | undefined; @@ -85,11 +94,10 @@ export class LegendDataCubeDataCubeCacheManager { } async cache(result: TDSExecutionResult) { - const schema = - LegendDataCubeDataCubeCacheManager.DUCKDB_DEFAULT_SCHEMA_NAME; - LegendDataCubeDataCubeCacheManager.tableCounter += 1; - const table = `${LegendDataCubeDataCubeCacheManager.TABLE_NAME_PREFIX}${LegendDataCubeDataCubeCacheManager.tableCounter}`; - const csvFileName = `${LegendDataCubeDataCubeCacheManager.CSV_FILE_NAME}${LegendDataCubeDataCubeCacheManager.tableCounter}.csv`; + const schema = LegendDataCubeDuckDBEngine.DUCKDB_DEFAULT_SCHEMA_NAME; + LegendDataCubeDuckDBEngine.cacheTableCounter += 1; + const table = `${LegendDataCubeDuckDBEngine.CACHE_TABLE_NAME_PREFIX}${LegendDataCubeDuckDBEngine.cacheTableCounter}`; + const csvFileName = `${LegendDataCubeDuckDBEngine.CACHE_FILE_NAME}${LegendDataCubeDuckDBEngine.cacheTableCounter}.csv`; const connection = await this.database.connect(); @@ -99,8 +107,8 @@ export class LegendDataCubeDataCubeCacheManager { const data = result.result.rows.map((row) => row.values); const csv = csvStringify([columnNames, ...data], { - escapeChar: `'`, - quoteChar: `'`, + escapeChar: LegendDataCubeDuckDBEngine.ESCAPE_CHAR, + quoteChar: LegendDataCubeDuckDBEngine.QUOTE_CHAR, }); await this._database?.registerFileText(csvFileName, csv); @@ -111,14 +119,47 @@ export class LegendDataCubeDataCubeCacheManager { create: true, header: true, detect: true, - escape: `'`, - quote: `'`, - delimiter: ',', + escape: LegendDataCubeDuckDBEngine.ESCAPE_CHAR, + quote: LegendDataCubeDuckDBEngine.QUOTE_CHAR, }); await connection.close(); - return { table, schema, rowCount: result.result.rows.length }; + return { schema, table, rowCount: result.result.rows.length }; + } + + async ingestFileData(csvString: string) { + const schema = LegendDataCubeDuckDBEngine.DUCKDB_DEFAULT_SCHEMA_NAME; + LegendDataCubeDuckDBEngine.ingestFileTableCounter += 1; + const table = `${LegendDataCubeDuckDBEngine.INGEST_TABLE_NAME_PREFIX}${LegendDataCubeDuckDBEngine.ingestFileTableCounter}`; + const csvFileName = `${LegendDataCubeDuckDBEngine.INGEST_FILE_DATA_FILE_NAME}${LegendDataCubeDuckDBEngine.ingestFileTableCounter}.csv`; + + const connection = await this.database.connect(); + + await this._database?.registerFileText(csvFileName, csvString); + + await connection.insertCSVFromPath(csvFileName, { + schema: schema, + name: table, + header: true, + detect: true, + escape: LegendDataCubeDuckDBEngine.ESCAPE_CHAR, + quote: LegendDataCubeDuckDBEngine.QUOTE_CHAR, + }); + + const dbSchemaResult = await connection.query( + `DESCRIBE ${schema}.${table}`, + ); + const dbSchema = dbSchemaResult + .toArray() + .map((data) => [ + data[LegendDataCubeDuckDBEngine.COLUMN_NAME], + data[LegendDataCubeDuckDBEngine.COLUMN_TYPE], + ]); + + await connection.close(); + + return { schema, table, dbSchema }; } async runSQLQuery(sql: string) { @@ -216,3 +257,20 @@ export class LegendDataCubeDataCubeCacheManager { await this._database?.terminate(); } } + +// https://duckdb.org/docs/sql/data_types/overview.html +export const enum DUCKDB_DATA_TYPES { + // TODO: confirm this is in accordance to engine + BIGINT = 'BIGINT', + BIT = 'BIT', + BOOLEAN = 'BOOLEAN', + DATE = 'DATE', + DECIMAL = 'DECIMAL', + DOUBLE = 'DOUBLE', + FLOAT = 'FLOAT', + INTEGER = 'INTEGER', + SMALLINT = 'SMALLINT', + TIMESTAMP = 'TIMESTAMP', + TINYINT = 'TININT', + VARCHAR = 'VARCHAR', +} diff --git a/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeCreatorState.tsx b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeCreatorState.tsx index 3ff0f18fc8..491ef2f5b3 100644 --- a/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeCreatorState.tsx +++ b/packages/legend-application-data-cube/src/stores/builder/LegendDataCubeCreatorState.tsx @@ -41,6 +41,7 @@ import { type LegendDataCubeBuilderStore, } from './LegendDataCubeBuilderStore.js'; import { generateBuilderRoute } from '../../__lib__/LegendDataCubeNavigation.js'; +import { CSVFileQueryDataCubeSourceBuilderState } from './source/CSVFileQueryDataCubeSourceBuilderState.js'; const DEFAULT_SOURCE_TYPE = LegendDataCubeSourceBuilderType.LEGEND_QUERY; @@ -105,6 +106,11 @@ export class LegendDataCubeCreatorState { this._application, this._engine, ); + case LegendDataCubeSourceBuilderType.CSV_FILE_QUERY: + return new CSVFileQueryDataCubeSourceBuilderState( + this._application, + this._engine, + ); default: throw new UnsupportedOperationError( `Can't create source builder for unsupported type '${type}'`, diff --git a/packages/legend-application-data-cube/src/stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.ts b/packages/legend-application-data-cube/src/stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.ts new file mode 100644 index 0000000000..af02ab4f17 --- /dev/null +++ b/packages/legend-application-data-cube/src/stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.ts @@ -0,0 +1,89 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { guaranteeType, type PlainObject } from '@finos/legend-shared'; +import { + LegendDataCubeSourceBuilderState, + LegendDataCubeSourceBuilderType, +} from './LegendDataCubeSourceBuilderState.js'; +import type { LegendDataCubeApplicationStore } from '../../LegendDataCubeBaseStore.js'; +import { action, makeObservable, observable } from 'mobx'; +import type { LegendDataCubeDataCubeEngine } from '../../LegendDataCubeDataCubeEngine.js'; +import { + CSVFileQueryDataCubeSource, + RawCSVFileQueryDataCubeSource, +} from '../../model/CSVFileQueryDataCubeSource.js'; + +export class CSVFileQueryDataCubeSourceBuilderState extends LegendDataCubeSourceBuilderState { + fileData!: string; + fileName!: string; + rowCount!: number; + + constructor( + application: LegendDataCubeApplicationStore, + engine: LegendDataCubeDataCubeEngine, + ) { + super(application, engine); + makeObservable(this, { + fileData: observable, + fileName: observable, + rowCount: observable, + + setFileData: action, + setFileName: action, + setRowCount: action, + }); + } + + setFileData(data: string) { + this.fileData = data; + } + + setFileName(fileName: string) { + this.fileName = fileName; + } + + setRowCount(count: number) { + this.rowCount = count; + } + + override get label(): LegendDataCubeSourceBuilderType { + return LegendDataCubeSourceBuilderType.CSV_FILE_QUERY; + } + + override get isValid(): boolean { + return Boolean(this.fileData); + } + + override async generateSourceData(): Promise { + const csvDataSource = guaranteeType( + await this._engine.ingestFileData(this.fileData), + CSVFileQueryDataCubeSource, + `Can't generate data source`, + ); + + const rawCsvDataSource = new RawCSVFileQueryDataCubeSource(); + rawCsvDataSource.count = this.rowCount; + rawCsvDataSource.fileName = this.fileName; + rawCsvDataSource.db = csvDataSource.db; + rawCsvDataSource.model = csvDataSource.model; + rawCsvDataSource.schema = csvDataSource.schema; + rawCsvDataSource.table = csvDataSource.table; + rawCsvDataSource.runtime = csvDataSource.runtime; + + return RawCSVFileQueryDataCubeSource.serialization.toJson(rawCsvDataSource); + } +} diff --git a/packages/legend-application-data-cube/src/stores/builder/source/LegendDataCubeSourceBuilderState.ts b/packages/legend-application-data-cube/src/stores/builder/source/LegendDataCubeSourceBuilderState.ts index 0d0800312b..44a27f0e59 100644 --- a/packages/legend-application-data-cube/src/stores/builder/source/LegendDataCubeSourceBuilderState.ts +++ b/packages/legend-application-data-cube/src/stores/builder/source/LegendDataCubeSourceBuilderState.ts @@ -22,6 +22,7 @@ import type { DataCubeConfiguration } from '@finos/legend-data-cube'; export enum LegendDataCubeSourceBuilderType { LEGEND_QUERY = 'Legend Query', ADHOC_QUERY = 'Ad hoc Query', + CSV_FILE_QUERY = 'CSV File Query', } export abstract class LegendDataCubeSourceBuilderState { diff --git a/packages/legend-application-data-cube/src/stores/model/CSVFIleQueryDataCubeSource.ts b/packages/legend-application-data-cube/src/stores/model/CSVFIleQueryDataCubeSource.ts new file mode 100644 index 0000000000..e5cc414625 --- /dev/null +++ b/packages/legend-application-data-cube/src/stores/model/CSVFIleQueryDataCubeSource.ts @@ -0,0 +1,61 @@ +/** + * Copyright (c) 2020-present, Goldman Sachs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { DataCubeSource } from '@finos/legend-data-cube'; +import { + V1_pureModelContextDataPropSchema, + type V1_PureModelContextData, +} from '@finos/legend-graph'; +import { + SerializationFactory, + usingConstantValueSchema, +} from '@finos/legend-shared'; +import { createModelSchema, primitive } from 'serializr'; + +export const CSV_FILE_QUERY_DATA_CUBE_SOURCE_TYPE = 'csvFileQuery'; + +export class CSVFileQueryDataCubeSource extends DataCubeSource { + model!: V1_PureModelContextData; + runtime!: string; + db!: string; + schema!: string; + table!: string; + count!: number; + fileName!: string; +} + +export class RawCSVFileQueryDataCubeSource { + model!: V1_PureModelContextData; + runtime!: string; + db!: string; + schema!: string; + table!: string; + count!: number; + fileName!: string; + + static readonly serialization = new SerializationFactory( + createModelSchema(RawCSVFileQueryDataCubeSource, { + _type: usingConstantValueSchema(CSV_FILE_QUERY_DATA_CUBE_SOURCE_TYPE), + model: V1_pureModelContextDataPropSchema, + runtime: primitive(), + db: primitive(), + schema: primitive(), + table: primitive(), + count: primitive(), + fileName: primitive(), + }), + ); +} diff --git a/packages/legend-data-cube/src/stores/core/DataCubeEngine.tsx b/packages/legend-data-cube/src/stores/core/DataCubeEngine.tsx index 14c1e8b2f5..fc60c71133 100644 --- a/packages/legend-data-cube/src/stores/core/DataCubeEngine.tsx +++ b/packages/legend-data-cube/src/stores/core/DataCubeEngine.tsx @@ -270,6 +270,11 @@ export abstract class DataCubeEngine { source: DataCubeSource, ): V1_AppliedFunction | undefined; + // ---------------------------------- FILE UPLOAD ------------------------------ + async ingestFileData(csvString: string): Promise { + return undefined; + } + // ---------------------------------- CACHING ---------------------------------- async initializeCache( diff --git a/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts b/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts index c55c901df9..6da837c9e2 100644 --- a/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts +++ b/packages/legend-data-cube/src/stores/core/model/CachedDataCubeSource.ts @@ -17,8 +17,6 @@ import type { V1_PureModelContextData } from '@finos/legend-graph'; import { DataCubeSource } from './DataCubeSource.js'; -export const CACHED_DATA_CUBE_SOURCE_TYPE = 'cached'; - export class CachedDataCubeSource extends DataCubeSource { model!: V1_PureModelContextData; runtime!: string; diff --git a/packages/legend-shared/src/format/FormatterUtils.ts b/packages/legend-shared/src/format/FormatterUtils.ts index 8cd3b1d51c..0918f039ae 100644 --- a/packages/legend-shared/src/format/FormatterUtils.ts +++ b/packages/legend-shared/src/format/FormatterUtils.ts @@ -21,7 +21,11 @@ import { parse as losslessParse, isSafeNumber as lossIsSafeNumber, } from 'lossless-json'; -import CSVParser, { type UnparseConfig } from 'papaparse'; +import CSVParser, { + type LocalFile, + type ParseLocalConfig, + type UnparseConfig, +} from 'papaparse'; import { assertNonNullable } from '../error/AssertionUtils.js'; export const capitalize = (value: string): string => @@ -152,6 +156,11 @@ export const parseCSVString = (value: string): string[] | undefined => { } }; +export const parseCSVFile = ( + file: LocalFile, + config: ParseLocalConfig, +): void => CSVParser.parse(file, config); + export const csvStringify = ( value: unknown[], config?: UnparseConfig,