From 8ef21681b54438f4fb74f2be79af91cba4000c5e Mon Sep 17 00:00:00 2001 From: gs-gunjan Date: Wed, 12 Feb 2025 15:22:28 +0530 Subject: [PATCH] datacube: fixing caching for csv --- .changeset/tidy-ads-judge.md | 5 ++ .../src/stores/LegendDataCubeCacheManager.ts | 67 +++++++++++++------ 2 files changed, 53 insertions(+), 19 deletions(-) create mode 100644 .changeset/tidy-ads-judge.md diff --git a/.changeset/tidy-ads-judge.md b/.changeset/tidy-ads-judge.md new file mode 100644 index 0000000000..cbb48a8777 --- /dev/null +++ b/.changeset/tidy-ads-judge.md @@ -0,0 +1,5 @@ +--- +'@finos/legend-application-data-cube': patch +--- + +Fixing Caching for CSV diff --git a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts b/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts index 03a6dc08a2..95b172780f 100644 --- a/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts +++ b/packages/legend-application-data-cube/src/stores/LegendDataCubeCacheManager.ts @@ -32,6 +32,7 @@ import { } from '@finos/legend-shared'; import type { CachedDataCubeSource } from '@finos/legend-data-cube'; import { Type } from 'apache-arrow'; +import CSVParser from 'papaparse'; export class LegendDataCubeDataCubeCacheManager { private static readonly DUCKDB_DEFAULT_SCHEMA_NAME = 'main'; // See https://duckdb.org/docs/sql/statements/use.html @@ -92,34 +93,62 @@ export class LegendDataCubeDataCubeCacheManager { const connection = await this.database.connect(); - const columnString = result.builder.columns - .map((col) => col.name) - .join(','); - - const dataString: string[] = [columnString]; - - result.result.rows.forEach((row) => { - const updatedRows = row.values.map((val) => { - if (val !== null && typeof val === 'string') { - return `'${val.replaceAll(`'`, `''`)}'`; - } else if (val === null) { - return `NULL`; + const columns: string[] = []; + const columnNames: string[] = []; + result.builder.columns.forEach((col) => { + let colType: string; + switch (col.type as string) { + case PRIMITIVE_TYPE.BOOLEAN: { + colType = 'BOOLEAN'; + break; + } + case PRIMITIVE_TYPE.INTEGER: { + colType = 'INTEGER'; + break; } - return val; - }); - dataString.push(`${updatedRows.join(',')}`); + case PRIMITIVE_TYPE.NUMBER: + case PRIMITIVE_TYPE.DECIMAL: + case PRIMITIVE_TYPE.FLOAT: { + colType = 'FLOAT'; + break; + } + // We don't use type DATE because DuckDB will automatically convert it to a TIMESTAMP + case PRIMITIVE_TYPE.STRICTDATE: + case PRIMITIVE_TYPE.DATETIME: + case PRIMITIVE_TYPE.DATE: { + colType = 'VARCHAR'; + break; + } + case PRIMITIVE_TYPE.STRING: { + colType = 'VARCHAR'; + break; + } + default: { + throw new UnsupportedOperationError( + `Can't initialize cache: failed to find matching DuckDB type for Pure type '${col.type}'`, + ); + } + } + columns.push(`"${col.name}" ${colType}`); + columnNames.push(col.name); }); - const csvString = dataString.join('\n'); + const CREATE_TABLE_SQL = `CREATE TABLE ${schema}.${table} (${columns.join(',')})`; + await connection.query(CREATE_TABLE_SQL); + + const data = result.result.rows.map((row) => row.values); + + const csv = CSVParser.unparse([columnNames, ...data], { + escapeChar: `'`, + quoteChar: `'`, + }); - await this._database?.registerFileText(csvFileName, csvString); + await this._database?.registerFileText(csvFileName, csv); await connection.insertCSVFromPath(csvFileName, { schema: schema, name: table, create: false, - header: true, - detect: true, escape: `'`, quote: `'`, delimiter: ',',