Skip to content

Commit

Permalink
datacube: fixing caching for csv
Browse files Browse the repository at this point in the history
  • Loading branch information
gs-gunjan committed Feb 12, 2025
1 parent c0a5458 commit 8ef2168
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 19 deletions.
5 changes: 5 additions & 0 deletions .changeset/tidy-ads-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@finos/legend-application-data-cube': patch
---

Fixing Caching for CSV
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
} from '@finos/legend-shared';
import type { CachedDataCubeSource } from '@finos/legend-data-cube';
import { Type } from 'apache-arrow';
import CSVParser from 'papaparse';

export class LegendDataCubeDataCubeCacheManager {
private static readonly DUCKDB_DEFAULT_SCHEMA_NAME = 'main'; // See https://duckdb.org/docs/sql/statements/use.html
Expand Down Expand Up @@ -92,34 +93,62 @@ export class LegendDataCubeDataCubeCacheManager {

const connection = await this.database.connect();

const columnString = result.builder.columns
.map((col) => col.name)
.join(',');

const dataString: string[] = [columnString];

result.result.rows.forEach((row) => {
const updatedRows = row.values.map((val) => {
if (val !== null && typeof val === 'string') {
return `'${val.replaceAll(`'`, `''`)}'`;
} else if (val === null) {
return `NULL`;
const columns: string[] = [];
const columnNames: string[] = [];
result.builder.columns.forEach((col) => {
let colType: string;
switch (col.type as string) {
case PRIMITIVE_TYPE.BOOLEAN: {
colType = 'BOOLEAN';
break;
}
case PRIMITIVE_TYPE.INTEGER: {
colType = 'INTEGER';
break;
}
return val;
});
dataString.push(`${updatedRows.join(',')}`);
case PRIMITIVE_TYPE.NUMBER:
case PRIMITIVE_TYPE.DECIMAL:
case PRIMITIVE_TYPE.FLOAT: {
colType = 'FLOAT';
break;
}
// We don't use type DATE because DuckDB will automatically convert it to a TIMESTAMP
case PRIMITIVE_TYPE.STRICTDATE:
case PRIMITIVE_TYPE.DATETIME:
case PRIMITIVE_TYPE.DATE: {
colType = 'VARCHAR';
break;
}
case PRIMITIVE_TYPE.STRING: {
colType = 'VARCHAR';
break;
}
default: {
throw new UnsupportedOperationError(
`Can't initialize cache: failed to find matching DuckDB type for Pure type '${col.type}'`,
);
}
}
columns.push(`"${col.name}" ${colType}`);
columnNames.push(col.name);
});

const csvString = dataString.join('\n');
const CREATE_TABLE_SQL = `CREATE TABLE ${schema}.${table} (${columns.join(',')})`;
await connection.query(CREATE_TABLE_SQL);

const data = result.result.rows.map((row) => row.values);

const csv = CSVParser.unparse([columnNames, ...data], {
escapeChar: `'`,
quoteChar: `'`,
});

await this._database?.registerFileText(csvFileName, csvString);
await this._database?.registerFileText(csvFileName, csv);

await connection.insertCSVFromPath(csvFileName, {
schema: schema,
name: table,
create: false,
header: true,
detect: true,
escape: `'`,
quote: `'`,
delimiter: ',',
Expand Down

0 comments on commit 8ef2168

Please sign in to comment.