Skip to content

Commit

Permalink
datacube: fixing caching for csv
Browse files Browse the repository at this point in the history
  • Loading branch information
gs-gunjan committed Feb 12, 2025
1 parent c0a5458 commit 5622f03
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 22 deletions.
6 changes: 6 additions & 0 deletions .changeset/tidy-ads-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'@finos/legend-application-data-cube': patch
'@finos/legend-shared': patch
---

Fixing Caching for CSV
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
} from '@finos/legend-graph';
import {
assertNonNullable,
csvStringify,
guaranteeNonNullable,
UnsupportedOperationError,
} from '@finos/legend-shared';
Expand Down Expand Up @@ -92,34 +93,62 @@ export class LegendDataCubeDataCubeCacheManager {

const connection = await this.database.connect();

const columnString = result.builder.columns
.map((col) => col.name)
.join(',');

const dataString: string[] = [columnString];

result.result.rows.forEach((row) => {
const updatedRows = row.values.map((val) => {
if (val !== null && typeof val === 'string') {
return `'${val.replaceAll(`'`, `''`)}'`;
} else if (val === null) {
return `NULL`;
const columns: string[] = [];
const columnNames: string[] = [];
result.builder.columns.forEach((col) => {
let colType: string;
switch (col.type as string) {
case PRIMITIVE_TYPE.BOOLEAN: {
colType = 'BOOLEAN';
break;
}
case PRIMITIVE_TYPE.INTEGER: {
colType = 'INTEGER';
break;
}
return val;
});
dataString.push(`${updatedRows.join(',')}`);
case PRIMITIVE_TYPE.NUMBER:
case PRIMITIVE_TYPE.DECIMAL:
case PRIMITIVE_TYPE.FLOAT: {
colType = 'FLOAT';
break;
}
// We don't use type DATE because DuckDB will automatically convert it to a TIMESTAMP
case PRIMITIVE_TYPE.STRICTDATE:
case PRIMITIVE_TYPE.DATETIME:
case PRIMITIVE_TYPE.DATE: {
colType = 'VARCHAR';
break;
}
case PRIMITIVE_TYPE.STRING: {
colType = 'VARCHAR';
break;
}
default: {
throw new UnsupportedOperationError(
`Can't initialize cache: failed to find matching DuckDB type for Pure type '${col.type}'`,
);
}
}
columns.push(`"${col.name}" ${colType}`);
columnNames.push(col.name);
});

const csvString = dataString.join('\n');
const CREATE_TABLE_SQL = `CREATE TABLE ${schema}.${table} (${columns.join(',')})`;
await connection.query(CREATE_TABLE_SQL);

const data = result.result.rows.map((row) => row.values);

const csv = csvStringify([columnNames, ...data], {
escapeChar: `'`,
quoteChar: `'`,
});

await this._database?.registerFileText(csvFileName, csvString);
await this._database?.registerFileText(csvFileName, csv);

await connection.insertCSVFromPath(csvFileName, {
schema: schema,
name: table,
create: false,
header: true,
detect: true,
escape: `'`,
quote: `'`,
delimiter: ',',
Expand Down
8 changes: 5 additions & 3 deletions packages/legend-shared/src/format/FormatterUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
parse as losslessParse,
isSafeNumber as lossIsSafeNumber,
} from 'lossless-json';
import CSVParser from 'papaparse';
import CSVParser, { type UnparseConfig } from 'papaparse';
import { assertNonNullable } from '../error/AssertionUtils.js';

export const capitalize = (value: string): string =>
Expand Down Expand Up @@ -152,8 +152,10 @@ export const parseCSVString = (value: string): string[] | undefined => {
}
};

export const csvStringify = (value: unknown[]): string =>
CSVParser.unparse(value);
export const csvStringify = (
value: unknown[],
config?: UnparseConfig,
): string => CSVParser.unparse(value, config);

/**
* One very common use case is that we get the JSON as response from the server than we will convert this to a string and persist
Expand Down

0 comments on commit 5622f03

Please sign in to comment.