Skip to content

Commit

Permalink
datacube: upload data via file into DataCube grid
Browse files Browse the repository at this point in the history
  • Loading branch information
gs-gunjan committed Feb 13, 2025
1 parent f99237f commit 3f4bb19
Show file tree
Hide file tree
Showing 11 changed files with 385 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import { LegendQueryDataCubeSourceBuilder } from './source/LegendQueryDataCubeSo
import { AdhocQueryDataCubeSourceBuilder } from './source/AdhocQueryDataCubeSourceBuilder.js';
import { AdhocQueryDataCubeSourceBuilderState } from '../../stores/builder/source/AdhocQueryDataCubeSourceBuilderState.js';
import { useLegendDataCubeBuilderStore } from './LegendDataCubeBuilderStoreProvider.js';
import { RawFileQueryDataCubeSourceBuilderState } from '../../stores/builder/source/RawFileQueryDataCubeSourceBuilderState.js';
import { RawFileQueryDataCubeSourceBuilder } from './source/RawFileQueryDataCubeSourceBuilder.js';
import { CSVFileQueryDataCubeSourceBuilderState } from '../../stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.js';
import { CSVFileQueryDataCubeSourceBuilder } from './source/CSVFileQueryDataCubeSourceBuilder.js';

export const LegendDataCubeCreator = observer(() => {
const store = useLegendDataCubeBuilderStore();
Expand Down Expand Up @@ -63,7 +63,7 @@ export const LegendDataCubeCreator = observer(() => {
{[
LegendDataCubeSourceBuilderType.LEGEND_QUERY,
LegendDataCubeSourceBuilderType.ADHOC_QUERY,
LegendDataCubeSourceBuilderType.RAW_FILE_QUERY,
LegendDataCubeSourceBuilderType.CSV_FILE_QUERY,
].map((type) => (
<FormDropdownMenuItem
key={type}
Expand Down Expand Up @@ -93,8 +93,8 @@ export const LegendDataCubeCreator = observer(() => {
/>
)}
{sourceBuilder instanceof
RawFileQueryDataCubeSourceBuilderState && (
<RawFileQueryDataCubeSourceBuilder
CSVFileQueryDataCubeSourceBuilderState && (
<CSVFileQueryDataCubeSourceBuilder
sourceBuilder={sourceBuilder}
/>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
*/

import { observer } from 'mobx-react-lite';
import type { RawFileQueryDataCubeSourceBuilderState } from '../../../stores/builder/source/RawFileQueryDataCubeSourceBuilderState.js';
import type { CSVFileQueryDataCubeSourceBuilderState } from '../../../stores/builder/source/CSVFileQueryDataCubeSourceBuilderState.js';
import Papa from 'papaparse';
import { useState } from 'react';
import { csvStringify } from '@finos/legend-shared';

export const RawFileQueryDataCubeSourceBuilder = observer(
(props: { sourceBuilder: RawFileQueryDataCubeSourceBuilderState }) => {
export const CSVFileQueryDataCubeSourceBuilder = observer(
(props: { sourceBuilder: CSVFileQueryDataCubeSourceBuilderState }) => {
const { sourceBuilder } = props;

const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
Expand All @@ -30,7 +30,11 @@ export const RawFileQueryDataCubeSourceBuilder = observer(
Papa.parse(file, {
complete: (result) => {
// Set the parsed data to state
sourceBuilder.fileData = result.data;
sourceBuilder.setFileData(
csvStringify(result.data, { escapeChar: `'`, quoteChar: `'` }),
);
sourceBuilder.setFileName(file.name);
sourceBuilder.setRowCount(result.data.length);
},
header: true,
dynamicTyping: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,19 @@ import {
import type { CachedDataCubeSource } from '@finos/legend-data-cube';
import { Type } from 'apache-arrow';

// TODO: rename this to legend engine duck db engine
export class LegendDataCubeDataCubeCacheManager {
private static readonly DUCKDB_DEFAULT_SCHEMA_NAME = 'main'; // See https://duckdb.org/docs/sql/statements/use.html
private static readonly TABLE_NAME_PREFIX = 'cache';
private static readonly CSV_FILE_NAME = 'data';
private static tableCounter = 0;
private static readonly CACHE_TABLE_NAME_PREFIX = 'cache';
private static readonly FILE_DATA_TABLE_NAME_PREFIX = 'fileData';
private static readonly CACHE_FILE_NAME = 'cacheData';
private static readonly FILE_DATA_FILE_NAME = 'ingestData';
private static readonly COLUMN_NAME = 'column_name';
private static readonly COLUMN_TYPE = 'column_type';
private static readonly ESCAPE_CHAR = `'`;
private static readonly QUOTE_CHAR = `'`;
private static cacheTableCounter = 0;
private static fileTableCounter = 0;

private _database?: duckdb.AsyncDuckDB | undefined;

Expand Down Expand Up @@ -87,9 +95,9 @@ export class LegendDataCubeDataCubeCacheManager {
async cache(result: TDSExecutionResult) {
const schema =
LegendDataCubeDataCubeCacheManager.DUCKDB_DEFAULT_SCHEMA_NAME;
LegendDataCubeDataCubeCacheManager.tableCounter += 1;
const table = `${LegendDataCubeDataCubeCacheManager.TABLE_NAME_PREFIX}${LegendDataCubeDataCubeCacheManager.tableCounter}`;
const csvFileName = `${LegendDataCubeDataCubeCacheManager.CSV_FILE_NAME}${LegendDataCubeDataCubeCacheManager.tableCounter}.csv`;
LegendDataCubeDataCubeCacheManager.cacheTableCounter += 1;
const table = `${LegendDataCubeDataCubeCacheManager.CACHE_TABLE_NAME_PREFIX}${LegendDataCubeDataCubeCacheManager.cacheTableCounter}`;
const csvFileName = `${LegendDataCubeDataCubeCacheManager.CACHE_FILE_NAME}${LegendDataCubeDataCubeCacheManager.cacheTableCounter}.csv`;

const connection = await this.database.connect();

Expand All @@ -99,8 +107,8 @@ export class LegendDataCubeDataCubeCacheManager {
const data = result.result.rows.map((row) => row.values);

const csv = csvStringify([columnNames, ...data], {
escapeChar: `'`,
quoteChar: `'`,
escapeChar: LegendDataCubeDataCubeCacheManager.ESCAPE_CHAR,
quoteChar: LegendDataCubeDataCubeCacheManager.QUOTE_CHAR,
});

await this._database?.registerFileText(csvFileName, csv);
Expand All @@ -111,19 +119,54 @@ export class LegendDataCubeDataCubeCacheManager {
create: true,
header: true,
detect: true,
escape: `'`,
quote: `'`,
delimiter: ',',
escape: LegendDataCubeDataCubeCacheManager.ESCAPE_CHAR,
quote: LegendDataCubeDataCubeCacheManager.QUOTE_CHAR,
});

await connection.close();

return { table, schema, rowCount: result.result.rows.length };
return { schema, table, rowCount: result.result.rows.length };
}

async ingestFileData(csvString: string) {
const schema =
LegendDataCubeDataCubeCacheManager.DUCKDB_DEFAULT_SCHEMA_NAME;
LegendDataCubeDataCubeCacheManager.fileTableCounter += 1;
const table = `${LegendDataCubeDataCubeCacheManager.FILE_DATA_TABLE_NAME_PREFIX}${LegendDataCubeDataCubeCacheManager.fileTableCounter}`;
const csvFileName = `${LegendDataCubeDataCubeCacheManager.FILE_DATA_FILE_NAME}${LegendDataCubeDataCubeCacheManager.fileTableCounter}.csv`;

const connection = await this.database.connect();

await this._database?.registerFileText(csvFileName, csvString);

await connection.insertCSVFromPath(csvFileName, {
schema: schema,
name: table,
header: true,
detect: true,
escape: LegendDataCubeDataCubeCacheManager.ESCAPE_CHAR,
quote: LegendDataCubeDataCubeCacheManager.QUOTE_CHAR,
});

const dbSchemaResult = await connection.query(
`DESCRIBE ${schema}.${table}`,
);
const dbSchema = dbSchemaResult
.toArray()
.map((data) => [
data[LegendDataCubeDataCubeCacheManager.COLUMN_NAME],
data[LegendDataCubeDataCubeCacheManager.COLUMN_TYPE],
]);

await connection.close();

return { schema, table, dbSchema };
}

async runSQLQuery(sql: string) {
const connection = await this.database.connect();
const result = await connection.query(sql);
console.log(sql);
await connection.close();

const data = result.toArray();
Expand Down
Loading

0 comments on commit 3f4bb19

Please sign in to comment.