Skip to content

Commit 9fde535

Browse files
committed
Data normalization upon load to DuckDB + cleanup templates
Clean up templates
1 parent 35bdbf3 commit 9fde535

File tree

63 files changed

+161
-2973
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+161
-2973
lines changed

preswald/engine/managers/data.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def load_json_source(config: dict[str, Any]) -> pd.DataFrame:
3838
f"Invalid record_path '{record_path}' for JSON file '{path}': {e}"
3939
) from e
4040

41-
4241
# Normalize or convert data if "flatten"
4342
try:
4443
if flatten:
@@ -51,7 +50,6 @@ def load_json_source(config: dict[str, Any]) -> pd.DataFrame:
5150
) from e
5251

5352

54-
5553
# Database Configs ############################################################
5654
@dataclass
5755
class ClickhouseConfig:
@@ -118,6 +116,7 @@ class S3CSVConfig:
118116
s3_use_ssl: bool = False
119117
s3_url_style: str = "path"
120118

119+
121120
class DataSource:
122121
"""Base class for all data sources"""
123122

@@ -182,7 +181,14 @@ def __init__(
182181
self._table_name = f"csv_{name}_{uuid.uuid4().hex[:8]}"
183182
self._duckdb.execute(f"""
184183
CREATE TABLE {self._table_name} AS
185-
SELECT * FROM read_csv_auto('{self.path}')
184+
SELECT * FROM read_csv_auto('{self.path}',
185+
header=true,
186+
auto_detect=true,
187+
ignore_errors=true,
188+
normalize_names=true,
189+
sample_size=-1,
190+
all_varchar=true
191+
)
186192
""")
187193

188194
def query(self, sql: str) -> pd.DataFrame:

preswald/templates/api-docs/hello.py.template

Lines changed: 0 additions & 13 deletions
This file was deleted.

preswald/templates/api-docs/preswald.toml.template

Lines changed: 0 additions & 20 deletions
This file was deleted.

preswald/templates/api-docs/sample.csv.template

Lines changed: 0 additions & 13 deletions
This file was deleted.

preswald/templates/bacteria-evolution/hello.py.template

Lines changed: 0 additions & 13 deletions
This file was deleted.

preswald/templates/bacteria-evolution/preswald.toml.template

Lines changed: 0 additions & 20 deletions
This file was deleted.

preswald/templates/bacteria-evolution/sample.csv.template

Lines changed: 0 additions & 151 deletions
This file was deleted.

0 commit comments

Comments
 (0)