Skip to content

Commit

Permalink
Added resource end_date
Browse files Browse the repository at this point in the history
  • Loading branch information
alexiglaser committed Dec 2, 2024
1 parent 2f9000e commit a079ccb
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 3 deletions.
3 changes: 3 additions & 0 deletions digital_land/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def convert_cmd(input_path, output_path):
@dataset_resource_dir
@issue_dir
@click.option("--cache-dir", type=click.Path(), default="var/cache/parquet")
@click.option("--resource-path", type=click.Path(), default="collection/resource.csv")
@click.argument("input-paths", nargs=-1, type=click.Path(exists=True))
@click.pass_context
def dataset_create_cmd(
Expand All @@ -153,6 +154,7 @@ def dataset_create_cmd(
dataset_resource_dir,
issue_dir,
cache_dir,
resource_path,
):
return dataset_create(
input_paths=input_paths,
Expand All @@ -165,6 +167,7 @@ def dataset_create_cmd(
dataset_resource_dir=dataset_resource_dir,
issue_dir=issue_dir,
cache_dir=cache_dir,
resource_path=resource_path,
)


Expand Down
2 changes: 2 additions & 0 deletions digital_land/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def dataset_create(
column_field_dir="var/column-field",
dataset_resource_dir="var/dataset-resource",
cache_dir="var/cache/parquet",
resource_path="collection/resource.csv",
):
cache_dir = os.path.join(cache_dir, dataset)

Expand Down Expand Up @@ -409,6 +410,7 @@ def dataset_create(
organisation=organisation,
path=output_path,
cache_dir=cache_dir,
resource_path=resource_path,
specification_dir=None, # TBD: package should use this specification object
)
pqpackage.create_temp_table(input_paths)
Expand Down
5 changes: 3 additions & 2 deletions digital_land/package/datasetparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@


class DatasetParquetPackage(Package):
def __init__(self, dataset, organisation, cache_dir, **kwargs):
def __init__(self, dataset, organisation, cache_dir, resource_path, **kwargs):
self.suffix = ".parquet"
super().__init__(dataset, tables=tables, indexes=indexes, **kwargs)
self.dataset = dataset
self.organisation = organisation
self.cache_dir = cache_dir
self._spatialite = None
self.resource_path = resource_path
# Persistent connection for the class. Given name to ensure that table is stored on disk (not purely in memory)
os.makedirs(cache_dir, exist_ok=True)
self.duckdb_file = os.path.join(cache_dir, f"{dataset}.duckdb")
Expand Down Expand Up @@ -192,7 +193,7 @@ def load_entities(self):
SELECT {fields_str} FROM (
SELECT {fields_str}, CASE WHEN resource_csv."end-date" IS NULL THEN '2999-12-31' ELSE resource_csv."end-date" END AS resource_end_date
FROM temp_table
LEFT JOIN read_csv_auto('collection/resource.csv') resource_csv
LEFT JOIN read_csv_auto('{self.resource_path}') resource_csv
ON temp_table.resource = resource_csv.resource
QUALIFY ROW_NUMBER() OVER (
PARTITION BY entity, field
Expand Down
12 changes: 12 additions & 0 deletions tests/acceptance/test_dataset_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,23 @@ def issue_dir(session_tmp_path):
return issue_dir


@pytest.fixture
def resource_path(session_tmp_path):
resource_path = session_tmp_path / "resource.csv"
columns = ["resource", "end-date"]
with open(resource_path, "w") as f:
f.write(",".join(columns) + "\n")
return resource_path


def test_acceptance_dataset_create(
session_tmp_path,
organisation_path,
input_paths,
issue_dir,
cache_path,
dataset_dir,
resource_path,
):
output_path = dataset_dir / f"{test_dataset}.sqlite3"

Expand All @@ -99,6 +109,8 @@ def test_acceptance_dataset_create(
str(issue_dir),
"--cache-dir",
str(cache_path),
"--resource-path",
str(resource_path),
]
+ input_paths,
catch_exceptions=False,
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/test_package_datasetparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,14 +518,21 @@ def test_dataset_parquet_package(temp_dir):

# Leave hash3.csv empty except for the headers (to test that an empty csv doesn't screw things up).
with open(input_paths[2], "w") as f:
f.write(",".join(map(lambda x: str(x) if x is not np.nan else "", row)) + "\n")
f.write(",".join(columns) + "\n")
# f.write(",".join(map(lambda x: str(x) if x is not np.nan else "", row)) + "\n")

resource_path = str(temp_dir / "resource.csv")
resource_columns = ["resource", "end-date"]
with open(resource_path, "w") as f:
f.write(",".join(resource_columns) + "\n")

# Instantiate the DatasetParquetPackage with temp_dir input paths and a mock schema
package = DatasetParquetPackage(
dataset="conservation-area",
organisation=MockOrganisation(os.path.join(temp_dir, "organisation.csv")),
path=os.path.join(temp_dir, "integration_test.sqlite3"),
cache_dir=temp_dir,
resource_path=resource_path,
specification_dir=None,
)
package.create_temp_table(input_paths)
Expand Down

0 comments on commit a079ccb

Please sign in to comment.