Skip to content

Commit

Permalink
Write out datapackage schema from user-config (closes OSeMOSYS#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
willu47 committed Jul 14, 2022
1 parent b1b8a8b commit 4fc746d
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 72 deletions.
2 changes: 1 addition & 1 deletion src/otoole/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def write(self, inputs: Dict, filepath: str, default_values: Dict):

self._footer(handle)

if handle:
if isinstance(handle, TextIO):
handle.close()


Expand Down
102 changes: 60 additions & 42 deletions src/otoole/preprocess/create_datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,80 +6,98 @@

import logging
import os
from frictionless import Package
from typing import Dict

from datapackage import Package
logger = logging.getLogger()

from otoole.utils import read_packaged_file

logger = logging.getLogger()
def generate_package(package: Package, config: Dict[str, Dict]) -> Package:
"""Adds schema information to a basic Resource
Arguments
---------
package: Package
A frictionless Package
config: Dict[str, Dict]
A user-configuration dictionary
def generate_package(path_to_package):
"""Creates a datapackage in folder ``path_to_package``
Returns
-------
dict
Notes
-----
[{'fields': 'REGION', 'reference': {'resource': 'REGION', 'fields': 'VALUE'}}]
"""

datapath = os.path.join(path_to_package)
package = Package(base_path=datapath)

package.infer("data/*.csv")
logger.debug(f"Auto-identified resources {package.resources}")

package.descriptor["licenses"] = [
{
"name": "CC-BY-4.0",
"path": "https://creativecommons.org/licenses/by/4.0/",
"title": "Creative Commons Attribution 4.0",
}
]
# package.licenses = [
# {
# "name": "CC-BY-4.0",
# "path": "https://creativecommons.org/licenses/by/4.0/",
# "title": "Creative Commons Attribution 4.0",
# }
# ]

package.descriptor["title"] = "The OSeMOSYS Simplicity Example Model"
# package.title = "The OSeMOSYS Simplicity Example Model"

package.descriptor["name"] = "osemosys_model_simplicity"
# package.name = "osemosys_model_simplicity"

package.descriptor["contributors"] = [
{
"title": "Will Usher",
"email": "[email protected]",
"path": "http://www.kth.se/wusher",
"role": "author",
}
]
# package.contributors = [
# {
# "title": "Will Usher",
# "email": "[email protected]",
# "path": "https://www.kth.se/profile/wusher/",
# "role": "author",
# }
# ]

package.commit()
for resource in package.resources: # typing: Resource

config = read_packaged_file("config.yaml", "otoole.preprocess")

new_resources = []
for resource in package.resources:
name = resource.title # Use the title which preserves case

descriptor = resource.descriptor
logger.debug(f"Updating resource '{name}'")

name = resource.name
if config[name]["type"] == "param":

indices = config[name]["indices"]
logger.debug("Indices of %s are %s", name, indices)

fields = []
foreign_keys = []
for index in indices:
key = {
"fields": index,
"reference": {"resource": index, "fields": "VALUE"},
"reference": {"resource": index.lower(), "fields": "VALUE"},
}
foreign_keys.append(key)
field = {"name": index, "type": config[index]["dtype"]}

fields.append(field)

value_field = {"name": "VALUE", "type": config[name]["dtype"]}

fields.append(value_field)

resource.schema.fields = fields
resource.schema.foreign_keys = foreign_keys
resource.schema.primary_key = indices
resource.schema.missing_values = [""]

elif config[name]["type"] == "set":

descriptor["schema"]["foreignKeys"] = foreign_keys
descriptor["schema"]["primaryKey"] = indices
descriptor["schema"]["missingValues"] = [""]
fields = []
value_field = {"name": "VALUE", "type": config[name]["dtype"]}

new_resources.append(descriptor)
fields.append(value_field)
resource.schema.fields = fields
resource.schema.missing_values = [""]

package.descriptor["resources"] = new_resources
package.commit()
logger.debug(f"Schema for resource {name}: {resource.schema}")

filepath = os.path.join(path_to_package, "datapackage.json")
package.save(filepath)
return package


def validate_contents(path_to_package):
Expand Down
84 changes: 57 additions & 27 deletions src/otoole/write_strategies.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
import os
import pandas as pd
from json import dump
from typing import Any, TextIO
from frictionless import Package, Resource
from typing import TextIO

from otoole.input import WriteStrategy
from otoole.preprocess.create_datapackage import generate_package
from otoole.read_strategies import CSV_TO_EXCEL
from otoole.utils import read_packaged_file

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -155,17 +155,8 @@ class WriteCsv(WriteStrategy):
user_config: dict, default=None
"""

def _header(self) -> Any:
os.makedirs(os.path.join(self.filepath), exist_ok=True)
return None

def _write_parameter(
self, df: pd.DataFrame, parameter_name: str, handle: TextIO, default: float
) -> pd.DataFrame:
"""Write parameter data"""
self._write_out_dataframe(self.filepath, parameter_name, df, index=True)

def _write_out_dataframe(self, folder, parameter, df, index=False):
@staticmethod
def _write_out_dataframe(folder, parameter, df, index=False):
"""Writes out a dataframe as a csv into the data subfolder of a datapackage
Arguments
Expand All @@ -184,6 +175,16 @@ def _write_out_dataframe(self, folder, parameter, df, index=False):
)
df.to_csv(csvfile, index=index)

def _header(self) -> Package:
os.makedirs(os.path.join(self.filepath), exist_ok=True)
return None

def _write_parameter(
self, df: pd.DataFrame, parameter_name: str, handle: Package, default: float
) -> pd.DataFrame:
"""Write parameter data"""
self._write_out_dataframe(self.filepath, parameter_name, df, index=True)

def _write_set(self, df: pd.DataFrame, set_name, handle: TextIO) -> pd.DataFrame:
"""Write set data"""
self._write_out_dataframe(self.filepath, set_name, df, index=False)
Expand All @@ -192,11 +193,7 @@ def _footer(self, handle: TextIO):
pass


class WriteDatapackage(WriteCsv):
def _header(self) -> Any:
os.makedirs(os.path.join(self.filepath, "data"), exist_ok=True)
return None

class WriteDatapackage(WriteStrategy):
def _write_out_dataframe(self, folder, parameter, df, index=False):
"""Writes out a dataframe as a csv into the data subfolder of a datapackage
Expand All @@ -214,19 +211,52 @@ def _write_out_dataframe(self, folder, parameter, df, index=False):
)
df.to_csv(csvfile, index=index)

def _footer(self, handle: TextIO):
datapackage = read_packaged_file("datapackage.json", "otoole.preprocess")
filepath = os.path.join(self.filepath, "datapackage.json")
with open(filepath, "w", newline="") as destination:
dump(datapackage, destination)
self._write_default_values()

def _write_default_values(self):
def _write_default_values(self, handle):

default_values_path = os.path.join(self.filepath, "data", "default_values.csv")
with open(default_values_path, "w", newline="") as csv_file:

csv_file.write("name,default_value\n")

rows = []
for name, contents in self.user_config.items():
if contents["type"] == "param":
csv_file.write("{},{}\n".format(name, contents["default"]))
rows.append([name, contents["default"]])

df = pd.DataFrame(rows, columns=["name", "default_value"])
self._add_resource("default_values", df)

def _header(self) -> Package:
os.makedirs(os.path.join(self.filepath, "data"), exist_ok=True)
return Package()

def _add_resource(self, parameter_name: str, df: pd.DataFrame) -> Resource:
resource = Resource(df)
resource.name = parameter_name.lower()
resource.title = parameter_name
return resource

def _write_parameter(
self, df: pd.DataFrame, parameter_name: str, handle: Package, default: float
) -> pd.DataFrame:
"""Write parameter data"""
self._write_out_dataframe(self.filepath, parameter_name, df, index=True)

resource = self._add_resource(parameter_name, df)
handle.add_resource(resource)

def _write_set(self, df: pd.DataFrame, set_name, handle: Package) -> pd.DataFrame:
"""Write set data"""
self._write_out_dataframe(self.filepath, set_name, df, index=False)

resource = self._add_resource(set_name, df)
handle.add_resource(resource)

def _footer(self, handle: Package):

self._write_default_values(handle)
package = generate_package(handle, self.user_config)

filepath = os.path.join(self.filepath, "datapackage.yaml")
package.to_yaml(filepath)
Loading

0 comments on commit 4fc746d

Please sign in to comment.