Skip to content

Commit

Permalink
Merge branch 'develop' into docs-update
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorb1 committed Feb 23, 2023
2 parents 55897d9 + 5fe7435 commit 02c618c
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 11 deletions.
28 changes: 25 additions & 3 deletions src/otoole/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def validate_model(args):
"Reading from datapackage is deprecated, trying to read from CSVs"
)
data_file = read_deprecated_datapackage(data_file)
logger.info("Successfully read folder of CSVs")
read_strategy = ReadCsv(user_config=config)
elif data_format == "csv":
read_strategy = ReadCsv(user_config=config)
Expand Down Expand Up @@ -152,6 +153,7 @@ def result_matrix(args):
"Reading from datapackage is deprecated, trying to read from CSVs"
)
input_csvs = read_deprecated_datapackage(args.input_datapackage)
logger.info("Successfully read folder of CSVs")
input_data, _ = ReadCsv(user_config=config).read(input_csvs)
elif args.input_datafile:
input_data, _ = ReadDatafile(user_config=config).read(args.input_datafile)
Expand Down Expand Up @@ -199,18 +201,21 @@ def conversion_matrix(args):

# set read strategy

keep_whitespace = True if args.keep_whitespace else False

if args.from_format == "datafile":
read_strategy = ReadDatafile(user_config=config)
elif args.from_format == "datapackage":
logger.warning(
"Reading from datapackage is deprecated, trying to read from CSVs"
)
from_path = read_deprecated_datapackage(from_path)
read_strategy = ReadCsv(user_config=config)
logger.info("Successfully read folder of CSVs")
read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace)
elif args.from_format == "csv":
read_strategy = ReadCsv(user_config=config)
read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace)
elif args.from_format == "excel":
read_strategy = ReadExcel(user_config=config)
read_strategy = ReadExcel(user_config=config, keep_whitespace=keep_whitespace)

input_data, _ = read_strategy.read(args.from_path)

Expand Down Expand Up @@ -332,6 +337,11 @@ def get_parser():
help="Input GNUMathProg datafile required for OSeMOSYS short or fast results",
default=None,
)
result_parser.add_argument(
"--input_datapackage",
help="Deprecated",
default=None,
)
result_parser.add_argument("config", help="Path to config YAML file")
result_parser.add_argument(
"--write_defaults",
Expand Down Expand Up @@ -366,6 +376,12 @@ def get_parser():
default=False,
action="store_true",
)
convert_parser.add_argument(
"--keep_whitespace",
help="Keeps leading/trailing whitespace in CSV files",
default=False,
action="store_true",
)
convert_parser.set_defaults(func=conversion_matrix)

# Parser for validation
Expand Down Expand Up @@ -424,6 +440,12 @@ def get_parser():
return parser


class DeprecateAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
logger.warning(f"Argument {self.option_strings} is deprecated and is ignored.")
delattr(namespace, self.dest)


def main():

parser = get_parser()
Expand Down
40 changes: 32 additions & 8 deletions src/otoole/read_strategies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
from typing import Any, Dict, List, TextIO, Tuple, Union
from typing import Any, Dict, List, Optional, TextIO, Tuple, Union

import pandas as pd
from amply import Amply
Expand Down Expand Up @@ -34,6 +34,10 @@ def read(


class _ReadTabular(ReadStrategy):
def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = False):
super().__init__(user_config)
self.keep_whitespace = keep_whitespace

def _check_set(self, df: pd.DataFrame, config_details: Dict, name: str):

logger.info("Checking set %s", name)
Expand Down Expand Up @@ -86,6 +90,24 @@ def _check_parameter(self, df: pd.DataFrame, expected_headers: List, name: str):

return narrow[all_headers].set_index(expected_headers)

def _whitespace_converter(self, indices: List[str]) -> Dict[str, Any]:
"""Creates converter for striping whitespace in dataframe
Arguments
---------
indicies: List[str]
Column headers of dataframe
Returns
-------
Dict[str,Any]
Converter dictionary
"""
if self.keep_whitespace:
return {}
else:
return {x: str.strip for x in indices}


class ReadExcel(_ReadTabular):
"""Read in an Excel spreadsheet in wide format to a dict of Pandas DataFrames"""
Expand Down Expand Up @@ -177,9 +199,13 @@ def read(
logger.info("Looking for %s", parameter)

entity_type = details["type"]
try:
converter = self._whitespace_converter(details["indices"])
except KeyError: # sets don't have indices def
converter = self._whitespace_converter(["VALUE"])

if entity_type == "param":
df = self._get_input_data(filepath, parameter, details)
df = self._get_input_data(filepath, parameter, details, converter)
narrow = self._check_parameter(df, details["indices"], parameter)
if not narrow.empty:
narrow_checked = check_datatypes(
Expand All @@ -189,7 +215,7 @@ def read(
narrow_checked = narrow

elif entity_type == "set":
df = self._get_input_data(filepath, parameter, details)
df = self._get_input_data(filepath, parameter, details, converter)
narrow = self._check_set(df, details, parameter)
if not narrow.empty:
narrow_checked = check_set_datatype(
Expand All @@ -214,9 +240,7 @@ def read(

@staticmethod
def _get_input_data(
filepath: str,
parameter: str,
details: Dict,
filepath: str, parameter: str, details: Dict, converter: Optional[Dict] = None
) -> pd.DataFrame:
"""Reads in and checks CSV data format.
Expand All @@ -234,10 +258,10 @@ def _get_input_data(
pd.DataFrame
CSV data as a dataframe
"""
converter = {} if not converter else converter
csv_path = os.path.join(filepath, parameter + ".csv")

try:
df = pd.read_csv(csv_path)
df = pd.read_csv(csv_path, converters=converter)
except pd.errors.EmptyDataError:
logger.error("No data found in file for %s", parameter)
expected_columns = details["indices"]
Expand Down
25 changes: 25 additions & 0 deletions tests/test_read_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,3 +1066,28 @@ def test_read_default_values_csv(self, user_config):
actual = reader._check_for_default_values_csv(filepath)
expected = None
assert actual == expected


class TestReadTabular:
"""Methods shared for csv and excel"""

test_data = [
(True, ["REGION", "TECHNOLOGY"], {}),
(
False,
["REGION", "TECHNOLOGY"],
{"REGION": str.strip, "TECHNOLOGY": str.strip},
),
]

@mark.parametrize(
"keep_whitespace, indices, expected",
test_data,
ids=["create_empty", "create_full"],
)
def test_whitespace_converter(
self, user_config, keep_whitespace, indices, expected
):
reader = ReadCsv(user_config=user_config, keep_whitespace=keep_whitespace)
actual = reader._whitespace_converter(indices)
assert actual == expected

0 comments on commit 02c618c

Please sign in to comment.