From 73486e0f5cc3550a5c9a516c284c6bd5b9abab6d Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Mon, 20 Feb 2023 12:11:13 -0800 Subject: [PATCH 1/5] whitespace fix --- src/otoole/read_strategies.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index 2c148ccb..e6a56dfc 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -235,9 +235,13 @@ def _get_input_data( CSV data as a dataframe """ csv_path = os.path.join(filepath, parameter + ".csv") + if details["type"] == "param": + converter = {x: str.strip for x in details["indices"]} + else: + converter = {} try: - df = pd.read_csv(csv_path) + df = pd.read_csv(csv_path, converters=converter) except pd.errors.EmptyDataError: logger.error("No data found in file for %s", parameter) expected_columns = details["indices"] From a93a651865de55ee8e8019aa9826a0d26bdb6bed Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Mon, 20 Feb 2023 13:56:06 -0800 Subject: [PATCH 2/5] refactor update --- src/otoole/read_strategies.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index e6a56dfc..8c2f159e 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -235,13 +235,8 @@ def _get_input_data( CSV data as a dataframe """ csv_path = os.path.join(filepath, parameter + ".csv") - if details["type"] == "param": - converter = {x: str.strip for x in details["indices"]} - else: - converter = {} - try: - df = pd.read_csv(csv_path, converters=converter) + df = pd.read_csv(csv_path, skipinitialspace=True) except pd.errors.EmptyDataError: logger.error("No data found in file for %s", parameter) expected_columns = details["indices"] From 8369bbb83ebe3f6cad40f709203386b309197cdb Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 22 Feb 2023 15:34:41 -0800 Subject: [PATCH 3/5] whitespace removal logic --- src/otoole/cli.py | 14 ++++++++++--- src/otoole/read_strategies.py | 39 ++++++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/otoole/cli.py b/src/otoole/cli.py index c7b85ed5..1e5f150e 100644 --- a/src/otoole/cli.py +++ b/src/otoole/cli.py @@ -199,6 +199,8 @@ def conversion_matrix(args): # set read strategy + keep_whitespace = True if args.keep_whitespace else False + if args.from_format == "datafile": read_strategy = ReadDatafile(user_config=config) elif args.from_format == "datapackage": @@ -206,11 +208,11 @@ def conversion_matrix(args): "Reading from datapackage is deprecated, trying to read from CSVs" ) from_path = read_deprecated_datapackage(from_path) - read_strategy = ReadCsv(user_config=config) + read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace) elif args.from_format == "csv": - read_strategy = ReadCsv(user_config=config) + read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace) elif args.from_format == "excel": - read_strategy = ReadExcel(user_config=config) + read_strategy = ReadExcel(user_config=config, keep_whitespace=keep_whitespace) input_data, _ = read_strategy.read(args.from_path) @@ -366,6 +368,12 @@ def get_parser(): default=False, action="store_true", ) + convert_parser.add_argument( + "--keep_whitespace", + help="Keeps leading/trailing whitespace in CSV files", + default=False, + action="store_true", + ) convert_parser.set_defaults(func=conversion_matrix) # Parser for validation diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index 8c2f159e..33a7eea6 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -1,6 +1,6 @@ import logging import os -from typing import Any, Dict, List, TextIO, Tuple, Union +from typing import Any, Dict, List, Optional, TextIO, Tuple, Union import pandas as pd from amply import Amply @@ -34,6 +34,10 @@ def read( class _ReadTabular(ReadStrategy): + def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = True): + super().__init__(user_config) + self.keep_whitespace = keep_whitespace + def _check_set(self, df: pd.DataFrame, config_details: Dict, name: str): logger.info("Checking set %s", name) @@ -86,6 +90,24 @@ def _check_parameter(self, df: pd.DataFrame, expected_headers: List, name: str): return narrow[all_headers].set_index(expected_headers) + def _whitespace_converter(self, indices: List[str]) -> Dict[str, Any]: + """Creates converter for striping whitespace in dataframe + + Arguments + --------- + indicies: List[str] + Column headers of dataframe + + Returns + ------- + Dict[str,Any] + Converter dictionary + """ + if self.keep_whitespace: + return {} + else: + return {x: str.strip for x in indices} + class ReadExcel(_ReadTabular): """Read in an Excel spreadsheet in wide format to a dict of Pandas DataFrames""" @@ -177,9 +199,13 @@ def read( logger.info("Looking for %s", parameter) entity_type = details["type"] + try: + converter = self._whitespace_converter(details["indices"]) + except KeyError: # sets don't have indices def + converter = self._whitespace_converter(["VALUE"]) if entity_type == "param": - df = self._get_input_data(filepath, parameter, details) + df = self._get_input_data(filepath, parameter, details, converter) narrow = self._check_parameter(df, details["indices"], parameter) if not narrow.empty: narrow_checked = check_datatypes( @@ -189,7 +215,7 @@ def read( narrow_checked = narrow elif entity_type == "set": - df = self._get_input_data(filepath, parameter, details) + df = self._get_input_data(filepath, parameter, details, converter) narrow = self._check_set(df, details, parameter) if not narrow.empty: narrow_checked = check_set_datatype( @@ -214,9 +240,7 @@ def read( @staticmethod def _get_input_data( - filepath: str, - parameter: str, - details: Dict, + filepath: str, parameter: str, details: Dict, converter: Optional[Dict] = None ) -> pd.DataFrame: """Reads in and checks CSV data format. @@ -234,9 +258,10 @@ def _get_input_data( pd.DataFrame CSV data as a dataframe """ + converter = {} if not converter else converter csv_path = os.path.join(filepath, parameter + ".csv") try: - df = pd.read_csv(csv_path, skipinitialspace=True) + df = pd.read_csv(csv_path, converters=converter) except pd.errors.EmptyDataError: logger.error("No data found in file for %s", parameter) expected_columns = details["indices"] From c3450e3f5013a2da63c697a089725d1e12efd863 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 22 Feb 2023 16:21:46 -0800 Subject: [PATCH 4/5] keep_whitespace flag tests --- src/otoole/read_strategies.py | 2 +- tests/test_read_strategies.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index 33a7eea6..a74b4166 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -34,7 +34,7 @@ def read( class _ReadTabular(ReadStrategy): - def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = True): + def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = False): super().__init__(user_config) self.keep_whitespace = keep_whitespace diff --git a/tests/test_read_strategies.py b/tests/test_read_strategies.py index c3456b1c..dd106e9c 100644 --- a/tests/test_read_strategies.py +++ b/tests/test_read_strategies.py @@ -1066,3 +1066,28 @@ def test_read_default_values_csv(self, user_config): actual = reader._check_for_default_values_csv(filepath) expected = None assert actual == expected + + +class TestReadTabular: + """Methods shared for csv and excel""" + + test_data = [ + (True, ["REGION", "TECHNOLOGY"], {}), + ( + False, + ["REGION", "TECHNOLOGY"], + {"REGION": str.strip, "TECHNOLOGY": str.strip}, + ), + ] + + @mark.parametrize( + "keep_whitespace, indices, expected", + test_data, + ids=["create_empty", "create_full"], + ) + def test_whitespace_converter( + self, user_config, keep_whitespace, indices, expected + ): + reader = ReadCsv(user_config=user_config, keep_whitespace=keep_whitespace) + actual = reader._whitespace_converter(indices) + assert actual == expected From f0bb9a3efc5f2c12ae9c83f1de95281f98b72223 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 22 Feb 2023 20:09:11 -0800 Subject: [PATCH 5/5] fixed deprecation message --- src/otoole/cli.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/otoole/cli.py b/src/otoole/cli.py index 1e5f150e..9aeb1ada 100644 --- a/src/otoole/cli.py +++ b/src/otoole/cli.py @@ -88,6 +88,7 @@ def validate_model(args): "Reading from datapackage is deprecated, trying to read from CSVs" ) data_file = read_deprecated_datapackage(data_file) + logger.info("Successfully read folder of CSVs") read_strategy = ReadCsv(user_config=config) elif data_format == "csv": read_strategy = ReadCsv(user_config=config) @@ -152,6 +153,7 @@ def result_matrix(args): "Reading from datapackage is deprecated, trying to read from CSVs" ) input_csvs = read_deprecated_datapackage(args.input_datapackage) + logger.info("Successfully read folder of CSVs") input_data, _ = ReadCsv(user_config=config).read(input_csvs) elif args.input_datafile: input_data, _ = ReadDatafile(user_config=config).read(args.input_datafile) @@ -208,6 +210,7 @@ def conversion_matrix(args): "Reading from datapackage is deprecated, trying to read from CSVs" ) from_path = read_deprecated_datapackage(from_path) + logger.info("Successfully read folder of CSVs") read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace) elif args.from_format == "csv": read_strategy = ReadCsv(user_config=config, keep_whitespace=keep_whitespace) @@ -334,6 +337,11 @@ def get_parser(): help="Input GNUMathProg datafile required for OSeMOSYS short or fast results", default=None, ) + result_parser.add_argument( + "--input_datapackage", + help="Deprecated", + default=None, + ) result_parser.add_argument("config", help="Path to config YAML file") result_parser.add_argument( "--write_defaults", @@ -432,6 +440,12 @@ def get_parser(): return parser +class DeprecateAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + logger.warning(f"Argument {self.option_strings} is deprecated and is ignored.") + delattr(namespace, self.dest) + + def main(): parser = get_parser()