From 0affd20d04d3ec38ca396f3017f40e4a4526faf7 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Mon, 12 Aug 2019 12:06:21 +0200 Subject: [PATCH] Pickle file gen reads from input data file --- scripts/generate_pickle.py | 110 ++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 31 deletions(-) diff --git a/scripts/generate_pickle.py b/scripts/generate_pickle.py index 52a87cd..cfe7972 100644 --- a/scripts/generate_pickle.py +++ b/scripts/generate_pickle.py @@ -11,23 +11,57 @@ """ import sys import pandas as pd +import logging +import pickle +from typing import List +LOGGER = logging.getLogger(__name__) +logging.basicConfig(format='%(asctime)s %(message)s', + filename='output_data/generate_pickle.log', + level=logging.DEBUG) -def main(data_file : str, pickle_file : str): + +def read_gurobi_results(data_file): + LOGGER.info("Reading models results in Gurobi format from %s", data_file) + df = pd.read_csv(data_file, sep='$', header=None, skiprows=2) # type: pandas.DataFrame + df.columns = ['temp'] + df[['parameter', 'id', 'value']] = df['temp'].str.split(r'\)|\(', expand=True) + df = df.drop('temp', axis=1) + return df + + +def read_cbc_results(data_file): + LOGGER.info("Reading models results in CBC format from %s", data_file) + df = pd.read_csv(data_file, sep='$', header=None) # type: pandas.DataFrame + df.columns = ['temp'] + df[['temp','value']] = df['temp'].str.split(')', expand=True) + df = df.applymap(lambda x: x.strip() if isinstance(x,str) else x) + df[['temp','parameter']] = df['temp'].str.split(' ', expand=True) + df[['parameter','id']] = df['parameter'].str.split('(', expand=True) + df['value'] = df['value'].str.replace(' 0','') + df = df.drop('temp', axis=1) + df = df[~df['value'].str.contains('e-')] + LOGGER.debug("Read in %s rows of data", df.shape) + return df + + +def main(input_file : str, data_file : str, pickle_file : str, result_format='cbc'): """ Arguments --------- + input_file : str + The input data file data_file : str Path to the solution file pickle_file : str Path to which to write the pickle file """ - df = pd.read_csv(data_file, sep='$', header=None, skiprows=2) - df.columns = ['temp'] - df[['parameter', 'id', 'value']] = df['temp'].str.split(r'\)|\(', expand=True) - df = df.drop('temp', axis=1) + if result_format == 'gurobi': + df = read_gurobi_results(data_file) + elif result_format == 'cbc': + df = read_cbc_results(data_file) params = df.parameter.unique() all_params = {} @@ -61,7 +95,7 @@ def main(data_file : str, pickle_file : str): df_p = df_p.rename(columns={'value': each}) df_p.to_csv(str(each) + '.csv', index=None) # Print data for each paramter to a CSV file - lines = [] + lines = [] # type: List parsing = False @@ -73,7 +107,7 @@ def main(data_file : str, pickle_file : str): output_table = [] input_table= [] - with open(data_file, 'r') as f: + with open(input_file, 'r') as f: for line in f: if line.startswith('set YEAR'): start_year = line.split(' ')[3] @@ -90,9 +124,14 @@ def main(data_file : str, pickle_file : str): elif line.startswith('set MODE_OF_OPERATION'): mode_list = line.split(' ')[3:-1] + LOGGER.debug("Read in set of %s fuels", len(fuel_list)) + LOGGER.debug("Read in set of %s techs", len(tech_list)) + LOGGER.debug("Read in set of %s storage", len(storage_list)) + LOGGER.debug("Read in set of %s emissions", len(emission_list)) + emission_table = [] # Emission activity ratios-reading them from the data file - with open(data_file, 'r') as f: + with open(input_file, 'r') as f: for line in f: if line.startswith(";"): parsing = False @@ -101,21 +140,26 @@ def main(data_file : str, pickle_file : str): emission = line.split(', ')[2] tech = line.split(', ')[1] elif line.startswith(start_year): - years = line.rstrip().split(' ')[0:] + years = line.rstrip(':= ;\n').split(' ')[0:] years = [i.strip(' :=') for i in years] + LOGGER.debug("Read in set of %s years", len(years)) elif not line.startswith(start_year): - values = line.rstrip().split(' ')[1:] + values = line.rstrip(':= ;\n').split(' ')[1:] mode = line.split(' ')[0] - data_emission.append(tuple([emission,tech,mode])) - data_all.append(tuple([tech,mode])) + data_emission.append(tuple([emission, tech, mode])) + data_all.append(tuple([tech, mode])) for i in range(0,len(years)): - emission_table.append(tuple([tech,emission,mode,years[i],values[i]])) + try: + emission_table.append(tuple([tech, emission, mode, years[i], values[i]])) + except IndexError as ex: + LOGGER.error("Could not append year %s", years[i]) + raise ex if line.startswith('param EmissionActivityRatio'): parsing = True # input activity ratios-reading them from the data file - with open(data_file, 'r') as f: + with open(input_file, 'r') as f: for line in f: if line.startswith(";"): parsing = False @@ -124,10 +168,10 @@ def main(data_file : str, pickle_file : str): fuel = line.split(', ')[2] tech = line.split(', ')[1] elif line.startswith(start_year): - years = line.rstrip().split(' ')[0:] + years = line.rstrip(':= ;\n').split(' ')[0:] years = [i.strip(' :=') for i in years] elif not line.startswith(start_year): - values = line.rstrip().split(' ')[1:] + values = line.rstrip(':= ;\n').split(' ')[1:] mode = line.split(' ')[0] data_inp.append(tuple([fuel,tech,mode])) data_all.append(tuple([tech,mode])) @@ -138,7 +182,7 @@ def main(data_file : str, pickle_file : str): # Output activity ratios-reading them from the data file - with open(data_file, 'r') as f: + with open(input_file, 'r') as f: for line in f: if line.startswith(";"): parsing = False @@ -147,10 +191,10 @@ def main(data_file : str, pickle_file : str): fuel = line.split(', ')[2] tech = line.split(', ')[1] elif line.startswith(start_year): - years = line.rstrip().split(' ')[0:] + years = line.rstrip(':= ;\n').split(' ')[0:] years = [i.strip(' :=') for i in years] elif not line.startswith(start_year): - values = line.rstrip().split(' ')[1:] + values = line.rstrip(':= ;\n').split(' ')[1:] mode = line.split(' ')[0] data_out.append(tuple([fuel,tech,mode])) data_all.append(tuple([tech,mode])) @@ -163,16 +207,16 @@ def main(data_file : str, pickle_file : str): year_split = [] parsing = False - with open(data_file, 'r') as f: + with open(input_file, 'r') as f: for line in f: if line.startswith(";"): parsing = False if parsing: if line.startswith(start_year): - years = line.rstrip().split(' ')[0:] + years = line.rstrip(':= ;\n').split(' ')[0:] years = [i.strip(':=') for i in years] elif not line.startswith(start_year): - time_slice = line.rstrip().split(' ')[0] + time_slice = line.rstrip(':= ;\n').split(' ')[0] values = line.rstrip().split(' ')[1:] for i in range(0,len(years)): year_split.append(tuple([time_slice,years[i],values[i]])) @@ -272,23 +316,27 @@ def main(data_file : str, pickle_file : str): # Removing the rate of activity fromt he results to make the pickle file lighter. del all_params['RateOfActivity'] + LOGGER.debug("Results data has %s rows", len(all_params)) + + LOGGER.info("Dumping results to %s", pickle_file) with open(pickle_file, 'wb') as handle: pickle.dump(all_params, handle, protocol=pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': - if len(sys.argv) != 3: - msg = "Usage: python {} " + if len(sys.argv) != 4: + msg = "Usage: python {} " print(msg.format(sys.argv[0])) sys.exit(1) else: - data_file = sys.argv[1] - pickle_file = sys.argv[2] - try: - main(data_file, pickle_file) - sys.exit(0) - except: - sys.exit(1) + input_file = sys.argv[1] + data_file = sys.argv[2] + pickle_file = sys.argv[3] + # try: + main(input_file, data_file, pickle_file) + # except: + # sys.exit(1) + # sys.exit(0) \ No newline at end of file