diff --git a/gm4/utils.py b/gm4/utils.py index 07c155f0ac..b4e4b7bf19 100644 --- a/gm4/utils.py +++ b/gm4/utils.py @@ -1,8 +1,10 @@ import subprocess import warnings from dataclasses import dataclass, asdict -from typing import Any +from typing import Any, List from functools import total_ordering +import csv +from pathlib import Path def run(cmd: list[str]|str) -> str: """Run a shell command and return the stdout.""" @@ -81,3 +83,168 @@ class NoneAttribute(): """Object which returns None for any arbitrary attribute access. Used for default members""" def __getattribute__(self, __name: str) -> None: return None + + +# CSV READING UTILS +class CSVCell(str): + """ + String wrapper for contents of a CSVCell, supports interpreting the content as different formats. + """ + + DEC = 'dec' # for numbers formatted 16777215 + HEX = 'hex' # for numbers formatted #AB0EFF + FLOAT = 'float' # for numbers formatted [0.5, 0.2, 0.9] + + def as_integer(self) -> int: + """ + Interprets the string contained in this CSVCell as an integer. + Supported formats are: + - base 10 integers (no prefix) + - prefixed hex color codes (# prefix and 6 digits) + - prefixed hex, octal, or binary (0x, 0o, or 0b and some amount of digits) + - bool (True or False, case insensitive) + Returns a integer representation of the value. + """ + if self.startswith('#') and len(self) == 7: # alternative way of marking base 16 (hex colors) + return CSVCell('0x' + self.lstrip('#')).as_integer() + if self.startswith('0x'): # check if the string is in base 2 + return int(self, 16) + if self.startswith('0o'): # check if the string is in base 8 + return int(self, 8) + if self.startswith('0b'): # check if the string is in base 16 + return int(self, 2) + if self.casefold() == 'true': + return 1 + if self.casefold() == 'false': + return 0 + return int(self) # default case, interpret as base 10 + + def to_color_code(self, encoding: str) -> 'CSVCell': + """ + Interprets the string contained in this CSVCell as a color code using the given encoding and returns a new CSVCell with that interpretation as its content. + E.g. if the CSVCell this function was called on contains '#4AA0C7' and 'CSVCell.DEC' is given as an encoding, a new CSVCell with content '4890823' is returned. + """ + if encoding == CSVCell.HEX: + return CSVCell('#' + hex(self.as_integer()).lstrip('0x')) + if encoding == CSVCell.DEC: + return CSVCell(self.as_integer()) + if encoding == CSVCell.FLOAT: + dec = self.as_integer() + return CSVCell([(dec >> 16) / 255, ((dec >> 8) & 0xFF) / 255, (dec & 0xFF) / 255]) + raise ValueError( + f"Invalid encoding '{encoding}'. Must be '{CSVCell.DEC}', '{CSVCell.HEX}', or '{CSVCell.FLOAT}'.") + + +class CSVRow(): + """ + Read-only dict wrapper which represents a row of data from a .csv file. + """ + + def __init__(self, column_names: List[str] | None = None, data: List[CSVCell] | None = None) -> None: + """ + Initialize a new CSVRow object using the supplied column names and data. CSVRow objects are read-only by design. + If no data and no column names are supplied the resulting CSVRow object will evaluate to false in boolean expressions. + + Access data within this CSVRow via the `get(key, default)` method or using `[]`. + """ + if not column_names: + column_names = [] + if not data: + data = [] + + if len(column_names) != len(data): + raise ValueError( + f"Could not build CSVRow from supplied column names and data; Number of supplied column names ({len(column_names)}) does not match number of supplied data entries ({len(data)}).") + + self._data = {column_names[column_index] + : value for column_index, value in enumerate(data)} + + def __bool__(self): + """ + Allow for the use of CSVRow instances in if statements; If the CSVRow has no keys it is equivalent to `False`. + """ + return len(self._data.keys()) != 0 + + def __getitem__(self, key: str): + try: + return self._data[key] + except KeyError as ke: + raise ValueError( + f"Failed to select column named '{ke.args[0]}' from CSVRow with columns {[key for key in self._data]}.") + + def __repr__(self) -> str: + return str(self._data) + + def get(self, key: str, default: str | Any) -> CSVCell: + """ + Returns the value corrosponding to the key if it exists and is not the empty string. + Else returns the provided default. The provided default is cast to a string internally. + """ + value = self._data.get(key, CSVCell(default)) + if value: + return value + else: + return CSVCell(default) + + +class CSV(): + """ + List-of-Rows representation of a .csv file which can be iteraded over using for ... in. + Optimized for row-first access, i.e. select a row, then a column. + Also provides a `find_row` function for column-first, i.e. select a column, then a row, access. + However, the latter is is more expensive. + + All access methods return CSVRow objects which are dynamically created upon calling an access method. + """ + + @staticmethod + def from_file(path: Path) -> 'CSV': + """ + Reads in a csv file and returns a list of rows. Each row consists of a dictionary which contains labeled values. + """ + with open(path, mode='r') as file: + csv_file = csv.reader(file) + header = next(csv_file) + + return CSV(column_names=header, rows=[[CSVCell(cell) for cell in row] for row in csv_file]) + + def __init__(self, column_names: List[str], rows: List[List[CSVCell]]) -> None: + """ + Initialize a new CSV from a list of column names (headers) and a list of rows. + The latter contain actual data, whilst the former only holds names of columns. + """ + self._column_names = column_names + self._rows = rows + + def __iter__(self): + self.__current = 0 + self.__last = len(self._rows) + return self + + def __next__(self) -> CSVRow: + current = self.__current + self.__current += 1 + if current < self.__last: + return CSVRow(self._column_names, self._rows[current]) + raise StopIteration() + + def __getitem__(self, row_index: int): + return CSVRow(self._column_names, self._rows[row_index]) + + def __repr__(self): + return str([CSVRow(self._column_names, data) for data in self._rows]) + + def find_row(self, value: str, by_column: str | int = 0) -> CSVRow: + """ + Finds and returns the first row in this CSV which has `value` in column `by_column`. `by_column` can either be a str, in which case it is treated + as a column name and the header line is searched for a matching string, or an int n, in which case the nth column is selected. + `by_column` defaults to `0`. + Returns an empty `CSVRow` if no match was found. + """ + if isinstance(by_column, str): + by_column = self._column_names.index(by_column) + + for row in self._rows: + if row[by_column] == value: + return CSVRow(self._column_names, row) + return CSVRow() diff --git a/gm4_zauber_cauldrons/generate.py b/gm4_zauber_cauldrons/generate.py index 0a607abf28..bf3741b351 100644 --- a/gm4_zauber_cauldrons/generate.py +++ b/gm4_zauber_cauldrons/generate.py @@ -1,165 +1,11 @@ -from typing import List, Dict, Any +from typing import Dict, Any from pathlib import Path from itertools import product -import csv +from gm4.utils import CSV, CSVCell import json from beet import Context, subproject - -class CSVCell(str): - """ - String wrapper which supports color encoding translation. - """ - - DEC = 'dec' # for numbers formatted 16777215 - HEX = 'hex' # for numbers formatted #AB0EFF - FLOAT = 'float' # for numbers formatted [0.5, 0.2, 0.9] - - def as_integer(self) -> int: - """ - Interprets the string contained in this CSVCell as an integer. - Tries to detect the base automatically. - """ - if self.startswith('#') and len(self) == 7: # alternative way of marking base 16 (hex colors) - return CSVCell('0x' + self.lstrip('#')).as_integer() - if self.startswith('0x'): # check if the string is in base 2 - return int(self, 16) - if self.startswith('0o'): # check if the string is in base 8 - return int(self, 8) - if self.startswith('0b'): # check if the string is in base 16 - return int(self, 2) - return int(self) # string must be base 10 - - def to_color_code(self, encoding: str) -> 'CSVCell': - """ - Outputs the string contained in this CSVCell formatted as a color code, e.g. #4AA0C7 if 'HEX' is given. - """ - if encoding == CSVCell.HEX: - return CSVCell('#' + hex(self.as_integer()).lstrip('0x')) - if encoding == CSVCell.DEC: - return CSVCell(self.as_integer()) - if encoding == CSVCell.FLOAT: - dec = self.as_integer() - return CSVCell([(dec >> 16) / 255, ((dec >> 8) & 0xFF) / 255, (dec & 0xFF) / 255]) - raise ValueError( - f"Invalid encoding '{encoding}'. Must be '{CSVCell.DEC}', '{CSVCell.HEX}', or '{CSVCell.FLOAT}'.") - - -class CSVRow(): - """ - Read-only dict wrapper which represents a row of data from a .csv file. - """ - - def __init__(self, column_names: List[str] | None = None, data: List[CSVCell] | None = None) -> None: - """ - Initialize a new CSVRow object using the supplied column names and data. CSVRow objects are read-only by design. - If no data and no column names are supplied the resulting CSVRow object will evaluate to false in boolean expressions. - - Access data within this CSVRow via the `get(key, default)` method or using `[]`. - """ - if not column_names: - column_names = [] - if not data: - data = [] - - if len(column_names) != len(data): - raise ValueError( - f"Could not build CSVRow from supplied column names and data; Number of supplied column names ({len(column_names)}) does not match number of supplied data entries ({len(data)}).") - - self._data = {column_names[column_index]: value for column_index, value in enumerate(data)} - - def __bool__(self): - """ - Allow for the use of CSVRow instances in if statements; If the CSVRow has no keys it is equivalent to `False`. - """ - return len(self._data.keys()) != 0 - - def __getitem__(self, key: str): - try: - return self._data[key] - except KeyError as ke: - raise ValueError( - f"Failed to select column named '{ke.args[0]}' from CSVRow with columns {[key for key in self._data]}.") - - def __repr__(self) -> str: - return str(self._data) - - def get(self, key: str, default: str | Any) -> CSVCell: - """ - Returns the value corrosponding to the key if it exists and is not the empty string. - Else returns the provided default. The provided default is cast to a string internally. - """ - value = self._data.get(key, CSVCell(default)) - if value: - return value - else: - return CSVCell(default) - - -class CSV(): - """ - List-of-Rows representation of a .csv file which can be iteraded over using for ... in. - Optimized for row-first access, i.e. select a row, then a column. - Also provides a `find_row` function for column-first, i.e. select a column, then a row, access. - However, the latter is is more expensive. - - All access methods return CSVRow objects which are dynamically created upon calling an access method. - """ - - def __init__(self, column_names: List[str], rows: List[List[CSVCell]]) -> None: - """ - Initialize a new CSV from a list of column names (headers) and a list of rows. - The latter contain actual data, whilst the former only holds names of columns. - """ - self._column_names = column_names - self._rows = rows - - def __iter__(self): - self.__current = 0 - self.__last = len(self._rows) - return self - - def __next__(self) -> CSVRow: - current = self.__current - self.__current += 1 - if current < self.__last: - return CSVRow(self._column_names, self._rows[current]) - raise StopIteration() - - def __getitem__(self, row_index: int): - return CSVRow(self._column_names, self._rows[row_index]) - - def __repr__(self): - return str([CSVRow(self._column_names, data) for data in self._rows]) - - def find_row(self, value: str, by_column: str | int = 0) -> CSVRow: - """ - Finds and returns the first row in this CSV which has `value` in column `by_column`. `by_column` can either be a str, in which case it is treated - as a column name and the header line is searched for a matching string, or an int n, in which case the nth column is selected. - `by_column` defaults to `0`. - Returns an empty `CSVRow` if no match was found. - """ - if isinstance(by_column, str): - by_column = self._column_names.index(by_column) - - for row in self._rows: - if row[by_column] == value: - return CSVRow(self._column_names, row) - return CSVRow() - - -def read_csv(path: Path) -> CSV: - """ - Reads in a csv file and returns a list of rows. Each row consists of a dictionary which contains labeled values. - """ - with open(path, mode='r') as file: - csv_file = csv.reader(file) - header = next(csv_file) - - return CSV(column_names=header, rows=[[CSVCell(cell) for cell in row] for row in csv_file]) - - def read_json(path: Path) -> Any: """ Reads in a json file and returns a python object representing the json. @@ -168,31 +14,30 @@ def read_json(path: Path) -> Any: json_file = json.load(file) return json_file - def beet_default(ctx: Context): # read raw data - armor_flavors: CSV = read_csv( + armor_flavors: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'armor_flavors.csv')) - armor_pieces: CSV = read_csv( + armor_pieces: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'armor_pieces.csv')) - crystal_effects: CSV = read_csv( + crystal_effects: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'crystal_effects.csv')) crystal_lores: Any = read_json( Path('gm4_zauber_cauldrons', 'raw', 'crystal_lores.json')) - flower_types: CSV = read_csv( + flower_types: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'flower_types.csv')) - magicol_colors: CSV = read_csv( + magicol_colors: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'magicol_colors.csv')) - potion_bottles: CSV = read_csv( + potion_bottles: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'potion_bottles.csv')) - potion_effects: CSV = read_csv( + potion_effects: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'potion_effects.csv')) - potion_bottles: CSV = read_csv( + potion_bottles: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'potion_bottles.csv')) potion_lores: Any = read_json( Path('gm4_zauber_cauldrons', 'raw', 'potion_lores.json')) - weather_modifiers: CSV = read_csv( + weather_modifiers: CSV = CSV.from_file( Path('gm4_zauber_cauldrons', 'raw', 'weather_modifiers.csv')) # generate files