Skip to content

Move CSV utils to gm4.utils #944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 168 additions & 1 deletion gm4/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import subprocess
import warnings
from dataclasses import dataclass, asdict
from typing import Any
from typing import Any, List
from functools import total_ordering
import csv
from pathlib import Path

def run(cmd: list[str]|str) -> str:
"""Run a shell command and return the stdout."""
Expand Down Expand Up @@ -81,3 +83,168 @@ class NoneAttribute():
"""Object which returns None for any arbitrary attribute access. Used for default members"""
def __getattribute__(self, __name: str) -> None:
return None


# CSV READING UTILS
class CSVCell(str):
"""
String wrapper for contents of a CSVCell, supports interpreting the content as different formats.
"""

DEC = 'dec' # for numbers formatted 16777215
HEX = 'hex' # for numbers formatted #AB0EFF
FLOAT = 'float' # for numbers formatted [0.5, 0.2, 0.9]

def as_integer(self) -> int:
"""
Interprets the string contained in this CSVCell as an integer.
Supported formats are:
- base 10 integers (no prefix)
- prefixed hex color codes (# prefix and 6 digits)
- prefixed hex, octal, or binary (0x, 0o, or 0b and some amount of digits)
- bool (True or False, case insensitive)
Returns a integer representation of the value.
"""
if self.startswith('#') and len(self) == 7: # alternative way of marking base 16 (hex colors)
return CSVCell('0x' + self.lstrip('#')).as_integer()
if self.startswith('0x'): # check if the string is in base 2
return int(self, 16)
if self.startswith('0o'): # check if the string is in base 8
return int(self, 8)
if self.startswith('0b'): # check if the string is in base 16
return int(self, 2)
if self.casefold() == 'true':
return 1
if self.casefold() == 'false':
return 0
return int(self) # default case, interpret as base 10

def to_color_code(self, encoding: str) -> 'CSVCell':
"""
Interprets the string contained in this CSVCell as a color code using the given encoding and returns a new CSVCell with that interpretation as its content.
E.g. if the CSVCell this function was called on contains '#4AA0C7' and 'CSVCell.DEC' is given as an encoding, a new CSVCell with content '4890823' is returned.
"""
if encoding == CSVCell.HEX:
return CSVCell('#' + hex(self.as_integer()).lstrip('0x'))
if encoding == CSVCell.DEC:
return CSVCell(self.as_integer())
if encoding == CSVCell.FLOAT:
dec = self.as_integer()
return CSVCell([(dec >> 16) / 255, ((dec >> 8) & 0xFF) / 255, (dec & 0xFF) / 255])
raise ValueError(
f"Invalid encoding '{encoding}'. Must be '{CSVCell.DEC}', '{CSVCell.HEX}', or '{CSVCell.FLOAT}'.")


class CSVRow():
"""
Read-only dict wrapper which represents a row of data from a .csv file.
"""

def __init__(self, column_names: List[str] | None = None, data: List[CSVCell] | None = None) -> None:
"""
Initialize a new CSVRow object using the supplied column names and data. CSVRow objects are read-only by design.
If no data and no column names are supplied the resulting CSVRow object will evaluate to false in boolean expressions.

Access data within this CSVRow via the `get(key, default)` method or using `[<key: str>]`.
"""
if not column_names:
column_names = []
if not data:
data = []

if len(column_names) != len(data):
raise ValueError(
f"Could not build CSVRow from supplied column names and data; Number of supplied column names ({len(column_names)}) does not match number of supplied data entries ({len(data)}).")

self._data = {column_names[column_index]
: value for column_index, value in enumerate(data)}

def __bool__(self):
"""
Allow for the use of CSVRow instances in if statements; If the CSVRow has no keys it is equivalent to `False`.
"""
return len(self._data.keys()) != 0

def __getitem__(self, key: str):
try:
return self._data[key]
except KeyError as ke:
raise ValueError(
f"Failed to select column named '{ke.args[0]}' from CSVRow with columns {[key for key in self._data]}.")

def __repr__(self) -> str:
return str(self._data)

def get(self, key: str, default: str | Any) -> CSVCell:
"""
Returns the value corrosponding to the key if it exists and is not the empty string.
Else returns the provided default. The provided default is cast to a string internally.
"""
value = self._data.get(key, CSVCell(default))
if value:
return value
else:
return CSVCell(default)


class CSV():
"""
List-of-Rows representation of a .csv file which can be iteraded over using for ... in.
Optimized for row-first access, i.e. select a row, then a column.
Also provides a `find_row` function for column-first, i.e. select a column, then a row, access.
However, the latter is is more expensive.

All access methods return CSVRow objects which are dynamically created upon calling an access method.
"""

@staticmethod
def from_file(path: Path) -> 'CSV':
"""
Reads in a csv file and returns a list of rows. Each row consists of a dictionary which contains labeled values.
"""
with open(path, mode='r') as file:
csv_file = csv.reader(file)
header = next(csv_file)

return CSV(column_names=header, rows=[[CSVCell(cell) for cell in row] for row in csv_file])

def __init__(self, column_names: List[str], rows: List[List[CSVCell]]) -> None:
"""
Initialize a new CSV from a list of column names (headers) and a list of rows.
The latter contain actual data, whilst the former only holds names of columns.
"""
self._column_names = column_names
self._rows = rows

def __iter__(self):
self.__current = 0
self.__last = len(self._rows)
return self

def __next__(self) -> CSVRow:
current = self.__current
self.__current += 1
if current < self.__last:
return CSVRow(self._column_names, self._rows[current])
raise StopIteration()

def __getitem__(self, row_index: int):
return CSVRow(self._column_names, self._rows[row_index])

def __repr__(self):
return str([CSVRow(self._column_names, data) for data in self._rows])

def find_row(self, value: str, by_column: str | int = 0) -> CSVRow:
"""
Finds and returns the first row in this CSV which has `value` in column `by_column`. `by_column` can either be a str, in which case it is treated
as a column name and the header line is searched for a matching string, or an int n, in which case the nth column is selected.
`by_column` defaults to `0`.
Returns an empty `CSVRow` if no match was found.
"""
if isinstance(by_column, str):
by_column = self._column_names.index(by_column)

for row in self._rows:
if row[by_column] == value:
return CSVRow(self._column_names, row)
return CSVRow()
177 changes: 11 additions & 166 deletions gm4_zauber_cauldrons/generate.py
Original file line number Diff line number Diff line change
@@ -1,165 +1,11 @@
from typing import List, Dict, Any
from typing import Dict, Any
from pathlib import Path
from itertools import product
import csv
from gm4.utils import CSV, CSVCell
import json

from beet import Context, subproject


class CSVCell(str):
"""
String wrapper which supports color encoding translation.
"""

DEC = 'dec' # for numbers formatted 16777215
HEX = 'hex' # for numbers formatted #AB0EFF
FLOAT = 'float' # for numbers formatted [0.5, 0.2, 0.9]

def as_integer(self) -> int:
"""
Interprets the string contained in this CSVCell as an integer.
Tries to detect the base automatically.
"""
if self.startswith('#') and len(self) == 7: # alternative way of marking base 16 (hex colors)
return CSVCell('0x' + self.lstrip('#')).as_integer()
if self.startswith('0x'): # check if the string is in base 2
return int(self, 16)
if self.startswith('0o'): # check if the string is in base 8
return int(self, 8)
if self.startswith('0b'): # check if the string is in base 16
return int(self, 2)
return int(self) # string must be base 10

def to_color_code(self, encoding: str) -> 'CSVCell':
"""
Outputs the string contained in this CSVCell formatted as a color code, e.g. #4AA0C7 if 'HEX' is given.
"""
if encoding == CSVCell.HEX:
return CSVCell('#' + hex(self.as_integer()).lstrip('0x'))
if encoding == CSVCell.DEC:
return CSVCell(self.as_integer())
if encoding == CSVCell.FLOAT:
dec = self.as_integer()
return CSVCell([(dec >> 16) / 255, ((dec >> 8) & 0xFF) / 255, (dec & 0xFF) / 255])
raise ValueError(
f"Invalid encoding '{encoding}'. Must be '{CSVCell.DEC}', '{CSVCell.HEX}', or '{CSVCell.FLOAT}'.")


class CSVRow():
"""
Read-only dict wrapper which represents a row of data from a .csv file.
"""

def __init__(self, column_names: List[str] | None = None, data: List[CSVCell] | None = None) -> None:
"""
Initialize a new CSVRow object using the supplied column names and data. CSVRow objects are read-only by design.
If no data and no column names are supplied the resulting CSVRow object will evaluate to false in boolean expressions.

Access data within this CSVRow via the `get(key, default)` method or using `[<key: str>]`.
"""
if not column_names:
column_names = []
if not data:
data = []

if len(column_names) != len(data):
raise ValueError(
f"Could not build CSVRow from supplied column names and data; Number of supplied column names ({len(column_names)}) does not match number of supplied data entries ({len(data)}).")

self._data = {column_names[column_index]: value for column_index, value in enumerate(data)}

def __bool__(self):
"""
Allow for the use of CSVRow instances in if statements; If the CSVRow has no keys it is equivalent to `False`.
"""
return len(self._data.keys()) != 0

def __getitem__(self, key: str):
try:
return self._data[key]
except KeyError as ke:
raise ValueError(
f"Failed to select column named '{ke.args[0]}' from CSVRow with columns {[key for key in self._data]}.")

def __repr__(self) -> str:
return str(self._data)

def get(self, key: str, default: str | Any) -> CSVCell:
"""
Returns the value corrosponding to the key if it exists and is not the empty string.
Else returns the provided default. The provided default is cast to a string internally.
"""
value = self._data.get(key, CSVCell(default))
if value:
return value
else:
return CSVCell(default)


class CSV():
"""
List-of-Rows representation of a .csv file which can be iteraded over using for ... in.
Optimized for row-first access, i.e. select a row, then a column.
Also provides a `find_row` function for column-first, i.e. select a column, then a row, access.
However, the latter is is more expensive.

All access methods return CSVRow objects which are dynamically created upon calling an access method.
"""

def __init__(self, column_names: List[str], rows: List[List[CSVCell]]) -> None:
"""
Initialize a new CSV from a list of column names (headers) and a list of rows.
The latter contain actual data, whilst the former only holds names of columns.
"""
self._column_names = column_names
self._rows = rows

def __iter__(self):
self.__current = 0
self.__last = len(self._rows)
return self

def __next__(self) -> CSVRow:
current = self.__current
self.__current += 1
if current < self.__last:
return CSVRow(self._column_names, self._rows[current])
raise StopIteration()

def __getitem__(self, row_index: int):
return CSVRow(self._column_names, self._rows[row_index])

def __repr__(self):
return str([CSVRow(self._column_names, data) for data in self._rows])

def find_row(self, value: str, by_column: str | int = 0) -> CSVRow:
"""
Finds and returns the first row in this CSV which has `value` in column `by_column`. `by_column` can either be a str, in which case it is treated
as a column name and the header line is searched for a matching string, or an int n, in which case the nth column is selected.
`by_column` defaults to `0`.
Returns an empty `CSVRow` if no match was found.
"""
if isinstance(by_column, str):
by_column = self._column_names.index(by_column)

for row in self._rows:
if row[by_column] == value:
return CSVRow(self._column_names, row)
return CSVRow()


def read_csv(path: Path) -> CSV:
"""
Reads in a csv file and returns a list of rows. Each row consists of a dictionary which contains labeled values.
"""
with open(path, mode='r') as file:
csv_file = csv.reader(file)
header = next(csv_file)

return CSV(column_names=header, rows=[[CSVCell(cell) for cell in row] for row in csv_file])


def read_json(path: Path) -> Any:
"""
Reads in a json file and returns a python object representing the json.
Expand All @@ -168,31 +14,30 @@ def read_json(path: Path) -> Any:
json_file = json.load(file)
return json_file


def beet_default(ctx: Context):

# read raw data
armor_flavors: CSV = read_csv(
armor_flavors: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'armor_flavors.csv'))
armor_pieces: CSV = read_csv(
armor_pieces: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'armor_pieces.csv'))
crystal_effects: CSV = read_csv(
crystal_effects: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'crystal_effects.csv'))
crystal_lores: Any = read_json(
Path('gm4_zauber_cauldrons', 'raw', 'crystal_lores.json'))
flower_types: CSV = read_csv(
flower_types: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'flower_types.csv'))
magicol_colors: CSV = read_csv(
magicol_colors: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'magicol_colors.csv'))
potion_bottles: CSV = read_csv(
potion_bottles: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'potion_bottles.csv'))
potion_effects: CSV = read_csv(
potion_effects: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'potion_effects.csv'))
potion_bottles: CSV = read_csv(
potion_bottles: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'potion_bottles.csv'))
potion_lores: Any = read_json(
Path('gm4_zauber_cauldrons', 'raw', 'potion_lores.json'))
weather_modifiers: CSV = read_csv(
weather_modifiers: CSV = CSV.from_file(
Path('gm4_zauber_cauldrons', 'raw', 'weather_modifiers.csv'))

# generate files
Expand Down