From e5cf3fc36d17642e6eb664224ed2a64aa86d2189 Mon Sep 17 00:00:00 2001 From: Simon Dierickx Date: Wed, 9 Apr 2025 11:40:43 +0200 Subject: [PATCH 1/2] first proposal of project structure --- src/endureio/datacomponents/__init__.py | 0 .../datacomponents/base_data_components.py | 78 +++++++++++++++++++ .../datacomponents/fitfile.py/__init__.py | 0 src/endureio/filehandlers/__init__.py | 0 .../filehandlers/base_file_handlers.py | 52 +++++++++++++ src/endureio/filehandlers/fit/__init__.py | 0 src/endureio/filehandlers/fit/fitfile.py | 7 ++ src/endureio/fit.py | 8 -- uv.lock | 58 +++++++++++++- 9 files changed, 194 insertions(+), 9 deletions(-) create mode 100644 src/endureio/datacomponents/__init__.py create mode 100644 src/endureio/datacomponents/base_data_components.py create mode 100644 src/endureio/datacomponents/fitfile.py/__init__.py create mode 100644 src/endureio/filehandlers/__init__.py create mode 100644 src/endureio/filehandlers/base_file_handlers.py create mode 100644 src/endureio/filehandlers/fit/__init__.py create mode 100644 src/endureio/filehandlers/fit/fitfile.py delete mode 100644 src/endureio/fit.py diff --git a/src/endureio/datacomponents/__init__.py b/src/endureio/datacomponents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/endureio/datacomponents/base_data_components.py b/src/endureio/datacomponents/base_data_components.py new file mode 100644 index 0000000..40a78b5 --- /dev/null +++ b/src/endureio/datacomponents/base_data_components.py @@ -0,0 +1,78 @@ +from abc import ABC, abstractmethod + + +# --- Component --- +class DataComponent(ABC): + """ + Base class for all metadata components. + """ + + @abstractmethod + def get_data(self) -> dict: + pass + + def add(self, component: "DataComponent"): + raise NotImplementedError("This component doesn't support children.") + + def remove(self, component: "DataComponent"): + raise NotImplementedError("This component doesn't support children.") + + +# --- Composite --- +class CompositeData(DataComponent): + """ + Represents a composite metadata component (can hold other components). + """ + + def __init__(self, name: str = ""): + self.name = name + self.children: list[DataComponent] = [] + + def add(self, component: DataComponent): + self.children.append(component) + + def remove(self, component: DataComponent): + self.children.remove(component) + + def get_data(self) -> dict: + result = {} + for child in self.children: + result.update(child.get_data()) + return {self.name: result} if self.name else result + + +# --- Example Usage for a FIT file --- +if __name__ == "__main__": + # Example Data Component + class ExampleData(DataComponent): + """ + Represents a leaf metadata component (key-value pair or similar). + """ + + def __init__(self, key: str, value: str): + self.key = key + self.value = value + + def get_metadata(self) -> dict: + return {self.key: self.value} + + # Example usage + # Build metadata structure + fit_metadata = CompositeData("fit_file") + + # Header Section + header = CompositeData("header") + header.add(ExampleData("file_type", "FIT")) + header.add(ExampleData("version", "2.0")) + + # Body Section + body = CompositeData("body") + # add the training data + body.add(ExampleData("duration", "45 min")) + body.add(ExampleData("distance", "10 km")) + + # Add sections to root + fit_metadata.add(header) + fit_metadata.add(body) + + print(fit_metadata.get_metadata()) diff --git a/src/endureio/datacomponents/fitfile.py/__init__.py b/src/endureio/datacomponents/fitfile.py/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/endureio/filehandlers/__init__.py b/src/endureio/filehandlers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/endureio/filehandlers/base_file_handlers.py b/src/endureio/filehandlers/base_file_handlers.py new file mode 100644 index 0000000..9e1896c --- /dev/null +++ b/src/endureio/filehandlers/base_file_handlers.py @@ -0,0 +1,52 @@ +from os import PathLike +from typing import IO +from abc import ABC, abstractmethod + +import pandas as pd + +from endureio.datacomponents.base_data_components import CompositeData + + +class BaseFileHandler(ABC): + """ """ + + def __init__(self, file_path_or_buffer: str | bytes | PathLike | IO[bytes]): + self.file_path_or_buffer = file_path_or_buffer + self.data: CompositeData + pass + + @abstractmethod + def read(self): + """ + reads file or buffer + + creates a Data component for each section of the file (e.g. header, , etc.) + And adds them to the data attribute. + The data attribute is a CompositeData object that contains all + the metadata components. + + returns base file handler object with data attribute populated. + """ + pass + + @abstractmethod + def write(Self): + """write file""" + pass + + @abstractmethod + def validate(self): + """validate the file or buffer""" + pass + + def to_df(self) -> pd.DataFrame: + """ + convert the data attribute to a pandas DataFrame + """ + return pd.DataFrame(self.data.get_data()) + + def to_dict(self) -> dict: + """ + convert the data attribute to a dictionary + """ + return self.data.get_data() diff --git a/src/endureio/filehandlers/fit/__init__.py b/src/endureio/filehandlers/fit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/endureio/filehandlers/fit/fitfile.py b/src/endureio/filehandlers/fit/fitfile.py new file mode 100644 index 0000000..ca9a798 --- /dev/null +++ b/src/endureio/filehandlers/fit/fitfile.py @@ -0,0 +1,7 @@ +from endureio.filehandlers.base_file_handlers import BaseFileHandler + + +class FitFile(BaseFileHandler): + def __init__(self): + + pass diff --git a/src/endureio/fit.py b/src/endureio/fit.py deleted file mode 100644 index 37625ae..0000000 --- a/src/endureio/fit.py +++ /dev/null @@ -1,8 +0,0 @@ -from os import PathLike -from typing import IO - -import pandas as pd - - -def read_fit(file_path_or_buffer: str | bytes | PathLike | IO[bytes]) -> pd.DataFrame: - pass \ No newline at end of file diff --git a/uv.lock b/uv.lock index 88f0c28..f54b8c9 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.12" [[package]] @@ -10,6 +11,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, ] +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + [[package]] name = "distlib" version = "0.3.9" @@ -30,13 +40,17 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "pre-commit" }, + { name = "pytest" }, ] [package.metadata] requires-dist = [{ name = "pandas", specifier = ">=2.2.3" }] [package.metadata.requires-dev] -dev = [{ name = "pre-commit", specifier = ">=4.0.1" }] +dev = [ + { name = "pre-commit", specifier = ">=4.0.1" }, + { name = "pytest", specifier = ">=8.3.4" }, +] [[package]] name = "filelock" @@ -56,6 +70,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/fa/dce098f4cdf7621aa8f7b4f919ce545891f489482f0bfa5102f3eca8608b/identify-2.6.5-py2.py3-none-any.whl", hash = "sha256:14181a47091eb75b337af4c23078c9d09225cd4c48929f521f3bf16b09d02566", size = 99078 }, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -103,6 +126,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/9c/4fce9cf39dde2562584e4cfd351a0140240f82c0e3569ce25a250f47037d/numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268", size = 12693107 }, ] +[[package]] +name = "packaging" +version = "24.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, +] + [[package]] name = "pandas" version = "2.2.3" @@ -146,6 +178,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, ] +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + [[package]] name = "pre-commit" version = "4.0.1" @@ -162,6 +203,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/8f/496e10d51edd6671ebe0432e33ff800aa86775d2d147ce7d43389324a525/pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878", size = 218713 }, ] +[[package]] +name = "pytest" +version = "8.3.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 8eddbe920e129a8e2b8dae6ec30a6ef28fe3acd4 Mon Sep 17 00:00:00 2001 From: Simon Dierickx Date: Thu, 10 Apr 2025 10:25:58 +0200 Subject: [PATCH 2/2] added docs --- .../datacomponents/base_data_components.py | 71 +++++++++++++- .../filehandlers/base_file_handlers.py | 98 +++++++++++++++---- 2 files changed, 150 insertions(+), 19 deletions(-) diff --git a/src/endureio/datacomponents/base_data_components.py b/src/endureio/datacomponents/base_data_components.py index 40a78b5..deec901 100644 --- a/src/endureio/datacomponents/base_data_components.py +++ b/src/endureio/datacomponents/base_data_components.py @@ -4,7 +4,26 @@ # --- Component --- class DataComponent(ABC): """ - Base class for all metadata components. + DataComponent is an abstract base class that serves as the + foundation for all metadata components. + It enforces the implementation of a `get_data` method in + derived classes and provides default + implementations for `add` and `remove` methods, + which raise `NotImplementedError` to indicate + that the component does not support child components. + + Methods: + get_data() -> dict: + Abstract method that must be implemented by subclasses to + return the data associated with the component as a dictionary. + + add(component: "DataComponent"): + Raises NotImplementedError. Intended to be overridden + by subclasses that support adding child components. + + remove(component: "DataComponent"): + Raises NotImplementedError. Intended to be overridden by subclasses that + support removing child components. """ @abstractmethod @@ -21,7 +40,25 @@ def remove(self, component: "DataComponent"): # --- Composite --- class CompositeData(DataComponent): """ - Represents a composite metadata component (can hold other components). + CompositeData is a class that represents a composite metadata component, + which can hold and manage other DataComponent instances as its children. + + Attributes: + name (str): The name of the composite data component. + Defaults to an empty string. children (list[DataComponent]): + A list of child DataComponent instances contained within this composite. + + Methods: + add(component: DataComponent): + Adds a child DataComponent to the composite. + + remove(component: DataComponent): + Removes a child DataComponent from the composite. + + get_data() -> dict: + Aggregates and returns the data from all child components as a dictionary. + If the composite has a name, + the result is nested under the composite's name. """ def __init__(self, name: str = ""): @@ -29,12 +66,40 @@ def __init__(self, name: str = ""): self.children: list[DataComponent] = [] def add(self, component: DataComponent): + """ + Adds a DataComponent to the list of children. + + Args: + component (DataComponent): + The data component to be added to the children list. + """ self.children.append(component) def remove(self, component: DataComponent): + """ + Removes a specified DataComponent from the list of children. + + Args: + component (DataComponent): + The data component to be removed from the children list. + + Raises: + ValueError: If the specified component is not found in the children list. + """ self.children.remove(component) def get_data(self) -> dict: + """ + Retrieves the data from the current object and + its children in a hierarchical structure. + + Returns: + dict: A dictionary containing the data from + the current object and its children. + If the current object has a name, + the data is nested under the name as the key. + Otherwise, the data is returned as a flat dictionary. + """ result = {} for child in self.children: result.update(child.get_data()) @@ -47,6 +112,8 @@ def get_data(self) -> dict: class ExampleData(DataComponent): """ Represents a leaf metadata component (key-value pair or similar). + The leaf methods should be the low level fields of the different + files. """ def __init__(self, key: str, value: str): diff --git a/src/endureio/filehandlers/base_file_handlers.py b/src/endureio/filehandlers/base_file_handlers.py index 9e1896c..9687dd6 100644 --- a/src/endureio/filehandlers/base_file_handlers.py +++ b/src/endureio/filehandlers/base_file_handlers.py @@ -1,5 +1,5 @@ from os import PathLike -from typing import IO +from typing import IO, Self from abc import ABC, abstractmethod import pandas as pd @@ -8,7 +8,44 @@ class BaseFileHandler(ABC): - """ """ + """ + BaseFileHandler is an abstract base class that + provides a blueprint for file handling operations. + It defines the structure and methods that subclasses + must implement to handle specific file formats + or data sources. The class includes methods for reading, + writing, validating, and converting data. + + Attributes: + file_path_or_buffer (str | bytes | PathLike | IO[bytes]): + The file path or buffer to be handled. + data (CompositeData): A composite data structure + that stores metadata components extracted + from the file or buffer. + + Methods: + read() -> Self: + Reads the file or buffer and processes its contents. + This method must be implemented + by subclasses to populate the `data` attribute + with metadata components. + + write() -> Self: + Writes data to a file. Subclasses must implement + this method to handle the specific + logic for writing data to a file. + + validate() -> Self: + Validates the file or buffer. Subclasses must + implement this method to ensure the + file or buffer meets the required criteria. + + to_df() -> pd.DataFrame: + Converts the `data` attribute to a pandas DataFrame. + + to_dict() -> dict: + Converts the `data` attribute to a dictionary. + """ def __init__(self, file_path_or_buffer: str | bytes | PathLike | IO[bytes]): self.file_path_or_buffer = file_path_or_buffer @@ -16,37 +53,64 @@ def __init__(self, file_path_or_buffer: str | bytes | PathLike | IO[bytes]): pass @abstractmethod - def read(self): - """ - reads file or buffer + def read(self) -> Self: + """ " + Reads the file or buffer and processes its contents. - creates a Data component for each section of the file (e.g. header, , etc.) - And adds them to the data attribute. - The data attribute is a CompositeData object that contains all - the metadata components. + This method creates a `Data` component for each section of the file + (e.g., header, etc.) and adds them to the `data` attribute. The `data` + attribute is a `CompositeData` object that contains all the metadata + components. - returns base file handler object with data attribute populated. + Returns: + Self: The base file handler object with the `data` attribute populated. """ pass @abstractmethod - def write(Self): - """write file""" + def write(self) -> Self: + """ + Writes data to a file. + + This method should be implemented by subclasses to handle the + specific logic for writing data to a file. The implementation + should ensure that the file is written correctly and any necessary + resources are properly managed. + + Returns: + Self: The instance of the class, to allow for method chaining. + """ pass @abstractmethod - def validate(self): - """validate the file or buffer""" + def validate(self) -> Self: + """ + Validates the file or buffer. + + This method is intended to ensure that the file or buffer meets + the required criteria or format. Override this method in a subclass + to implement specific validation logic. + + Returns: + Self: The instance of the class, allowing for method chaining. + """ pass def to_df(self) -> pd.DataFrame: + """ " + Converts the `data` attribute of the object into a pandas DataFrame. + + Returns: + pd.DataFrame: A DataFrame representation of the `data` attribute. """ - convert the data attribute to a pandas DataFrame - """ + return pd.DataFrame(self.data.get_data()) def to_dict(self) -> dict: """ - convert the data attribute to a dictionary + convert the `data` attribute of the object into a python dict + + Returns: + dict: A Dictonary representation of the `data` attribute. """ return self.data.get_data()