diff --git a/pyproject.toml b/pyproject.toml index f7b1501..ab7a8f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ ] requires-python = ">=3.12" dependencies = [ + "fitdecode>=0.10.0", "pandas>=2.2.3", ] @@ -29,4 +30,4 @@ testpaths = [ ] pythonpath = [ "src", -] \ No newline at end of file +] diff --git a/src/endureio/fit.py b/src/endureio/fit.py index 37625ae..a0ab7fc 100644 --- a/src/endureio/fit.py +++ b/src/endureio/fit.py @@ -1,8 +1,217 @@ +from datetime import timezone +from itertools import pairwise from os import PathLike from typing import IO +import fitdecode import pandas as pd -def read_fit(file_path_or_buffer: str | bytes | PathLike | IO[bytes]) -> pd.DataFrame: - pass \ No newline at end of file +COLUMN_TRANSLATIONS = { + 'enhanced_speed': "speed", + "position_lat": "latitude", + "position_long": "longitude", + "enhanced_altitude": "altitude", + "saturated_hemoglobin_percent": "smo2", + 'Power': "power", # From e.g. Stryd + "Cadence": "cadence", # From e.g. Stryd + # "Leg Spring Stiffness": "leg_spring_stiffness", + # "Vertical Oscillation": "vertical_oscillation", +} + + +OPINIATED_COLUMNS = { + "timestamp", + "sport", + "sub_sport", + "power", + "speed", + "distance", + "longitude", + "latitude", + "altitude", + "heart_rate", + "cadence", + "temperature", + "core_temperature", + "smo2", +} + +FLOAT_COLUMNS = { + "power", + "speed", + "distance", + "heart_rate", + "cadence", + "temperature", + "core_temperature", + "smo2", +} + + +def _translate_columns(values, present_columns, allow_column_overwrites): + """ + Translates the keys of a dictionary using the COLUMN_TRANSLATIONS dictionary. + Only translates keys that are part of COLUMN_TRANSLATIONS and if the translated + key is not already in the present_columns set. + + Parameters: + - values (dict): The dictionary whose keys need to be translated. + - present_columns (set): A set of column names that are already present and should not be overwritten. + - allow_column_overwrites (bool): If True, allow opinionated translation of columns to overwrite existing columns. + + Returns: + - dict: A dictionary with keys translated as per COLUMN_TRANSLATIONS, considering present_columns. + """ + translated_values = {} + for key, value in values.items(): + if key in COLUMN_TRANSLATIONS and (COLUMN_TRANSLATIONS[key] not in present_columns or allow_column_overwrites): + translated_key = COLUMN_TRANSLATIONS[key] + else: + translated_key = key + translated_values[translated_key] = value + return translated_values + + +def _parse_device_info(device_info): + """ + Parses the device information from the FIT file. + """ + if device_info is None: + return None + + if device_info["manufacturer"] == "garmin" and "garmin_product" in device_info: + return f"garmin {device_info['garmin_product']}" + elif "product_name" in device_info: + return device_info["product_name"] + elif "descriptor" in device_info: + return device_info["descriptor"] + elif device_info.get("device_name", None) is not None: + return device_info["device_name"] + else: + return None + + +def read_fit(file_like: str | bytes | PathLike | IO[bytes], opinionated: bool = True, include_unopinionated: bool = True, allow_column_overwrites: bool = False) -> pd.DataFrame: + """ + Reads a .fit file and returns a pandas DataFrame. + + Parameters: + - file_like: A file-like object to read the .fit file from. + - opinionated (bool, optional): If True, perform some opinionated data cleaning and transformation. For example "Power" is renamed to "power". Defaults to True. + - include_unopinionated (bool, optional): If True, include columns that are not part of the opinionated columns. Defaults to True. + - allow_column_overwrites (bool, optional): If True, allow opinionated translation of columns to overwrite existing columns. If false, then if for example a "power" column already exists, the "Power" column will not be renamed to "power". Defaults to False. + + Returns: + - pandas.DataFrame: A DataFrame containing the data from the .fit file. + """ + frames = [] + present_columns = set() + sport = None + sub_sport = None + device_info = None + with fitdecode.FitReader(file_like) as fit: + laps = [] + sessions = [] + for frame in fit: + if not isinstance(frame, fitdecode.records.FitDataMessage): + continue + + if frame.mesg_type is None: + continue + + match frame.mesg_type.name: + case "record": + values = { + "sport": sport, + "sub_sport": sub_sport, + } + for f in frame.fields: + # @TODO this assumes 1 value per field name, but that's not necessarily the case. + values[f.name] = f.value + + if opinionated: + present_columns.update(values.keys()) + values = _translate_columns(values, present_columns, allow_column_overwrites) + + if not include_unopinionated: + values = {key: value for key, value in values.items() if key in OPINIATED_COLUMNS} + + frames.append(values) + case "sport": + sport = frame.get_field("sport").value + sub_sport = frame.get_field("sub_sport").value + case "lap": + laps.append(frame) + case "session": + sessions.append(frame) + case "device_info": + try: + device_index = frame.get_field("device_index").value + except Exception: + continue + + if device_index == "creator": + device_info = {} + for field_name in ["device_index", "manufacturer", "device_name", "garmin_product", "product_name", "serial_number", "descriptor"]: + try: + device_info[field_name] = frame.get_field(field_name).value + except KeyError: + device_info[field_name] = None + case "activity": + try: + local_timestamp = frame.get_field("local_timestamp").value + timestamp = frame.get_field("timestamp").value + except KeyError: + # Some FIT files don't have the local_timestamp field + tz = None + else: + tz = timezone(local_timestamp - timestamp.replace(tzinfo=None)) + case _: + pass + + data = pd.DataFrame(frames) + + data["timestamp"] = pd.to_datetime(data["timestamp"]) + if tz is not None: + data["timestamp"] = data["timestamp"].dt.tz_convert(tz) + data["duration"] = data["timestamp"].diff() + data.set_index("timestamp", inplace=True) + + data.attrs["device"] = _parse_device_info(device_info) + + # @TODO Revisit if only manual lap triggers should be used (if present) + # @TODO Support multi session FIT files + lap_start_times = [] + for lap in laps: + start_time = lap.get_value("start_time") + + try: + lap_trigger = lap.get_value("lap_trigger") + except KeyError: + lap_trigger = None + + lap_start_times.append((start_time, lap_trigger)) + + lap_idx = None + for lap_idx, ((start, lap_trigger), (end, _)) in enumerate(pairwise(lap_start_times)): + data.loc[start:end, "lap"] = lap_idx + data.loc[start:end, "lap_trigger"] = lap_trigger + + if lap_idx is not None: + data.loc[end:, "lap"] = lap_idx + 1 + data.loc[end:, "lap_trigger"] = "manual" # The end of an activity is always triggered manually + else: + data["lap"] = None + data["lap_trigger"] = None + + if opinionated: + for original, translated in COLUMN_TRANSLATIONS.items(): + if original in data.columns and translated not in data.columns: + data.rename(columns={original: translated}, inplace=True) + + for column in data.columns: + if column in FLOAT_COLUMNS: + data[column] = data[column].astype(float) + + return data \ No newline at end of file diff --git a/tests/test_fit.py b/tests/test_fit.py index 8f12f7d..4b331f1 100644 --- a/tests/test_fit.py +++ b/tests/test_fit.py @@ -2,6 +2,9 @@ def test_read_fit_running_file(): - file_path = "tests/data/running.fit" + file_path = "tests/data/short-run.fit" - data = read_fit(file_path) \ No newline at end of file + data = read_fit(file_path) + + assert "power" in data.columns + assert len(data) == 1344 \ No newline at end of file diff --git a/uv.lock b/uv.lock index 7837bc3..8091993 100644 --- a/uv.lock +++ b/uv.lock @@ -15,6 +15,7 @@ name = "endureio" version = "0.1.0" source = { editable = "." } dependencies = [ + { name = "fitdecode" }, { name = "pandas" }, ] @@ -24,11 +25,23 @@ dev = [ ] [package.metadata] -requires-dist = [{ name = "pandas", specifier = ">=2.2.3" }] +requires-dist = [ + { name = "fitdecode", specifier = ">=0.10.0" }, + { name = "pandas", specifier = ">=2.2.3" }, +] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=8.3.4" }] +[[package]] +name = "fitdecode" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/8c/7cf1d2865fc0d593dfd389704b751ca1ed2a98860c41d4587c5b49637da3/fitdecode-0.10.0.tar.gz", hash = "sha256:1b85d1303c87650c11377a7322757eaa1caf229776bbd5b545591dd6815123e4", size = 89048 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/de/cf0f223255bf12c885acdc3bcce7fdc5607db496d3e3b682c001a51406e0/fitdecode-0.10.0-py3-none-any.whl", hash = "sha256:527dc2e4b0fe45dbe868a2131af0a544cb686f631f608d94e392bcce57b047cf", size = 90890 }, +] + [[package]] name = "iniconfig" version = "2.0.0"