From 23955f911fb72dcae8eb7eae03c3b81796e76f7a Mon Sep 17 00:00:00 2001
From: Chimezie Iwuanyanwu <mr.chrisopher@yahoo.com>
Date: Sun, 29 Dec 2024 15:34:03 -0600
Subject: [PATCH] Added @staticmethod and type hints across code base

Introduce type hints and static methods to the code, improving its readability, maintainability, and potentially performance. Let's break down the changes by file:

**`output_processor.py`**:

*   Type hints are added to the `pregenerate_sm_output` and `pregenerate_txt_output` functions, specifying the expected data types for the `note_data` parameter as `dict[str, Any]`.  The previous type hint `defaultdict(list)` was too restrictive and didn't accurately reflect the data structure.  `dict[str, Any]` is more flexible, while still providing some type information.
*   The `@staticmethod` decorator is added to both functions. This indicates that the methods don't require access to the class's internal state (i.e., `self`).  This change clarifies the intent and usage of these functions.

**`input_processor.py`**:

*   Similar to `output_processor.py`, type hints are added throughout, enhancing clarity.  For example, `convert_note` now specifies its input (`line: str`) and return type (`-> str`). `parse_sm_input` also clearly defines its input and output types.
*   `@staticmethod` is applied to all methods in the class, emphasizing that these are utility functions operating independently of any specific `InputProcessor` instance. The internal logic remains consistent while being made easier to follow and reuse due to type hints. The improved specificity from the older, more general type hints aids in understanding the data flow within these methods.
*   The return type of `parse_sm_input` is updated to `tuple[dict[str, Any], bool]` which means it returns a tuple including a dictionary containing the note data and a boolean variable.

**`file_utils.py`**:

*   Type hints are added to all functions: `read_file`, `write_file`, `strip_filename`, `collect_filenames`, `getFilePaths`, and `checkFilePaths`.  This makes the code much easier to understand and debug, especially when dealing with file paths and extensions.  Notably, the `extensions` parameter in `collect_filenames` and `getFilePaths` are now typed as `Collection[str]`, allowing for various iterable types to be passed in (e.g., lists, tuples, sets).
*    The function signatures now explicitly communicate the expected input and output types, improving the developer experience and reducing the risk of type-related errors.

**`cli_options.py`**:

*   Type hints clarify the input (`filepath: str`) and return type (`-> DataHandler`) of the `read_SMtoData` and `read_TXTtoData` functions. The methods `write_DatatoSM` and `write_DatatoTXT` also have the input type hints added.
*   `@staticmethod` is applied to all functions, as these utility functions are self-contained and don't need instance-specific context.

**`data_processing/data_handler.py`**:

*   The `__init__` method now includes a type hint for the `filepath` parameter (`filepath: str`).
*   Type hints are added to `note_data` and `processed_data`, using `dict[str, Any]` to represent the flexible structure of the data they hold.

**`components/measure.py`**:

*   All methods in this class are marked as `@staticmethod` indicating that they do not operate on instance data. Type hints are added to all function parameters and return values, clarifying expectations. For instance, `calculate_timing` now explicitly states that it expects a list of strings or None values, and an integer, float, and float, and returns a list of strings. This strictness is vital when dealing with numerical computations and string formatting. The type hints for `find_gcd`, `generate_measure`, `fit_notes_to_measure` and `place_notes` also provides specific information about the input and output data types.

In summary, these changes represent a significant improvement to the code's clarity and maintainability. By adding type hints and using static methods, the developers have made the code easier to understand, reason about, and refactor. This also facilitates better static analysis and error detection.
---
 smdatatools/common/cli_options.py             |  5 ++++-
 smdatatools/common/file_utils.py              | 13 ++++++-----
 smdatatools/components/measure.py             | 16 +++++++++-----
 smdatatools/data_processing/data_handler.py   |  7 +++---
 .../data_processing/input_processor.py        | 22 +++++++++++--------
 .../data_processing/output_processor.py       |  8 ++++---
 6 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/smdatatools/common/cli_options.py b/smdatatools/common/cli_options.py
index 8329681..4f9db95 100644
--- a/smdatatools/common/cli_options.py
+++ b/smdatatools/common/cli_options.py
@@ -4,24 +4,27 @@
 from smdatatools.data_processing.output_processor import OutputProcessor
 
 class Options:
-
+    @staticmethod
     def read_SMtoData(filepath: str) -> DataHandler:
         data = DataHandler(filepath)
         istr = read_file(filepath)
         data.note_data, data.valid = InputProcessor.parse_sm_input(istr)
         return data
 
+    @staticmethod
     def read_TXTtoData(filepath: str) -> DataHandler:
         data = DataHandler(filepath)
         istr = read_file(filepath)
         data.note_data = InputProcessor.parse_txt_input(istr)
         return data
 
+    @staticmethod
     def write_DatatoSM(data: DataHandler, filepath: str):
         filename = strip_filename(filepath)
         ostr = OutputProcessor.pregenerate_sm_output(filename, data.processed_data)
         write_file(ostr, filepath)
 
+    @staticmethod
     def write_DatatoTXT(data: DataHandler, filepath: str):
         ostr = OutputProcessor.pregenerate_txt_output(data.note_data)
         write_file(ostr, filepath)
\ No newline at end of file
diff --git a/smdatatools/common/file_utils.py b/smdatatools/common/file_utils.py
index 788c43d..0c2b4b6 100644
--- a/smdatatools/common/file_utils.py
+++ b/smdatatools/common/file_utils.py
@@ -1,18 +1,19 @@
 from os import walk
 from os.path import join, split, splitext
 from re import sub
+from typing import Collection
 
-def read_file(filename):
+def read_file(filename: str) -> list[str]:
     file_data = []
     with open(filename, encoding='ascii', errors='ignore') as f:
         file_data = f.read().splitlines()
     return file_data
 
-def write_file(output_data, filename):
+def write_file(output_data: str, filename: str):
     with open(filename, 'w') as f:
         f.write(output_data)
 
-def strip_filename(filename):
+def strip_filename(filename: str) -> str:
     '''
     Strips file path
     Strips file extension
@@ -23,7 +24,7 @@ def strip_filename(filename):
     tail = splitext(tail)[0]
     return sub(' ', '_', sub('[^a-z0-9-_ ]', '', tail.lower()))
 
-def collect_filenames(input_dir: str, extensions: list):
+def collect_filenames(input_dir: str, extensions: Collection[str]) -> list[str]:
     filenames = []
     for root, dirs, files in walk(input_dir):
         for filename in files:
@@ -32,7 +33,7 @@ def collect_filenames(input_dir: str, extensions: list):
                 filenames.append(join(root, filename).replace("\\","/"))
     return filenames
 
-def getFilePaths(input_dir: list, extensions: list):
+def getFilePaths(input_dir: str, extensions: Collection[str]) -> list[str]:
     filepaths = collect_filenames(input_dir, extensions)
 
     if '.sm' in extensions:
@@ -40,7 +41,7 @@ def getFilePaths(input_dir: list, extensions: list):
 
     return filepaths
 
-def checkFilePaths(sm_filepaths):
+def checkFilePaths(sm_filepaths: list[str]):
     # checks for static bpm in the .sm file
     # and removes filepath from list if not
     for sm_file in sm_filepaths:
diff --git a/smdatatools/components/measure.py b/smdatatools/components/measure.py
index 2e318d6..c82c919 100644
--- a/smdatatools/components/measure.py
+++ b/smdatatools/components/measure.py
@@ -2,8 +2,8 @@
 from math import gcd, ceil
 
 class Measure:
-
-    def calculate_timing(measure, measure_index, bpm, offset):
+    @staticmethod
+    def calculate_timing(measure: list[str | None], measure_index: int, bpm: float, offset: float) -> list[str]:
         # calculate time in seconds for each line in the measure:
         #   BPM       = beats/minute -> BPS = beats/second = BPM/60
         #   measure   = 4 beats = 4 * 1/4th notes     = 1 note
@@ -16,7 +16,8 @@ def calculate_timing(measure, measure_index, bpm, offset):
         # returns the note/timing pair, if the note exists
         return [measure[i] + ' ' + str(i * note_256 * fraction_256 + measure_timing - offset) for i, is_set in enumerate(measure) if is_set != None]
 
-    def find_gcd(note_positions) -> int:
+    @staticmethod
+    def find_gcd(note_positions: list[int]) -> int:
         # attempts to fit the note positions into either a 
         # 256, 128, 64, 32, 16, 8 or 4 note measure based on spacing 
         # found by getting the greatest common denominator
@@ -30,7 +31,8 @@ def find_gcd(note_positions) -> int:
     
         return int(gcd_gap)
 
-    def generate_measure(notes, note_positions) -> list[str]:
+    @staticmethod
+    def generate_measure(notes: list[str], note_positions: list[int]) -> list[str]:
     
         # we'll want to trim as much output as possible
         # by reducing the measure size
@@ -46,7 +48,8 @@ def generate_measure(notes, note_positions) -> list[str]:
 
         return generated_measure
 
-    def fit_notes_to_measure(notes, timings, seconds_1_256) -> list[str]:
+    @staticmethod
+    def fit_notes_to_measure(notes: list[str], timings: list[float], seconds_1_256: float) -> list[str]:
         # if no data is passed, generate current measure
         # as "empty" with the smallest size
         if not notes or not timings:
@@ -78,7 +81,8 @@ def fit_notes_to_measure(notes, timings, seconds_1_256) -> list[str]:
 
         return measure
 
-    def place_notes(notes_and_timings, bpm) -> list:
+    @staticmethod
+    def place_notes(notes_and_timings: list[str], bpm: float) -> list[str]:
         placed_notes = []
         if not notes_and_timings:
             return placed_notes
diff --git a/smdatatools/data_processing/data_handler.py b/smdatatools/data_processing/data_handler.py
index 9dd207c..b75d5fc 100644
--- a/smdatatools/data_processing/data_handler.py
+++ b/smdatatools/data_processing/data_handler.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from typing import Any
 
 from smdatatools.common.file_utils import strip_filename
 from smdatatools.components.measure import Measure
@@ -6,12 +7,12 @@
 class DataHandler:
     # each Data Handler will represent the data of one .sm/.txt file
 
-    def __init__(self, filepath):
+    def __init__(self, filepath: str):
         self.sm_path = filepath
         self.filename = strip_filename(filepath)
 
-        self.note_data = defaultdict(list)
-        self.processed_data = defaultdict(list)
+        self.note_data: dict[str, Any] = defaultdict(list)
+        self.processed_data: dict[str, Any] = defaultdict(list)
         self.valid = True
 
     def process_data_to_sm_format(self):
diff --git a/smdatatools/data_processing/input_processor.py b/smdatatools/data_processing/input_processor.py
index dbf9625..3e0237d 100644
--- a/smdatatools/data_processing/input_processor.py
+++ b/smdatatools/data_processing/input_processor.py
@@ -1,16 +1,19 @@
 from collections import defaultdict
 from re import sub, split
+from typing import Any
 
 from smdatatools.components.measure import Measure
 
-class InputProcessor:
 
-    def convert_note(line):                                                      
+class InputProcessor:
+    @staticmethod
+    def convert_note(line: str) -> str:
         return sub('4', '1', sub('[MKLF]', '0', line))    #replaces extra notes: M, K, L, F; replaces 4 note
 
-    def parse_sm_input(sm_file):
-        note_data = defaultdict(list)
-        note_data['notes'] = defaultdict(list) # notes are paired with each difficulty
+    @staticmethod
+    def parse_sm_input(sm_file: list[str]) -> tuple[dict[str, Any], bool]:
+        note_data: dict[str, Any] = defaultdict(list)
+        note_data['notes']: dict[str, Any] = defaultdict(list)  # notes are paired with each difficulty
         current_difficulty = ''
         measure         = []
         measure_index   = 0
@@ -72,11 +75,12 @@ def parse_sm_input(sm_file):
                             measure.append(note) # adds note if found
                         else:
                             measure.append(None)
-                
+
         return note_data, valid
 
-    def parse_txt_input(txt_file):
-        note_data = defaultdict(list)
+    @staticmethod
+    def parse_txt_input(txt_file: list[str]) -> dict[str, Any]:
+        note_data: dict[str, Any] = defaultdict(list)
         note_data['notes'] = defaultdict(list)
         current_difficulty = ''
         notes_and_timings = []
@@ -105,5 +109,5 @@ def parse_txt_input(txt_file):
                 else:
                     notes_and_timings.append(line)
         note_data['notes'][current_difficulty].extend(notes_and_timings)
-    
+
         return note_data
diff --git a/smdatatools/data_processing/output_processor.py b/smdatatools/data_processing/output_processor.py
index 51b698e..f8ca142 100644
--- a/smdatatools/data_processing/output_processor.py
+++ b/smdatatools/data_processing/output_processor.py
@@ -1,8 +1,9 @@
 from collections import defaultdict
+from typing import Any
 
 class OutputProcessor:
-
-    def pregenerate_sm_output(file_name: str, note_data: defaultdict(list)) -> str:
+    @staticmethod
+    def pregenerate_sm_output(file_name: str, note_data: dict[str, Any]) -> str:
         # pre-generate .sm output data
         title  = '#TITLE:%s;\n' % note_data['title']
         artist = '#ARTIST:jhaco vs cpuguy96;\n'
@@ -26,7 +27,8 @@ def pregenerate_sm_output(file_name: str, note_data: defaultdict(list)) -> str:
 
         return ''.join((title, artist, music, select, bpm, notes))
 
-    def pregenerate_txt_output(note_data: defaultdict(list)) -> str:
+    @staticmethod
+    def pregenerate_txt_output(note_data: dict[str, Any]) -> str:
         # pre-generate output data
         title = 'TITLE %s\n' % note_data['title']
         bpm   = 'BPM   %s\n' % str(note_data['bpm'])