hudson-and-thames · gsmadi · Oct 23, 2021 · PanPip · Oct 25, 2021 · PanPip
diff --git a/gsmadi/.gitignore b/gsmadi/.gitignore
@@ -0,0 +1 @@
+venv
diff --git a/gsmadi/KCA.ipynb b/gsmadi/KCA.ipynb
diff --git a/gsmadi/README.md b/gsmadi/README.md
@@ -0,0 +1,30 @@
+# H&T Kinetic Component Analysis (KCA) Assignment
+
+## Getting started
+
+```
+virtualenv --python python3.7 ./venv
+source venv/bin/activate
+pip install -r requirements.txt
+```
+
+## Execute unit tests
+
+`pytest`
+
+## Execute linter
+
+`flake8 <FILENAME>`
+
+## Methodology
+
+- We first explore how KCA fits to gold futures contract price to see how good the fit is for in-sample data
+- We then take a look at the relationship between position and velocity produced by the KCA fit
+- We then apply a trading class based on KCA to make price forecasts and trade decisions against gold futures price to see how well it performs
+- Finally, we make some conclusions and how to better obtain results next time
+
+## KCA Trading Algorithm Design
+
+We fit our KCA trading algorithm with 360 days worth of data. We select a year worth of data given thats the resolution we have plus it captures four quarters worth of price movements.
+
+We continously feed in new observations from our test sample to update the KCA fit to take into consideration recent price behavior. We essentially roll the 360 day window on our fit given new observations from our test sample set.
diff --git a/gsmadi/data/futures.csv b/gsmadi/data/futures.csv
diff --git a/gsmadi/requirements.txt b/gsmadi/requirements.txt
@@ -0,0 +1,99 @@
+anyio==3.3.3
+argcomplete==1.12.3
+argon2-cffi==21.1.0
+attrs==21.2.0
+Babel==2.9.1
+backcall==0.2.0
+bleach==4.1.0
+certifi==2021.10.8
+cffi==1.15.0
+charset-normalizer==2.0.7
+cycler==0.10.0
+debugpy==1.5.0
+decorator==5.1.0
+defusedxml==0.7.1
+entrypoints==0.3
+ffn==0.3.6
+flake8==4.0.1
+future==0.18.2
+idna==3.3
+importlib-metadata==4.2.0
+iniconfig==1.1.1
+ipykernel==6.4.1
+ipython==7.28.0
+ipython-genutils==0.2.0
+jedi==0.18.0
+Jinja2==3.0.2
+joblib==1.1.0
+json5==0.9.6
+jsonschema==4.1.0
+jupyter-client==7.0.6
+jupyter-core==4.8.1
+jupyter-server==1.11.1
+jupyterlab==3.2.0
+jupyterlab-pygments==0.1.2
+jupyterlab-server==2.8.2
+kiwisolver==1.3.2
+lxml==4.6.3
+MarkupSafe==2.0.1
+matplotlib==3.4.3
+matplotlib-inline==0.1.3
+mccabe==0.6.1
+mistune==0.8.4
+multitasking==0.0.9
+nbclassic==0.3.2
+nbclient==0.5.4
+nbconvert==6.2.0
+nbformat==5.1.3
+nest-asyncio==1.5.1
+notebook==6.4.4
+numpy==1.21.2
+packaging==21.0
+pandas==1.3.3
+pandas-datareader==0.10.0
+pandocfilters==1.5.0
+parso==0.8.2
+patsy==0.5.2
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==8.4.0
+pkg_resources==0.0.0
+pluggy==1.0.0
+prometheus-client==0.11.0
+prompt-toolkit==3.0.20
+ptyprocess==0.7.0
+py==1.10.0
+pycodestyle==2.8.0
+pycparser==2.20
+pyflakes==2.4.0
+Pygments==2.10.0
+pykalman==0.9.5
+pyparsing==2.4.7
+PyPrind==2.11.3
+pyrsistent==0.18.0
+pytest==6.2.5
+python-dateutil==2.8.2
+pytz==2021.3
+pyzmq==22.3.0
+requests==2.26.0
+requests-unixsocket==0.2.0
+scikit-learn==1.0
+scipy==1.7.1
+Send2Trash==1.8.0
+six==1.16.0
+sniffio==1.2.0
+statsmodels==0.13.0
+tabulate==0.8.9
+terminado==0.12.1
+testpath==0.5.0
+threadpoolctl==3.0.0
+toml==0.10.2
+tornado==6.1
+traitlets==5.1.0
+typing-extensions==3.10.0.2
+urllib3==1.26.7
+wcwidth==0.2.5
+webencodings==0.5.1
+websocket-client==1.2.1
+yfinance==0.1.63
+zipp==3.6.0
diff --git a/gsmadi/src/__init__.py b/gsmadi/src/__init__.py
diff --git a/gsmadi/src/kca.py b/gsmadi/src/kca.py
@@ -0,0 +1,58 @@
+"""kca.py
+
+Module implements Kinetic Component Analysis (KCA).
+
+Original implementation can be found in
+https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2422183
+
+Original author: MLdP on 02/22/2014 <[email protected]>
+"""
+
+import numpy as np
+from pykalman import KalmanFilter
+
+
+def fitKCA(time, measurable, scalar, steps=0):
+    '''Fit measurement over time using KCA
+    Parameters:
+        time (numpy.Array): Iterable with time indices
+        measurable: Signal to perform fitting (e.g. price path)
+        scalar: Scalar that multiplies the seed states covariance
+        steps: number of steps to forecast (optional, default=0)
+    Return:
+        x[0]: smoothed state means of position velocity and acceleration
+        x[1]: smoothed state covar of position velocity and acceleration
+    '''
+    t = time
+    z = measurable
+    q = scalar
+    fwd = steps
+
+    # 1) Set up matrices A,H and a seed for Q
+    h = (t[-1] - t[0]) / t.shape[0]
+    A = np.array([[1, h, 0.5*h**2],
+                  [0, 1, h],
+                  [0, 0, 1]])
+    Q = q * np.eye(A.shape[0])
+    # 2) Apply the filter
+    kf = KalmanFilter(transition_matrices=A, transition_covariance=Q)
+    # 3) EM estimates
+    kf = kf.em(z)
+
+    # 4) Smooth
+    x_mean, x_covar = kf.smooth(z)
+    # 5) Forecast
+    for fwd_ in range(fwd):
+        x_mean_, x_covar_ = kf.filter_update(filtered_state_mean=x_mean[-1],
+                                             filtered_state_covariance=x_covar[-1])
+        x_mean = np.append(x_mean, x_mean_.reshape(1, -1), axis=0)
+        x_covar_ = np.expand_dims(x_covar_, axis=0)
+        x_covar = np.append(x_covar, x_covar_, axis=0)
+
+    # 6) Std series
+    x_std = (x_covar[:, 0, 0]**.5).reshape(-1, 1)
+    for i in range(1, x_covar.shape[1]):
+        x_std_ = x_covar[:, i, i]**.5
+        x_std = np.append(x_std, x_std_.reshape(-1, 1), axis=1)
+
+    return x_mean, x_std, x_covar
diff --git a/gsmadi/src/plotting.py b/gsmadi/src/plotting.py
@@ -0,0 +1,65 @@
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+import pandas as pd
+
+from sklearn import preprocessing
+
+def plot_gold_future_price(price_df):
+    fig, ax = plt.subplots()
+    ax.set_title('Gold Futures contract Price (GC=F)')
+    ax.set_ylabel('Price (Dollars)')
+    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('$%0.0f'))
+    ax.set_xlabel('Date')
+    ax.plot(price_df["GC=F"], label="GC=F", color="blue")
+    ax.grid()
+
+def plot_gold_future_to_kca_position(price_df):
+    fig, ax = plt.subplots()
+    ax.set_title('Gold Futures to KCA Position Fit Comparison')
+    ax.set_ylabel('Price (Dollars)')
+    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('$%0.0f'))
+    ax.set_xlabel('Date')
+    ax.plot(price_df['GC=F'], label='Gold Futures Price (GC=F)', color='black')
+    ax.plot(price_df['KCA_position'], label='KCA Position', color='red', marker='o', markersize=2, linestyle='None')
+    ax.legend(loc="lower left")
+    ax.grid()
+
+    plt.xticks(rotation=45)
+
+def plot_gold_kca_position_velocity(price_df):
+    """"""
+    # Normalize kca data to visually compare patterns
+    min_max_scaler = preprocessing.MinMaxScaler()
+    x_scaled = min_max_scaler.fit_transform(price_df)
+    futures_normalized = pd.DataFrame(x_scaled)
+    futures_normalized.index = price_df.index
+    normalized_position = futures_normalized[1]
+    normalized_velocity = futures_normalized[2]
+
+    fig, ax = plt.subplots()
+    ax.set_title('KCA Normalized Position & Velocity')
+    ax.set_ylabel('Normalized (Unitless)')
+    ax.set_xlabel('Date')
+    ax.plot(normalized_position, color='red', label="Position", marker='x', markersize=2, linestyle = 'None')
+    ax.plot(normalized_velocity, color='blue', marker='o', markersize=2, linestyle = 'None', label="Velocity")
+    ax.legend(loc="lower left")
+    ax.grid()
+    # plt.xticks(rotation=45)
+
+def plot_gold_kca_strategy(kca_observations, predictions):
+    kca = kca_observations
+    fig, ax = plt.subplots()
+
+    ax.set_title('KCAStrategy Predictions & Observations (GC=F)')
+    ax.set_ylabel('Price (Dollars)')
+    ax.set_xlabel('Date')
+    ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('$%0.0f'))
+    ax.plot(kca.observations["GC=F"], label="GC=F", color="blue")
+    ax.plot(kca.observations["prediction"], label="prediction", linestyle='None', marker="x", color="red")
+    ax.plot(predictions["GC=F"], label="observed", linestyle=None, marker="o", color="green")
+    ax.legend(loc="lower left")
+    ax.grid()
+    plt.xlim([kca.observations.index[-20], kca.observations.index[-1]])
+    plt.ylim([1400, 2000])
+    plt.xticks(rotation=45)
diff --git a/gsmadi/src/trading.py b/gsmadi/src/trading.py
@@ -0,0 +1,92 @@
+"""trading.py
+
+Module collects
+
+"""
+
+import random
+import numpy as np
+import pandas as pd
+
+from .kca import fitKCA
+from .utils import split_price_time_series
+
+
+def generate_trade_signal(price_df):
+    """Generate next day buy or sell signal with future price prediction
+        on a set of past price observations using KCA.
+    Parameters:
+        price_df (pandas.DataFrame): price observetions with th
+    Returns
+        tuple (numpy.float64, int): contains a price prediction on the first
+            slot of the tuple and a 1 or -1 on the second slot to indicate
+            a buy and sell signal, respectively.
+    """
+    randq = random.randrange(1000, 5000)  # Generate random seed for KCA q seed
+
+    market_price = price_df.iloc[-1][0]  # Extract last price as market price
+
+    # Capture last four quarters during KCA fit
+    last_four_quarter_signal_df = price_df.iloc[360:, :]
+
+    price_series, time_series = split_price_time_series(
+        last_four_quarter_signal_df)
+
+    x_mu, _, _ = fitKCA(time_series, price_series, randq, steps=1)
+
+    predicted = x_mu[-1][0]  # Extract KCA position next step prediction
+
+    if (predicted > market_price):
+        return (predicted, 1)
+    else:
+        return (predicted, -1)
+
+
+class KCAStrategy(object):
+    """KCAStrategy is a simpel trading algorithm based on KCA. It fits 360
+        days worth of price obsrvations to produce a prediction signal for
+        next day of trading. We use 360 days to capture at least 4 quarters
+        worth of price actions.
+    """
+    def __init__(self, initial_observations):
+        self.observations = initial_observations
+        self.observations['prediction'] = np.NaN
+        self.observations['decision'] = np.NaN
+
+    def predict(self):
+        """Predict next day market price and append to initial fitted observations.
+
+        Returns:
+            pandas.Series: Next day prediction row
+        """
+        signal = generate_trade_signal(self.observations)
+        prediction = signal[0]
+        decision = signal[1]
+
+        new_index = self.observations.index.append(
+                        pd.date_range(start=self.observations.index[-1],
+                                      periods=2,
+                                      freq='D',
+                                      closed='right'))
+        next_day = pd.to_datetime(new_index[-1])
+
+        self.observations.loc[next_day] = [np.NaN, prediction, decision]
+
+        return self.observations.iloc[-1]
+
+    def update_latest_observation(self, new_observation):
+        """Update last appended date to initial observations with actual
+            observed value.
+        Parameters:
+            new_observation (float): Newly observed price observation to
+                take into considerations next prediction fit
+        Returns:
+            pandas.Series: Latest date row with prediction and actual observed
+                value.
+        """
+        self.observations.iloc[-1, 0] = new_observation
+
+        return self.observations.iloc[-1]
+
+    def plot():
+        pass