From f99b96ea2932184310825bea4abe6f1546810edb Mon Sep 17 00:00:00 2001 From: adamjking3 Date: Sun, 7 Jul 2019 19:10:41 -0700 Subject: [PATCH] Add Jupyter notebook to keep track of experimentation. This is for personal documentation. --- .../Experiments-checkpoint.ipynb | 185 +++++++++++++++++ Experiments.ipynb | 192 ++++++++++++++++++ 2 files changed, 377 insertions(+) create mode 100644 .ipynb_checkpoints/Experiments-checkpoint.ipynb create mode 100644 Experiments.ipynb diff --git a/.ipynb_checkpoints/Experiments-checkpoint.ipynb b/.ipynb_checkpoints/Experiments-checkpoint.ipynb new file mode 100644 index 0000000..65c4199 --- /dev/null +++ b/.ipynb_checkpoints/Experiments-checkpoint.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 0.00\n", + "1 0.40\n", + "2 1.00\n", + "3 0.20\n", + "4 0.44\n", + "5 0.58\n", + "Name: normalized_a, dtype: float64\n", + " a b c normalized_a\n", + "0 0.00 0.0000 0.000000 0.00\n", + "1 0.40 0.1875 0.416667 0.40\n", + "2 1.00 0.1500 0.166667 1.00\n", + "3 0.20 0.1375 1.000000 0.20\n", + "4 0.44 1.0000 0.033333 0.44\n", + "5 0.58 0.0500 0.000000 0.58\n", + "[[0. 0. 0. 0. ]\n", + " [0.4 0.1875 0.41666667 0.4 ]\n", + " [1. 0.15 0.16666667 1. ]\n", + " [0.2 0.1375 1. 0.2 ]\n", + " [0.44 1. 0.03333333 0.44 ]\n", + " [0.58 0.05 0. 0.58 ]]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame([\n", + " { 'a': 0, 'b': 0, 'c': 0 },\n", + " { 'a': 2, 'b': 1.5, 'c': 2.5 },\n", + " { 'a': 5, 'b': 1.2, 'c': 1 },\n", + " { 'a': 1, 'b': 1.1, 'c': 6 },\n", + " { 'a': 2.2, 'b': 8, 'c': 0.2 },\n", + " { 'a': 2.9, 'b': 0.4, 'c': 0 },\n", + "])\n", + "\n", + "df['normalized_a'] = (df['a'] - df['a'].min()) / (df['a'].max() - df['a'].min())\n", + "\n", + "print(df['normalized_a'])\n", + "\n", + "from lib.data.features.transform import max_min_normalize\n", + "\n", + "df_max_min = max_min_normalize(df)\n", + "df_max_min_list = max_min_normalize(df.values)\n", + "\n", + "print(df_max_min)\n", + "print(df_max_min_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Date Open High Low Close Volume\n", + "1664 1417392000 300.0 370.0 300.00 370.0 0.05656\n", + "1663 1417478400 370.0 378.0 370.00 378.0 15.01000\n", + "1662 1417564800 378.0 378.0 377.01 378.0 0.54660\n", + "1661 1417651200 378.0 378.0 377.10 377.1 0.01000\n", + "1660 1417737600 377.1 377.1 377.10 377.1 0.00000\n", + " Date Open High Low Close Volume\n", + "1664 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n", + "1663 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n", + "1662 0.000061 0.021391 0.000000 0.018769 0.000000 -3.312755\n", + "1661 0.000061 0.000000 0.000000 0.000239 -0.002384 -4.001132\n", + "1660 0.000061 -0.002384 -0.002384 0.000000 0.000000 0.000000\n", + " Date Open High Low Close Volume\n", + "1664 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n", + "1663 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n", + "1662 0.008452 0.592149 0.415140 0.500916 0.576208 0.244405\n", + "1661 0.008446 0.576208 0.415140 0.499978 0.574431 0.186160\n", + "1660 0.008441 0.574431 0.410636 0.499966 0.576208 0.524705\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/Adam/Desktop/YouTube/BitcoinTrader/lib/data/features/transform.py:54: RuntimeWarning: divide by zero encountered in log\n", + " return transform(iterable, inplace, columns, lambda t_iterable: np.log(t_iterable) - np.log(t_iterable).shift(1))\n" + ] + } + ], + "source": [ + "from lib.data.providers import ProviderDateFormat, StaticDataProvider\n", + "from lib.data.features.transform import max_min_normalize, log_and_difference\n", + "\n", + "data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High',\n", + " 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'}\n", + "\n", + "provider = StaticDataProvider(date_format=ProviderDateFormat.DATETIME_HOUR_24,\n", + " csv_data_path=\"/Users/Adam/Desktop/YouTube/BitcoinTrader/data/input/coinbase-1d-btc-usd.csv\",\n", + " data_columns=data_columns)\n", + "\n", + "print(provider.data_frame.head())\n", + "\n", + "logged_and_diffed = log_and_difference(provider.data_frame)\n", + "\n", + "print(logged_and_diffed.head())\n", + "\n", + "normalized = max_min_normalize(provider.data_frame)\n", + "\n", + "print(normalized.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-3.]\n", + " [-3.]\n", + " [ 5.]\n", + " [ 1.]\n", + " [ 1.]]\n", + "[[0. ]\n", + " [0. ]\n", + " [1. ]\n", + " [0.5]\n", + " [0.5]]\n", + "[0. 0. 1. 0.5 0.5]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "from lib.data.features.transform import log_and_difference, max_min_normalize, difference\n", + "\n", + "items = [1, -2, 3, 4, 5]\n", + "\n", + "diffed = difference(items, inplace=False)\n", + "\n", + "print(diffed)\n", + "\n", + "normed = max_min_normalize(logged)\n", + "\n", + "print(normed)\n", + "\n", + "raveled = np.ravel(normed, order='F')\n", + "\n", + "print(raveled)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Experiments.ipynb b/Experiments.ipynb new file mode 100644 index 0000000..b96c0d6 --- /dev/null +++ b/Experiments.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 0.00\n", + "1 0.40\n", + "2 1.00\n", + "3 0.20\n", + "4 0.44\n", + "5 0.58\n", + "Name: normalized_a, dtype: float64\n", + " a b c normalized_a\n", + "0 0.00 0.0000 0.000000 0.00\n", + "1 0.40 0.1875 0.416667 0.40\n", + "2 1.00 0.1500 0.166667 1.00\n", + "3 0.20 0.1375 1.000000 0.20\n", + "4 0.44 1.0000 0.033333 0.44\n", + "5 0.58 0.0500 0.000000 0.58\n", + "[[0. 0. 0. 0. ]\n", + " [0.4 0.1875 0.41666667 0.4 ]\n", + " [1. 0.15 0.16666667 1. ]\n", + " [0.2 0.1375 1. 0.2 ]\n", + " [0.44 1. 0.03333333 0.44 ]\n", + " [0.58 0.05 0. 0.58 ]]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame([\n", + " { 'a': 0, 'b': 0, 'c': 0 },\n", + " { 'a': 2, 'b': 1.5, 'c': 2.5 },\n", + " { 'a': 5, 'b': 1.2, 'c': 1 },\n", + " { 'a': 1, 'b': 1.1, 'c': 6 },\n", + " { 'a': 2.2, 'b': 8, 'c': 0.2 },\n", + " { 'a': 2.9, 'b': 0.4, 'c': 0 },\n", + "])\n", + "\n", + "df['normalized_a'] = (df['a'] - df['a'].min()) / (df['a'].max() - df['a'].min())\n", + "\n", + "print(df['normalized_a'])\n", + "\n", + "from lib.data.features.transform import max_min_normalize\n", + "\n", + "df_max_min = max_min_normalize(df)\n", + "df_max_min_list = max_min_normalize(df.values)\n", + "\n", + "print(df_max_min)\n", + "print(df_max_min_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Date Open High Low Close Volume\n", + "1664 1417392000 300.0 370.0 300.00 370.0 0.05656\n", + "1663 1417478400 370.0 378.0 370.00 378.0 15.01000\n", + "1662 1417564800 378.0 378.0 377.01 378.0 0.54660\n", + "1661 1417651200 378.0 378.0 377.10 377.1 0.01000\n", + "1660 1417737600 377.1 377.1 377.10 377.1 0.00000\n", + " Date Open High Low Close Volume\n", + "1664 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n", + "1663 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n", + "1662 0.000061 0.021391 0.000000 0.018769 0.000000 -3.312755\n", + "1661 0.000061 0.000000 0.000000 0.000239 -0.002384 -4.001132\n", + "1660 0.000061 -0.002384 -0.002384 0.000000 0.000000 0.000000\n", + " Date Open High Low Close Volume\n", + "1664 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n", + "1663 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n", + "1662 0.008452 0.592149 0.415140 0.500916 0.576208 0.244405\n", + "1661 0.008446 0.576208 0.415140 0.499978 0.574431 0.186160\n", + "1660 0.008441 0.574431 0.410636 0.499966 0.576208 0.524705\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/Adam/Desktop/YouTube/BitcoinTrader/lib/data/features/transform.py:54: RuntimeWarning: divide by zero encountered in log\n", + " return transform(iterable, inplace, columns, lambda t_iterable: np.log(t_iterable) - np.log(t_iterable).shift(1))\n" + ] + } + ], + "source": [ + "from lib.data.providers import ProviderDateFormat, StaticDataProvider\n", + "from lib.data.features.transform import max_min_normalize, log_and_difference\n", + "\n", + "data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High',\n", + " 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'}\n", + "\n", + "provider = StaticDataProvider(date_format=ProviderDateFormat.DATETIME_HOUR_24,\n", + " csv_data_path=\"/Users/Adam/Desktop/YouTube/BitcoinTrader/data/input/coinbase-1d-btc-usd.csv\",\n", + " data_columns=data_columns)\n", + "\n", + "print(provider.data_frame.head())\n", + "\n", + "logged_and_diffed = log_and_difference(provider.data_frame)\n", + "\n", + "print(logged_and_diffed.head())\n", + "\n", + "normalized = max_min_normalize(provider.data_frame)\n", + "\n", + "print(normalized.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-3.]\n", + " [-3.]\n", + " [ 5.]\n", + " [ 1.]\n", + " [ 1.]]\n", + "[[0. ]\n", + " [0. ]\n", + " [1. ]\n", + " [0.5]\n", + " [0.5]]\n", + "[0. 0. 1. 0.5 0.5]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "from lib.data.features.transform import log_and_difference, max_min_normalize, difference\n", + "\n", + "items = [1, -2, 3, 4, 5]\n", + "\n", + "diffed = difference(items, inplace=False)\n", + "\n", + "print(diffed)\n", + "\n", + "normed = max_min_normalize(logged)\n", + "\n", + "print(normed)\n", + "\n", + "raveled = np.ravel(normed, order='F')\n", + "\n", + "print(raveled)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}