Skip to content

Commit f99b96e

Browse files
committed
Add Jupyter notebook to keep track of experimentation. This is for personal documentation.
1 parent 5a34c6a commit f99b96e

2 files changed

Lines changed: 377 additions & 0 deletions

File tree

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"0 0.00\n",
13+
"1 0.40\n",
14+
"2 1.00\n",
15+
"3 0.20\n",
16+
"4 0.44\n",
17+
"5 0.58\n",
18+
"Name: normalized_a, dtype: float64\n",
19+
" a b c normalized_a\n",
20+
"0 0.00 0.0000 0.000000 0.00\n",
21+
"1 0.40 0.1875 0.416667 0.40\n",
22+
"2 1.00 0.1500 0.166667 1.00\n",
23+
"3 0.20 0.1375 1.000000 0.20\n",
24+
"4 0.44 1.0000 0.033333 0.44\n",
25+
"5 0.58 0.0500 0.000000 0.58\n",
26+
"[[0. 0. 0. 0. ]\n",
27+
" [0.4 0.1875 0.41666667 0.4 ]\n",
28+
" [1. 0.15 0.16666667 1. ]\n",
29+
" [0.2 0.1375 1. 0.2 ]\n",
30+
" [0.44 1. 0.03333333 0.44 ]\n",
31+
" [0.58 0.05 0. 0.58 ]]\n"
32+
]
33+
}
34+
],
35+
"source": [
36+
"import pandas as pd\n",
37+
"\n",
38+
"df = pd.DataFrame([\n",
39+
" { 'a': 0, 'b': 0, 'c': 0 },\n",
40+
" { 'a': 2, 'b': 1.5, 'c': 2.5 },\n",
41+
" { 'a': 5, 'b': 1.2, 'c': 1 },\n",
42+
" { 'a': 1, 'b': 1.1, 'c': 6 },\n",
43+
" { 'a': 2.2, 'b': 8, 'c': 0.2 },\n",
44+
" { 'a': 2.9, 'b': 0.4, 'c': 0 },\n",
45+
"])\n",
46+
"\n",
47+
"df['normalized_a'] = (df['a'] - df['a'].min()) / (df['a'].max() - df['a'].min())\n",
48+
"\n",
49+
"print(df['normalized_a'])\n",
50+
"\n",
51+
"from lib.data.features.transform import max_min_normalize\n",
52+
"\n",
53+
"df_max_min = max_min_normalize(df)\n",
54+
"df_max_min_list = max_min_normalize(df.values)\n",
55+
"\n",
56+
"print(df_max_min)\n",
57+
"print(df_max_min_list)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 2,
63+
"metadata": {},
64+
"outputs": [
65+
{
66+
"name": "stdout",
67+
"output_type": "stream",
68+
"text": [
69+
" Date Open High Low Close Volume\n",
70+
"1664 1417392000 300.0 370.0 300.00 370.0 0.05656\n",
71+
"1663 1417478400 370.0 378.0 370.00 378.0 15.01000\n",
72+
"1662 1417564800 378.0 378.0 377.01 378.0 0.54660\n",
73+
"1661 1417651200 378.0 378.0 377.10 377.1 0.01000\n",
74+
"1660 1417737600 377.1 377.1 377.10 377.1 0.00000\n",
75+
" Date Open High Low Close Volume\n",
76+
"1664 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n",
77+
"1663 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n",
78+
"1662 0.000061 0.021391 0.000000 0.018769 0.000000 -3.312755\n",
79+
"1661 0.000061 0.000000 0.000000 0.000239 -0.002384 -4.001132\n",
80+
"1660 0.000061 -0.002384 -0.002384 0.000000 0.000000 0.000000\n",
81+
" Date Open High Low Close Volume\n",
82+
"1664 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n",
83+
"1663 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n",
84+
"1662 0.008452 0.592149 0.415140 0.500916 0.576208 0.244405\n",
85+
"1661 0.008446 0.576208 0.415140 0.499978 0.574431 0.186160\n",
86+
"1660 0.008441 0.574431 0.410636 0.499966 0.576208 0.524705\n"
87+
]
88+
},
89+
{
90+
"name": "stderr",
91+
"output_type": "stream",
92+
"text": [
93+
"/Users/Adam/Desktop/YouTube/BitcoinTrader/lib/data/features/transform.py:54: RuntimeWarning: divide by zero encountered in log\n",
94+
" return transform(iterable, inplace, columns, lambda t_iterable: np.log(t_iterable) - np.log(t_iterable).shift(1))\n"
95+
]
96+
}
97+
],
98+
"source": [
99+
"from lib.data.providers import ProviderDateFormat, StaticDataProvider\n",
100+
"from lib.data.features.transform import max_min_normalize, log_and_difference\n",
101+
"\n",
102+
"data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High',\n",
103+
" 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'}\n",
104+
"\n",
105+
"provider = StaticDataProvider(date_format=ProviderDateFormat.DATETIME_HOUR_24,\n",
106+
" csv_data_path=\"/Users/Adam/Desktop/YouTube/BitcoinTrader/data/input/coinbase-1d-btc-usd.csv\",\n",
107+
" data_columns=data_columns)\n",
108+
"\n",
109+
"print(provider.data_frame.head())\n",
110+
"\n",
111+
"logged_and_diffed = log_and_difference(provider.data_frame)\n",
112+
"\n",
113+
"print(logged_and_diffed.head())\n",
114+
"\n",
115+
"normalized = max_min_normalize(provider.data_frame)\n",
116+
"\n",
117+
"print(normalized.head())"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": 6,
123+
"metadata": {},
124+
"outputs": [
125+
{
126+
"name": "stdout",
127+
"output_type": "stream",
128+
"text": [
129+
"[[-3.]\n",
130+
" [-3.]\n",
131+
" [ 5.]\n",
132+
" [ 1.]\n",
133+
" [ 1.]]\n",
134+
"[[0. ]\n",
135+
" [0. ]\n",
136+
" [1. ]\n",
137+
" [0.5]\n",
138+
" [0.5]]\n",
139+
"[0. 0. 1. 0.5 0.5]\n"
140+
]
141+
}
142+
],
143+
"source": [
144+
"import numpy as np\n",
145+
"\n",
146+
"from lib.data.features.transform import log_and_difference, max_min_normalize, difference\n",
147+
"\n",
148+
"items = [1, -2, 3, 4, 5]\n",
149+
"\n",
150+
"diffed = difference(items, inplace=False)\n",
151+
"\n",
152+
"print(diffed)\n",
153+
"\n",
154+
"normed = max_min_normalize(logged)\n",
155+
"\n",
156+
"print(normed)\n",
157+
"\n",
158+
"raveled = np.ravel(normed, order='F')\n",
159+
"\n",
160+
"print(raveled)"
161+
]
162+
}
163+
],
164+
"metadata": {
165+
"kernelspec": {
166+
"display_name": "Python 3",
167+
"language": "python",
168+
"name": "python3"
169+
},
170+
"language_info": {
171+
"codemirror_mode": {
172+
"name": "ipython",
173+
"version": 3
174+
},
175+
"file_extension": ".py",
176+
"mimetype": "text/x-python",
177+
"name": "python",
178+
"nbconvert_exporter": "python",
179+
"pygments_lexer": "ipython3",
180+
"version": "3.7.2"
181+
}
182+
},
183+
"nbformat": 4,
184+
"nbformat_minor": 2
185+
}

Experiments.ipynb

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"0 0.00\n",
13+
"1 0.40\n",
14+
"2 1.00\n",
15+
"3 0.20\n",
16+
"4 0.44\n",
17+
"5 0.58\n",
18+
"Name: normalized_a, dtype: float64\n",
19+
" a b c normalized_a\n",
20+
"0 0.00 0.0000 0.000000 0.00\n",
21+
"1 0.40 0.1875 0.416667 0.40\n",
22+
"2 1.00 0.1500 0.166667 1.00\n",
23+
"3 0.20 0.1375 1.000000 0.20\n",
24+
"4 0.44 1.0000 0.033333 0.44\n",
25+
"5 0.58 0.0500 0.000000 0.58\n",
26+
"[[0. 0. 0. 0. ]\n",
27+
" [0.4 0.1875 0.41666667 0.4 ]\n",
28+
" [1. 0.15 0.16666667 1. ]\n",
29+
" [0.2 0.1375 1. 0.2 ]\n",
30+
" [0.44 1. 0.03333333 0.44 ]\n",
31+
" [0.58 0.05 0. 0.58 ]]\n"
32+
]
33+
}
34+
],
35+
"source": [
36+
"import pandas as pd\n",
37+
"\n",
38+
"df = pd.DataFrame([\n",
39+
" { 'a': 0, 'b': 0, 'c': 0 },\n",
40+
" { 'a': 2, 'b': 1.5, 'c': 2.5 },\n",
41+
" { 'a': 5, 'b': 1.2, 'c': 1 },\n",
42+
" { 'a': 1, 'b': 1.1, 'c': 6 },\n",
43+
" { 'a': 2.2, 'b': 8, 'c': 0.2 },\n",
44+
" { 'a': 2.9, 'b': 0.4, 'c': 0 },\n",
45+
"])\n",
46+
"\n",
47+
"df['normalized_a'] = (df['a'] - df['a'].min()) / (df['a'].max() - df['a'].min())\n",
48+
"\n",
49+
"print(df['normalized_a'])\n",
50+
"\n",
51+
"from lib.data.features.transform import max_min_normalize\n",
52+
"\n",
53+
"df_max_min = max_min_normalize(df)\n",
54+
"df_max_min_list = max_min_normalize(df.values)\n",
55+
"\n",
56+
"print(df_max_min)\n",
57+
"print(df_max_min_list)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 2,
63+
"metadata": {},
64+
"outputs": [
65+
{
66+
"name": "stdout",
67+
"output_type": "stream",
68+
"text": [
69+
" Date Open High Low Close Volume\n",
70+
"1664 1417392000 300.0 370.0 300.00 370.0 0.05656\n",
71+
"1663 1417478400 370.0 378.0 370.00 378.0 15.01000\n",
72+
"1662 1417564800 378.0 378.0 377.01 378.0 0.54660\n",
73+
"1661 1417651200 378.0 378.0 377.10 377.1 0.01000\n",
74+
"1660 1417737600 377.1 377.1 377.10 377.1 0.00000\n",
75+
" Date Open High Low Close Volume\n",
76+
"1664 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n",
77+
"1663 0.000061 0.209721 0.021391 0.209721 0.021391 5.581170\n",
78+
"1662 0.000061 0.021391 0.000000 0.018769 0.000000 -3.312755\n",
79+
"1661 0.000061 0.000000 0.000000 0.000239 -0.002384 -4.001132\n",
80+
"1660 0.000061 -0.002384 -0.002384 0.000000 0.000000 0.000000\n",
81+
" Date Open High Low Close Volume\n",
82+
"1664 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n",
83+
"1663 0.008457 0.732499 0.455558 0.510581 0.592149 0.996942\n",
84+
"1662 0.008452 0.592149 0.415140 0.500916 0.576208 0.244405\n",
85+
"1661 0.008446 0.576208 0.415140 0.499978 0.574431 0.186160\n",
86+
"1660 0.008441 0.574431 0.410636 0.499966 0.576208 0.524705\n"
87+
]
88+
},
89+
{
90+
"name": "stderr",
91+
"output_type": "stream",
92+
"text": [
93+
"/Users/Adam/Desktop/YouTube/BitcoinTrader/lib/data/features/transform.py:54: RuntimeWarning: divide by zero encountered in log\n",
94+
" return transform(iterable, inplace, columns, lambda t_iterable: np.log(t_iterable) - np.log(t_iterable).shift(1))\n"
95+
]
96+
}
97+
],
98+
"source": [
99+
"from lib.data.providers import ProviderDateFormat, StaticDataProvider\n",
100+
"from lib.data.features.transform import max_min_normalize, log_and_difference\n",
101+
"\n",
102+
"data_columns = {'Date': 'Date', 'Open': 'Open', 'High': 'High',\n",
103+
" 'Low': 'Low', 'Close': 'Close', 'Volume': 'VolumeFrom'}\n",
104+
"\n",
105+
"provider = StaticDataProvider(date_format=ProviderDateFormat.DATETIME_HOUR_24,\n",
106+
" csv_data_path=\"/Users/Adam/Desktop/YouTube/BitcoinTrader/data/input/coinbase-1d-btc-usd.csv\",\n",
107+
" data_columns=data_columns)\n",
108+
"\n",
109+
"print(provider.data_frame.head())\n",
110+
"\n",
111+
"logged_and_diffed = log_and_difference(provider.data_frame)\n",
112+
"\n",
113+
"print(logged_and_diffed.head())\n",
114+
"\n",
115+
"normalized = max_min_normalize(provider.data_frame)\n",
116+
"\n",
117+
"print(normalized.head())"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": 7,
123+
"metadata": {},
124+
"outputs": [
125+
{
126+
"name": "stdout",
127+
"output_type": "stream",
128+
"text": [
129+
"[[-3.]\n",
130+
" [-3.]\n",
131+
" [ 5.]\n",
132+
" [ 1.]\n",
133+
" [ 1.]]\n",
134+
"[[0. ]\n",
135+
" [0. ]\n",
136+
" [1. ]\n",
137+
" [0.5]\n",
138+
" [0.5]]\n",
139+
"[0. 0. 1. 0.5 0.5]\n"
140+
]
141+
}
142+
],
143+
"source": [
144+
"import numpy as np\n",
145+
"\n",
146+
"from lib.data.features.transform import log_and_difference, max_min_normalize, difference\n",
147+
"\n",
148+
"items = [1, -2, 3, 4, 5]\n",
149+
"\n",
150+
"diffed = difference(items, inplace=False)\n",
151+
"\n",
152+
"print(diffed)\n",
153+
"\n",
154+
"normed = max_min_normalize(logged)\n",
155+
"\n",
156+
"print(normed)\n",
157+
"\n",
158+
"raveled = np.ravel(normed, order='F')\n",
159+
"\n",
160+
"print(raveled)"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": []
169+
}
170+
],
171+
"metadata": {
172+
"kernelspec": {
173+
"display_name": "Python 3",
174+
"language": "python",
175+
"name": "python3"
176+
},
177+
"language_info": {
178+
"codemirror_mode": {
179+
"name": "ipython",
180+
"version": 3
181+
},
182+
"file_extension": ".py",
183+
"mimetype": "text/x-python",
184+
"name": "python",
185+
"nbconvert_exporter": "python",
186+
"pygments_lexer": "ipython3",
187+
"version": "3.7.2"
188+
}
189+
},
190+
"nbformat": 4,
191+
"nbformat_minor": 2
192+
}

0 commit comments

Comments
 (0)