Skip to content

Commit

Permalink
Merge branch 'rshtirmer-trade-stats'
Browse files Browse the repository at this point in the history
  • Loading branch information
notadamking committed Jul 10, 2019
2 parents a351a7d + 80fa433 commit 40c7559
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 47 deletions.
3 changes: 2 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ tensorboard
agents
data/tensorboard
data/agents
data/postgres
data/postgres
data/reports
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ data/tensorboard/*
data/agents/*
data/postgres/*
data/log/*
data/reports/*
*.pkl
14 changes: 11 additions & 3 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def run_optimize(args, logger):
from lib.RLTrader import RLTrader

trader = RLTrader(**vars(args), logger=logger)
trader.optimize(args.trials)
trader.optimize(n_trials=args.trials, n_prune_evals_per_trial=args.prune_evals, n_tests_per_eval=args.eval_tests)


if __name__ == '__main__':
Expand All @@ -39,8 +39,16 @@ def run_optimize(args, logger):
trader = RLTrader(**vars(args), logger=logger)

if args.command == 'train':
trader.train(n_epochs=args.epochs)
trader.train(n_epochs=args.epochs,
save_every=args.save_every,
test_trained_model=args.test_trained,
render_test_env=args.render_test,
render_report=args.render_report,
save_report=args.save_report)
elif args.command == 'test':
trader.test(model_epoch=args.model_epoch, should_render=args.no_render)
trader.test(model_epoch=args.model_epoch,
render_env=args.render_env,
render_report=args.render_report,
save_report=args.save_report)
elif args.command == 'update-static-data':
download_data_async()
Binary file added data/.DS_Store
Binary file not shown.
59 changes: 47 additions & 12 deletions lib/RLTrader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import optuna
import numpy as np
import pandas as pd
import quantstats as qs

from os import path
from typing import Dict
Expand Down Expand Up @@ -136,8 +138,12 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
validation_env = SubprocVecEnv([make_env(validation_provider, i) for i in range(1)])

model_params = self.optimize_agent_params(trial)
model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1,
tensorboard_log=self.tensorboard_path, **model_params)
model = self.Model(self.Policy,
train_env,
verbose=self.model_verbose,
nminibatches=1,
tensorboard_log=self.tensorboard_path,
**model_params)

last_reward = -np.finfo(np.float16).max
n_steps_per_eval = int(len(train_provider.data_frame) / n_prune_evals_per_trial)
Expand All @@ -154,7 +160,7 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
trades = train_env.get_attr('trades')

if len(trades[0]) < 1:
self.logger.info('Pruning trial for not making any trades: ', eval_idx)
self.logger.info(f'Pruning trial for not making any trades: {eval_idx}')
raise optuna.structs.TrialPruned()

state = None
Expand All @@ -179,9 +185,9 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e

return -1 * last_reward

def optimize(self, n_trials: int = 20, *optimize_params):
def optimize(self, n_trials: int = 20, **optimize_params):
try:
self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, *optimize_params)
self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, **optimize_params)
except KeyboardInterrupt:
pass

Expand All @@ -195,7 +201,13 @@ def optimize(self, n_trials: int = 20, *optimize_params):

return self.optuna_study.trials_dataframe()

def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: bool = False, render_trained_model: bool = False):
def train(self,
n_epochs: int = 10,
save_every: int = 1,
test_trained_model: bool = True,
render_test_env: bool = False,
render_report: bool = True,
save_report: bool = False):
train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

del test_provider
Expand All @@ -204,8 +216,12 @@ def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: boo

model_params = self.get_model_params()

model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches,
tensorboard_log=self.tensorboard_path, **model_params)
model = self.Model(self.Policy,
train_env,
verbose=self.model_verbose,
nminibatches=self.n_minibatches,
tensorboard_log=self.tensorboard_path,
**model_params)

self.logger.info(f'Training for {n_epochs} epochs')

Expand All @@ -221,11 +237,14 @@ def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: boo
model.save(model_path)

if test_trained_model:
self.test(model_epoch, should_render=render_trained_model)
self.test(model_epoch,
render_env=render_test_env,
render_report=render_report,
save_report=save_report)

self.logger.info(f'Trained {n_epochs} models')

def test(self, model_epoch: int = 0, should_render: bool = True):
def test(self, model_epoch: int = 0, render_env: bool = True, render_report: bool = True, save_report: bool = False):
train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

del train_provider
Expand All @@ -247,14 +266,30 @@ def test(self, model_epoch: int = 0, should_render: bool = True):

for _ in range(len(test_provider.data_frame)):
action, state = model.predict(zero_completed_obs, state=state)
obs, reward, _, __ = test_env.step([action[0]])
obs, reward, done, info = test_env.step([action[0]])

zero_completed_obs[0, :] = obs

rewards.append(reward)

if should_render:
if render_env:
test_env.render(mode='human')

if done:
net_worths = pd.DataFrame({
'Date': info[0]['timestamps'],
'Balance': info[0]['networths'],
})

net_worths.set_index('Date', drop=True, inplace=True)
returns = net_worths.pct_change()[1:]

if render_report:
qs.plots.snapshot(returns.Balance, title='RL Trader Performance')

if save_report:
reports_path = path.join('data', 'reports', f'{self.study_name}__{model_epoch}.html')
qs.reports.html(returns.Balance, file=reports_path)

self.logger.info(
f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}')
61 changes: 37 additions & 24 deletions lib/cli/RLTraderCLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class RLTraderCLI:
def __init__(self):
config_parser = argparse.ArgumentParser(add_help=False)
config_parser.add_argument("-f", "--from-config", help="Specify config file", metavar="FILE")

args, _ = config_parser.parse_known_args()
defaults = {}

Expand All @@ -17,44 +18,56 @@ def __init__(self):
defaults = dict(config.items("Defaults"))

formatter = argparse.ArgumentDefaultsHelpFormatter
self.parser = argparse.ArgumentParser(
formatter_class=formatter,
parents=[config_parser],
description=__doc__
)
self.parser = argparse.ArgumentParser(formatter_class=formatter,
parents=[config_parser],
description=__doc__)

self.parser.add_argument("--data-provider", "-o", type=str, default="static")
self.parser.add_argument("--input-data-path", "-t", type=str, default="data/input/coinbase-1h-btc-usd.csv")
self.parser.add_argument("--data-provider", "-d", type=str, default="static")
self.parser.add_argument("--input-data-path", "-n", type=str, default="data/input/coinbase-1h-btc-usd.csv")
self.parser.add_argument("--pair", "-p", type=str, default="BTC/USD")
self.parser.add_argument("--debug", "-n", action='store_false')
self.parser.add_argument("--debug", "-D", action='store_false')
self.parser.add_argument('--mini-batches', type=int, default=1, help='Mini batches', dest='n_minibatches')
self.parser.add_argument('--train-split-percentage', type=float, default=0.8, help='Train set percentage')
self.parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model')
self.parser.add_argument('--params-db-path', type=str, default='sqlite:///data/params.db',
help='Params path')
self.parser.add_argument(
'--tensor-board-path',
type=str,
default=os.path.join('data', 'tensorboard'),
help='Tensorboard path',
dest='tensorboard_path'
)
self.parser.add_argument('--parallel-jobs', type=int, default=multiprocessing.cpu_count(),
self.parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model', dest='model_verbose')
self.parser.add_argument('--params-db-path', type=str, default='sqlite:///data/params.db', help='Params path')
self.parser.add_argument('--tensorboard-path',
type=str,
default=os.path.join('data', 'tensorboard'),
help='Tensorboard path')
self.parser.add_argument('--parallel-jobs',
type=int,
default=multiprocessing.cpu_count(),
help='How many processes in parallel')

subparsers = self.parser.add_subparsers(help='Command', dest="command")

optimize_parser = subparsers.add_parser('optimize', description='Optimize model parameters')
optimize_parser.add_argument('--trials', type=int, default=1, help='Number of trials')

optimize_parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model', dest='model_verbose')
optimize_parser.add_argument('--prune-evals',
type=int,
default=2,
help='Number of pruning evaluations per trial')
optimize_parser.add_argument('--eval-tests', type=int, default=1, help='Number of tests per pruning evaluation')

train_parser = subparsers.add_parser('train', description='Train model')
train_parser.add_argument('--epochs', type=int, default=1, help='Number of epochs to train')
train_parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to train')
train_parser.add_argument('--save-every', type=int, default=1, help='Save the trained model every n epochs')
train_parser.add_argument('--no-test', dest="test_trained", action="store_false", help='Test each saved model')
train_parser.add_argument('--render-test', dest="render_test",
action="store_true", help='Render the test environment')
train_parser.add_argument('--no-report', dest="render_report", action="store_false",
help='Render the performance report')
train_parser.add_argument('--save-report', dest="save_report", action="store_true",
help='Save the performance report as .html')

test_parser = subparsers.add_parser('test', description='Test model')
test_parser.add_argument('--model-epoch', type=int, default=1, help='Model epoch index')
test_parser.add_argument('--no-render', action='store_false', help='Do not render test')
test_parser.add_argument('--model-epoch', type=int, default=0, help='Model epoch index')
test_parser.add_argument('--no-render', dest="render_env", action="store_false",
help='Render the test environment')
test_parser.add_argument('--no-report', dest="render_report", action="store_false",
help='Render the performance report')
test_parser.add_argument('--save-report', dest="save_report", action="store_true",
help='Save the performance report as .html')

subparsers.add_parser('update-static-data', description='Update static data')

Expand Down
6 changes: 3 additions & 3 deletions lib/data/features/indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
('KCLI', ta.keltner_channel_lband_indicator, ['High', 'Low', 'Close']),
('DCHI', ta.donchian_channel_hband_indicator, ['Close']),
('DCLI', ta.donchian_channel_lband_indicator, ['Close']),
('ADI', ta.acc_dist_index, ['High', 'Low', 'Close', 'volume']),
('OBV', ta.on_balance_volume, ['close', 'volume']),
('ADI', ta.acc_dist_index, ['High', 'Low', 'Close', 'Volume BTC']),
('OBV', ta.on_balance_volume, ['Close', 'Volume BTC']),
('CMF', ta.chaikin_money_flow, ['High', 'Low', 'Close', 'Volume BTC']),
('FI', ta.force_index, ['Close', 'Volume']),
('FI', ta.force_index, ['Close', 'Volume BTC']),
('EM', ta.ease_of_movement, ['High', 'Low', 'Close', 'Volume BTC']),
('VPT', ta.volume_price_trend, ['Close', 'Volume BTC']),
('NVI', ta.negative_volume_index, ['Close', 'Volume BTC']),
Expand Down
6 changes: 3 additions & 3 deletions lib/env/TradingEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def _take_action(self, action: int):

current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision)
self.net_worths.append(current_net_worth)

self.account_history = self.account_history.append({
'balance': self.balance,
'asset_bought': asset_bought,
Expand Down Expand Up @@ -155,6 +154,7 @@ def _reward(self):

def _next_observation(self):
self.current_ohlcv = self.data_provider.next_ohlcv()
self.timestamps.append(pd.to_datetime(self.current_ohlcv.Date.item(), unit='s'))
self.observations = self.observations.append(self.current_ohlcv, ignore_index=True)

if self.stationarize_obs:
Expand Down Expand Up @@ -187,6 +187,7 @@ def reset(self):

self.balance = self.initial_balance
self.net_worths = [self.initial_balance]
self.timestamps = []
self.asset_held = 0
self.current_step = 0

Expand All @@ -210,8 +211,7 @@ def step(self, action):
obs = self._next_observation()
reward = self._reward()
done = self._done()

return obs, reward, done, {}
return obs, reward, done, {'networths': self.net_worths, 'timestamps': self.timestamps}

def render(self, mode='human'):
if mode == 'system':
Expand Down
3 changes: 2 additions & 1 deletion requirements.base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ statsmodels==0.10.0rc2
empyrical
ccxt
psycopg2
configparser
configparser
quantstats

0 comments on commit 40c7559

Please sign in to comment.