diff --git a/.dockerignore b/.dockerignore index 3cbfe6f..3c04311 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,4 +6,5 @@ tensorboard agents data/tensorboard data/agents -data/postgres \ No newline at end of file +data/postgres +data/reports \ No newline at end of file diff --git a/.gitignore b/.gitignore index ec42947..989921f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ data/tensorboard/* data/agents/* data/postgres/* data/log/* +data/reports/* *.pkl diff --git a/cli.py b/cli.py index 5ca3a41..6d6b5d0 100644 --- a/cli.py +++ b/cli.py @@ -15,7 +15,7 @@ def run_optimize(args, logger): from lib.RLTrader import RLTrader trader = RLTrader(**vars(args), logger=logger) - trader.optimize(args.trials) + trader.optimize(n_trials=args.trials, n_prune_evals_per_trial=args.prune_evals, n_tests_per_eval=args.eval_tests) if __name__ == '__main__': @@ -39,8 +39,16 @@ def run_optimize(args, logger): trader = RLTrader(**vars(args), logger=logger) if args.command == 'train': - trader.train(n_epochs=args.epochs) + trader.train(n_epochs=args.epochs, + save_every=args.save_every, + test_trained_model=args.test_trained, + render_test_env=args.render_test, + render_report=args.render_report, + save_report=args.save_report) elif args.command == 'test': - trader.test(model_epoch=args.model_epoch, should_render=args.no_render) + trader.test(model_epoch=args.model_epoch, + render_env=args.render_env, + render_report=args.render_report, + save_report=args.save_report) elif args.command == 'update-static-data': download_data_async() diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000..7871d54 Binary files /dev/null and b/data/.DS_Store differ diff --git a/lib/RLTrader.py b/lib/RLTrader.py index 850be21..d9cd6fc 100644 --- a/lib/RLTrader.py +++ b/lib/RLTrader.py @@ -1,6 +1,8 @@ import os import optuna import numpy as np +import pandas as pd +import quantstats as qs from os import path from typing import Dict @@ -136,8 +138,12 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e validation_env = SubprocVecEnv([make_env(validation_provider, i) for i in range(1)]) model_params = self.optimize_agent_params(trial) - model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=1, - tensorboard_log=self.tensorboard_path, **model_params) + model = self.Model(self.Policy, + train_env, + verbose=self.model_verbose, + nminibatches=1, + tensorboard_log=self.tensorboard_path, + **model_params) last_reward = -np.finfo(np.float16).max n_steps_per_eval = int(len(train_provider.data_frame) / n_prune_evals_per_trial) @@ -154,7 +160,7 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e trades = train_env.get_attr('trades') if len(trades[0]) < 1: - self.logger.info('Pruning trial for not making any trades: ', eval_idx) + self.logger.info(f'Pruning trial for not making any trades: {eval_idx}') raise optuna.structs.TrialPruned() state = None @@ -179,9 +185,9 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e return -1 * last_reward - def optimize(self, n_trials: int = 20, *optimize_params): + def optimize(self, n_trials: int = 20, **optimize_params): try: - self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, *optimize_params) + self.optuna_study.optimize(self.optimize_params, n_trials=n_trials, n_jobs=1, **optimize_params) except KeyboardInterrupt: pass @@ -195,7 +201,13 @@ def optimize(self, n_trials: int = 20, *optimize_params): return self.optuna_study.trials_dataframe() - def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: bool = False, render_trained_model: bool = False): + def train(self, + n_epochs: int = 10, + save_every: int = 1, + test_trained_model: bool = True, + render_test_env: bool = False, + render_report: bool = True, + save_report: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del test_provider @@ -204,8 +216,12 @@ def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: boo model_params = self.get_model_params() - model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches, - tensorboard_log=self.tensorboard_path, **model_params) + model = self.Model(self.Policy, + train_env, + verbose=self.model_verbose, + nminibatches=self.n_minibatches, + tensorboard_log=self.tensorboard_path, + **model_params) self.logger.info(f'Training for {n_epochs} epochs') @@ -221,11 +237,14 @@ def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: boo model.save(model_path) if test_trained_model: - self.test(model_epoch, should_render=render_trained_model) + self.test(model_epoch, + render_env=render_test_env, + render_report=render_report, + save_report=save_report) self.logger.info(f'Trained {n_epochs} models') - def test(self, model_epoch: int = 0, should_render: bool = True): + def test(self, model_epoch: int = 0, render_env: bool = True, render_report: bool = True, save_report: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del train_provider @@ -247,14 +266,30 @@ def test(self, model_epoch: int = 0, should_render: bool = True): for _ in range(len(test_provider.data_frame)): action, state = model.predict(zero_completed_obs, state=state) - obs, reward, _, __ = test_env.step([action[0]]) + obs, reward, done, info = test_env.step([action[0]]) zero_completed_obs[0, :] = obs rewards.append(reward) - if should_render: + if render_env: test_env.render(mode='human') + if done: + net_worths = pd.DataFrame({ + 'Date': info[0]['timestamps'], + 'Balance': info[0]['networths'], + }) + + net_worths.set_index('Date', drop=True, inplace=True) + returns = net_worths.pct_change()[1:] + + if render_report: + qs.plots.snapshot(returns.Balance, title='RL Trader Performance') + + if save_report: + reports_path = path.join('data', 'reports', f'{self.study_name}__{model_epoch}.html') + qs.reports.html(returns.Balance, file=reports_path) + self.logger.info( f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}') diff --git a/lib/cli/RLTraderCLI.py b/lib/cli/RLTraderCLI.py index 1b21ebc..8dee37f 100644 --- a/lib/cli/RLTraderCLI.py +++ b/lib/cli/RLTraderCLI.py @@ -8,6 +8,7 @@ class RLTraderCLI: def __init__(self): config_parser = argparse.ArgumentParser(add_help=False) config_parser.add_argument("-f", "--from-config", help="Specify config file", metavar="FILE") + args, _ = config_parser.parse_known_args() defaults = {} @@ -17,44 +18,56 @@ def __init__(self): defaults = dict(config.items("Defaults")) formatter = argparse.ArgumentDefaultsHelpFormatter - self.parser = argparse.ArgumentParser( - formatter_class=formatter, - parents=[config_parser], - description=__doc__ - ) + self.parser = argparse.ArgumentParser(formatter_class=formatter, + parents=[config_parser], + description=__doc__) - self.parser.add_argument("--data-provider", "-o", type=str, default="static") - self.parser.add_argument("--input-data-path", "-t", type=str, default="data/input/coinbase-1h-btc-usd.csv") + self.parser.add_argument("--data-provider", "-d", type=str, default="static") + self.parser.add_argument("--input-data-path", "-n", type=str, default="data/input/coinbase-1h-btc-usd.csv") self.parser.add_argument("--pair", "-p", type=str, default="BTC/USD") - self.parser.add_argument("--debug", "-n", action='store_false') + self.parser.add_argument("--debug", "-D", action='store_false') self.parser.add_argument('--mini-batches', type=int, default=1, help='Mini batches', dest='n_minibatches') self.parser.add_argument('--train-split-percentage', type=float, default=0.8, help='Train set percentage') - self.parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model') - self.parser.add_argument('--params-db-path', type=str, default='sqlite:///data/params.db', - help='Params path') - self.parser.add_argument( - '--tensor-board-path', - type=str, - default=os.path.join('data', 'tensorboard'), - help='Tensorboard path', - dest='tensorboard_path' - ) - self.parser.add_argument('--parallel-jobs', type=int, default=multiprocessing.cpu_count(), + self.parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model', dest='model_verbose') + self.parser.add_argument('--params-db-path', type=str, default='sqlite:///data/params.db', help='Params path') + self.parser.add_argument('--tensorboard-path', + type=str, + default=os.path.join('data', 'tensorboard'), + help='Tensorboard path') + self.parser.add_argument('--parallel-jobs', + type=int, + default=multiprocessing.cpu_count(), help='How many processes in parallel') subparsers = self.parser.add_subparsers(help='Command', dest="command") optimize_parser = subparsers.add_parser('optimize', description='Optimize model parameters') optimize_parser.add_argument('--trials', type=int, default=1, help='Number of trials') - - optimize_parser.add_argument('--verbose-model', type=int, default=1, help='Verbose model', dest='model_verbose') + optimize_parser.add_argument('--prune-evals', + type=int, + default=2, + help='Number of pruning evaluations per trial') + optimize_parser.add_argument('--eval-tests', type=int, default=1, help='Number of tests per pruning evaluation') train_parser = subparsers.add_parser('train', description='Train model') - train_parser.add_argument('--epochs', type=int, default=1, help='Number of epochs to train') + train_parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to train') + train_parser.add_argument('--save-every', type=int, default=1, help='Save the trained model every n epochs') + train_parser.add_argument('--no-test', dest="test_trained", action="store_false", help='Test each saved model') + train_parser.add_argument('--render-test', dest="render_test", + action="store_true", help='Render the test environment') + train_parser.add_argument('--no-report', dest="render_report", action="store_false", + help='Render the performance report') + train_parser.add_argument('--save-report', dest="save_report", action="store_true", + help='Save the performance report as .html') test_parser = subparsers.add_parser('test', description='Test model') - test_parser.add_argument('--model-epoch', type=int, default=1, help='Model epoch index') - test_parser.add_argument('--no-render', action='store_false', help='Do not render test') + test_parser.add_argument('--model-epoch', type=int, default=0, help='Model epoch index') + test_parser.add_argument('--no-render', dest="render_env", action="store_false", + help='Render the test environment') + test_parser.add_argument('--no-report', dest="render_report", action="store_false", + help='Render the performance report') + test_parser.add_argument('--save-report', dest="save_report", action="store_true", + help='Save the performance report as .html') subparsers.add_parser('update-static-data', description='Update static data') diff --git a/lib/data/features/indicators.py b/lib/data/features/indicators.py index 027af49..b5e8880 100644 --- a/lib/data/features/indicators.py +++ b/lib/data/features/indicators.py @@ -35,10 +35,10 @@ ('KCLI', ta.keltner_channel_lband_indicator, ['High', 'Low', 'Close']), ('DCHI', ta.donchian_channel_hband_indicator, ['Close']), ('DCLI', ta.donchian_channel_lband_indicator, ['Close']), - ('ADI', ta.acc_dist_index, ['High', 'Low', 'Close', 'volume']), - ('OBV', ta.on_balance_volume, ['close', 'volume']), + ('ADI', ta.acc_dist_index, ['High', 'Low', 'Close', 'Volume BTC']), + ('OBV', ta.on_balance_volume, ['Close', 'Volume BTC']), ('CMF', ta.chaikin_money_flow, ['High', 'Low', 'Close', 'Volume BTC']), - ('FI', ta.force_index, ['Close', 'Volume']), + ('FI', ta.force_index, ['Close', 'Volume BTC']), ('EM', ta.ease_of_movement, ['High', 'Low', 'Close', 'Volume BTC']), ('VPT', ta.volume_price_trend, ['Close', 'Volume BTC']), ('NVI', ta.negative_volume_index, ['Close', 'Volume BTC']), diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py index 9f7e8d9..d827ddc 100644 --- a/lib/env/TradingEnv.py +++ b/lib/env/TradingEnv.py @@ -115,7 +115,6 @@ def _take_action(self, action: int): current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision) self.net_worths.append(current_net_worth) - self.account_history = self.account_history.append({ 'balance': self.balance, 'asset_bought': asset_bought, @@ -155,6 +154,7 @@ def _reward(self): def _next_observation(self): self.current_ohlcv = self.data_provider.next_ohlcv() + self.timestamps.append(pd.to_datetime(self.current_ohlcv.Date.item(), unit='s')) self.observations = self.observations.append(self.current_ohlcv, ignore_index=True) if self.stationarize_obs: @@ -187,6 +187,7 @@ def reset(self): self.balance = self.initial_balance self.net_worths = [self.initial_balance] + self.timestamps = [] self.asset_held = 0 self.current_step = 0 @@ -210,8 +211,7 @@ def step(self, action): obs = self._next_observation() reward = self._reward() done = self._done() - - return obs, reward, done, {} + return obs, reward, done, {'networths': self.net_worths, 'timestamps': self.timestamps} def render(self, mode='human'): if mode == 'system': diff --git a/requirements.base.txt b/requirements.base.txt index f352090..3b178e5 100644 --- a/requirements.base.txt +++ b/requirements.base.txt @@ -10,4 +10,5 @@ statsmodels==0.10.0rc2 empyrical ccxt psycopg2 -configparser \ No newline at end of file +configparser +quantstats