notadamking · mwbrulhardt · Jul 10, 2019 · Jul 14, 2019 · Jul 24, 2019 · Jul 27, 2019
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.idea/
 .vscode
 .ipynb_checkpoints
 .pytest_cache

diff --git a/cli.py b/cli.py
@@ -6,6 +6,7 @@
 from lib.util.logger import init_logger
 from lib.cli.functions import download_data_async
 from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
+from lib.env import TradingMode
 
 np.warnings.filterwarnings('ignore')
 
@@ -19,7 +20,10 @@
 def run_optimize(args, logger):
     from lib.RLTrader import RLTrader
 
-    trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy)
+    trader = RLTrader(**vars(args),
+                      logger=logger,
+                      reward_strategy=reward_strategy,
+                      trading_mode=TradingMode.TRAIN)
     trader.optimize(n_trials=args.trials)
 
 
@@ -41,16 +45,22 @@ def run_optimize(args, logger):
 
     from lib.RLTrader import RLTrader
 
-    trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy)
-
     if args.command == 'train':
+        trader = RLTrader(**vars(args),
+                          logger=logger,
+                          reward_strategy=reward_strategy,
+                          trading_mode=TradingMode.TRAIN)
         trader.train(n_epochs=args.epochs,
                      save_every=args.save_every,
                      test_trained_model=args.test_trained,
                      render_test_env=args.render_test,
                      render_report=args.render_report,
                      save_report=args.save_report)
     elif args.command == 'test':
+        trader = RLTrader(**vars(args),
+                          logger=logger,
+                          reward_strategy=reward_strategy,
+                          trading_mode=TradingMode.TEST)
         trader.test(model_epoch=args.model_epoch,
                     render_env=args.render_env,
                     render_report=args.render_report,

diff --git a/data/params.db b/data/params.db
diff --git a/lib/RLTrader.py b/lib/RLTrader.py
@@ -13,7 +13,7 @@
 from stable_baselines.common import set_global_seeds
 from stable_baselines import PPO2
 
-from lib.env.TradingEnv import TradingEnv
+from lib.env.TradingEnv import TradingEnv, TradingMode
 from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
 from lib.data.providers.dates import ProviderDateFormat
 from lib.data.providers import BaseDataProvider,  StaticDataProvider, ExchangeDataProvider
@@ -134,8 +134,8 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
 
         del test_provider
 
-        train_env = DummyVecEnv([lambda: TradingEnv(train_provider)])
-        validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)])
+        train_env = DummyVecEnv([lambda: TradingEnv(train_provider, trading_mode=TradingMode.TRAIN)])
+        validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider, trading_mode=TradingMode.TRAIN)])
 
         model_params = self.optimize_agent_params(trial)
         model = self.Model(self.Policy,
@@ -157,7 +157,7 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
             rewards = []
             n_episodes, reward_sum = 0, 0.0
 
-            trades = train_env.get_attr('trades')
+            trades = [exchange.trades for exchange in train_env.get_attr('exchange')]
 
             if len(trades[0]) < 1:
                 self.logger.info(f'Pruning trial for not making any trades: {eval_idx}')
@@ -293,3 +293,6 @@ def test(self, model_epoch: int = 0, render_env: bool = True, render_report: boo
 
         self.logger.info(
             f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}')
+
+    def live(self, paper_mode: bool = True):
+        pass
diff --git a/lib/cli/RLTraderCLI.py b/lib/cli/RLTraderCLI.py
@@ -70,6 +70,10 @@ def __init__(self):
         test_parser.add_argument('--save-report', dest="save_report", action="store_true",
                                  help='Save the performance report as .html')
 
+        live_parser = subparsers.add_parser('live', description='Live model')
+        live_parser.add_argument('--paper-mode', dest="paper_mode", action="store_true",
+                                 help='Trade in paper mode')
+
         subparsers.add_parser('update-static-data', description='Update static data')
 
         self.parser.set_defaults(**defaults)

diff --git a/lib/data/features/indicators.py b/lib/data/features/indicators.py
@@ -48,8 +48,8 @@
 
 
 def add_indicators(df) -> pd.DataFrame:
+    wrapper = lambda func, args: func(*args)
     for name, f, arg_names in indicators:
-        wrapper = lambda func, args: func(*args)
         args = [df[arg_name] for arg_name in arg_names]
         df[name] = wrapper(f, args)
     df.fillna(method='bfill', inplace=True)

diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py
@@ -6,6 +6,7 @@
 from enum import Enum
 from typing import List, Dict
 
+from lib.env.exchange import BaseExchange, SimulatedExchange
 from lib.env.render import TradingChart
 from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
 from lib.env.trade import BaseTradeStrategy, SimulatedTradeStrategy
@@ -20,18 +21,28 @@ class TradingEnvAction(Enum):
     HOLD = 2
 
 
+class TradingMode(Enum):
+    TRAIN = 0
+    TEST = 1
+    PAPER = 2
+    LIVE = 3
+
+
 class TradingEnv(gym.Env):
-    '''A reinforcement trading environment made for use with gym-enabled algorithms'''
+    """A reinforcement trading environment made for use with gym-enabled algorithms"""
     metadata = {'render.modes': ['human', 'system', 'none']}
     viewer = None
 
     def __init__(self,
                  data_provider: BaseDataProvider,
+                 exchange: BaseExchange = SimulatedExchange,
                  reward_strategy: BaseRewardStrategy = IncrementalProfit,
                  trade_strategy: BaseTradeStrategy = SimulatedTradeStrategy,
                  initial_balance: int = 10000,
-                 commissionPercent: float = 0.25,
-                 maxSlippagePercent: float = 2.0,
+                 commission_pct: float = 0.25,
+                 max_slippage_pct: float = 2.0,
+                 trading_mode: TradingMode = TradingMode.PAPER,
+                 exchange_args: Dict = {},
                  **kwargs):
         super(TradingEnv, self).__init__()
 
@@ -42,19 +53,29 @@ def __init__(self,
         self.min_cost_limit: float = kwargs.get('min_cost_limit', 1E-3)
         self.min_amount_limit: float = kwargs.get('min_amount_limit', 1E-3)
 
-        self.initial_balance = round(initial_balance, self.base_precision)
-        self.commissionPercent = commissionPercent
-        self.maxSlippagePercent = maxSlippagePercent
+        self.commission_pct = commission_pct
+        self.max_slippage_pct = max_slippage_pct
+        self.trading_mode = trading_mode
 
         self.data_provider = data_provider
         self.reward_strategy = reward_strategy()
-        self.trade_strategy = trade_strategy(commissionPercent=self.commissionPercent,
-                                             maxSlippagePercent=self.maxSlippagePercent,
+        self.trade_strategy = trade_strategy(commissionPercent=self.commission_pct,
+                                             maxSlippagePercent=self.max_slippage_pct,
                                              base_precision=self.base_precision,
                                              asset_precision=self.asset_precision,
                                              min_cost_limit=self.min_cost_limit,
                                              min_amount_limit=self.min_amount_limit)
 
+        if self.trading_mode == TradingMode.TRAIN or self.trading_mode == TradingMode.TEST:
+            self.exchange = exchange(self, initial_balance, **exchange_args)
+
+        elif self.trading_mode == TradingMode.PAPER:
+            self.exchange = exchange(self, **exchange_args)
+
+        elif self.trading_mode == TradingMode.LIVE:
+            self.exchange = exchange(self, **exchange_args)
+
+
         self.render_benchmarks: List[Dict] = kwargs.get('render_benchmarks', [])
         self.normalize_obs: bool = kwargs.get('normalize_obs', True)
         self.stationarize_obs: bool = kwargs.get('stationarize_obs', True)
@@ -80,59 +101,27 @@ def _get_trade(self, action: int):
         action_type: TradingEnvAction = TradingEnvAction(action % n_action_types)
         action_amount = float(1 / (action % n_amount_bins + 1))
 
+        commission = self.commission_pct / 100
+        max_slippage = self.max_slippage_pct / 100
+
         amount_asset_to_buy = 0
         amount_asset_to_sell = 0
 
-        if action_type == TradingEnvAction.BUY and self.balance >= self.min_cost_limit:
-            price_adjustment = (1 + (self.commissionPercent / 100)) * (1 + (self.maxSlippagePercent / 100))
-            buy_price = round(self._current_price() * price_adjustment, self.base_precision)
-            amount_asset_to_buy = round(self.balance * action_amount / buy_price, self.asset_precision)
-        elif action_type == TradingEnvAction.SELL and self.asset_held >= self.min_amount_limit:
-            amount_asset_to_sell = round(self.asset_held * action_amount, self.asset_precision)
-
-        return amount_asset_to_buy, amount_asset_to_sell
-
-    def _take_action(self, action: int):
-        amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action)
-
-        asset_bought, asset_sold, purchase_cost, sale_revenue = self.trade_strategy.trade(buy_amount=amount_asset_to_buy,
-                                                                                          sell_amount=amount_asset_to_sell,
-                                                                                          balance=self.balance,
-                                                                                          asset_held=self.asset_held,
-                                                                                          current_price=self._current_price)
-
-        if asset_bought:
-            self.asset_held += asset_bought
-            self.balance -= purchase_cost
-
-            self.trades.append({'step': self.current_step,
-                                'amount': asset_bought,
-                                'total': purchase_cost,
-                                'type': 'buy'})
-        elif asset_sold:
-            self.asset_held -= asset_sold
-            self.balance += sale_revenue
+        if action_type == TradingEnvAction.BUY and self.exchange.balance >= self.min_cost_limit:
+            price_adjustment = (1 + commission) * (1 + max_slippage)
+            buy_price = self._current_price() * price_adjustment
+            buy_price = round(buy_price, self.base_precision)
+            amount_asset_to_buy = self.exchange.balance * action_amount / buy_price
+            amount_asset_to_buy = round(amount_asset_to_buy, self.asset_precision)
 
-            self.reward_strategy.reset_reward()
+        elif action_type == TradingEnvAction.SELL and self.exchange.asset_held >= self.min_amount_limit:
+            amount_asset_to_sell = self.exchange.asset_held * action_amount
+            amount_asset_to_sell = round(amount_asset_to_sell, self.asset_precision)
 
-            self.trades.append({'step': self.current_step,
-                                'amount': asset_sold,
-                                'total': sale_revenue,
-                                'type': 'sell'})
-
-        current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision)
-        self.net_worths.append(current_net_worth)
-        self.account_history = self.account_history.append({
-            'balance': self.balance,
-            'asset_held': self.asset_held,
-            'asset_bought': asset_bought,
-            'purchase_cost': purchase_cost,
-            'asset_sold': asset_sold,
-            'sale_revenue': sale_revenue,
-        }, ignore_index=True)
+        return amount_asset_to_buy, amount_asset_to_sell
 
     def _done(self):
-        lost_90_percent_net_worth = float(self.net_worths[-1]) < (self.initial_balance / 10)
+        lost_90_percent_net_worth = float(self.exchange.net_worths[-1]) < (self.exchange.initial_balance / 10)
         has_next_frame = self.data_provider.has_next_ohlcv()
 
         return lost_90_percent_net_worth or not has_next_frame
@@ -141,8 +130,8 @@ def _reward(self):
         reward = self.reward_strategy.get_reward(current_step=self.current_step,
                                                  current_price=self._current_price,
                                                  observations=self.observations,
-                                                 account_history=self.account_history,
-                                                 net_worths=self.net_worths)
+                                                 account_history=self.exchange.account_history,
+                                                 net_worths=self.exchange.net_worths)
 
         reward = float(reward) if np.isfinite(float(reward)) else 0
 
@@ -176,9 +165,9 @@ def _next_observation(self):
         obs = observations.values[-1]
 
         if self.stationarize_obs:
-            scaled_history = log_and_difference(self.account_history, inplace=False)
+            scaled_history = log_and_difference(self.exchange.get_account_history(), inplace=False)
         else:
-            scaled_history = self.account_history
+            scaled_history = self.exchange.get_account_history()
 
         if self.normalize_obs:
             scaled_history = max_min_normalize(scaled_history, inplace=False)
@@ -193,53 +182,53 @@ def _next_observation(self):
     def reset(self):
         self.data_provider.reset_ohlcv_index()
 
-        self.balance = self.initial_balance
-        self.net_worths = [self.initial_balance]
+        if self.trading_mode == TradingMode.TRAIN or self.trading_mode == TradingMode.TEST:
+            self.exchange.reset()
+
         self.timestamps = []
-        self.asset_held = 0
         self.current_step = 0
 
         self.reward_strategy.reset_reward()
 
-        self.account_history = pd.DataFrame([{
-            'balance': self.balance,
-            'asset_held': self.asset_held,
-            'asset_bought': 0,
-            'purchase_cost': 0,
-            'asset_sold': 0,
-            'sale_revenue': 0,
-        }])
-        self.trades = []
         self.rewards = [0]
 
         return self._next_observation()
 
     def step(self, action):
-        self._take_action(action)
+        amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action)
+
+        if amount_asset_to_buy:
+            self.exchange.buy(amount_asset_to_buy)
+        elif amount_asset_to_sell:
+            self.exchange.sell(amount_asset_to_sell)
+            self.reward_strategy.reset_reward()
+        else:
+            self.exchange.hold()
 
         self.current_step += 1
 
         obs = self._next_observation()
         reward = self._reward()
         done = self._done()
 
-        return obs, reward, done, {'net_worths': self.net_worths, 'timestamps': self.timestamps}
+        return obs, reward, done, {'net_worths': self.exchange.net_worths, 'timestamps': self.timestamps}
 
     def render(self, mode='human'):
+
         if mode == 'system':
             self.logger.info('Price: ' + str(self._current_price()))
-            self.logger.info('Bought: ' + str(self.account_history['asset_bought'][self.current_step]))
-            self.logger.info('Sold: ' + str(self.account_history['asset_sold'][self.current_step]))
-            self.logger.info('Net worth: ' + str(self.net_worths[-1]))
+            self.logger.info('Bought: ' + str(self.exchange.account_history['asset_bought'][self.current_step]))
+            self.logger.info('Sold: ' + str(self.exchange.account_history['asset_sold'][self.current_step]))
+            self.logger.info('Net worth: ' + str(self.exchange.net_worths[-1]))
 
         elif mode == 'human':
             if self.viewer is None:
                 self.viewer = TradingChart(self.data_provider.data_frame)
 
             self.viewer.render(self.current_step,
-                               self.net_worths,
+                               self.exchange.net_worths,
                                self.render_benchmarks,
-                               self.trades)
+                               self.exchange.trades)
 
     def close(self):
         if self.viewer is not None:

diff --git a/lib/env/__init__.py b/lib/env/__init__.py
@@ -1,2 +1,2 @@
-from lib.env.TradingEnv import TradingEnv
+from lib.env.TradingEnv import TradingEnv, TradingMode
 from lib.env.render.TradingChart import TradingChart
diff --git a/lib/env/exchange/BaseExchange.py b/lib/env/exchange/BaseExchange.py
@@ -0,0 +1,29 @@
+
+import abc
+import pandas as pd
+
+from enum import Enum
+from lib.env import TradingEnv
+
+
+class BaseExchange(object, metaclass=abc.ABCMeta):
+
+    @abc.abstractmethod
+    def __init__(self, env: TradingEnv, **kwargs):
+        pass
+
+    @abc.abstractmethod
+    def get_account_history(self):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def buy(self, amount: float):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def sell(self, amount: float):
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def hold(self):
+        raise NotImplementedError