Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the code for the Base and Dummy exchange classes to the exchang… #121

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea/
.vscode
.ipynb_checkpoints
.pytest_cache
Expand Down
16 changes: 13 additions & 3 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from lib.util.logger import init_logger
from lib.cli.functions import download_data_async
from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
from lib.env import TradingMode

np.warnings.filterwarnings('ignore')

Expand All @@ -19,7 +20,10 @@
def run_optimize(args, logger):
from lib.RLTrader import RLTrader

trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy)
trader = RLTrader(**vars(args),
logger=logger,
reward_strategy=reward_strategy,
trading_mode=TradingMode.TRAIN)
trader.optimize(n_trials=args.trials)


Expand All @@ -41,16 +45,22 @@ def run_optimize(args, logger):

from lib.RLTrader import RLTrader

trader = RLTrader(**vars(args), logger=logger, reward_strategy=reward_strategy)

if args.command == 'train':
trader = RLTrader(**vars(args),
logger=logger,
reward_strategy=reward_strategy,
trading_mode=TradingMode.TRAIN)
trader.train(n_epochs=args.epochs,
save_every=args.save_every,
test_trained_model=args.test_trained,
render_test_env=args.render_test,
render_report=args.render_report,
save_report=args.save_report)
elif args.command == 'test':
trader = RLTrader(**vars(args),
logger=logger,
reward_strategy=reward_strategy,
trading_mode=TradingMode.TEST)
trader.test(model_epoch=args.model_epoch,
render_env=args.render_env,
render_report=args.render_report,
Expand Down
Binary file modified data/params.db
Binary file not shown.
11 changes: 7 additions & 4 deletions lib/RLTrader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from stable_baselines.common import set_global_seeds
from stable_baselines import PPO2

from lib.env.TradingEnv import TradingEnv
from lib.env.TradingEnv import TradingEnv, TradingMode
from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
from lib.data.providers.dates import ProviderDateFormat
from lib.data.providers import BaseDataProvider, StaticDataProvider, ExchangeDataProvider
Expand Down Expand Up @@ -134,8 +134,8 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e

del test_provider

train_env = DummyVecEnv([lambda: TradingEnv(train_provider)])
validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider)])
train_env = DummyVecEnv([lambda: TradingEnv(train_provider, trading_mode=TradingMode.TRAIN)])
validation_env = DummyVecEnv([lambda: TradingEnv(validation_provider, trading_mode=TradingMode.TRAIN)])

model_params = self.optimize_agent_params(trial)
model = self.Model(self.Policy,
Expand All @@ -157,7 +157,7 @@ def optimize_params(self, trial, n_prune_evals_per_trial: int = 2, n_tests_per_e
rewards = []
n_episodes, reward_sum = 0, 0.0

trades = train_env.get_attr('trades')
trades = [exchange.trades for exchange in train_env.get_attr('exchange')]

if len(trades[0]) < 1:
self.logger.info(f'Pruning trial for not making any trades: {eval_idx}')
Expand Down Expand Up @@ -293,3 +293,6 @@ def test(self, model_epoch: int = 0, render_env: bool = True, render_report: boo

self.logger.info(
f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(np.sum(rewards))}')

def live(self, paper_mode: bool = True):
pass
4 changes: 4 additions & 0 deletions lib/cli/RLTraderCLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def __init__(self):
test_parser.add_argument('--save-report', dest="save_report", action="store_true",
help='Save the performance report as .html')

live_parser = subparsers.add_parser('live', description='Live model')
live_parser.add_argument('--paper-mode', dest="paper_mode", action="store_true",
help='Trade in paper mode')

subparsers.add_parser('update-static-data', description='Update static data')

self.parser.set_defaults(**defaults)
Expand Down
2 changes: 1 addition & 1 deletion lib/data/features/indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@


def add_indicators(df) -> pd.DataFrame:
wrapper = lambda func, args: func(*args)
for name, f, arg_names in indicators:
wrapper = lambda func, args: func(*args)
args = [df[arg_name] for arg_name in arg_names]
df[name] = wrapper(f, args)
df.fillna(method='bfill', inplace=True)
Expand Down
143 changes: 66 additions & 77 deletions lib/env/TradingEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from enum import Enum
from typing import List, Dict

from lib.env.exchange import BaseExchange, SimulatedExchange
from lib.env.render import TradingChart
from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealizedProfit
from lib.env.trade import BaseTradeStrategy, SimulatedTradeStrategy
Expand All @@ -20,18 +21,28 @@ class TradingEnvAction(Enum):
HOLD = 2


class TradingMode(Enum):
TRAIN = 0
TEST = 1
PAPER = 2
LIVE = 3


class TradingEnv(gym.Env):
'''A reinforcement trading environment made for use with gym-enabled algorithms'''
"""A reinforcement trading environment made for use with gym-enabled algorithms"""
metadata = {'render.modes': ['human', 'system', 'none']}
viewer = None

def __init__(self,
data_provider: BaseDataProvider,
exchange: BaseExchange = SimulatedExchange,
reward_strategy: BaseRewardStrategy = IncrementalProfit,
trade_strategy: BaseTradeStrategy = SimulatedTradeStrategy,
initial_balance: int = 10000,
commissionPercent: float = 0.25,
maxSlippagePercent: float = 2.0,
commission_pct: float = 0.25,
max_slippage_pct: float = 2.0,
trading_mode: TradingMode = TradingMode.PAPER,
exchange_args: Dict = {},
**kwargs):
super(TradingEnv, self).__init__()

Expand All @@ -42,19 +53,29 @@ def __init__(self,
self.min_cost_limit: float = kwargs.get('min_cost_limit', 1E-3)
self.min_amount_limit: float = kwargs.get('min_amount_limit', 1E-3)

self.initial_balance = round(initial_balance, self.base_precision)
self.commissionPercent = commissionPercent
self.maxSlippagePercent = maxSlippagePercent
self.commission_pct = commission_pct
self.max_slippage_pct = max_slippage_pct
self.trading_mode = trading_mode

self.data_provider = data_provider
self.reward_strategy = reward_strategy()
self.trade_strategy = trade_strategy(commissionPercent=self.commissionPercent,
maxSlippagePercent=self.maxSlippagePercent,
self.trade_strategy = trade_strategy(commissionPercent=self.commission_pct,
maxSlippagePercent=self.max_slippage_pct,
base_precision=self.base_precision,
asset_precision=self.asset_precision,
min_cost_limit=self.min_cost_limit,
min_amount_limit=self.min_amount_limit)

if self.trading_mode == TradingMode.TRAIN or self.trading_mode == TradingMode.TEST:
self.exchange = exchange(self, initial_balance, **exchange_args)

elif self.trading_mode == TradingMode.PAPER:
self.exchange = exchange(self, **exchange_args)

elif self.trading_mode == TradingMode.LIVE:
self.exchange = exchange(self, **exchange_args)


self.render_benchmarks: List[Dict] = kwargs.get('render_benchmarks', [])
self.normalize_obs: bool = kwargs.get('normalize_obs', True)
self.stationarize_obs: bool = kwargs.get('stationarize_obs', True)
Expand All @@ -80,59 +101,27 @@ def _get_trade(self, action: int):
action_type: TradingEnvAction = TradingEnvAction(action % n_action_types)
action_amount = float(1 / (action % n_amount_bins + 1))

commission = self.commission_pct / 100
max_slippage = self.max_slippage_pct / 100

amount_asset_to_buy = 0
amount_asset_to_sell = 0

if action_type == TradingEnvAction.BUY and self.balance >= self.min_cost_limit:
price_adjustment = (1 + (self.commissionPercent / 100)) * (1 + (self.maxSlippagePercent / 100))
buy_price = round(self._current_price() * price_adjustment, self.base_precision)
amount_asset_to_buy = round(self.balance * action_amount / buy_price, self.asset_precision)
elif action_type == TradingEnvAction.SELL and self.asset_held >= self.min_amount_limit:
amount_asset_to_sell = round(self.asset_held * action_amount, self.asset_precision)

return amount_asset_to_buy, amount_asset_to_sell

def _take_action(self, action: int):
amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action)

asset_bought, asset_sold, purchase_cost, sale_revenue = self.trade_strategy.trade(buy_amount=amount_asset_to_buy,
sell_amount=amount_asset_to_sell,
balance=self.balance,
asset_held=self.asset_held,
current_price=self._current_price)

if asset_bought:
self.asset_held += asset_bought
self.balance -= purchase_cost

self.trades.append({'step': self.current_step,
'amount': asset_bought,
'total': purchase_cost,
'type': 'buy'})
elif asset_sold:
self.asset_held -= asset_sold
self.balance += sale_revenue
if action_type == TradingEnvAction.BUY and self.exchange.balance >= self.min_cost_limit:
price_adjustment = (1 + commission) * (1 + max_slippage)
buy_price = self._current_price() * price_adjustment
buy_price = round(buy_price, self.base_precision)
amount_asset_to_buy = self.exchange.balance * action_amount / buy_price
amount_asset_to_buy = round(amount_asset_to_buy, self.asset_precision)

self.reward_strategy.reset_reward()
elif action_type == TradingEnvAction.SELL and self.exchange.asset_held >= self.min_amount_limit:
amount_asset_to_sell = self.exchange.asset_held * action_amount
amount_asset_to_sell = round(amount_asset_to_sell, self.asset_precision)

self.trades.append({'step': self.current_step,
'amount': asset_sold,
'total': sale_revenue,
'type': 'sell'})

current_net_worth = round(self.balance + self.asset_held * self._current_price(), self.base_precision)
self.net_worths.append(current_net_worth)
self.account_history = self.account_history.append({
'balance': self.balance,
'asset_held': self.asset_held,
'asset_bought': asset_bought,
'purchase_cost': purchase_cost,
'asset_sold': asset_sold,
'sale_revenue': sale_revenue,
}, ignore_index=True)
return amount_asset_to_buy, amount_asset_to_sell

def _done(self):
lost_90_percent_net_worth = float(self.net_worths[-1]) < (self.initial_balance / 10)
lost_90_percent_net_worth = float(self.exchange.net_worths[-1]) < (self.exchange.initial_balance / 10)
has_next_frame = self.data_provider.has_next_ohlcv()

return lost_90_percent_net_worth or not has_next_frame
Expand All @@ -141,8 +130,8 @@ def _reward(self):
reward = self.reward_strategy.get_reward(current_step=self.current_step,
current_price=self._current_price,
observations=self.observations,
account_history=self.account_history,
net_worths=self.net_worths)
account_history=self.exchange.account_history,
net_worths=self.exchange.net_worths)

reward = float(reward) if np.isfinite(float(reward)) else 0

Expand Down Expand Up @@ -176,9 +165,9 @@ def _next_observation(self):
obs = observations.values[-1]

if self.stationarize_obs:
scaled_history = log_and_difference(self.account_history, inplace=False)
scaled_history = log_and_difference(self.exchange.get_account_history(), inplace=False)
else:
scaled_history = self.account_history
scaled_history = self.exchange.get_account_history()

if self.normalize_obs:
scaled_history = max_min_normalize(scaled_history, inplace=False)
Expand All @@ -193,53 +182,53 @@ def _next_observation(self):
def reset(self):
self.data_provider.reset_ohlcv_index()

self.balance = self.initial_balance
self.net_worths = [self.initial_balance]
if self.trading_mode == TradingMode.TRAIN or self.trading_mode == TradingMode.TEST:
self.exchange.reset()

self.timestamps = []
self.asset_held = 0
self.current_step = 0

self.reward_strategy.reset_reward()

self.account_history = pd.DataFrame([{
'balance': self.balance,
'asset_held': self.asset_held,
'asset_bought': 0,
'purchase_cost': 0,
'asset_sold': 0,
'sale_revenue': 0,
}])
self.trades = []
self.rewards = [0]

return self._next_observation()

def step(self, action):
self._take_action(action)
amount_asset_to_buy, amount_asset_to_sell = self._get_trade(action)

if amount_asset_to_buy:
self.exchange.buy(amount_asset_to_buy)
elif amount_asset_to_sell:
self.exchange.sell(amount_asset_to_sell)
self.reward_strategy.reset_reward()
else:
self.exchange.hold()

self.current_step += 1

obs = self._next_observation()
reward = self._reward()
done = self._done()

return obs, reward, done, {'net_worths': self.net_worths, 'timestamps': self.timestamps}
return obs, reward, done, {'net_worths': self.exchange.net_worths, 'timestamps': self.timestamps}

def render(self, mode='human'):

if mode == 'system':
self.logger.info('Price: ' + str(self._current_price()))
self.logger.info('Bought: ' + str(self.account_history['asset_bought'][self.current_step]))
self.logger.info('Sold: ' + str(self.account_history['asset_sold'][self.current_step]))
self.logger.info('Net worth: ' + str(self.net_worths[-1]))
self.logger.info('Bought: ' + str(self.exchange.account_history['asset_bought'][self.current_step]))
self.logger.info('Sold: ' + str(self.exchange.account_history['asset_sold'][self.current_step]))
self.logger.info('Net worth: ' + str(self.exchange.net_worths[-1]))

elif mode == 'human':
if self.viewer is None:
self.viewer = TradingChart(self.data_provider.data_frame)

self.viewer.render(self.current_step,
self.net_worths,
self.exchange.net_worths,
self.render_benchmarks,
self.trades)
self.exchange.trades)

def close(self):
if self.viewer is not None:
Expand Down
2 changes: 1 addition & 1 deletion lib/env/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from lib.env.TradingEnv import TradingEnv
from lib.env.TradingEnv import TradingEnv, TradingMode
from lib.env.render.TradingChart import TradingChart
29 changes: 29 additions & 0 deletions lib/env/exchange/BaseExchange.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

import abc
import pandas as pd

from enum import Enum
from lib.env import TradingEnv


class BaseExchange(object, metaclass=abc.ABCMeta):

@abc.abstractmethod
def __init__(self, env: TradingEnv, **kwargs):
pass

@abc.abstractmethod
def get_account_history(self):
raise NotImplementedError

@abc.abstractmethod
def buy(self, amount: float):
raise NotImplementedError

@abc.abstractmethod
def sell(self, amount: float):
raise NotImplementedError

@abc.abstractmethod
def hold(self):
raise NotImplementedError
Loading