From b2ed19a08667e99ec02abc135a3dbea29795101f Mon Sep 17 00:00:00 2001 From: Arunavo Ray Date: Tue, 9 Jul 2019 16:22:42 +0530 Subject: [PATCH 1/2] Added Weighted Unrealised Profit --- .gitignore | 1 + lib/env/TradingEnv.py | 7 +++- lib/env/reward/BaseRewardStrategy.py | 5 +++ lib/env/reward/IncrementalProfit.py | 4 ++ lib/env/reward/WeightedUnrealisedProfit.py | 45 ++++++++++++++++++++++ 5 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 lib/env/reward/WeightedUnrealisedProfit.py diff --git a/.gitignore b/.gitignore index 5c370c0..8bea167 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ data/agents/* data/postgres/* data/log/* *.pkl +venv/* diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py index 4f1a643..8c05f2b 100644 --- a/lib/env/TradingEnv.py +++ b/lib/env/TradingEnv.py @@ -7,7 +7,7 @@ from typing import List, Dict from lib.env.render import TradingChart -from lib.env.reward import BaseRewardStrategy, IncrementalProfit +from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit from lib.data.providers import BaseDataProvider from lib.data.features.transform import max_min_normalize, mean_normalize, log_and_difference, difference from lib.util.logger import init_logger @@ -26,7 +26,7 @@ class TradingEnv(gym.Env): def __init__(self, data_provider: BaseDataProvider, - reward_strategy: BaseRewardStrategy = IncrementalProfit, + reward_strategy: BaseRewardStrategy = WeightedUnrealisedProfit, initial_balance: int = 10000, commission: float = 0.0025, **kwargs): @@ -91,6 +91,7 @@ def _make_trade(self, action: int, current_price: float): self.last_sold = self.current_step self.asset_held -= asset_sold self.balance += revenue_from_sold + self.reward_strategy.reset_reward() self.trades.append({'step': self.current_step, 'amount': asset_sold, 'total': revenue_from_sold, 'type': 'sell'}) @@ -124,6 +125,7 @@ def _reward(self): net_worths=self.net_worths, account_history=self.account_history, last_bought=self.last_bought, + last_held=self.asset_held, last_sold=self.last_sold, current_price=self._current_price()) @@ -181,6 +183,7 @@ def reset(self): self.current_step = 0 self.last_bought = 0 self.last_sold = 0 + self.reward_strategy.reset_reward() self.account_history = pd.DataFrame([{ 'balance': self.balance, diff --git a/lib/env/reward/BaseRewardStrategy.py b/lib/env/reward/BaseRewardStrategy.py index 4a2e18b..f630e82 100644 --- a/lib/env/reward/BaseRewardStrategy.py +++ b/lib/env/reward/BaseRewardStrategy.py @@ -9,12 +9,17 @@ class BaseRewardStrategy(object, metaclass=ABCMeta): def __init__(self): pass + @abstractmethod + def reset_reward(self): + pass + @abstractmethod def get_reward(self, observations: pd.DataFrame, account_history: pd.DataFrame, net_worths: List[float], last_bought: int, + last_held: int, last_sold: int, current_price: float) -> float: raise NotImplementedError() diff --git a/lib/env/reward/IncrementalProfit.py b/lib/env/reward/IncrementalProfit.py index 304a1db..a159b5a 100644 --- a/lib/env/reward/IncrementalProfit.py +++ b/lib/env/reward/IncrementalProfit.py @@ -9,11 +9,15 @@ class IncrementalProfit(BaseRewardStrategy): def __init__(self): pass + def reset_reward(self): + pass + def get_reward(self, observations: pd.DataFrame, account_history: pd.DataFrame, net_worths: List[float], last_bought: int, + last_held: int, last_sold: int, current_price: float): curr_balance = account_history['balance'].values[-1] diff --git a/lib/env/reward/WeightedUnrealisedProfit.py b/lib/env/reward/WeightedUnrealisedProfit.py new file mode 100644 index 0000000..937ff40 --- /dev/null +++ b/lib/env/reward/WeightedUnrealisedProfit.py @@ -0,0 +1,45 @@ +from collections import deque + +import pandas as pd +import numpy as np +from typing import List + +from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy + + +class WeightedUnrealisedProfit(BaseRewardStrategy): + def __init__(self, **kwargs): + super(WeightedUnrealisedProfit, self).__init__() + + self.decay_rate = kwargs.get('decay_rate', 1e-2) + self.rewards = deque(np.zeros(1, dtype=float)) + self.sum = 0.0 + self.denominator = np.exp(-1 * self.decay_rate) + + def reset_reward(self): + self.rewards = deque(np.zeros(1, dtype=float)) + self.sum = 0.0 + + def cal_reward(self, reward): + stale_reward = self.rewards.popleft() + self.sum = self.sum - np.exp(-1 * self.decay_rate) * stale_reward + self.sum = self.sum * np.exp(-1 * self.decay_rate) + self.sum = self.sum + reward + self.rewards.append(reward) + return self.sum / self.denominator + + def get_reward(self, + observations: pd.DataFrame, + account_history: pd.DataFrame, + net_worths: List[float], + last_bought: int, + last_held: int, + last_sold: int, + current_price: float): + + if account_history['btc_sold'].values[-1] > 0: + reward = self.cal_reward(account_history['revenue_from_sold'].values[-1]) + else: + reward = self.cal_reward(last_held * current_price) + + return reward From be6d144f1887f88e53906694d2454173f6c69675 Mon Sep 17 00:00:00 2001 From: Arunavo Ray Date: Tue, 9 Jul 2019 21:26:31 +0530 Subject: [PATCH 2/2] Fixed Bad Import Issue --- lib/RLTrader.py | 2 +- lib/env/TradingEnv.py | 2 +- lib/env/reward/IncrementalProfit.py | 2 +- lib/env/reward/__init__.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/RLTrader.py b/lib/RLTrader.py index 850be21..88e8d82 100644 --- a/lib/RLTrader.py +++ b/lib/RLTrader.py @@ -12,7 +12,7 @@ from stable_baselines import PPO2 from lib.env.TradingEnv import TradingEnv -from lib.env.reward import BaseRewardStrategy, IncrementalProfit +from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit from lib.data.providers.dates import ProviderDateFormat from lib.data.providers import BaseDataProvider, StaticDataProvider, ExchangeDataProvider from lib.util.logger import init_logger diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py index 8c05f2b..bae0d50 100644 --- a/lib/env/TradingEnv.py +++ b/lib/env/TradingEnv.py @@ -26,7 +26,7 @@ class TradingEnv(gym.Env): def __init__(self, data_provider: BaseDataProvider, - reward_strategy: BaseRewardStrategy = WeightedUnrealisedProfit, + reward_strategy: BaseRewardStrategy = IncrementalProfit, initial_balance: int = 10000, commission: float = 0.0025, **kwargs): diff --git a/lib/env/reward/IncrementalProfit.py b/lib/env/reward/IncrementalProfit.py index a159b5a..cfec28b 100644 --- a/lib/env/reward/IncrementalProfit.py +++ b/lib/env/reward/IncrementalProfit.py @@ -7,7 +7,7 @@ class IncrementalProfit(BaseRewardStrategy): def __init__(self): - pass + super(IncrementalProfit, self).__init__() def reset_reward(self): pass diff --git a/lib/env/reward/__init__.py b/lib/env/reward/__init__.py index 6a95835..a548e78 100644 --- a/lib/env/reward/__init__.py +++ b/lib/env/reward/__init__.py @@ -1,2 +1,3 @@ from lib.env.reward.IncrementalProfit import IncrementalProfit +from lib.env.reward.WeightedUnrealisedProfit import WeightedUnrealisedProfit from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy