diff --git a/.gitignore b/.gitignore index 989921f..77158db 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ data/postgres/* data/log/* data/reports/* *.pkl +venv/* diff --git a/lib/RLTrader.py b/lib/RLTrader.py index d9cd6fc..e09278a 100644 --- a/lib/RLTrader.py +++ b/lib/RLTrader.py @@ -14,7 +14,7 @@ from stable_baselines import PPO2 from lib.env.TradingEnv import TradingEnv -from lib.env.reward import BaseRewardStrategy, IncrementalProfit +from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit from lib.data.providers.dates import ProviderDateFormat from lib.data.providers import BaseDataProvider, StaticDataProvider, ExchangeDataProvider from lib.util.logger import init_logger diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py index d827ddc..a9a7699 100644 --- a/lib/env/TradingEnv.py +++ b/lib/env/TradingEnv.py @@ -7,7 +7,7 @@ from typing import List, Dict from lib.env.render import TradingChart -from lib.env.reward import BaseRewardStrategy, IncrementalProfit +from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit from lib.env.trade import BaseTradeStrategy, SimulatedTradeStrategy from lib.data.providers import BaseDataProvider from lib.data.features.transform import max_min_normalize, mean_normalize, log_and_difference, difference @@ -109,6 +109,7 @@ def _take_action(self, action: int): elif asset_sold: self.asset_held -= asset_sold self.balance += sale_revenue + self.reward_strategy.reset_reward() self.trades.append({'step': self.current_step, 'amount': asset_sold, 'total': sale_revenue, 'type': 'sell'}) @@ -191,6 +192,8 @@ def reset(self): self.asset_held = 0 self.current_step = 0 + self.reward_strategy.reset_reward() + self.account_history = pd.DataFrame([{ 'balance': self.balance, 'asset_bought': 0, diff --git a/lib/env/reward/BaseRewardStrategy.py b/lib/env/reward/BaseRewardStrategy.py index 9839216..c33bb60 100644 --- a/lib/env/reward/BaseRewardStrategy.py +++ b/lib/env/reward/BaseRewardStrategy.py @@ -9,6 +9,10 @@ class BaseRewardStrategy(object, metaclass=ABCMeta): def __init__(self): pass + @abstractmethod + def reset_reward(self): + raise NotImplementedError() + @abstractmethod def get_reward(self, current_step: int, diff --git a/lib/env/reward/IncrementalProfit.py b/lib/env/reward/IncrementalProfit.py index ad0ecfe..f208e78 100644 --- a/lib/env/reward/IncrementalProfit.py +++ b/lib/env/reward/IncrementalProfit.py @@ -12,12 +12,15 @@ class IncrementalProfit(BaseRewardStrategy): def __init__(self): pass + def reset_reward(self): + pass + def get_reward(self, current_step: int, current_price: Callable[[str], float], observations: pd.DataFrame, account_history: pd.DataFrame, - net_worths: List[float]): + net_worths: List[float]) -> float: reward = 0 curr_balance = account_history['balance'].values[-1] diff --git a/lib/env/reward/WeightedUnrealisedProfit.py b/lib/env/reward/WeightedUnrealisedProfit.py new file mode 100644 index 0000000..14d6b41 --- /dev/null +++ b/lib/env/reward/WeightedUnrealisedProfit.py @@ -0,0 +1,41 @@ +from collections import deque + +import pandas as pd +import numpy as np +from typing import List, Callable + +from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy + + +class WeightedUnrealisedProfit(BaseRewardStrategy): + def __init__(self, **kwargs): + self.decay_rate = kwargs.get('decay_rate', 1e-2) + self.decay_denominator = np.exp(-1 * self.decay_rate) + + self.reset_reward() + + def reset_reward(self): + self.rewards = deque(np.zeros(1, dtype=float)) + self.sum = 0.0 + + def calc_reward(self, reward): + self.sum = self.sum - self.decay_denominator * self.rewards.popleft() + self.sum = self.sum * self.decay_denominator + self.sum = self.sum + reward + + self.rewards.append(reward) + + return self.sum / self.decay_denominator + + def get_reward(self, + current_step: int, + current_price: Callable[[str], float], + observations: pd.DataFrame, + account_history: pd.DataFrame, + net_worths: List[float]) -> float: + if account_history['asset_sold'].values[-1] > 0: + reward = self.calc_reward(account_history['sale_revenue'].values[-1]) + else: + reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price) + + return reward diff --git a/lib/env/reward/__init__.py b/lib/env/reward/__init__.py index 6a95835..a548e78 100644 --- a/lib/env/reward/__init__.py +++ b/lib/env/reward/__init__.py @@ -1,2 +1,3 @@ from lib.env.reward.IncrementalProfit import IncrementalProfit +from lib.env.reward.WeightedUnrealisedProfit import WeightedUnrealisedProfit from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy