Skip to content

Commit

Permalink
Merge WeightedUnrealisedProfit
Browse files Browse the repository at this point in the history
  • Loading branch information
notadamking committed Jul 10, 2019
2 parents 40c7559 + be6d144 commit e94bea6
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ data/postgres/*
data/log/*
data/reports/*
*.pkl
venv/*
2 changes: 1 addition & 1 deletion lib/RLTrader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from stable_baselines import PPO2

from lib.env.TradingEnv import TradingEnv
from lib.env.reward import BaseRewardStrategy, IncrementalProfit
from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit
from lib.data.providers.dates import ProviderDateFormat
from lib.data.providers import BaseDataProvider, StaticDataProvider, ExchangeDataProvider
from lib.util.logger import init_logger
Expand Down
5 changes: 4 additions & 1 deletion lib/env/TradingEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import List, Dict

from lib.env.render import TradingChart
from lib.env.reward import BaseRewardStrategy, IncrementalProfit
from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit
from lib.env.trade import BaseTradeStrategy, SimulatedTradeStrategy
from lib.data.providers import BaseDataProvider
from lib.data.features.transform import max_min_normalize, mean_normalize, log_and_difference, difference
Expand Down Expand Up @@ -109,6 +109,7 @@ def _take_action(self, action: int):
elif asset_sold:
self.asset_held -= asset_sold
self.balance += sale_revenue
self.reward_strategy.reset_reward()

self.trades.append({'step': self.current_step, 'amount': asset_sold,
'total': sale_revenue, 'type': 'sell'})
Expand Down Expand Up @@ -191,6 +192,8 @@ def reset(self):
self.asset_held = 0
self.current_step = 0

self.reward_strategy.reset_reward()

self.account_history = pd.DataFrame([{
'balance': self.balance,
'asset_bought': 0,
Expand Down
4 changes: 4 additions & 0 deletions lib/env/reward/BaseRewardStrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ class BaseRewardStrategy(object, metaclass=ABCMeta):
def __init__(self):
pass

@abstractmethod
def reset_reward(self):
raise NotImplementedError()

@abstractmethod
def get_reward(self,
current_step: int,
Expand Down
5 changes: 4 additions & 1 deletion lib/env/reward/IncrementalProfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ class IncrementalProfit(BaseRewardStrategy):
def __init__(self):
pass

def reset_reward(self):
pass

def get_reward(self,
current_step: int,
current_price: Callable[[str], float],
observations: pd.DataFrame,
account_history: pd.DataFrame,
net_worths: List[float]):
net_worths: List[float]) -> float:
reward = 0

curr_balance = account_history['balance'].values[-1]
Expand Down
41 changes: 41 additions & 0 deletions lib/env/reward/WeightedUnrealisedProfit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from collections import deque

import pandas as pd
import numpy as np
from typing import List, Callable

from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy


class WeightedUnrealisedProfit(BaseRewardStrategy):
def __init__(self, **kwargs):
self.decay_rate = kwargs.get('decay_rate', 1e-2)
self.decay_denominator = np.exp(-1 * self.decay_rate)

self.reset_reward()

def reset_reward(self):
self.rewards = deque(np.zeros(1, dtype=float))
self.sum = 0.0

def calc_reward(self, reward):
self.sum = self.sum - self.decay_denominator * self.rewards.popleft()
self.sum = self.sum * self.decay_denominator
self.sum = self.sum + reward

self.rewards.append(reward)

return self.sum / self.decay_denominator

def get_reward(self,
current_step: int,
current_price: Callable[[str], float],
observations: pd.DataFrame,
account_history: pd.DataFrame,
net_worths: List[float]) -> float:
if account_history['asset_sold'].values[-1] > 0:
reward = self.calc_reward(account_history['sale_revenue'].values[-1])
else:
reward = self.calc_reward(account_history['asset_held'].values[-1] * current_price)

return reward
1 change: 1 addition & 0 deletions lib/env/reward/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from lib.env.reward.IncrementalProfit import IncrementalProfit
from lib.env.reward.WeightedUnrealisedProfit import WeightedUnrealisedProfit
from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy

0 comments on commit e94bea6

Please sign in to comment.