From b2ed19a08667e99ec02abc135a3dbea29795101f Mon Sep 17 00:00:00 2001
From: Arunavo Ray <rayanup3@gmail.com>
Date: Tue, 9 Jul 2019 16:22:42 +0530
Subject: [PATCH 1/2] Added Weighted Unrealised Profit

---
 .gitignore                                 |  1 +
 lib/env/TradingEnv.py                      |  7 +++-
 lib/env/reward/BaseRewardStrategy.py       |  5 +++
 lib/env/reward/IncrementalProfit.py        |  4 ++
 lib/env/reward/WeightedUnrealisedProfit.py | 45 ++++++++++++++++++++++
 5 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 lib/env/reward/WeightedUnrealisedProfit.py

diff --git a/.gitignore b/.gitignore
index 5c370c0..8bea167 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ data/agents/*
 data/postgres/*
 data/log/*
 *.pkl
+venv/*
diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py
index 4f1a643..8c05f2b 100644
--- a/lib/env/TradingEnv.py
+++ b/lib/env/TradingEnv.py
@@ -7,7 +7,7 @@
 from typing import List, Dict
 
 from lib.env.render import TradingChart
-from lib.env.reward import BaseRewardStrategy, IncrementalProfit
+from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit
 from lib.data.providers import BaseDataProvider
 from lib.data.features.transform import max_min_normalize, mean_normalize, log_and_difference, difference
 from lib.util.logger import init_logger
@@ -26,7 +26,7 @@ class TradingEnv(gym.Env):
 
     def __init__(self,
                  data_provider: BaseDataProvider,
-                 reward_strategy: BaseRewardStrategy = IncrementalProfit,
+                 reward_strategy: BaseRewardStrategy = WeightedUnrealisedProfit,
                  initial_balance: int = 10000,
                  commission: float = 0.0025,
                  **kwargs):
@@ -91,6 +91,7 @@ def _make_trade(self, action: int, current_price: float):
             self.last_sold = self.current_step
             self.asset_held -= asset_sold
             self.balance += revenue_from_sold
+            self.reward_strategy.reset_reward()
 
             self.trades.append({'step': self.current_step, 'amount': asset_sold,
                                 'total': revenue_from_sold, 'type': 'sell'})
@@ -124,6 +125,7 @@ def _reward(self):
                                                  net_worths=self.net_worths,
                                                  account_history=self.account_history,
                                                  last_bought=self.last_bought,
+                                                 last_held=self.asset_held,
                                                  last_sold=self.last_sold,
                                                  current_price=self._current_price())
 
@@ -181,6 +183,7 @@ def reset(self):
         self.current_step = 0
         self.last_bought = 0
         self.last_sold = 0
+        self.reward_strategy.reset_reward()
 
         self.account_history = pd.DataFrame([{
             'balance': self.balance,
diff --git a/lib/env/reward/BaseRewardStrategy.py b/lib/env/reward/BaseRewardStrategy.py
index 4a2e18b..f630e82 100644
--- a/lib/env/reward/BaseRewardStrategy.py
+++ b/lib/env/reward/BaseRewardStrategy.py
@@ -9,12 +9,17 @@ class BaseRewardStrategy(object, metaclass=ABCMeta):
     def __init__(self):
         pass
 
+    @abstractmethod
+    def reset_reward(self):
+        pass
+
     @abstractmethod
     def get_reward(self,
                    observations: pd.DataFrame,
                    account_history: pd.DataFrame,
                    net_worths: List[float],
                    last_bought: int,
+                   last_held: int,
                    last_sold: int,
                    current_price: float) -> float:
         raise NotImplementedError()
diff --git a/lib/env/reward/IncrementalProfit.py b/lib/env/reward/IncrementalProfit.py
index 304a1db..a159b5a 100644
--- a/lib/env/reward/IncrementalProfit.py
+++ b/lib/env/reward/IncrementalProfit.py
@@ -9,11 +9,15 @@ class IncrementalProfit(BaseRewardStrategy):
     def __init__(self):
         pass
 
+    def reset_reward(self):
+        pass
+
     def get_reward(self,
                    observations: pd.DataFrame,
                    account_history: pd.DataFrame,
                    net_worths: List[float],
                    last_bought: int,
+                   last_held: int,
                    last_sold: int,
                    current_price: float):
         curr_balance = account_history['balance'].values[-1]
diff --git a/lib/env/reward/WeightedUnrealisedProfit.py b/lib/env/reward/WeightedUnrealisedProfit.py
new file mode 100644
index 0000000..937ff40
--- /dev/null
+++ b/lib/env/reward/WeightedUnrealisedProfit.py
@@ -0,0 +1,45 @@
+from collections import deque
+
+import pandas as pd
+import numpy as np
+from typing import List
+
+from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy
+
+
+class WeightedUnrealisedProfit(BaseRewardStrategy):
+    def __init__(self, **kwargs):
+        super(WeightedUnrealisedProfit, self).__init__()
+
+        self.decay_rate = kwargs.get('decay_rate', 1e-2)
+        self.rewards = deque(np.zeros(1, dtype=float))
+        self.sum = 0.0
+        self.denominator = np.exp(-1 * self.decay_rate)
+
+    def reset_reward(self):
+        self.rewards = deque(np.zeros(1, dtype=float))
+        self.sum = 0.0
+
+    def cal_reward(self, reward):
+        stale_reward = self.rewards.popleft()
+        self.sum = self.sum - np.exp(-1 * self.decay_rate) * stale_reward
+        self.sum = self.sum * np.exp(-1 * self.decay_rate)
+        self.sum = self.sum + reward
+        self.rewards.append(reward)
+        return self.sum / self.denominator
+
+    def get_reward(self,
+                   observations: pd.DataFrame,
+                   account_history: pd.DataFrame,
+                   net_worths: List[float],
+                   last_bought: int,
+                   last_held: int,
+                   last_sold: int,
+                   current_price: float):
+
+        if account_history['btc_sold'].values[-1] > 0:
+            reward = self.cal_reward(account_history['revenue_from_sold'].values[-1])
+        else:
+            reward = self.cal_reward(last_held * current_price)
+
+        return reward

From be6d144f1887f88e53906694d2454173f6c69675 Mon Sep 17 00:00:00 2001
From: Arunavo Ray <rayanup3@gmail.com>
Date: Tue, 9 Jul 2019 21:26:31 +0530
Subject: [PATCH 2/2] Fixed Bad Import Issue

---
 lib/RLTrader.py                     | 2 +-
 lib/env/TradingEnv.py               | 2 +-
 lib/env/reward/IncrementalProfit.py | 2 +-
 lib/env/reward/__init__.py          | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/RLTrader.py b/lib/RLTrader.py
index 850be21..88e8d82 100644
--- a/lib/RLTrader.py
+++ b/lib/RLTrader.py
@@ -12,7 +12,7 @@
 from stable_baselines import PPO2
 
 from lib.env.TradingEnv import TradingEnv
-from lib.env.reward import BaseRewardStrategy, IncrementalProfit
+from lib.env.reward import BaseRewardStrategy, IncrementalProfit, WeightedUnrealisedProfit
 from lib.data.providers.dates import ProviderDateFormat
 from lib.data.providers import BaseDataProvider,  StaticDataProvider, ExchangeDataProvider
 from lib.util.logger import init_logger
diff --git a/lib/env/TradingEnv.py b/lib/env/TradingEnv.py
index 8c05f2b..bae0d50 100644
--- a/lib/env/TradingEnv.py
+++ b/lib/env/TradingEnv.py
@@ -26,7 +26,7 @@ class TradingEnv(gym.Env):
 
     def __init__(self,
                  data_provider: BaseDataProvider,
-                 reward_strategy: BaseRewardStrategy = WeightedUnrealisedProfit,
+                 reward_strategy: BaseRewardStrategy = IncrementalProfit,
                  initial_balance: int = 10000,
                  commission: float = 0.0025,
                  **kwargs):
diff --git a/lib/env/reward/IncrementalProfit.py b/lib/env/reward/IncrementalProfit.py
index a159b5a..cfec28b 100644
--- a/lib/env/reward/IncrementalProfit.py
+++ b/lib/env/reward/IncrementalProfit.py
@@ -7,7 +7,7 @@
 
 class IncrementalProfit(BaseRewardStrategy):
     def __init__(self):
-        pass
+        super(IncrementalProfit, self).__init__()
 
     def reset_reward(self):
         pass
diff --git a/lib/env/reward/__init__.py b/lib/env/reward/__init__.py
index 6a95835..a548e78 100644
--- a/lib/env/reward/__init__.py
+++ b/lib/env/reward/__init__.py
@@ -1,2 +1,3 @@
 from lib.env.reward.IncrementalProfit import IncrementalProfit
+from lib.env.reward.WeightedUnrealisedProfit import WeightedUnrealisedProfit
 from lib.env.reward.BaseRewardStrategy import BaseRewardStrategy