diff --git a/_gym/__init__.py b/_gym/__init__.py
index 58f3ace6..5ba63ea1 100644
--- a/_gym/__init__.py
+++ b/_gym/__init__.py
@@ -1 +1 @@
-from .version import __version__
+from .version import __version__  # noqa
diff --git a/_gym/env/bidder.py b/_gym/env/bidder.py
index 80cd3dcd..f0e1aef0 100644
--- a/_gym/env/bidder.py
+++ b/_gym/env/bidder.py
@@ -21,7 +21,9 @@ class Bidder:
     Intended to be called and initialized from RTBEnv class in env.py.
 
     Determine bid price by the following formula.
-        :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
+
+    .. math::
+        {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
 
     Parameters
     -------
@@ -101,7 +103,9 @@ def determine_bid_price(
         Note
         -------
         Determine bid price as follows.
-            :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
+
+        .. math::
+            {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
 
         Parameters
         -------
diff --git a/_gym/env/rtb.py b/_gym/env/rtb.py
index f7105f2b..edfced04 100644
--- a/_gym/env/rtb.py
+++ b/_gym/env/rtb.py
@@ -48,7 +48,8 @@ class RTBEnv(gym.Env):
             Adjust rate parameter used for determining the bid price as follows.
             (Bid price is individually determined for each auction.)
 
-                :math:`bid_price_{t, i} = adjust_rate_{t} \\times ground_truth_reward_{t, i} ( \\times const.)`
+            .. math::
+                {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
 
             Note that, you can also use predicted reward instead of ground-truth reward in the above equation.
             Please also refer to CustomizedRTBEnv Wrapper.
diff --git a/_gym/env/simulator/function.py b/_gym/env/simulator/function.py
index 5db8c34c..89406520 100644
--- a/_gym/env/simulator/function.py
+++ b/_gym/env/simulator/function.py
@@ -80,7 +80,7 @@ def __post_init__(self):
         check_scalar(self.n_ads, name="n_ads", target_type=int, min_val=1)
         if not isinstance(self.standard_bid_price_distribution, NormalDistribution):
             raise ValueError(
-                f"standard_bid_price_distribution must be a child class of NormalDistribution"
+                "standard_bid_price_distribution must be a child class of NormalDistribution"
             )
         if self.minimum_standard_bid_price is None:
             self.minimum_standard_bid_price = (
diff --git a/_gym/env/simulator/rtb_synthetic.py b/_gym/env/simulator/rtb_synthetic.py
index 5517f913..6de6d028 100644
--- a/_gym/env/simulator/rtb_synthetic.py
+++ b/_gym/env/simulator/rtb_synthetic.py
@@ -10,7 +10,7 @@
     BaseWinningPriceDistribution,
     BaseClickAndConversionRate,
 )
-from _gym.env.simulator.function import (
+from _gym.env.simulator.function import (  # noqa: F401
     WinningPriceDistribution,
     ClickThroughRate,
     ConversionRate,
@@ -99,9 +99,11 @@ class RTBSyntheticSimulator(BaseSimulator):
     user_feature_vector: Optional[np.ndarray] = None
     ad_sampling_rate: Optional[np.ndarray] = None
     user_sampling_rate: Optional[np.ndarray] = None
-    WinningPriceDistribution: BaseWinningPriceDistribution = WinningPriceDistribution
-    ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate
-    ConversionRate: BaseClickAndConversionRate = ConversionRate
+    WinningPriceDistribution: BaseWinningPriceDistribution = (  # noqa: F811
+        WinningPriceDistribution
+    )
+    ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate  # noqa: F811
+    ConversionRate: BaseClickAndConversionRate = ConversionRate  # noqa: F811
     standard_bid_price_distribution: NormalDistribution = NormalDistribution(
         mean=50,
         std=5,
diff --git a/_gym/env/wrapper_rtb.py b/_gym/env/wrapper_rtb.py
index 9f1c9fc9..90e69662 100644
--- a/_gym/env/wrapper_rtb.py
+++ b/_gym/env/wrapper_rtb.py
@@ -55,7 +55,9 @@ class CustomizedRTBEnv(gym.Env):
         action: Union[int, float, NDArray] (> 0)
             Adjust rate parameter used for the bid price calculation as follows.
             Note that the following bid price is individually determined for each auction.
-                :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
+
+            .. math::
+                {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
 
             Both discrete and continuous actions are acceptable.
 
@@ -100,7 +102,7 @@ class CustomizedRTBEnv(gym.Env):
         Dictionary which maps discrete action index into specific actions.
         Used when only when using action_type="discrete" option.
 
-        If None, the action meaning values automatically set to [action_min, action_max] log sampled values.
+        If None, the values are automatically set to [action_min, action_max] as follows.
             np.logspace(-1, 1, n_actions)
 
     Examples
diff --git a/_gym/ope/estimators_continuous.py b/_gym/ope/estimators_continuous.py
index 99525c19..5a409289 100644
--- a/_gym/ope/estimators_continuous.py
+++ b/_gym/ope/estimators_continuous.py
@@ -993,7 +993,7 @@ def _estimate_trajectory_value(
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
             :math:`\\hat{Q}` for the action chosen by evaluation policy,
-            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
+            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
 
         gamma: float, default=1.0 (0, 1]
             Discount factor.
@@ -1081,7 +1081,7 @@ def estimate_policy_value(
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
             :math:`\\hat{Q}` for the action chosen by evaluation policy,
-            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
+            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
 
         gamma: float, default=1.0 (0, 1]
             Discount factor.
@@ -1184,7 +1184,7 @@ def estimate_interval(
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
             :math:`\\hat{Q}` for the action chosen by evaluation policy,
-            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
+            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
 
         gamma: float, default=1.0 (0, 1]
             Discount factor.
@@ -1728,7 +1728,7 @@ def _estimate_trajectory_value(
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
             :math:`\\hat{Q}` for the action chosen by evaluation policy,
-            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
+            i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
 
         gamma: float, default=1.0 (0, 1]
             Discount factor.
diff --git a/_gym/ope/estimators_discrete.py b/_gym/ope/estimators_discrete.py
index 8d77d7e6..f32a2f43 100644
--- a/_gym/ope/estimators_discrete.py
+++ b/_gym/ope/estimators_discrete.py
@@ -759,7 +759,7 @@ def _estimate_trajectory_value(
 
         evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
             Action choice probability of evaluation policy for all action,
-            i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
+            i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
             :math:`\\hat{Q}` for all action,
@@ -833,7 +833,7 @@ def estimate_policy_value(
 
         evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
             Action choice probability of evaluation policy for all action,
-            i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
+            i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
             :math:`\\hat{Q}` for all action,
@@ -972,7 +972,7 @@ def estimate_interval(
 
         evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
             Action choice probability of evaluation policy for all action,
-            i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
+            i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
             :math:`\\hat{Q}` for all action,
@@ -1368,7 +1368,7 @@ def _estimate_trajectory_value(
 
         evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
             Action choice probability of evaluation policy for all action,
-            i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
+            i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
 
         state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
             :math:`\\hat{Q}` for all action,
diff --git a/_gym/ope/ope.py b/_gym/ope/ope.py
index fe7a17a9..4ad4ae37 100644
--- a/_gym/ope/ope.py
+++ b/_gym/ope/ope.py
@@ -1077,7 +1077,7 @@ def obtain_whole_inputs(
 
             evaluation_policy_action_dist: Optional[NDArray], shape (n_episodes * step_per_episode, n_actions)
                 Action choice probability of evaluation policy for all actions,
-                i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
+                i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
                 If action_type == "continuous", `None` is recorded.
 
             state_action_value_prediction: Optional[NDArray]
@@ -1086,7 +1086,7 @@ def obtain_whole_inputs(
                 shape (n_episodes * step_per_episode, n_actions)
 
                 If action_type == "continuous", :math:`\\hat{Q}` for the action chosen by evaluation policy,
-                i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
+                i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
                 shape (n_episodes * step_per_episode, )
 
                 If use_base_model == False, `None` is recorded.
diff --git a/_gym/policy/head.py b/_gym/policy/head.py
index 59a8293a..bd431687 100644
--- a/_gym/policy/head.py
+++ b/_gym/policy/head.py
@@ -212,7 +212,7 @@ class DiscreteEpsilonGreedyHead(BaseHead):
     Epsilon-greedy policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows.
 
     .. math::
-        \\pi(a \mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}|
+        \\pi(a \\mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}|
 
     where :math:`\\epsilon` is the probability of taking random actions and :math:`a*` is the greedy action.
     :math:`\\mathbb{I}(\\cdot)` denotes indicator function.
@@ -357,7 +357,7 @@ class DiscreteSoftmaxHead(BaseHead):
     Softmax policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows.
 
     .. math::
-        \\pi(a \mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)}
+        \\pi(a \\mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)}
 
     where :math:`\\tau` is the temperature parameter of the softmax function.
     :math:`Q(s, a)` is the predicted value for the given :math:`(s, a)` pair.
@@ -373,8 +373,9 @@ class DiscreteSoftmaxHead(BaseHead):
     n_actions: int (> 0)
         Numbers of actions.
 
-    tau: float, default=1.0 (-\infty, \infty)
+    tau: float, default=1.0 (:math:`\\in (- \\infty, \\infty)`)
         Temperature parameter.
+        A negative value leads to a sub-optimal policy.
 
     random_state: Optional[int], default=None (>= 0)
         Random state.
@@ -544,7 +545,7 @@ class ContinuousGaussianHead(BaseHead):
     This class should be used when action_space is not clipped.
     Otherwise, please use ContinuousTruncatedGaussianHead instead.
 
-    Given a deterministic policy, gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows.
+    Given a deterministic policy, gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows.
 
     .. math::
         a \\sim Normal(\\pi(s), \\sigma)
@@ -680,7 +681,7 @@ class ContinuousTruncatedGaussianHead(BaseHead):
 
     Note
     -------
-    Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows.
+    Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows.
 
     .. math::
         a \\sim TruncNorm(\\pi(s), \\sigma)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..14a938c0
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,7 @@
+[flake8]
+ignore =
+    E501,W503,E203
+    # We ignore the following errors as they are incompatible with black formatter.
+    # We ignore E501: line too long 
+    # We ignore W503: line break before binary operator 
+    # We ignore E203: whitespace before ':'
diff --git a/tests/env/simulator/test_rtb_synthetic.py b/tests/env/simulator/test_rtb_synthetic.py
index cce0c1dc..a600a8bd 100644
--- a/tests/env/simulator/test_rtb_synthetic.py
+++ b/tests/env/simulator/test_rtb_synthetic.py
@@ -360,7 +360,7 @@
         -1,  #
         24,
         ValueError,
-        "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
+        "minimum_standard_bid_price must be a float value within",
     ),
     (
         5,
@@ -373,7 +373,7 @@
         101,  #
         24,
         ValueError,
-        "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
+        "minimum_standard_bid_price must be a float value within",
     ),
     (
         5,
@@ -386,7 +386,7 @@
         "1",  #
         24,
         ValueError,
-        "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
+        "minimum_standard_bid_price must be a float value within",
     ),
     (
         5,
@@ -614,7 +614,7 @@ def test_generate_auction_using_valid_input():
         -np.ones(3, dtype=int),  #
         np.ones(3, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -630,7 +630,7 @@ def test_generate_auction_using_valid_input():
         np.arange(3, dtype=int),  #
         np.ones(3, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -638,7 +638,7 @@ def test_generate_auction_using_valid_input():
         np.ones((2, 3), dtype=int),  #
         np.ones(3, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -646,7 +646,7 @@ def test_generate_auction_using_valid_input():
         np.ones(3, dtype=int),
         -np.ones(3, dtype=int),  #
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -662,7 +662,7 @@ def test_generate_auction_using_valid_input():
         np.ones(3, dtype=int),
         np.arange(3, dtype=int),  #
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -670,7 +670,7 @@ def test_generate_auction_using_valid_input():
         np.ones(3, dtype=int),
         np.ones((2, 3), dtype=int),  #
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -776,7 +776,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
         np.ones(3, dtype=int),
         np.ones(3, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -796,7 +796,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
         np.ones(3, dtype=int),
         np.ones(3, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -806,7 +806,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
         -np.ones(3, dtype=int),  #
         np.ones(3, dtype=int),
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
@@ -826,7 +826,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
         np.arange(3, dtype=int),  #
         np.ones(3, dtype=int),
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers within",
     ),
     (
         2,
diff --git a/tests/env/test_bidder.py b/tests/env/test_bidder.py
index 2f0fe173..4af28c74 100644
--- a/tests/env/test_bidder.py
+++ b/tests/env/test_bidder.py
@@ -224,7 +224,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
         2 * np.ones(2, dtype=int),  #
         np.arange(2, dtype=int),
         ValueError,
-        "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
+        "ad_ids must be 1-dimensional NDArray with integers",
     ),
     (
         2,
@@ -254,7 +254,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
         np.arange(2, dtype=int),
         -np.ones(2, dtype=int),  #
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers",
     ),
     (
         2,
@@ -264,7 +264,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
         np.arange(2),
         2 * np.ones(2, dtype=int),  #
         ValueError,
-        "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
+        "user_ids must be 1-dimensional NDArray with integers",
     ),
     (
         2,