Skip to content

Commit

Permalink
flake8
Browse files Browse the repository at this point in the history
  • Loading branch information
aiueola committed Jan 13, 2022
1 parent 99a2f17 commit 3bff985
Show file tree
Hide file tree
Showing 13 changed files with 59 additions and 42 deletions.
2 changes: 1 addition & 1 deletion _gym/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .version import __version__
from .version import __version__ # noqa
8 changes: 6 additions & 2 deletions _gym/env/bidder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ class Bidder:
Intended to be called and initialized from RTBEnv class in env.py.
Determine bid price by the following formula.
:math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
.. math::
{bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
Parameters
-------
Expand Down Expand Up @@ -101,7 +103,9 @@ def determine_bid_price(
Note
-------
Determine bid price as follows.
:math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
.. math::
{bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
Parameters
-------
Expand Down
3 changes: 2 additions & 1 deletion _gym/env/rtb.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ class RTBEnv(gym.Env):
Adjust rate parameter used for determining the bid price as follows.
(Bid price is individually determined for each auction.)
:math:`bid_price_{t, i} = adjust_rate_{t} \\times ground_truth_reward_{t, i} ( \\times const.)`
.. math::
{bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
Note that, you can also use predicted reward instead of ground-truth reward in the above equation.
Please also refer to CustomizedRTBEnv Wrapper.
Expand Down
2 changes: 1 addition & 1 deletion _gym/env/simulator/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __post_init__(self):
check_scalar(self.n_ads, name="n_ads", target_type=int, min_val=1)
if not isinstance(self.standard_bid_price_distribution, NormalDistribution):
raise ValueError(
f"standard_bid_price_distribution must be a child class of NormalDistribution"
"standard_bid_price_distribution must be a child class of NormalDistribution"
)
if self.minimum_standard_bid_price is None:
self.minimum_standard_bid_price = (
Expand Down
10 changes: 6 additions & 4 deletions _gym/env/simulator/rtb_synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
BaseWinningPriceDistribution,
BaseClickAndConversionRate,
)
from _gym.env.simulator.function import (
from _gym.env.simulator.function import ( # noqa: F401
WinningPriceDistribution,
ClickThroughRate,
ConversionRate,
Expand Down Expand Up @@ -99,9 +99,11 @@ class RTBSyntheticSimulator(BaseSimulator):
user_feature_vector: Optional[np.ndarray] = None
ad_sampling_rate: Optional[np.ndarray] = None
user_sampling_rate: Optional[np.ndarray] = None
WinningPriceDistribution: BaseWinningPriceDistribution = WinningPriceDistribution
ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate
ConversionRate: BaseClickAndConversionRate = ConversionRate
WinningPriceDistribution: BaseWinningPriceDistribution = ( # noqa: F811
WinningPriceDistribution
)
ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate # noqa: F811
ConversionRate: BaseClickAndConversionRate = ConversionRate # noqa: F811
standard_bid_price_distribution: NormalDistribution = NormalDistribution(
mean=50,
std=5,
Expand Down
6 changes: 4 additions & 2 deletions _gym/env/wrapper_rtb.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ class CustomizedRTBEnv(gym.Env):
action: Union[int, float, NDArray] (> 0)
Adjust rate parameter used for the bid price calculation as follows.
Note that the following bid price is individually determined for each auction.
:math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)`
.. math::
{bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.})
Both discrete and continuous actions are acceptable.
Expand Down Expand Up @@ -100,7 +102,7 @@ class CustomizedRTBEnv(gym.Env):
Dictionary which maps discrete action index into specific actions.
Used when only when using action_type="discrete" option.
If None, the action meaning values automatically set to [action_min, action_max] log sampled values.
If None, the values are automatically set to [action_min, action_max] as follows.
np.logspace(-1, 1, n_actions)
Examples
Expand Down
8 changes: 4 additions & 4 deletions _gym/ope/estimators_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ def _estimate_trajectory_value(
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
:math:`\\hat{Q}` for the action chosen by evaluation policy,
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
gamma: float, default=1.0 (0, 1]
Discount factor.
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def estimate_policy_value(
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
:math:`\\hat{Q}` for the action chosen by evaluation policy,
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
gamma: float, default=1.0 (0, 1]
Discount factor.
Expand Down Expand Up @@ -1184,7 +1184,7 @@ def estimate_interval(
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
:math:`\\hat{Q}` for the action chosen by evaluation policy,
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
gamma: float, default=1.0 (0, 1]
Discount factor.
Expand Down Expand Up @@ -1728,7 +1728,7 @@ def _estimate_trajectory_value(
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, )
:math:`\\hat{Q}` for the action chosen by evaluation policy,
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
gamma: float, default=1.0 (0, 1]
Discount factor.
Expand Down
8 changes: 4 additions & 4 deletions _gym/ope/estimators_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def _estimate_trajectory_value(
evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
Action choice probability of evaluation policy for all action,
i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
:math:`\\hat{Q}` for all action,
Expand Down Expand Up @@ -833,7 +833,7 @@ def estimate_policy_value(
evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
Action choice probability of evaluation policy for all action,
i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
:math:`\\hat{Q}` for all action,
Expand Down Expand Up @@ -972,7 +972,7 @@ def estimate_interval(
evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
Action choice probability of evaluation policy for all action,
i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
:math:`\\hat{Q}` for all action,
Expand Down Expand Up @@ -1368,7 +1368,7 @@ def _estimate_trajectory_value(
evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action)
Action choice probability of evaluation policy for all action,
i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action)
:math:`\\hat{Q}` for all action,
Expand Down
4 changes: 2 additions & 2 deletions _gym/ope/ope.py
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,7 @@ def obtain_whole_inputs(
evaluation_policy_action_dist: Optional[NDArray], shape (n_episodes * step_per_episode, n_actions)
Action choice probability of evaluation policy for all actions,
i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}`
i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}`
If action_type == "continuous", `None` is recorded.
state_action_value_prediction: Optional[NDArray]
Expand All @@ -1086,7 +1086,7 @@ def obtain_whole_inputs(
shape (n_episodes * step_per_episode, n_actions)
If action_type == "continuous", :math:`\\hat{Q}` for the action chosen by evaluation policy,
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`.
i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`.
shape (n_episodes * step_per_episode, )
If use_base_model == False, `None` is recorded.
Expand Down
11 changes: 6 additions & 5 deletions _gym/policy/head.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ class DiscreteEpsilonGreedyHead(BaseHead):
Epsilon-greedy policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows.
.. math::
\\pi(a \mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}|
\\pi(a \\mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}|
where :math:`\\epsilon` is the probability of taking random actions and :math:`a*` is the greedy action.
:math:`\\mathbb{I}(\\cdot)` denotes indicator function.
Expand Down Expand Up @@ -357,7 +357,7 @@ class DiscreteSoftmaxHead(BaseHead):
Softmax policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows.
.. math::
\\pi(a \mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)}
\\pi(a \\mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)}
where :math:`\\tau` is the temperature parameter of the softmax function.
:math:`Q(s, a)` is the predicted value for the given :math:`(s, a)` pair.
Expand All @@ -373,8 +373,9 @@ class DiscreteSoftmaxHead(BaseHead):
n_actions: int (> 0)
Numbers of actions.
tau: float, default=1.0 (-\infty, \infty)
tau: float, default=1.0 (:math:`\\in (- \\infty, \\infty)`)
Temperature parameter.
A negative value leads to a sub-optimal policy.
random_state: Optional[int], default=None (>= 0)
Random state.
Expand Down Expand Up @@ -544,7 +545,7 @@ class ContinuousGaussianHead(BaseHead):
This class should be used when action_space is not clipped.
Otherwise, please use ContinuousTruncatedGaussianHead instead.
Given a deterministic policy, gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows.
Given a deterministic policy, gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows.
.. math::
a \\sim Normal(\\pi(s), \\sigma)
Expand Down Expand Up @@ -680,7 +681,7 @@ class ContinuousTruncatedGaussianHead(BaseHead):
Note
-------
Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows.
Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows.
.. math::
a \\sim TruncNorm(\\pi(s), \\sigma)
Expand Down
7 changes: 7 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[flake8]
ignore =
E501,W503,E203
# We ignore the following errors as they are incompatible with black formatter.
# We ignore E501: line too long
# We ignore W503: line break before binary operator
# We ignore E203: whitespace before ':'
26 changes: 13 additions & 13 deletions tests/env/simulator/test_rtb_synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@
-1, #
24,
ValueError,
"minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
"minimum_standard_bid_price must be a float value within",
),
(
5,
Expand All @@ -373,7 +373,7 @@
101, #
24,
ValueError,
"minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
"minimum_standard_bid_price must be a float value within",
),
(
5,
Expand All @@ -386,7 +386,7 @@
"1", #
24,
ValueError,
"minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]",
"minimum_standard_bid_price must be a float value within",
),
(
5,
Expand Down Expand Up @@ -614,7 +614,7 @@ def test_generate_auction_using_valid_input():
-np.ones(3, dtype=int), #
np.ones(3, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand All @@ -630,23 +630,23 @@ def test_generate_auction_using_valid_input():
np.arange(3, dtype=int), #
np.ones(3, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers within",
),
(
2,
2,
np.ones((2, 3), dtype=int), #
np.ones(3, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers within",
),
(
2,
2,
np.ones(3, dtype=int),
-np.ones(3, dtype=int), #
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand All @@ -662,15 +662,15 @@ def test_generate_auction_using_valid_input():
np.ones(3, dtype=int),
np.arange(3, dtype=int), #
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers within",
),
(
2,
2,
np.ones(3, dtype=int),
np.ones((2, 3), dtype=int), #
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand Down Expand Up @@ -776,7 +776,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
np.ones(3, dtype=int),
np.ones(3, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand All @@ -796,7 +796,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
np.ones(3, dtype=int),
np.ones(3, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand All @@ -806,7 +806,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
-np.ones(3, dtype=int), #
np.ones(3, dtype=int),
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand All @@ -826,7 +826,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids):
np.arange(3, dtype=int), #
np.ones(3, dtype=int),
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers within",
),
(
2,
Expand Down
6 changes: 3 additions & 3 deletions tests/env/test_bidder.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
2 * np.ones(2, dtype=int), #
np.arange(2, dtype=int),
ValueError,
"ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)",
"ad_ids must be 1-dimensional NDArray with integers",
),
(
2,
Expand Down Expand Up @@ -254,7 +254,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
np.arange(2, dtype=int),
-np.ones(2, dtype=int), #
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers",
),
(
2,
Expand All @@ -264,7 +264,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler):
np.arange(2),
2 * np.ones(2, dtype=int), #
ValueError,
"user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)",
"user_ids must be 1-dimensional NDArray with integers",
),
(
2,
Expand Down

0 comments on commit 3bff985

Please sign in to comment.