diff --git a/_gym/__init__.py b/_gym/__init__.py index 58f3ace6..5ba63ea1 100644 --- a/_gym/__init__.py +++ b/_gym/__init__.py @@ -1 +1 @@ -from .version import __version__ +from .version import __version__ # noqa diff --git a/_gym/env/bidder.py b/_gym/env/bidder.py index 80cd3dcd..f0e1aef0 100644 --- a/_gym/env/bidder.py +++ b/_gym/env/bidder.py @@ -21,7 +21,9 @@ class Bidder: Intended to be called and initialized from RTBEnv class in env.py. Determine bid price by the following formula. - :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)` + + .. math:: + {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.}) Parameters ------- @@ -101,7 +103,9 @@ def determine_bid_price( Note ------- Determine bid price as follows. - :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)` + + .. math:: + {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.}) Parameters ------- diff --git a/_gym/env/rtb.py b/_gym/env/rtb.py index f7105f2b..edfced04 100644 --- a/_gym/env/rtb.py +++ b/_gym/env/rtb.py @@ -48,7 +48,8 @@ class RTBEnv(gym.Env): Adjust rate parameter used for determining the bid price as follows. (Bid price is individually determined for each auction.) - :math:`bid_price_{t, i} = adjust_rate_{t} \\times ground_truth_reward_{t, i} ( \\times const.)` + .. math:: + {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.}) Note that, you can also use predicted reward instead of ground-truth reward in the above equation. Please also refer to CustomizedRTBEnv Wrapper. diff --git a/_gym/env/simulator/function.py b/_gym/env/simulator/function.py index 5db8c34c..89406520 100644 --- a/_gym/env/simulator/function.py +++ b/_gym/env/simulator/function.py @@ -80,7 +80,7 @@ def __post_init__(self): check_scalar(self.n_ads, name="n_ads", target_type=int, min_val=1) if not isinstance(self.standard_bid_price_distribution, NormalDistribution): raise ValueError( - f"standard_bid_price_distribution must be a child class of NormalDistribution" + "standard_bid_price_distribution must be a child class of NormalDistribution" ) if self.minimum_standard_bid_price is None: self.minimum_standard_bid_price = ( diff --git a/_gym/env/simulator/rtb_synthetic.py b/_gym/env/simulator/rtb_synthetic.py index 5517f913..6de6d028 100644 --- a/_gym/env/simulator/rtb_synthetic.py +++ b/_gym/env/simulator/rtb_synthetic.py @@ -10,7 +10,7 @@ BaseWinningPriceDistribution, BaseClickAndConversionRate, ) -from _gym.env.simulator.function import ( +from _gym.env.simulator.function import ( # noqa: F401 WinningPriceDistribution, ClickThroughRate, ConversionRate, @@ -99,9 +99,11 @@ class RTBSyntheticSimulator(BaseSimulator): user_feature_vector: Optional[np.ndarray] = None ad_sampling_rate: Optional[np.ndarray] = None user_sampling_rate: Optional[np.ndarray] = None - WinningPriceDistribution: BaseWinningPriceDistribution = WinningPriceDistribution - ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate - ConversionRate: BaseClickAndConversionRate = ConversionRate + WinningPriceDistribution: BaseWinningPriceDistribution = ( # noqa: F811 + WinningPriceDistribution + ) + ClickThroughRate: BaseClickAndConversionRate = ClickThroughRate # noqa: F811 + ConversionRate: BaseClickAndConversionRate = ConversionRate # noqa: F811 standard_bid_price_distribution: NormalDistribution = NormalDistribution( mean=50, std=5, diff --git a/_gym/env/wrapper_rtb.py b/_gym/env/wrapper_rtb.py index 9f1c9fc9..90e69662 100644 --- a/_gym/env/wrapper_rtb.py +++ b/_gym/env/wrapper_rtb.py @@ -55,7 +55,9 @@ class CustomizedRTBEnv(gym.Env): action: Union[int, float, NDArray] (> 0) Adjust rate parameter used for the bid price calculation as follows. Note that the following bid price is individually determined for each auction. - :math:`bid_price_{t, i} = adjust_rate_{t} \\times predicted_reward_{t,i}/ground_truth_reward_{t, i} ( \\times const.)` + + .. math:: + {bid price}_{t, i} = {adjust rate}_{t} \\times {predicted reward}_{t,i} ( \\times {const.}) Both discrete and continuous actions are acceptable. @@ -100,7 +102,7 @@ class CustomizedRTBEnv(gym.Env): Dictionary which maps discrete action index into specific actions. Used when only when using action_type="discrete" option. - If None, the action meaning values automatically set to [action_min, action_max] log sampled values. + If None, the values are automatically set to [action_min, action_max] as follows. np.logspace(-1, 1, n_actions) Examples diff --git a/_gym/ope/estimators_continuous.py b/_gym/ope/estimators_continuous.py index 99525c19..5a409289 100644 --- a/_gym/ope/estimators_continuous.py +++ b/_gym/ope/estimators_continuous.py @@ -993,7 +993,7 @@ def _estimate_trajectory_value( state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, ) :math:`\\hat{Q}` for the action chosen by evaluation policy, - i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`. + i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`. gamma: float, default=1.0 (0, 1] Discount factor. @@ -1081,7 +1081,7 @@ def estimate_policy_value( state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, ) :math:`\\hat{Q}` for the action chosen by evaluation policy, - i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`. + i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`. gamma: float, default=1.0 (0, 1] Discount factor. @@ -1184,7 +1184,7 @@ def estimate_interval( state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, ) :math:`\\hat{Q}` for the action chosen by evaluation policy, - i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`. + i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`. gamma: float, default=1.0 (0, 1] Discount factor. @@ -1728,7 +1728,7 @@ def _estimate_trajectory_value( state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, ) :math:`\\hat{Q}` for the action chosen by evaluation policy, - i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`. + i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`. gamma: float, default=1.0 (0, 1] Discount factor. diff --git a/_gym/ope/estimators_discrete.py b/_gym/ope/estimators_discrete.py index 8d77d7e6..f32a2f43 100644 --- a/_gym/ope/estimators_discrete.py +++ b/_gym/ope/estimators_discrete.py @@ -759,7 +759,7 @@ def _estimate_trajectory_value( evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action) Action choice probability of evaluation policy for all action, - i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}` + i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}` state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action) :math:`\\hat{Q}` for all action, @@ -833,7 +833,7 @@ def estimate_policy_value( evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action) Action choice probability of evaluation policy for all action, - i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}` + i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}` state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action) :math:`\\hat{Q}` for all action, @@ -972,7 +972,7 @@ def estimate_interval( evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action) Action choice probability of evaluation policy for all action, - i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}` + i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}` state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action) :math:`\\hat{Q}` for all action, @@ -1368,7 +1368,7 @@ def _estimate_trajectory_value( evaluation_policy_action_dist: NDArray, shape (n_episodes * step_per_episode, n_action) Action choice probability of evaluation policy for all action, - i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}` + i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}` state_action_value_prediction: NDArray, shape (n_episodes * step_per_episode, n_action) :math:`\\hat{Q}` for all action, diff --git a/_gym/ope/ope.py b/_gym/ope/ope.py index fe7a17a9..4ad4ae37 100644 --- a/_gym/ope/ope.py +++ b/_gym/ope/ope.py @@ -1077,7 +1077,7 @@ def obtain_whole_inputs( evaluation_policy_action_dist: Optional[NDArray], shape (n_episodes * step_per_episode, n_actions) Action choice probability of evaluation policy for all actions, - i.e., :math:`\\pi_e(a \mid s_t) \\forall a \\in \\mathcal{A}` + i.e., :math:`\\pi_e(a \\mid s_t) \\forall a \\in \\mathcal{A}` If action_type == "continuous", `None` is recorded. state_action_value_prediction: Optional[NDArray] @@ -1086,7 +1086,7 @@ def obtain_whole_inputs( shape (n_episodes * step_per_episode, n_actions) If action_type == "continuous", :math:`\\hat{Q}` for the action chosen by evaluation policy, - i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \mid s_t))`. + i.e., :math:`\\hat{Q}(s_t, \\pi_e(a \\mid s_t))`. shape (n_episodes * step_per_episode, ) If use_base_model == False, `None` is recorded. diff --git a/_gym/policy/head.py b/_gym/policy/head.py index 59a8293a..bd431687 100644 --- a/_gym/policy/head.py +++ b/_gym/policy/head.py @@ -212,7 +212,7 @@ class DiscreteEpsilonGreedyHead(BaseHead): Epsilon-greedy policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows. .. math:: - \\pi(a \mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}| + \\pi(a \\mid s) := (1 - \\epsilon) * \\mathbb{I}(a = a*)) + \\epsilon / |\\mathcal{A}| where :math:`\\epsilon` is the probability of taking random actions and :math:`a*` is the greedy action. :math:`\\mathbb{I}(\\cdot)` denotes indicator function. @@ -357,7 +357,7 @@ class DiscreteSoftmaxHead(BaseHead): Softmax policy stochastically chooses actions (i.e., :math:`a \\in \\mathcal{A}`) given state :math:`s` as follows. .. math:: - \\pi(a \mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)} + \\pi(a \\mid s) := \\frac{\\exp(Q(s, a) / \\tau)}{\\sum_{a' \\in A} \\exp(Q(s, a') / \\tau)} where :math:`\\tau` is the temperature parameter of the softmax function. :math:`Q(s, a)` is the predicted value for the given :math:`(s, a)` pair. @@ -373,8 +373,9 @@ class DiscreteSoftmaxHead(BaseHead): n_actions: int (> 0) Numbers of actions. - tau: float, default=1.0 (-\infty, \infty) + tau: float, default=1.0 (:math:`\\in (- \\infty, \\infty)`) Temperature parameter. + A negative value leads to a sub-optimal policy. random_state: Optional[int], default=None (>= 0) Random state. @@ -544,7 +545,7 @@ class ContinuousGaussianHead(BaseHead): This class should be used when action_space is not clipped. Otherwise, please use ContinuousTruncatedGaussianHead instead. - Given a deterministic policy, gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows. + Given a deterministic policy, gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows. .. math:: a \\sim Normal(\\pi(s), \\sigma) @@ -680,7 +681,7 @@ class ContinuousTruncatedGaussianHead(BaseHead): Note ------- - Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \mathcal{A}` given state :math:`s` as follows. + Given a deterministic policy, truncated gaussian policy samples action :math:`a \\in \\mathcal{A}` given state :math:`s` as follows. .. math:: a \\sim TruncNorm(\\pi(s), \\sigma) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..14a938c0 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,7 @@ +[flake8] +ignore = + E501,W503,E203 + # We ignore the following errors as they are incompatible with black formatter. + # We ignore E501: line too long + # We ignore W503: line break before binary operator + # We ignore E203: whitespace before ':' diff --git a/tests/env/simulator/test_rtb_synthetic.py b/tests/env/simulator/test_rtb_synthetic.py index cce0c1dc..a600a8bd 100644 --- a/tests/env/simulator/test_rtb_synthetic.py +++ b/tests/env/simulator/test_rtb_synthetic.py @@ -360,7 +360,7 @@ -1, # 24, ValueError, - "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]", + "minimum_standard_bid_price must be a float value within", ), ( 5, @@ -373,7 +373,7 @@ 101, # 24, ValueError, - "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]", + "minimum_standard_bid_price must be a float value within", ), ( 5, @@ -386,7 +386,7 @@ "1", # 24, ValueError, - "minimum_standard_bid_price must be a float value within [0, standard_bid_price_distribution.mean]", + "minimum_standard_bid_price must be a float value within", ), ( 5, @@ -614,7 +614,7 @@ def test_generate_auction_using_valid_input(): -np.ones(3, dtype=int), # np.ones(3, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -630,7 +630,7 @@ def test_generate_auction_using_valid_input(): np.arange(3, dtype=int), # np.ones(3, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -638,7 +638,7 @@ def test_generate_auction_using_valid_input(): np.ones((2, 3), dtype=int), # np.ones(3, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -646,7 +646,7 @@ def test_generate_auction_using_valid_input(): np.ones(3, dtype=int), -np.ones(3, dtype=int), # ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -662,7 +662,7 @@ def test_generate_auction_using_valid_input(): np.ones(3, dtype=int), np.arange(3, dtype=int), # ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -670,7 +670,7 @@ def test_generate_auction_using_valid_input(): np.ones(3, dtype=int), np.ones((2, 3), dtype=int), # ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -776,7 +776,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids): np.ones(3, dtype=int), np.ones(3, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -796,7 +796,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids): np.ones(3, dtype=int), np.ones(3, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -806,7 +806,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids): -np.ones(3, dtype=int), # np.ones(3, dtype=int), ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers within", ), ( 2, @@ -826,7 +826,7 @@ def test_map_idx_to_contexts(n_ads, n_users, ad_ids, user_ids): np.arange(3, dtype=int), # np.ones(3, dtype=int), ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers within", ), ( 2, diff --git a/tests/env/test_bidder.py b/tests/env/test_bidder.py index 2f0fe173..4af28c74 100644 --- a/tests/env/test_bidder.py +++ b/tests/env/test_bidder.py @@ -224,7 +224,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler): 2 * np.ones(2, dtype=int), # np.arange(2, dtype=int), ValueError, - "ad_ids must be 1-dimensional NDArray with integers within \[0, n_ads\)", + "ad_ids must be 1-dimensional NDArray with integers", ), ( 2, @@ -254,7 +254,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler): np.arange(2, dtype=int), -np.ones(2, dtype=int), # ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers", ), ( 2, @@ -264,7 +264,7 @@ def test_init_using_valid_input(simulator, objective, reward_predictor, scaler): np.arange(2), 2 * np.ones(2, dtype=int), # ValueError, - "user_ids must be 1-dimensional NDArray with integers within \[0, n_users\)", + "user_ids must be 1-dimensional NDArray with integers", ), ( 2,