-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathbase.py
77 lines (59 loc) · 2.69 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Copyright (c) 2023, Haruka Kiyohara, Ren Kishimoto, HAKUHODO Technologies Inc., and Hanjuku-kaso Co., Ltd. All rights reserved.
# Licensed under the Apache 2.0 License.
"""Abstract Base Class for Simulation."""
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
import numpy as np
from ...types import Action
@dataclass
class BaseUserModel(metaclass=ABCMeta):
"""Base class to define user_preference_dynamics and reward_function.
Imported as: class:`recgym.BaseUserModel`
"""
@abstractmethod
def user_preference_dynamics(
self,
state: np.ndarray,
action: Action,
item_feature_vector: np.ndarray,
) -> np.ndarray:
"""Function that determines the user state transition (i.e., user preference) based on the recommended item.
Parameters
-------
state: array-like of shape (user_feature_dim, )
A vector representing user preference. The preference changes over time in an episode depending on the actions presented by the RL agent.
When the true state is unobservable, you can gain observation instead of state.
action: int or array-like of shape (1, )
Indicating which item to present to the user.
item_feature_vector: ndarray of shape (n_items, item_feature_dim), default=None
Feature vectors that characterizes each item.
Returns
-------
state: array-like of shape (user_feature_dim, )
A vector representing user preference. The preference changes over time in an episode depending on the actions presented by the RL agent.
When the true state is unobservable, you can gain observation instead of state.
"""
raise NotImplementedError
@abstractmethod
def reward_function(
self,
state: np.ndarray,
action: Action,
item_feature_vector: np.ndarray,
) -> float:
"""Reward function.
Parameters
-------
state: array-like of shape (user_feature_dim, )
A vector representing user preference. The preference changes over time in an episode depending on the actions presented by the RL agent.
When the true state is unobservable, you can gain observation instead of state.
action: int or array-like of shape (1, )
Indicating which item to present to the user.
item_feature_vector: ndarray of shape (n_items, item_feature_dim), default=None
Feature vectors that characterizes each item.
Returns
-------
reward: bool or float
User engagement signal as a reward. Either binary or continuous.
"""
raise NotImplementedError