diff --git a/.gitignore b/.gitignore index 8725aaf..c65708d 100644 --- a/.gitignore +++ b/.gitignore @@ -615,4 +615,5 @@ MigrationBackup/ /tmp /d3rlpy_data -/d3rlpy_logs \ No newline at end of file +/d3rlpy_logs +/propensity_output \ No newline at end of file diff --git a/README.md b/README.md index 728001b..cf1d0d8 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,9 @@ -# offline_rl_ope (BETA RELEASE) +# offline_rl_ope **WARNING** - All IS methods implemented incorrectly in versions < 6.x -- Per-decision weighted importance sampling was incorrectly implemented in versions < 5.X -- Weighted importance sampling was incorrectly implemented in versions 1.X.X and 2.1.X, 2.2.X - Unit testing currently only running in Python 3.11. 3.10 will be supported in the future -- Only 1 dimensional discrete action spaces are currently supported! - -**IMPORTANT: THIS IS A BETA RELEASE. FUNCTIONALITY IS STILL BEING TESTED** Feedback/contributions are welcome :) +- Not all functionality has been tested i.e., d3rlpy api and LowerBounds are still in beta ### Testing progress - [x] components/ @@ -21,11 +17,12 @@ - [x] Metrics - [x] EffectiveSampleSize.py - [x] ValidWeightsProp.py -- [ ] PropensityModels +- [x] PropensityModels - [ ] LowerBounds - [ ] api/d3rlpy -* Insufficient functionality to test i.e., currently only wrapper classes are implemented for the OPEEstimation/DirectMethod.py +Insufficient functionality to test DirectMethod.py i.e., currently only wrapper classes are implemented for the OPEEstimation/DirectMethod.py + #### Overview Basic unit testing has been implemented for all the core functionality of the package. The d3rlpy/api for importance sampling adds minimal additional functionality therefore, it is likely to function as expected however, no sepcific unit testing has been implemented! @@ -34,7 +31,7 @@ Basic unit testing has been implemented for all the core functionality of the pa * More documentation needs to be added however, please refer to examples/ for an illustration of the functionality * examples/static.py provides an illustration of the package being used for evaluation post training. Whilst the d3rlpy package is used for model training, the script is agnostic to the evaluation model used * examples/d3rlpy_training_api.py provides an illustration of how the package can be used to obtain incremental performance statistics during the training of d3rlpy models. It provides greater functionality to the native scorer metrics included in d3rlpy -* The current focus has been on discrete action spaces. Continuous action spaces are intended to be addressed at a later date +* For continuous action spaces, only deterministic policies are fully supported. Supprt for stochastic policies is in development ### Description * offline_rl_ope aims to provide flexible and efficient implementations of OPE algorithms for use when training offline RL models. The main audience is researchers developing smaller, non-distributed models i.e., those who do not want to use packages such as ray (https://github.com/ray-project/ray). diff --git a/propensity_output/epoch_1_train_preds.pkl b/propensity_output/epoch_1_train_preds.pkl deleted file mode 100644 index c7e4f6d..0000000 Binary files a/propensity_output/epoch_1_train_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_1_val_preds.pkl b/propensity_output/epoch_1_val_preds.pkl deleted file mode 100644 index e8cd87c..0000000 Binary files a/propensity_output/epoch_1_val_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_2_train_preds.pkl b/propensity_output/epoch_2_train_preds.pkl deleted file mode 100644 index 7e08a05..0000000 Binary files a/propensity_output/epoch_2_train_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_2_val_preds.pkl b/propensity_output/epoch_2_val_preds.pkl deleted file mode 100644 index 8ce3784..0000000 Binary files a/propensity_output/epoch_2_val_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_3_train_preds.pkl b/propensity_output/epoch_3_train_preds.pkl deleted file mode 100644 index 5b1046b..0000000 Binary files a/propensity_output/epoch_3_train_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_3_val_preds.pkl b/propensity_output/epoch_3_val_preds.pkl deleted file mode 100644 index 7f0bafa..0000000 Binary files a/propensity_output/epoch_3_val_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_4_train_preds.pkl b/propensity_output/epoch_4_train_preds.pkl deleted file mode 100644 index d089577..0000000 Binary files a/propensity_output/epoch_4_train_preds.pkl and /dev/null differ diff --git a/propensity_output/epoch_4_val_preds.pkl b/propensity_output/epoch_4_val_preds.pkl deleted file mode 100644 index f05af17..0000000 Binary files a/propensity_output/epoch_4_val_preds.pkl and /dev/null differ diff --git a/propensity_output/mdl_chkpnt_epoch_1.pt b/propensity_output/mdl_chkpnt_epoch_1.pt deleted file mode 100644 index 91cb6ca..0000000 Binary files a/propensity_output/mdl_chkpnt_epoch_1.pt and /dev/null differ diff --git a/propensity_output/mdl_chkpnt_epoch_2.pt b/propensity_output/mdl_chkpnt_epoch_2.pt deleted file mode 100644 index 0a73a78..0000000 Binary files a/propensity_output/mdl_chkpnt_epoch_2.pt and /dev/null differ diff --git a/propensity_output/mdl_chkpnt_epoch_3.pt b/propensity_output/mdl_chkpnt_epoch_3.pt deleted file mode 100644 index deabebc..0000000 Binary files a/propensity_output/mdl_chkpnt_epoch_3.pt and /dev/null differ diff --git a/propensity_output/mdl_chkpnt_epoch_4.pt b/propensity_output/mdl_chkpnt_epoch_4.pt deleted file mode 100644 index 2300ba1..0000000 Binary files a/propensity_output/mdl_chkpnt_epoch_4.pt and /dev/null differ diff --git a/propensity_output/training_metric_df.csv b/propensity_output/training_metric_df.csv deleted file mode 100644 index 072c7dc..0000000 --- a/propensity_output/training_metric_df.csv +++ /dev/null @@ -1,9 +0,0 @@ -,raw_vals,metric_name -epoch_1,0.5026572188648346,epoch_train_loss -epoch_2,0.45641687404069453,epoch_train_loss -epoch_3,0.4506564313404394,epoch_train_loss -epoch_4,0.4499283042737856,epoch_train_loss -epoch_1,0.4542446283801114,epoch_val_loss -epoch_2,0.43553749836608163,epoch_val_loss -epoch_3,0.4922322245233471,epoch_val_loss -epoch_4,0.4404787527628977,epoch_val_loss diff --git a/src/offline_rl_ope/LowerBounds/__init__.py b/src/offline_rl_ope/LowerBounds/__init__.py index e69de29..2cdbbc0 100644 --- a/src/offline_rl_ope/LowerBounds/__init__.py +++ b/src/offline_rl_ope/LowerBounds/__init__.py @@ -0,0 +1,3 @@ +from ..import logger + +logger.warn("LowerBound functionality still in beta") \ No newline at end of file diff --git a/src/offline_rl_ope/api/d3rlpy/__init__.py b/src/offline_rl_ope/api/d3rlpy/__init__.py index 73bf079..9c27335 100644 --- a/src/offline_rl_ope/api/d3rlpy/__init__.py +++ b/src/offline_rl_ope/api/d3rlpy/__init__.py @@ -1 +1,4 @@ -from . import Scorers, Callbacks, Misc \ No newline at end of file +from . import Scorers, Callbacks, Misc +from ...import logger + +logger.warn("api/d3rlpy functionality still in beta") \ No newline at end of file diff --git a/tests/base.py b/tests/base.py index 894e393..a0f8213 100644 --- a/tests/base.py +++ b/tests/base.py @@ -213,6 +213,60 @@ def __post_init__(self): } ) + +test_action_vals = [ + [[0.9], [4], [0.001], [0]], + [[1], [0], [0.9]] +] + +test_eval_action_vals = [ + [[0.9], [0.9], [0.001], [0]], + [[1], [1], [0.9]] +] + + +test_configs.update( + { + "continuous_action": TestConfig( + test_state_vals=test_state_vals, + test_action_vals=test_action_vals, + test_action_probs=test_action_probs, + test_eval_action_vals=test_eval_action_vals, + test_eval_action_probs=test_eval_action_probs, + test_reward_values=test_reward_values, + test_dm_s_values=test_dm_s_values, + test_dm_sa_values=test_dm_sa_values + ) + } +) + + +test_action_vals = [ + [[0.9,1], [4,0.9], [0.001, 1], [0,-1.2]], + [[1,-0.8], [0,-1], [0.9,1]] +] + +test_eval_action_vals = [ + [[0.9,1], [1,0.9], [0.001, 1], [0,-1.2]], + [[1,-0.8], [0,-1], [1,1]] +] + +test_configs.update( + { + "multi_continuous_action": TestConfig( + test_state_vals=test_state_vals, + test_action_vals=test_action_vals, + test_action_probs=test_action_probs, + test_eval_action_vals=test_eval_action_vals, + test_eval_action_probs=test_eval_action_probs, + test_reward_values=test_reward_values, + test_dm_s_values=test_dm_s_values, + test_dm_sa_values=test_dm_sa_values + ) + } +) + + test_configs_fmt = [[key,test_configs[key]] for key in test_configs.keys()] test_configs_fmt_class = [ {"test_conf":test_configs[key]} for key in test_configs.keys()