From 8b3c6e4d7bbabd8bce1bc07ca3ab082ad7ffdc7a Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Mon, 15 Aug 2022 23:16:11 -0400
Subject: [PATCH] VW version requirement and documentation on
 config_constraints vs metric_constraints (#686)

* add vw version requirement

* vw version

* version range

* add documentation

* vw version range

* skip test on py3.10

* vw version

* rephrase

* don't install vw on py 3.10

* move import location

* remove inherit

* 3.10 in version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 .github/workflows/python-package.yml          |  4 ++++
 setup.py                                      |  3 +--
 test/test_autovw.py                           | 11 +++++++---
 .../Use-Cases/Tune-User-Defined-Function.md   | 21 +++++++++++--------
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 19f3a3f407..6d3b78b82e 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -51,6 +51,10 @@ jobs:
         if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9' && matrix.python-version != '3.10'
         run: |
           pip install -e .[forecast]
+      - name: Install vw on python < 3.10
+        if: matrix.python-version != '3.10'
+        run: |
+          pip install -e .[vw]
       - name: Lint with flake8
         run: |
           # stop the build if there are Python syntax errors or undefined names
diff --git a/setup.py b/setup.py
index e8bfb3b074..39fa7dc6c8 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,6 @@
             "catboost>=0.26",
             "rgf-python",
             "optuna==2.8.0",
-            "vowpalwabbit",
             "openml",
             "statsmodels>=0.12.2",
             "psutil==5.8.0",
@@ -79,7 +78,7 @@
             "nni",
         ],
         "vw": [
-            "vowpalwabbit",
+            "vowpalwabbit>=8.10.0, <9.0.0",
         ],
         "nlp": [
             "transformers[torch]==4.18",
diff --git a/test/test_autovw.py b/test/test_autovw.py
index cacbe59d06..f75e527af8 100644
--- a/test/test_autovw.py
+++ b/test/test_autovw.py
@@ -1,18 +1,17 @@
 import unittest
-
 import numpy as np
 import scipy.sparse
-
 import pandas as pd
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 import logging
 from flaml.tune import loguniform, polynomial_expansion_set
-from vowpalwabbit import pyvw
 from flaml import AutoVW
 import string
 import os
 import openml
 from requests.exceptions import SSLError
+import sys
+import pytest
 
 VW_DS_DIR = "test/data/"
 NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
@@ -369,8 +368,14 @@ def get_vw_tuning_problem(tuning_hp="NamesapceInteraction"):
     return vw_oml_problem_args, vw_online_aml_problem
 
 
+@pytest.mark.skipif(
+    "3.10" in sys.version,
+    reason="do not run on py 3.10",
+)
 class TestAutoVW(unittest.TestCase):
     def test_vw_oml_problem_and_vanilla_vw(self):
+        from vowpalwabbit import pyvw
+
         vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()
         vanilla_vw = pyvw.vw(**vw_oml_problem_args["fixed_hp_config"])
         cumulative_loss_list = online_learning_loop(
diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md
index 94d2ae87e6..6f78c739e1 100644
--- a/website/docs/Use-Cases/Tune-User-Defined-Function.md
+++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md
@@ -265,24 +265,27 @@ A user can specify constraints on the configurations to be satisfied via the arg
 In the following code example, we constrain the output of `area`, which takes a configuration as input and outputs a numerical value, to be no larger than 1000.
 
 ```python
-def area(config):
-    return config["width"] * config["height"]
+def my_model_size(config):
+    return config["n_estimators"] * config["max_leaves"]
 
-flaml.tune.run(evaluation_function=evaluate_config, mode="min",
-               config=config_search_space,
-               config_constraints=[(area, "<=", 1000)], ...)
+analysis = tune.run(...,
+    config_constraints = [(my_model_size, "<=", 40)],
+)
 ```
 
  You can also specify a list of metric constraints to be satisfied via the argument `metric_constraints`. Each element in the `metric_constraints` list is a tuple that consists of (1) a string specifying the name of the metric (the metric name must be defined and returned in the user-defined `evaluation_function`); (2) an operation chosen from "<=" or ">="; (3) a numerical threshold.
 
- In the following code example, we constrain the metric `score` to be no larger than 0.4.
+ In the following code example, we constrain the metric `training_cost` to be no larger than 1 second.
 
 ```python
-flaml.tune.run(evaluation_function=evaluate_config, mode="min",
-               config=config_search_space,
-               metric_constraints=[("score", "<=", 0.4)],...)
+analysis = tune.run(...,
+    metric_constraints = [("training_cost", "<=", 1)]),
 ```
 
+#### **`config_constraints` vs `metric_constraints`:**
+The key difference between these two types of constraints is that the calculation of constraints in `config_constraints` does not rely on the computation procedure in the evaluation function, i.e., in `evaluation_function`. For example, when a constraint only depends on the config itself, as shown in the code example. Due to this independency, constraints in `config_constraints` will be checked before evaluation. So configurations that do not satisfy `config_constraints` will not be evaluated.
+
+
 ### Parallel tuning
 
 Related arguments: