Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,7 @@ def test7_missing_enum_values_lambda_search(self):
pyunit_utils.show_test_results("test7_missing_enum_values_lambda_search", num_test_failed, self.test_failed)
self.test_num += 1


def sklearn_binomial_result(self, training_data_file, test_data_file, has_categorical, true_one_hot,
validation_data_file=""):
"""
Expand Down
23 changes: 23 additions & 0 deletions h2o-py/h2o/model/metrics/binomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,3 +976,26 @@ def thresholds_and_metric_scores(self):
if 'thresholds_and_metric_scores' in self._metric_json:
return self._metric_json['thresholds_and_metric_scores']
return None

def kolmogorov_smirnov(self, thresholds= None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def kolmogorov_smirnov(self, thresholds= None):
def kolmogorov_smirnov(self):

"""
:param thresholds: thresholds parameter must be a list (e.g. ``[0.01, 0.5, 0.99]``).
If None, then the threshold maximizing the KS statistic will be used.
:returns: The Kolmogorov-Smirnov statistic for this set of metrics and thresholds.

:examples:

>>> from h2o.estimators.gbm import H2OGradientBoostingEstimator
>>> cars = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/junit/cars_20mpg.csv")
>>> cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
>>> predictors = ["displacement","power","weight","acceleration","year"]
>>> response = "economy_20mpg"
>>> train, valid = cars.split_frame(ratios = [.8], seed = 1234)
>>> cars_gbm = H2OGradientBoostingEstimator(seed = 1234)
>>> cars_gbm.train(x = predictors,
... y = response,
... training_frame = train,
... validation_frame = valid)
>>> cars_gbm.kolmogorov_smirnov()
"""
return self.metric("ks", thresholds=thresholds)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal is something like this:

Suggested change
return self.metric("ks", thresholds=thresholds)
return max(self.gains_lift()["kolmogorov_smirnov"])