@@ -725,7 +725,7 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
725725 ----------
726726 X : {array-like, sparse matrix} of shape (n_samples, n_features)
727727 Input data.
728- quantiles : float, optional
728+ quantiles : array-like, float, optional
729729 The quantiles at which to evaluate, by default 0.5 (median).
730730 method : str, optional
731731 The method to interpolate, by default 'linear'. Can be any keyword
@@ -746,7 +746,7 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
746746 X = self ._validate_X_predict (X )
747747
748748 if not isinstance (quantiles , (np .ndarray , list )):
749- quantiles = np .array ([ quantiles ] )
749+ quantiles = np .atleast_1d ( np . array (quantiles ) )
750750
751751 # if we trained a binning tree, then we should re-bin the data
752752 # XXX: this is inefficient and should be improved to be in line with what
@@ -777,15 +777,15 @@ def predict_quantiles(self, X, quantiles=0.5, method="nearest"):
777777
778778 # (n_total_leaf_samples, n_outputs)
779779 leaf_node_samples = np .vstack (
780- (
780+ [
781781 est .leaf_nodes_samples_ [leaf_nodes [jdx ]]
782782 for jdx , est in enumerate (self .estimators_ )
783- )
783+ ]
784784 )
785785
786786 # get quantiles across all leaf node samples
787787 y_hat [idx , ...] = np .quantile (
788- leaf_node_samples , quantiles , axis = 0 , interpolation = method
788+ leaf_node_samples , quantiles , axis = 0 , method = method
789789 )
790790
791791 if is_classifier (self ):
@@ -1550,6 +1550,17 @@ class RandomForestClassifier(ForestClassifier):
15501550
15511551 .. versionadded:: 1.4
15521552
1553+ categorical : array-like or str
1554+ Array of feature indices, boolean array of length n_features,
1555+ ``'all'`` or `None`. Indicates which features should be
1556+ considered as categorical rather than ordinal. For decision trees,
1557+ the maximum number of categories is 64. In practice, the limit will
1558+ often be lower because the process of searching for the best possible
1559+ split grows exponentially with the number of categories. However, a
1560+ shortcut due to Breiman (1984) is used when fitting data with binary
1561+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
1562+ the runtime is linear in the number of categories.
1563+
15531564 Attributes
15541565 ----------
15551566 estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier`
@@ -1693,6 +1704,7 @@ def __init__(
16931704 max_bins = None ,
16941705 store_leaf_values = False ,
16951706 monotonic_cst = None ,
1707+ categorical = None ,
16961708 ):
16971709 super ().__init__ (
16981710 estimator = DecisionTreeClassifier (),
@@ -1710,6 +1722,7 @@ def __init__(
17101722 "ccp_alpha" ,
17111723 "store_leaf_values" ,
17121724 "monotonic_cst" ,
1725+ "categorical" ,
17131726 ),
17141727 bootstrap = bootstrap ,
17151728 oob_score = oob_score ,
@@ -1733,6 +1746,7 @@ def __init__(
17331746 self .min_impurity_decrease = min_impurity_decrease
17341747 self .monotonic_cst = monotonic_cst
17351748 self .ccp_alpha = ccp_alpha
1749+ self .categorical = categorical
17361750
17371751
17381752class RandomForestRegressor (ForestRegressor ):
@@ -1935,6 +1949,17 @@ class RandomForestRegressor(ForestRegressor):
19351949
19361950 .. versionadded:: 1.4
19371951
1952+ categorical : array-like or str
1953+ Array of feature indices, boolean array of length n_features,
1954+ ``'all'`` or `None`. Indicates which features should be
1955+ considered as categorical rather than ordinal. For decision trees,
1956+ the maximum number of categories is 64. In practice, the limit will
1957+ often be lower because the process of searching for the best possible
1958+ split grows exponentially with the number of categories. However, a
1959+ shortcut due to Breiman (1984) is used when fitting data with binary
1960+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
1961+ the runtime is linear in the number of categories.
1962+
19381963 Attributes
19391964 ----------
19401965 estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`
@@ -2065,6 +2090,7 @@ def __init__(
20652090 max_bins = None ,
20662091 store_leaf_values = False ,
20672092 monotonic_cst = None ,
2093+ categorical = None ,
20682094 ):
20692095 super ().__init__ (
20702096 estimator = DecisionTreeRegressor (),
@@ -2082,6 +2108,7 @@ def __init__(
20822108 "ccp_alpha" ,
20832109 "store_leaf_values" ,
20842110 "monotonic_cst" ,
2111+ "categorical" ,
20852112 ),
20862113 bootstrap = bootstrap ,
20872114 oob_score = oob_score ,
@@ -2104,6 +2131,7 @@ def __init__(
21042131 self .min_impurity_decrease = min_impurity_decrease
21052132 self .ccp_alpha = ccp_alpha
21062133 self .monotonic_cst = monotonic_cst
2134+ self .categorical = categorical
21072135
21082136
21092137class ExtraTreesClassifier (ForestClassifier ):
@@ -2316,24 +2344,16 @@ class ExtraTreesClassifier(ForestClassifier):
23162344
23172345 .. versionadded:: 1.4
23182346
2319- monotonic_cst : array-like of int of shape (n_features), default=None
2320- Indicates the monotonicity constraint to enforce on each feature.
2321- - 1: monotonically increasing
2322- - 0: no constraint
2323- - -1: monotonically decreasing
2324-
2325- If monotonic_cst is None, no constraints are applied.
2326-
2327- Monotonicity constraints are not supported for:
2328- - multiclass classifications (i.e. when `n_classes > 2`),
2329- - multioutput classifications (i.e. when `n_outputs_ > 1`),
2330- - classifications trained on data with missing values.
2331-
2332- The constraints hold over the probability of the positive class.
2333-
2334- Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
2335-
2336- .. versionadded:: 1.4
2347+ categorical : array-like or str
2348+ Array of feature indices, boolean array of length n_features,
2349+ ``'all'`` or `None`. Indicates which features should be
2350+ considered as categorical rather than ordinal. For decision trees,
2351+ the maximum number of categories is 64. In practice, the limit will
2352+ often be lower because the process of searching for the best possible
2353+ split grows exponentially with the number of categories. However, a
2354+ shortcut due to Breiman (1984) is used when fitting data with binary
2355+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
2356+ the runtime is linear in the number of categories.
23372357
23382358 Attributes
23392359 ----------
@@ -2467,6 +2487,7 @@ def __init__(
24672487 max_bins = None ,
24682488 store_leaf_values = False ,
24692489 monotonic_cst = None ,
2490+ categorical = None ,
24702491 ):
24712492 super ().__init__ (
24722493 estimator = ExtraTreeClassifier (),
@@ -2484,6 +2505,7 @@ def __init__(
24842505 "ccp_alpha" ,
24852506 "store_leaf_values" ,
24862507 "monotonic_cst" ,
2508+ "categorical" ,
24872509 ),
24882510 bootstrap = bootstrap ,
24892511 oob_score = oob_score ,
@@ -2507,6 +2529,7 @@ def __init__(
25072529 self .min_impurity_decrease = min_impurity_decrease
25082530 self .ccp_alpha = ccp_alpha
25092531 self .monotonic_cst = monotonic_cst
2532+ self .categorical = categorical
25102533
25112534
25122535class ExtraTreesRegressor (ForestRegressor ):
@@ -2704,6 +2727,17 @@ class ExtraTreesRegressor(ForestRegressor):
27042727
27052728 .. versionadded:: 1.4
27062729
2730+ categorical : array-like or str
2731+ Array of feature indices, boolean array of length n_features,
2732+ ``'all'`` or `None`. Indicates which features should be
2733+ considered as categorical rather than ordinal. For decision trees,
2734+ the maximum number of categories is 64. In practice, the limit will
2735+ often be lower because the process of searching for the best possible
2736+ split grows exponentially with the number of categories. However, a
2737+ shortcut due to Breiman (1984) is used when fitting data with binary
2738+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
2739+ the runtime is linear in the number of categories.
2740+
27072741 Attributes
27082742 ----------
27092743 estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`
@@ -2819,6 +2853,7 @@ def __init__(
28192853 max_bins = None ,
28202854 store_leaf_values = False ,
28212855 monotonic_cst = None ,
2856+ categorical = None ,
28222857 ):
28232858 super ().__init__ (
28242859 estimator = ExtraTreeRegressor (),
@@ -2836,6 +2871,7 @@ def __init__(
28362871 "ccp_alpha" ,
28372872 "store_leaf_values" ,
28382873 "monotonic_cst" ,
2874+ "categorical" ,
28392875 ),
28402876 bootstrap = bootstrap ,
28412877 oob_score = oob_score ,
@@ -2858,6 +2894,7 @@ def __init__(
28582894 self .min_impurity_decrease = min_impurity_decrease
28592895 self .ccp_alpha = ccp_alpha
28602896 self .monotonic_cst = monotonic_cst
2897+ self .categorical = categorical
28612898
28622899
28632900class RandomTreesEmbedding (TransformerMixin , BaseForest ):
@@ -2969,6 +3006,17 @@ class RandomTreesEmbedding(TransformerMixin, BaseForest):
29693006 new forest. See :term:`Glossary <warm_start>` and
29703007 :ref:`gradient_boosting_warm_start` for details.
29713008
3009+ categorical : array-like or str
3010+ Array of feature indices, boolean array of length n_features,
3011+ ``'all'`` or `None`. Indicates which features should be
3012+ considered as categorical rather than ordinal. For decision trees,
3013+ the maximum number of categories is 64. In practice, the limit will
3014+ often be lower because the process of searching for the best possible
3015+ split grows exponentially with the number of categories. However, a
3016+ shortcut due to Breiman (1984) is used when fitting data with binary
3017+ labels using the ``Gini`` or ``Entropy`` criteria. In this case,
3018+ the runtime is linear in the number of categories.
3019+
29723020 Attributes
29733021 ----------
29743022 estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance
@@ -3073,6 +3121,7 @@ def __init__(
30733121 verbose = 0 ,
30743122 warm_start = False ,
30753123 store_leaf_values = False ,
3124+ categorical = None ,
30763125 ):
30773126 super ().__init__ (
30783127 estimator = ExtraTreeRegressor (),
@@ -3088,6 +3137,7 @@ def __init__(
30883137 "min_impurity_decrease" ,
30893138 "random_state" ,
30903139 "store_leaf_values" ,
3140+ "categorical" ,
30913141 ),
30923142 bootstrap = False ,
30933143 oob_score = False ,
@@ -3106,6 +3156,7 @@ def __init__(
31063156 self .max_leaf_nodes = max_leaf_nodes
31073157 self .min_impurity_decrease = min_impurity_decrease
31083158 self .sparse_output = sparse_output
3159+ self .categorical = categorical
31093160
31103161 def _set_oob_score_and_attributes (self , X , y , scoring_function = None ):
31113162 raise NotImplementedError ("OOB score not supported by tree embedding" )
0 commit comments