qiime2 · lizgehret · Feb 27, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 24, 2026
diff --git a/q2_sample_classifier/classify.py b/q2_sample_classifier/classify.py
@@ -291,6 +291,13 @@ def fit_regressor(table: biom.Table,
         n_jobs, optimize_feature_selection, parameter_tuning,
         missing_samples=missing_samples, classification=False)
 
+    # this is sorted by the first column rather than 'importance' because
+    # the column name isn't consistent across methods - so this is the least
+    # invasive way to preserve order with the first column (which does
+    # contain the importance values)
+    importance = importance.sort_values(by=importance.columns[0],
+                                        ascending=False, kind='mergesort')
+
     return estimator, importance
 
 
@@ -361,6 +368,14 @@ def regress_samples_ncv(
         table, metadata, cv, random_state, n_jobs, n_estimators, estimator,
         stratify, parameter_tuning, classification=False,
         scoring=mean_squared_error, missing_samples=missing_samples)
+
+    # this is sorted by the first column rather than 'importance' because
+    # the column name isn't consistent across methods - so this is the least
+    # invasive way to preserve order with the first column (which does
+    # contain the importance values)
+    importances = importances.sort_values(by=importances.columns[0],
+                                          ascending=False, kind='mergesort')
+
     return y_pred, importances
 
 

diff --git a/q2_sample_classifier/tests/test_classifier.py b/q2_sample_classifier/tests/test_classifier.py
@@ -37,47 +37,79 @@ def setUp(self):
         self.X = np.random.rand(50, 20)
         self.y = np.random.randint(0, 2, 50)
 
-        self.exp1 = pd.Series([
-            0.4999999999999999, 0.52, 0.52, 0.5399999999999999,
-            0.44000000000000006, 0.52, 0.4600000000000001,
-            0.5599999999999998, 0.52, 0.52, 0.5, 0.5399999999999999, 0.54,
-            0.5599999999999999, 0.47999999999999987, 0.6199999999999999,
-            0.5399999999999999, 0.5, 0.4999999999999999, 0.45999999999999996],
-            index=pd.Index(range(1, 21)), name='Accuracy')
-        self.exp2 = pd.Series([
-            0.5000000000000001, 0.52, 0.48, 0.5599999999999998, 0.5,
-            0.5799999999999998, 0.54, 0.4600000000000001, 0.6,
-            0.45999999999999996, 0.45999999999999996],
-            index=pd.Index([1] + [i for i in range(2, 21, 2)]),
-            name='Accuracy')
-        self.exp3 = pd.Series({1: 0.4600000000000001, 20: 0.45999999999999996},
-                              name='Accuracy')
-
-    def extract_rfe_scores_template(self, steps, expected):
+    def _fit_selector(self, step):
         selector = RFECV(RandomForestClassifier(
-            random_state=123, n_estimators=2), step=steps, cv=10)
-        selector = selector.fit(self.X, self.y.ravel())
-        pdt.assert_series_equal(
-            _extract_rfe_scores(selector), expected)
+            random_state=123, n_estimators=2), step=step, cv=10
+        )
+
+        return selector.fit(self.X, self.y.ravel())
+
+    def _assert_basic_contract(self, selector):
+        obs = _extract_rfe_scores(selector)
+
+        self.assertIsInstance(obs, pd.Series)
+        self.assertEqual(obs.name, 'Accuracy')
+
+        scores = selector.cv_results_['mean_test_score']
+        self.assertEqual(len(obs), len(scores))
+
+        np.testing.assert_array_equal(
+            np.sort(obs.to_numpy()),
+            np.sort(np.asarray(scores)))
+
+        index = obs.index.to_numpy()
+        self.assertTrue(np.all(np.diff(index) > 0))
+
+        n_features = len(selector.ranking_)
+
+        self.assertEqual(index[0], 1)
+        self.assertEqual(index[-1], n_features)
+        self.assertTrue(np.issubdtype(index.dtype, np.integer))
+
+        return obs
 
     def test_extract_rfe_scores_step_int_one(self):
-        self.extract_rfe_scores_template(1, self.exp1)
+        self._assert_basic_contract(self._fit_selector(1))
 
     def test_extract_rfe_scores_step_float_one(self):
-        self.extract_rfe_scores_template(0.05, self.exp1)
+        self._assert_basic_contract(self._fit_selector(0.05))
+        # for 20 features, 0.05 * 20 = 1, so this should match step=1 index
+        step_int = self._fit_selector(1)
+        step_float = self._fit_selector(0.05)
+
+        obs_int = _extract_rfe_scores(step_int)
+        obs_float = _extract_rfe_scores(step_float)
+
+        np.testing.assert_array_equal(
+            obs_int.index.to_numpy(), obs_float.index.to_numpy()
+        )
 
     def test_extract_rfe_scores_step_int_two(self):
-        self.extract_rfe_scores_template(2, self.exp2)
+        self._assert_basic_contract(self._fit_selector(2))
 
     def test_extract_rfe_scores_step_float_two(self):
-        self.extract_rfe_scores_template(0.1, self.exp2)
+        self._assert_basic_contract(self._fit_selector(0.1))
+        # for 20 features, 0.1 * 20 = 2, so this should match step=2 index
+        step_int = self._fit_selector(2)
+        step_float = self._fit_selector(0.1)
+
+        obs_int = _extract_rfe_scores(step_int)
+        obs_float = _extract_rfe_scores(step_float)
+
+        np.testing.assert_array_equal(
+            obs_int.index.to_numpy(), obs_float.index.to_numpy()
+        )
+
+    def test_extract_rfe_scores_step_full_range_out_of_range(self):
+        self._assert_basic_contract(self._fit_selector(20))
+        self._assert_basic_contract(self._fit_selector(21))
 
-    def test_extract_rfe_scores_step_full_range(self):
-        self.extract_rfe_scores_template(20, self.exp3)
+        obs_full = _extract_rfe_scores(self._fit_selector(20))
+        obs_oor = _extract_rfe_scores(self._fit_selector(21))
 
-    def test_extract_rfe_scores_step_out_of_range(self):
-        # should be equal to full_range
-        self.extract_rfe_scores_template(21, self.exp3)
+        np.testing.assert_array_equal(
+            obs_full.index.to_numpy(), obs_oor.index.to_numpy())
+        pdt.assert_series_equal(obs_full, obs_oor)
 
 
 # test classifier pipelines succeed on binary data

diff --git a/q2_sample_classifier/tests/test_estimators.py b/q2_sample_classifier/tests/test_estimators.py
@@ -334,7 +334,7 @@ def test_regress_samples_ncv_accuracy(self):
             self.table_ecam_fp, self.mdc_ecam_fp, random_state=123,
             n_estimators=2, n_jobs=1, missing_samples='ignore')
         pdt.assert_series_equal(y_pred, self.exp_pred)
-        pdt.assert_frame_equal(importances, self.exp_imp)
+        pdt.assert_frame_equal(importances, self.exp_imp, atol=1e-12)
 
     # test that fit_* methods output consistent importance scores
     def test_fit_regressor(self):
@@ -344,7 +344,7 @@ def test_fit_regressor(self):
         exp_imp = pd.read_csv(
             self.get_data_path('importance_cv.tsv'), sep='\t', header=0,
             index_col=0)
-        pdt.assert_frame_equal(importances, exp_imp)
+        pdt.assert_frame_equal(importances, exp_imp, atol=1e-12)
 
     # just make sure this method runs. Uses the same internal function as
     # fit_regressor, so importance score consistency is covered by the above

diff --git a/q2_sample_classifier/utilities.py b/q2_sample_classifier/utilities.py
@@ -250,15 +250,26 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
 
 def _extract_rfe_scores(rfecv):
     grid_scores_ = rfecv.cv_results_['mean_test_score']
-    n_features = len(rfecv.ranking_)
-    # If using fractional step, step = integer of fraction * n_features
-    if rfecv.step < 1:
-        rfecv.step = int(rfecv.step * n_features)
-    # Need to manually calculate x-axis, grid_scores_ is a 1-d array
-    x = [n_features - (n * rfecv.step)
-         for n in range(len(grid_scores_)-1, -1, -1)]
-    if x[0] < 1:
-        x[0] = 1
+
+    # sklearn >= 1.5 provides the x-axis directly
+    # https://scikit-learn.org/stable/whats_new/v1.5.html#sklearn-feature-selection
+    if 'n_features' in rfecv.cv_results_:
+        x = rfecv.cv_results_['n_features']
+
+    else:
+        n_features = len(rfecv.ranking_)
+        # If using fractional step, step = integer of fraction * n_features
+        step = rfecv.step
+        if step < 1:
+            # prevent case where step = 0
+            step = max(1, int(step * n_features))
+
+        # Need to manually calculate x-axis, grid_scores_ is a 1-d array
+        x = [n_features - (n * step)
+             for n in range(len(grid_scores_)-1, -1, -1)]
+        if x[0] < 1:
+            x[0] = 1
+
     return pd.Series(grid_scores_, index=x, name='Accuracy')