imputation: suppress cluster_name/n_clusters under ANY survey design (incl. replicate)

igerber · claude · igerber · commit 1249ded64346 · 2026-05-25T16:08:34.000-04:00
The Results-metadata suppression gate previously fired only when
`resolved_survey.psu is not None`, which left replicate-weight survey
fits (psu=None by SurveyDesign mutual-exclusion rules) leaking
cluster_name="unit" and n_clusters=n_units onto Results. Summary then
printed "Number of clusters" plus the unit-cluster CR1 label, even
though the new public contract says both fields are None under survey
designs because replicate-variance ignores PSU/cluster entirely
(replicates encode the design implicitly via BRR / Fay / JK1 / JKn / SDR
reweighting).

Fix: gate on `resolved_survey is not None` so the suppression also
covers the replicate-weight branch. Regression test added:
`test_cluster_name_suppressed_under_replicate_survey` asserts both
fields are None and summary omits the Number-of-clusters line + the
CR1 label under a JK1 replicate design.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/imputation.py b/diff_diff/imputation.py
@@ -887,16 +887,18 @@ def _refit_imp(w_r):
                         )[0]
 
         # Resolve cluster_name / n_clusters for Results metadata.
-        # Suppress under survey designs (the survey block in summary()
-        # already renders the design's PSU/strata metadata).
+        # Suppress under ANY survey design (the survey block in summary()
+        # already renders the design's PSU/strata/replicate metadata, and
+        # replicate-weight variance ignores PSU/cluster entirely — keeping
+        # cluster_name/n_clusters populated on a replicate fit would
+        # misreport the inference source).
         # Otherwise:
         #   bare cluster= -> populate with the user-named cluster column
         #   cluster=None  -> the Theorem 3 variance still clusters at the
         #                    `unit` column by default (cluster_var = unit
         #                    at L418), so the summary label must report
         #                    unit-cluster CR1, not generic HC1.
-        _survey_active = resolved_survey is not None and resolved_survey.psu is not None
-        if _survey_active:
+        if resolved_survey is not None:
             _cluster_name_for_results: Optional[str] = None
             _n_clusters_for_results: Optional[int] = None
         elif self.cluster is not None:
diff --git a/tests/test_imputation.py b/tests/test_imputation.py
@@ -2831,6 +2831,35 @@ def test_cluster_name_suppressed_under_survey(self):
         assert r.cluster_name is None
         assert r.n_clusters is None
 
+    def test_cluster_name_suppressed_under_replicate_survey(self):
+        # Replicate-weight survey designs have psu=None but still must
+        # suppress cluster_name/n_clusters: replicate variance is computed
+        # by replicate reweighting (BRR / Fay / JK1 / JKn / SDR) and
+        # ignores PSU/cluster entirely, so populating cluster_name="unit"
+        # and n_clusters=n_units would misreport the inference source.
+        # Summary must also omit the "Number of clusters:" line and the
+        # CR1 cluster-robust label.
+        data, rep_cols = _imputation_replicate_panel()
+        design = SurveyDesign(
+            weights="weight",
+            replicate_weights=rep_cols,
+            replicate_method="JK1",
+            weight_type="pweight",
+        )
+        r = ImputationDiD().fit(
+            data,
+            outcome="outcome",
+            unit="unit",
+            time="time",
+            first_treat="first_treat",
+            survey_design=design,
+        )
+        assert r.cluster_name is None
+        assert r.n_clusters is None
+        text = r.summary()
+        assert "Number of clusters:" not in text
+        assert "CR1 cluster-robust" not in text
+
     def test_fit_clone_idempotent_on_vcov_type(self):
         data = generate_test_data(seed=11)
         imp1 = ImputationDiD(vcov_type="hc1")