fix: DPLSTM layers for FlatModel (#189)

shuangwu5 · web-flow · commit f66e53d7acdc · 2025-09-23T16:14:12.000+02:00
diff --git a/mostlyai/engine/_tabular/argn.py b/mostlyai/engine/_tabular/argn.py
@@ -871,6 +871,7 @@ def __init__(
         model_size: ModelSizeOrUnits,
         column_order: list[str] | None,
         device: torch.device,
+        with_dp: bool = False,
     ):
         super().__init__()
 
@@ -892,6 +893,7 @@ def __init__(
             ctx_cardinalities=self.ctx_cardinalities,
             ctxseq_len_median=self.ctxseq_len_median,
             device=device,
+            with_dp=with_dp,
         )
 
         # sub column embeddings
diff --git a/mostlyai/engine/_tabular/common.py b/mostlyai/engine/_tabular/common.py
@@ -20,11 +20,18 @@
 
 _LOG = logging.getLogger(__name__)
 
+DPLSTM_SUFFIXES: tuple = ("ih.weight", "ih.bias", "hh.weight", "hh.bias")
 
-def load_model_weights(model: torch.nn.Module, path: Path, device: torch.device):
-    try:
-        t00 = time.time()
-        model.load_state_dict(torch.load(f=path, map_location=device, weights_only=True))
-        _LOG.info(f"loaded model weights in {time.time() - t00:.2f}s")
-    except Exception as e:
-        _LOG.warning(f"failed to load model weights: {e}")
+
+def load_model_weights(model: torch.nn.Module, path: Path, device: torch.device) -> None:
+    t0 = time.time()
+    incompatible_keys = model.load_state_dict(torch.load(f=path, map_location=device, weights_only=True), strict=False)
+    missing_keys = incompatible_keys.missing_keys
+    unexpected_keys = incompatible_keys.unexpected_keys
+    # for DP-trained models, we expect extra keys from the DPLSTM layers (which is fine to ignore because we use standard LSTM layers during generation)
+    # but if there're any other missing or unexpected keys, an error should be raised
+    if len(missing_keys) > 0 or any(not k.endswith(DPLSTM_SUFFIXES) for k in unexpected_keys):
+        raise RuntimeError(
+            f"failed to load model weights due to incompatibility: {missing_keys = }, {unexpected_keys = }"
+        )
+    _LOG.info(f"loaded model weights in {time.time() - t0:.2f}s")
diff --git a/mostlyai/engine/_tabular/generation.py b/mostlyai/engine/_tabular/generation.py
@@ -879,11 +879,14 @@ def generate(
         no_of_model_params = get_no_of_model_parameters(model)
         _LOG.info(f"{no_of_model_params=}")
 
-        load_model_weights(
-            model=model,
-            path=workspace.model_tabular_weights_path,
-            device=device,
-        )
+        if workspace.model_tabular_weights_path.exists():
+            load_model_weights(
+                model=model,
+                path=workspace.model_tabular_weights_path,
+                device=device,
+            )
+        else:
+            _LOG.warning("model weights not found; generating data with an untrained model")
 
         model.to(device)
         model.eval()
diff --git a/mostlyai/engine/_tabular/training.py b/mostlyai/engine/_tabular/training.py
@@ -462,6 +462,7 @@ def train(
                 model_size=model_size,
                 column_order=trn_column_order,
                 device=device,
+                with_dp=with_dp,
             )
         _LOG.info(f"model class: {argn.__class__.__name__}")
 
diff --git a/tests/end_to_end/test_tabular_sequential.py b/tests/end_to_end/test_tabular_sequential.py
@@ -590,6 +590,7 @@ def test_training_strategy(self, workspace_before_training, differential_privacy
         # it's actually a fresh training, so the progress will look different
         with pytest.raises(AssertionError):
             pd.testing.assert_frame_equal(progress_resume.iloc[:2], progress_resume_without_checkpoint.iloc[:2])
+        generate(workspace_dir=workspace_before_training)
 
 
 def test_seed_generation(tmp_path):

Original file line number	Diff line number	Diff line change
`@@ -462,6 +462,7 @@ def train(`
`462`	`462`	`model_size=model_size,`
`463`	`463`	`column_order=trn_column_order,`
`464`	`464`	`device=device,`
	`465`	`+ with_dp=with_dp,`
`465`	`466`	`)`
`466`	`467`	`_LOG.info(f"model class: {argn.__class__.__name__}")`
`467`	`468`