torchmd
diff --git a/‎README.md‎
Lines changed: 37 additions & 1 deletion b/‎README.md‎
Lines changed: 37 additions & 1 deletion
diff --git a/‎benchmarks/inference.py‎
Lines changed: 24 additions & 31 deletions b/‎benchmarks/inference.py‎
Lines changed: 24 additions & 31 deletions
diff --git a/‎examples/TensorNet-QM9.yaml‎
Lines changed: 3 additions & 1 deletion b/‎examples/TensorNet-QM9.yaml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/aceff_examples/ase_aceff.py‎
Lines changed: 2 additions & 1 deletion b/‎examples/aceff_examples/ase_aceff.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/aceff_examples/ase_aceff_PBC.py‎
Lines changed: 7 additions & 4 deletions b/‎examples/aceff_examples/ase_aceff_PBC.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/expected.pkl‎
2.2 KB b/‎tests/expected.pkl‎
2.2 KB
diff --git a/‎tests/test_model.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/test_model.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_warp_ops.py‎
Lines changed: 103 additions & 0 deletions b/‎tests/test_warp_ops.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎torchmdnet/extensions/warp_kernels/__init__.py‎
Lines changed: 65 additions & 0 deletions b/‎torchmdnet/extensions/warp_kernels/__init__.py‎
Lines changed: 65 additions & 0 deletions
@@ -65,7 +65,43 @@ Run `torchmd-train --help` to see all available options and their descriptions.
 
 ### AceFF
 Trained [AceFF models](https://huggingface.co/collections/Acellera/aceff-machine-learning-potentials) can be loaded and used for inference.
-please see [here](https://github.com/torchmd/torchmd-net/tree/main/examples/aceff_examples) 
+Please see [here](https://github.com/torchmd/torchmd-net/tree/main/examples/aceff_examples) for example scripts.
+
+#### Loading AceFF models with `load_model`
+
+```python
+from huggingface_hub import hf_hub_download
+from torchmdnet.models.model import load_model
+
+model_file_path = hf_hub_download(repo_id="Acellera/AceFF-1.1", filename="aceff_v1.1.ckpt")
+model = load_model(model_file_path, derivative=True)
+```
+
+#### Loading AceFF models with the ASE calculator
+
+```python
+from huggingface_hub import hf_hub_download
+from torchmdnet.calculators import TMDNETCalculator
+
+model_file_path = hf_hub_download(repo_id="Acellera/AceFF-1.1", filename="aceff_v1.1.ckpt")
+calc = TMDNETCalculator(model_file_path, device="cuda")
+```
+
+#### `compatibility_load` flag
+
+TensorNet and TensorNet2 checkpoints trained with older versions of the code used a different
+internal tensor layout (`[N, F, 3, 3]` instead of the current `[N, 3, 3, F]`).  When loading
+such a checkpoint, the affected weight matrices must be remapped before the state dict can be
+applied.
+
+**This is handled automatically.** Old-format checkpoints always contain a `check_errors`
+key in their saved hyper-parameters (a parameter that was removed in newer code); `load_model`
+detects this and applies the remapping transparently, emitting a `UserWarning` to let you know.
+All currently released AceFF checkpoints (1.0, 1.1, 2.0) are old-format and are handled this way.
+
+If you need to override the automatic detection you can pass `compatibility_load=True` (force
+remap) or `compatibility_load=False` (suppress remap) explicitly to either `load_model` or
+`TMDNETCalculator`.
 
 
 To load your own trained models see [here](https://github.com/torchmd/torchmd-net/tree/main/examples#loading-checkpoints) for instructions on how to load pretrained models.
 
@@ -25,6 +25,10 @@ def load_example_args(model_name, remove_prior=False, config_file=None, **kwargs
             config_file = join(
                 dirname(dirname(__file__)), "examples", "TensorNet-QM9.yaml"
             )
+        elif model_name == "tensornet2":
+            config_file = join(
+                dirname(dirname(__file__)), "examples", "TensorNet2-QM9.yaml"
+            )
         else:
             config_file = join(dirname(dirname(__file__)), "examples", "ET-QM9.yaml")
     with open(config_file, "r") as f:
@@ -64,22 +68,24 @@ def benchmark_pdb(pdb_file, **kwargs):
     molecule = None
     torch.cuda.nvtx.range_push("Initialization")
     args = load_example_args(
-        "tensornet",
-        config_file="../examples/TensorNet-rMD17.yaml",
+        kwargs["model"],
         remove_prior=True,
         output_model="Scalar",
         derivative=False,
         max_z=int(atomic_numbers.max() + 1),
-        max_num_neighbors=32,
+        max_num_neighbors=64,
         **kwargs,
     )
     model = create_model(args)
     z = atomic_numbers
     pos = positions
     batch = torch.zeros_like(z).to("cuda")
+    model.representation_model.setup_for_inference(
+        z.cpu(), batch.cpu()
+    )  # setup for inference
     model = model.to("cuda")
-    torch.cuda.nvtx.range_pop()
-    torch.cuda.nvtx.range_push("Warmup")
+    # torch.cuda.nvtx.range_pop()
+    # torch.cuda.nvtx.range_push("Warmup")
     for i in range(3):
         pred, _ = model(z, pos, batch)
         pred.sum().backward()
@@ -88,38 +94,27 @@ def benchmark_pdb(pdb_file, **kwargs):
     for i in range(10):
         pred, _ = model(z, pos, batch)
         pred.sum().backward()
-    torch.cuda.nvtx.range_pop()
-    torch.cuda.nvtx.range_push("Benchmark")
+    # torch.cuda.nvtx.range_pop()
+    # torch.cuda.nvtx.range_push("Benchmark")
     nbench = 100
-    times = np.zeros(nbench)
-    stream = torch.cuda.Stream()
     torch.cuda.synchronize()
-    with GpuTimer() as timer:
-        with torch.cuda.stream(stream):
-            for i in range(nbench):
-                # torch.cuda.synchronize()
-                # with GpuTimer() as timer2:
-                # torch.cuda.nvtx.range_push("Step")
-                pred, _ = model(z, pos, batch)
-                # torch.cuda.nvtx.range_push("derivative")
-                pred.sum().backward()
-                # torch.cuda.nvtx.range_pop()
-                # torch.cuda.nvtx.range_pop()
-                # torch.cuda.synchronize()
-                # times[i] = timer2.interval
-        torch.cuda.synchronize()
-    # torch.cuda.nvtx.range_pop()
-    return len(atomic_numbers), timer.interval / nbench
+    t1 = time.perf_counter()
+    for i in range(nbench):
+        pred, _ = model(z, pos, batch)
+        pred.sum().backward()
+    torch.cuda.synchronize()
+    t2 = time.perf_counter()
+    return len(atomic_numbers), (t2 - t1) * 1000 / nbench
 
 
 from tabulate import tabulate
 
 # List of cases to benchmark, arbitrary parameters can be overriden here
 cases = {
-    "0L": {"num_layers": 0, "embedding_dimension": 128},
-    "1L": {"num_layers": 1, "embedding_dimension": 128},
-    "2L": {"num_layers": 2, "embedding_dimension": 128},
-    "2L emb 64": {"num_layers": 2, "embedding_dimension": 64},
+    "0L": {"model": "tensornet", "num_layers": 0, "embedding_dimension": 128},
+    "1L": {"model": "tensornet", "num_layers": 1, "embedding_dimension": 128},
+    "2L": {"model": "tensornet", "num_layers": 2, "embedding_dimension": 128},
+    "2L emb 64": {"model": "tensornet", "num_layers": 2, "embedding_dimension": 64},
 }
 
 
@@ -134,8 +129,6 @@ def benchmark_all():
     for pdb_file in os.listdir("systems"):
         if not pdb_file.endswith(".pdb"):
             continue
-        if pdb_file == "stmv.pdb":  # Does not fit in a 4090
-            continue
         times = {}
         num_atoms = 0
         for name, kwargs in cases.items():
 
@@ -36,7 +36,7 @@ ngpus: -1
 num_epochs: 3000
 num_layers: 3
 num_nodes: 1
-num_rbf: 64
+num_rbf: 32
 num_workers: 6
 output_model: Scalar
 precision: 32
@@ -57,3 +57,5 @@ weight_decay: 0.0
 box_vecs: null
 charge: false
 spin: false
+static_shapes: false
+check_errors: true
@@ -15,7 +15,7 @@
 
 
 # We create the ASE calculator by supplying the path to the model and specifying the device and dtype
-calc = TMDNETCalculator(model_file_path, device="cuda")
+calc = TMDNETCalculator(model_file_path, device="cuda", max_num_neighbors=24)
 atoms = read("caffeine.pdb")
 print(atoms)
 
@@ -77,6 +77,7 @@
 atoms.calc = calc
 
 # Run more dynamics
+dyn.run(steps=10)  # warmup before timing
 t1 = time.perf_counter()
 dyn.run(steps=nsteps)
 t2 = time.perf_counter()
 
@@ -15,9 +15,14 @@
 
 # We create the ASE calculator by supplying the path to the model and specifying the device and dtype
 # we provided a cutoff for the coulomb term so we can use PBCs
-calc = TMDNETCalculator(model_file_path, device="cuda", coulomb_cutoff=10.0)
+calc = TMDNETCalculator(
+    model_file_path,
+    device="cuda",
+    coulomb_cutoff=10.0,
+)
 atoms = read("alanine-dipeptide-explicit.pdb")
 
+
 print(atoms)
 
 atoms.calc = calc
@@ -39,7 +44,7 @@
 
 # setup MD
 temperature_K: float = 300
-timestep: float = 1.0 * units.fs
+timestep: float = 0.5 * units.fs
 friction: float = 0.01 / units.fs
 traj_interval: int = 10
 log_interval: int = 10
@@ -54,5 +59,3 @@
 t1 = time.perf_counter()
 dyn.run(steps=nsteps)
 t2 = time.perf_counter()
-
-print(f"Completed MD in {t2 - t1:.1f} s ({(t2 - t1)*1000 / nsteps:.3f} ms/step)")
 
@@ -21,6 +21,7 @@ dependencies = [
   "triton; sys_platform == 'linux' and platform_machine != 'aarch64'",
   "triton-windows; sys_platform == 'win32'",
   "ase",
+  "warp-lang>=1.10.1",
   "setuptools>=82.0.0",
 ]
 
 
@@ -38,6 +38,7 @@ def test_forward(model_name, use_batch, explicit_q_s, precision):
 @mark.parametrize("precision", [32, 64])
 def test_forward_output_modules(model_name, output_model, precision):
     z, pos, batch = create_example_batch()
+    pos = pos.to(dtype=dtype_mapping[precision])
     args = load_example_args(
         model_name, remove_prior=True, output_model=output_model, precision=precision
     )
 
@@ -0,0 +1,103 @@
+"""Tests that warp-ops and pure-Python TensorNet paths produce identical results."""
+
+import os
+import pytest
+import torch
+from torch.testing import assert_close
+from os.path import dirname, join
+
+import torchmdnet.models.tensornet as _tn
+
+CURR_DIR = dirname(__file__)
+CKPT = join(CURR_DIR, "example_tensornet.ckpt")
+CAFFEINE_PDB = join(CURR_DIR, "caffeine.pdb")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_model(device):
+    from torchmdnet.models.model import load_model
+
+    return load_model(CKPT, derivative=True).to(device)
+
+
+def _caffeine_tensors(device):
+    from ase.io import read
+    from ase.data import atomic_numbers
+
+    atoms = read(CAFFEINE_PDB)
+    z = torch.tensor(
+        [atomic_numbers[s] for s in atoms.get_chemical_symbols()], dtype=torch.long
+    ).to(device)
+    pos = torch.tensor(atoms.get_positions(), dtype=torch.float32).to(device)
+    return z, pos
+
+
+def _run(model, z, pos):
+    energy, forces = model(z, pos)
+    return energy.detach(), forces.detach()
+
+
+def _set_opt(model, value: bool):
+    """Set .opt on the TensorNet representation model and all its submodules."""
+    rep = model.representation_model
+    rep.opt = value
+    rep.tensor_embedding.opt = value
+    for layer in rep.layers:
+        layer.opt = value
+
+
+def _patch_nonopt(monkeypatch, model):
+    """Switch model to pure-Python ops by patching module-level ops and .opt flags."""
+    # Module-level ops are used as globals inside forward() bodies, so they
+    # still need to be swapped even though branching is now done via self.opt.
+    monkeypatch.setattr(_tn, "compose_tensor", _tn._compose_tensor)
+    monkeypatch.setattr(_tn, "decompose_tensor", _tn._decompose_tensor)
+    monkeypatch.setattr(_tn, "tensor_matmul_o3", _tn._tensor_matmul_o3)
+    monkeypatch.setattr(_tn, "tensor_matmul_so3", _tn._tensor_matmul_so3)
+    _set_opt(model, False)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_warp_vs_python(device, monkeypatch):
+    """Warp-ops and pure-Python paths must produce identical energy and forces."""
+    if device == "cuda" and not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+    if not _tn.OPT:
+        pytest.skip("warp-ops not available")
+
+    model = _load_model(device)
+    z, pos = _caffeine_tensors(device)
+
+    energy_opt, forces_opt = _run(model, z, pos)
+
+    _patch_nonopt(monkeypatch, model)
+    energy_py, forces_py = _run(model, z, pos)
+
+    assert_close(energy_opt, energy_py, rtol=1e-4, atol=1e-4)
+    assert_close(forces_opt, forces_py, rtol=1e-4, atol=1e-4)
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_nonopt_runs(device, monkeypatch):
+    """Pure-Python (opt=False) path must produce finite energy and forces."""
+    if device == "cuda" and not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+    model = _load_model(device)
+    z, pos = _caffeine_tensors(device)
+    _patch_nonopt(monkeypatch, model)
+
+    energy, forces = _run(model, z, pos)
+
+    assert torch.isfinite(energy).all(), "Energy contains non-finite values"
+    assert torch.isfinite(forces).all(), "Forces contain non-finite values"
+    assert forces.shape == pos.shape
@@ -0,0 +1,65 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Warp GPU kernels for TensorNet operations."""
+
+from __future__ import annotations
+
+import warp as wp
+
+from .compose_tensor import generate_compose_tensor
+from .decompose_tensor import generate_decompose_tensor
+from .equivariant_o3_matmul import generate_tensor_matmul_o3_3x3
+from .equivariant_so3_matmul import generate_tensor_matmul_so3_3x3
+from .graph_transform import convert_to_sparse, count_row_col
+from .tensor_norm3 import generate_tensor_norm3
+from .tensornet_mp import generate_message_passing
+from .tensornet_radial_mp import generate_radial_message_passing
+from .utils import add_module, get_module, get_stream
+
+wp.init()
+
+
+__all__ = [
+    "add_module",
+    "add_module",
+    "convert_to_sparse",
+    "convert_to_sparse",
+    "count_row_col",
+    "count_row_col",
+    "generate_compose_tensor",
+    "generate_decompose_tensor",
+    "generate_message_passing",
+    "generate_message_passing",
+    "generate_radial_message_passing",
+    "generate_radial_message_passing",
+    "generate_tensor_matmul_o3_3x3",
+    "generate_tensor_matmul_so3_3x3",
+    "generate_tensor_norm3",
+    "get_module",
+    "get_stream",
+]
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ dependencies = [`
`21`	`21`	`"triton; sys_platform == 'linux' and platform_machine != 'aarch64'",`
`22`	`22`	`"triton-windows; sys_platform == 'win32'",`
`23`	`23`	`"ase",`
	`24`	`+ "warp-lang>=1.10.1",`
`24`	`25`	`"setuptools>=82.0.0",`
`25`	`26`	`]`
`26`	`27`
Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ def test_forward(model_name, use_batch, explicit_q_s, precision):`
`38`	`38`	`@mark.parametrize("precision", [32, 64])`
`39`	`39`	`def test_forward_output_modules(model_name, output_model, precision):`
`40`	`40`	`z, pos, batch = create_example_batch()`
	`41`	`+ pos = pos.to(dtype=dtype_mapping[precision])`
`41`	`42`	`args = load_example_args(`
`42`	`43`	`model_name, remove_prior=True, output_model=output_model, precision=precision`
`43`	`44`	`)`