Example notebook for QET training (#742)

kenko911 · web-flow · commit 966672bcc197 · 2026-03-15T00:24:57.000-04:00
* Adding README.md for PyG TensorNet

* added missing TrajectoryObserver

* fix ruff

* improve unit tests

* convert float into Tensor

* Corrected expected values

* Corrected expected values again

* update torch&lt;=2.9.1

* Convert PyG TensorNet Embedding and Interaction blocks into pure Torch

* Pure Pytorch TensorNet for MLIPs is added

* Fix united tests

* Refactor the PyG TensorNet components to ensure compatibility with pure PyTorch.

* Cleanup MGLDataset

* Improve the handling of stress unit in PESCalculator

* cleanup test_ase_pyg.py

* update PESCalculator unit tests

* Improve logging in PESCalculator

* fix linting tests

* fixed linting and united tests

* include_ref_charge keyword is added in MGLDataset

* Update Relaxations and Simulations using the QET Universal Potential.ipynb

Signed-off-by: Tsz Wai Ko &lt;47970742+kenko911@users.noreply.github.com&gt;

* fix ruff

* avoid crashing in MatCalc united tests by adding calc_charge and compute_charge in PyG Potential and PESCalculator

* QET training module added

* fix united tests for QET training

* make sure predicted and target charges into the same dimension during the training

* added backed the self.charge_weight

* change torch.vstack into torch.hstack for reference charges

* fixed the unit test

* fix the predict_structure function in QET model

* example notebook for QET potential training

---------

Signed-off-by: Tsz Wai Ko &lt;47970742+kenko911@users.noreply.github.com&gt;
diff --git a/examples/Training a QET Potential with PyTorch Lightning.ipynb b/examples/Training a QET Potential with PyTorch Lightning.ipynb
@@ -0,0 +1,399 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {
+    "id": "35c97a76"
+   },
+   "source": [
+    "# Introduction\n",
+    "\n",
+    "This notebook demonstrates how to fit a QET potential using PyTorch Lightning with MatGL."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {
+    "id": "6355190a",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import os\n",
+    "import shutil\n",
+    "import warnings\n",
+    "from functools import partial\n",
+    "\n",
+    "import lightning as L\n",
+    "import numpy as np\n",
+    "from dgl.data.utils import split_dataset\n",
+    "from lightning.pytorch.loggers import CSVLogger\n",
+    "from mp_api.client import MPRester\n",
+    "\n",
+    "import matgl\n",
+    "matgl.config.BACKEND = \"DGL\":\n",
+    "from matgl.config import DEFAULT_ELEMENTS\n",
+    "from matgl.ext._pymatgen_dgl import Structure2Graph\n",
+    "from matgl.graph._data_dgl import MGLDataLoader, MGLDataset, collate_fn_pes\n",
+    "from matgl.models._qet_dgl import QET\n",
+    "from matgl.utils.training import PotentialLightningModule\n",
+    "\n",
+    "# To suppress warnings for clearer output\n",
+    "warnings.simplefilter(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {
+    "id": "eaafc0bd"
+   },
+   "source": [
+    "For the purposes of demonstration, we will download all Si-O compounds in the Materials Project via the MPRester. The forces and stresses are set to zero, though in a real context, these would be non-zero and obtained from DFT calculations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 67,
+     "referenced_widgets": [
+      "aac9f06228444b8dbd7dd798e6c1a93f",
+      "591a338fe6304870bebd845eb8d8e2a9",
+      "19fe8f71e71048bf9a923291ad9b1bb4",
+      "0ab337e54e0943cb8bc940922fb425f5",
+      "ed3ba68de443454b8be182c1901f8cfa",
+      "a93bf23e7f224a3092094a1b0961251a",
+      "a5503afc1d2b427d9fd0e83bf733387d",
+      "fbad168bdfc34eb1a439bc3334748369",
+      "4deece8c90b249f384722bce145a6a08",
+      "d2777074f8d148c591652654e68e6d9f",
+      "2eda31d46a5d440281571f3ea1240228"
+     ]
+    },
+    "id": "bd0ce8a2-ec68-4160-9457-823fb9e6a35d",
+    "outputId": "2252a59c-9a70-4673-926f-9ed8fc69ed0d",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Obtain your API key here: https://next-gen.materialsproject.org/api\n",
+    "mpr = MPRester(api_key=\"YOUR_API_KEY\")\n",
+    "entries = mpr.get_entries_in_chemsys([\"Si\", \"O\"])\n",
+    "structures = [e.structure for e in entries]\n",
+    "energies = [e.energy for e in entries]\n",
+    "forces = [np.zeros((len(s), 3)).tolist() for s in structures]\n",
+    "stresses = [np.zeros((3, 3)).tolist() for s in structures]\n",
+    "charges = [np.zeros(len(s)).tolist() for s in structures]\n",
+    "labels = {\n",
+    "    \"energies\": energies,\n",
+    "    \"forces\": forces,\n",
+    "    \"stresses\": stresses,\n",
+    "    \"charges\": charges,\n",
+    "}\n",
+    "\n",
+    "print(f\"{len(structures)} downloaded from MP.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4",
+   "metadata": {
+    "id": "f666cb23"
+   },
+   "source": [
+    "We will first setup the QET model and the LightningModule."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "e9dc84cb",
+    "outputId": "b9f93f24-0fd6-4737-a8e4-e87804cd3ad2",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "element_types = DEFAULT_ELEMENTS\n",
+    "converter = Structure2Graph(element_types=element_types, cutoff=5.0)\n",
+    "dataset = MGLDataset(\n",
+    "    structures=structures,\n",
+    "    converter=converter,\n",
+    "    labels=labels,\n",
+    "    include_ref_charge=True,\n",
+    ")\n",
+    "train_data, val_data, test_data = split_dataset(\n",
+    "    dataset,\n",
+    "    frac_list=[0.8, 0.1, 0.1],\n",
+    "    shuffle=True,\n",
+    "    random_state=42,\n",
+    ")\n",
+    "# if you are not intended to use stress for training, switch include_stress=False!\n",
+    "my_collate_fn = partial(collate_fn_pes, include_charge=True, include_stress=True)\n",
+    "train_loader, val_loader, test_loader = MGLDataLoader(\n",
+    "    train_data=train_data,\n",
+    "    val_data=val_data,\n",
+    "    test_data=test_data,\n",
+    "    collate_fn=my_collate_fn,\n",
+    "    batch_size=2,\n",
+    "    num_workers=0,\n",
+    ")\n",
+    "model = QET(element_types=element_types, is_intensive=False, use_smooth=True, rbf_type=\"SphericalBessel\")\n",
+    "# if you are not intended to use stress and charge for training, set stress_weight=0.0 and charge_weight=0.0!\n",
+    "lit_module = PotentialLightningModule(model=model, stress_weight=0.01, charge_weight=0.001)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6",
+   "metadata": {
+    "id": "01be4689"
+   },
+   "source": [
+    "Finally, we will initialize the Pytorch Lightning trainer and run the fitting. Here, the max_epochs is set to 2 just for demonstration purposes. In a real fitting, this would be a much larger number. Also, the `accelerator=\"cpu\"` was set just to ensure compatibility with M1 Macs. In a real world use case, please remove the kwarg or set it to cuda for GPU based training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 423,
+     "referenced_widgets": [
+      "ca0304013c864637b614ca03d131f920",
+      "e6d2aa7fa6d644f39fa9eefaabbbef48",
+      "cbaf8681aaa1410d9150f70f755628af",
+      "f4b8c28792544919b47bc832cd93d4a6",
+      "9d73d246aa1a47a9af3d1e2caa961b5d",
+      "c6dc5ee98e1346a08067491930872eff",
+      "2a89da23f17c4b0b85ce81bcfd69cc37",
+      "5161471448ae4ec1b8ba2d2152f42153",
+      "641cc028432f453a8ef3eeab201a9218",
+      "f562f5b7289d4fcf9f310426aa620521",
+      "3b95d47d64994a9eb812488ddbac61ec",
+      "4ce50de72a8a4b5f995e72487f34c560",
+      "f4ec778fed4749e890cdac57ef0a16f5",
+      "daaabf9a40b64db6a2240f7e2ae27d08",
+      "bb84fc3ffb3a4add91ce6af226f9c4af",
+      "b32866d1699a46fe814e42ef2e5b8477",
+      "bd3cb443099643b194a13b5cd3ad037f",
+      "f7b6a5d231bc45c98c7eec9e40c5943c",
+      "79e719731a74439da8bff2cc57c9898e",
+      "1b2cc76759ce4164b28d407911590d73",
+      "ac65110f92bc494d843ef347c980031f",
+      "0c466ca4cfb34eb58db798b750fecfaa",
+      "67f78887930f4ba8a9ad25035ab48801",
+      "ea74e30a47d043c991770a3d82aadbb5",
+      "56949107dcd94055bc6115404a0de63f",
+      "9f6d6018de5a42afbfca719fa7b2a740",
+      "2b8242683dd54bf9aa82e3e186fde7c5",
+      "b3359b9ef6384c0cae5849f0b80d6cb1",
+      "169f8ac55aa14dc393b5381e56783dc5",
+      "53be090dca604f3c98e944bcfb35e34d",
+      "9de7ce8be6ea4f5298870cc326895412",
+      "1ee31acfe8dd4002a3917561f83b91bd",
+      "bf983f21b4b34030b9c14e912a8d5450"
+     ]
+    },
+    "id": "7472d071",
+    "outputId": "9d10c152-752f-4afc-8759-c5174ea446b9",
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator=\"cpu\" kwarg.\n",
+    "logger = CSVLogger(\"logs\", name=\"QET_training\")\n",
+    "# Inference mode = False is required for calculating forces, stress in test mode and prediction mode\n",
+    "trainer = L.Trainer(max_epochs=1, accelerator=\"cpu\", logger=logger, inference_mode=False)\n",
+    "trainer.fit(model=lit_module, train_dataloaders=train_loader, val_dataloaders=val_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# test the model, remember to set inference_mode=False in trainer (see above)\n",
+    "trainer.test(dataloaders=test_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save trained model\n",
+    "model_export_path = \"./trained_model/\"\n",
+    "lit_module.model.save(model_export_path)\n",
+    "\n",
+    "# load trained model\n",
+    "model = matgl.load_model(path=model_export_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10",
+   "metadata": {},
+   "source": [
+    "## Finetuning a pre-trained QET\n",
+    "In the previous cells, we demonstrated the process of training an QET from scratch. Next, let's see how to perform additional training on an QET that has already been trained using Materials Project data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11",
+   "metadata": {
+    "id": "85ef3a34-e6fb-452b-82cc-65012e80bce6"
+   },
+   "outputs": [],
+   "source": [
+    "# download a pre-trained M3GNet\n",
+    "qet_nnp = matgl.load_model(\"QET-MatQ-PES\")\n",
+    "model_pretrained = qet_nnp.model\n",
+    "# obtain element energy offset\n",
+    "property_offset = qet_nnp.element_refs.property_offset\n",
+    "# you should test whether including the original property_offset helps improve training and validation accuracy\n",
+    "lit_module_finetune = PotentialLightningModule(model=model, stress_weight=0.01, charge_weight=0.001)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 423,
+     "referenced_widgets": [
+      "9d537bceb5994f0ca8908e6a8fa8dda0",
+      "783e4a0f711e4e63b68bdfead3dd29dd",
+      "b835f08350de4bfab6ce0e06b15c0e01",
+      "ee3af67a2be244988ca27a46472f18c5",
+      "30631c73610d4438a894a676ed31e49a",
+      "ac1633b9d785471098cb40db339edb3f",
+      "dfcaa58a05ab49a0b82108f04984e48b",
+      "a8318cebd921434caee81d717a5f01fa",
+      "479321dba45e48648bba9bad86ad4c4c",
+      "8ac7f6294ff949b1a10da96532387ebb",
+      "edafb607f21045d98a4b0c2059d27958",
+      "d23643bb47234c969df66ad2ad223b4f",
+      "fac4d3e8a6b64524bb0aa08036aa0588",
+      "20ccc55a5b264bc3b193d324caeae685",
+      "44f14d0ce3af41f5bb8f048a69da6a5d",
+      "e8f06e2730cf4aef8427932a033252ab",
+      "b662754e5b484ea58433fef79d401ce6",
+      "4977216b2e9f4e94a41520127fde4a03",
+      "c62f28b519c04081831d1a2507003e38",
+      "e23184852e70488698cc75272c7057ae",
+      "b7e24728fbb444c39a7115988c289883",
+      "6921ddc707334f5d99b13e519d10d437",
+      "db8cc5bcec1d4d0b999b48eb7a36f0de",
+      "12511f8f4b134a7f80feada2b65240d0",
+      "34a896f642fa47a78fe858ba8ab95c9e",
+      "3a43080c70054e7793e73b5d49e48448",
+      "88dddbd1837c4bfbbb1870beb73aa08a",
+      "c59c41ae3d224b5aaa888a7526e1a130",
+      "9cb7a81f00dc4d97b3f0639ec6dde1a4",
+      "dba16d8574f8475f9d8f973fbf968b59",
+      "d4df21b4ef1a4d3b938c5657337b7ec1",
+      "b6a3965a166940d289ab4a50f2e78a60",
+      "b5813fa692a74bf2aec3fbdf03088f3d"
+     ]
+    },
+    "id": "4133225a-5990-4b97-9d73-88195df87a1a",
+    "outputId": "f149a68a-eef7-4726-b3a1-723525dc908f"
+   },
+   "outputs": [],
+   "source": [
+    "# If you wish to disable GPU or MPS (M1 mac) training, use the accelerator=\"cpu\" kwarg.\n",
+    "logger = CSVLogger(\"logs\", name=\"QET_finetuning\")\n",
+    "trainer = L.Trainer(max_epochs=1, accelerator=\"cpu\", logger=logger, inference_mode=False)\n",
+    "trainer.fit(model=lit_module_finetune, train_dataloaders=train_loader, val_dataloaders=val_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13",
+   "metadata": {
+    "id": "252f6456-3ecf-47f0-84ca-c8e9dcc66ccc"
+   },
+   "outputs": [],
+   "source": [
+    "# save trained model\n",
+    "model_save_path = \"./finetuned_model/\"\n",
+    "lit_module_finetune.model.save(model_save_path)\n",
+    "# load trained model\n",
+    "trained_model = matgl.load_model(path=model_save_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {
+    "id": "cd11b92f"
+   },
+   "outputs": [],
+   "source": [
+    "# This code just performs cleanup for this notebook.\n",
+    "\n",
+    "for fn in (\"dgl_graph.bin\", \"lattice.pt\", \"dgl_line_graph.bin\", \"state_attr.pt\", \"labels.json\"):\n",
+    "    try:\n",
+    "        os.remove(fn)\n",
+    "    except FileNotFoundError:\n",
+    "        pass\n",
+    "\n",
+    "shutil.rmtree(\"logs\")\n",
+    "shutil.rmtree(\"trained_model\")\n",
+    "shutil.rmtree(\"finetuned_model\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}