deepmodeling · njzjz · Oct 21, 2025 · Aug 5, 2025 · Aug 5, 2025 · Aug 6, 2025
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -6,14 +6,15 @@ on:
 
 jobs:
   benchmark:
+    if: ${{ github.repository_owner == 'deepmodeling' }}
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
     - name: Set up Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: 3.12
-    - uses: astral-sh/setup-uv@v6
+    - uses: astral-sh/setup-uv@v7
       with:
         enable-cache: true
         cache-dependency-glob: |
@@ -22,7 +23,8 @@ jobs:
     - name: Install dependencies
       run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel
     - name: Run benchmarks
-      uses: CodSpeedHQ/action@v3
+      uses: CodSpeedHQ/action@v4
       with:
         token: ${{ secrets.CODSPEED_TOKEN }}
+        mode: walltime
         run: pytest benchmark/ --codspeed
diff --git a/.github/workflows/pyright.yml b/.github/workflows/pyright.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@master
-    - uses: actions/setup-python@v5
+    - uses: actions/setup-python@v6
       with:
         python-version: '3.12'
     - run: pip install uv

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,13 +12,13 @@ jobs:
         python-version: ["3.8", "3.12"]
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
     # set up conda
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python-version }}
-    - uses: astral-sh/setup-uv@v6
+    - uses: astral-sh/setup-uv@v7
       with:
         enable-cache: true
         cache-dependency-glob: |

diff --git a/.github/workflows/test_import.yml b/.github/workflows/test_import.yml
@@ -8,8 +8,8 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-    - uses: actions/setup-python@v5
+    - uses: actions/checkout@v5
+    - uses: actions/setup-python@v6
       with:
         python-version: '3.9'
         architecture: 'x64'

diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,8 @@ docs/minimizers.csv
 docs/api/
 docs/formats/
 .DS_Store
+# Test artifacts
+tests/data_*.h5
+tests/data_*/
+tests/tmp.*
+tests/.coverage
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
     # there are many log files in tests
     # TODO: seperate py files and log files
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.12.5
+    rev: v0.14.1
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -36,7 +36,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.19.1
+    rev: 1.20.0
     hooks:
     -   id: blacken-docs
 ci:

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,148 @@
+# dpdata - Atomistic Data Format Manipulation
+
+dpdata is a Python package for manipulating atomistic data from computational science software. It supports format conversion between various atomistic simulation packages including VASP, DeePMD-kit, LAMMPS, GROMACS, Gaussian, ABACUS, and many others.
+
+Always reference these instructions first and fallback to search or bash commands only when you encounter unexpected information that does not match the info here.
+
+## Working Effectively
+
+- **Bootstrap and install the repository:**
+  - `cd /home/runner/work/dpdata/dpdata` (or wherever the repo is cloned)
+  - `uv pip install -e .` -- installs dpdata in development mode with core dependencies (numpy, scipy, h5py, monty, wcmatch)
+  - Test installation: `dpdata --version` -- should show version like "dpdata v0.1.dev2+..."
+
+- **Run tests:**
+  - `cd tests && python -m unittest discover` -- runs all 1826 tests in ~10 seconds. NEVER CANCEL.
+  - `cd tests && python -m unittest test_<module>.py` -- run specific test modules (individual modules take ~0.5 seconds)
+  - `cd tests && coverage run --source=../dpdata -m unittest discover && coverage report` -- run tests with coverage
+
+- **Linting and formatting:**
+  - Install ruff: `uv pip install ruff`
+  - `ruff check dpdata/` -- lint the main package (takes ~1 second)
+  - `ruff format dpdata/` -- format code according to project style
+  - `ruff check --fix dpdata/` -- auto-fix linting issues where possible
+
+- **Pre-commit hooks:**
+  - Install: `uv pip install pre-commit`
+  - `pre-commit run --all-files` -- run all hooks on all files
+  - Hooks include: ruff linting/formatting, trailing whitespace, end-of-file-fixer, yaml/json/toml checks
+
+## Validation
+
+- **Always test CLI functionality after making changes:**
+  - `dpdata --help` -- ensure CLI still works
+  - `dpdata --version` -- verify version is correct
+  - Test a basic conversion if sample data is available
+
+- **Always run linting before committing:**
+  - `ruff check dpdata/` -- ensure no new linting errors
+  - `ruff format dpdata/` -- ensure code is properly formatted
+
+- **Run relevant tests for your changes:**
+  - For format-specific changes: `cd tests && python -m unittest test_<format>*.py`
+  - For core system changes: `cd tests && python -m unittest test_system*.py test_multisystems.py`
+  - For CLI changes: `cd tests && python -m unittest test_cli.py` (if exists)
+
+## Build and Documentation
+
+- **Documentation:**
+  - `cd docs && make help` -- see all available build targets
+  - `cd docs && make html` -- build HTML documentation (requires additional dependencies)
+  - Documentation source is in `docs/` directory using Sphinx
+  - **NOTE:** Full docs build requires additional dependencies like `deepmodeling-sphinx` that may not be readily available
+
+- **Package building:**
+  - Uses setuptools with pyproject.toml configuration
+  - `uv pip install build && python -m build` -- create source and wheel distributions
+  - Version is managed by setuptools_scm from git tags
+
+## Common Tasks
+
+The following are outputs from frequently run commands. Reference them instead of re-running to save time.
+
+### Repository structure
+```
+/home/runner/work/dpdata/dpdata/
+├── dpdata/           # Main package code
+│   ├── __init__.py
+│   ├── cli.py        # Command-line interface
+│   ├── system.py     # Core System classes
+│   ├── format.py     # Format registry
+│   ├── abacus/       # ABACUS format support
+│   ├── amber/        # AMBER format support
+│   ├── deepmd/       # DeePMD format support
+│   ├── vasp/         # VASP format support
+│   ├── xyz/          # XYZ format support
+│   └── ...          # Other format modules
+├── tests/            # Test suite (91 test files)
+├── docs/             # Sphinx documentation
+├── plugin_example/   # Example plugin
+├── pyproject.toml    # Project configuration
+└── README.md
+```
+
+### Key dependencies
+- Core: numpy>=1.14.3, scipy, h5py, monty, wcmatch
+- Optional: ase (ASE integration), parmed (AMBER), pymatgen (Materials Project), rdkit (molecular analysis)
+- Testing: unittest (built-in), coverage
+- Linting: ruff
+- Docs: sphinx with various extensions
+
+### Test timing expectations
+- Full test suite: ~10 seconds (1826 tests). NEVER CANCEL.
+- Individual test modules: ~0.5 seconds
+- Linting with ruff: ~1 second
+- Documentation build: ~30 seconds
+
+### Common workflows
+1. **Adding a new format:**
+   - Create module in `dpdata/<format>/`
+   - Implement format classes inheriting from appropriate base classes
+   - Add tests in `tests/test_<format>*.py`
+   - Register format in the plugin system
+
+2. **Fixing bugs:**
+   - Write test that reproduces the bug first
+   - Make minimal fix to pass the test
+   - Run full test suite to ensure no regressions
+   - Run linting to ensure code style compliance
+
+3. **CLI changes:**
+   - Modify `dpdata/cli.py`
+   - Test with `dpdata --help` and specific commands
+   - Add/update tests if needed
+
+## Troubleshooting
+
+- **Installation timeouts:** Network timeouts during `uv pip install` are common. If this occurs, try:
+  - Individual package installation: `uv pip install numpy scipy h5py monty wcmatch`
+  - Use `--timeout` option: `uv pip install --timeout 300 -e .`
+  - Verify existing installation works: `dpdata --version` should work even if reinstall fails
+
+- **Optional dependency errors:** Many tests will skip or fail if optional dependencies (ase, parmed, pymatgen, rdkit) are not installed. This is expected. Core functionality will work with just the basic dependencies.
+
+- **Documentation build failures:** The docs build requires specific dependencies like `deepmodeling-sphinx` that may not be readily available. Use `make help` to see available targets, but expect build failures without full doc dependencies.
+
+- **Test artifacts:** The test suite generates temporary files (`tests/data_*`, `tests/tmp.*`, `tests/.coverage`). These are excluded by `.gitignore` and should not be committed.
+
+- **Import errors:** If you see import errors for specific modules, check if the corresponding optional dependency is installed. For example, ASE functionality requires `uv pip install ase`.
+
+## Critical Notes
+
+- **NEVER CANCEL** test runs or builds - they complete quickly (10 seconds for tests, 30 seconds for docs)
+- Always run `ruff check` and `ruff format` before committing
+- Test artifacts in `tests/` directory are excluded by `.gitignore` - don't commit them
+- Optional dependencies are required for some formats but core functionality works without them
+- The CLI tool `dpdata` is the main user interface for format conversion
+
+## Commit and PR Guidelines
+
+- **Use semantic commit messages** for all commits and PR titles following the format: `type(scope): description`
+  - **Types:** `feat` (new feature), `fix` (bug fix), `docs` (documentation), `style` (formatting), `refactor` (code restructuring), `test` (testing), `chore` (maintenance)
+  - **Examples:**
+    - `feat(vasp): add support for POSCAR format`
+    - `fix(cli): resolve parsing error for multi-frame files`
+    - `docs: update installation instructions`
+    - `test(amber): add tests for trajectory parsing`
+- **PR titles** must follow semantic commit format
+- **Commit messages** should be concise but descriptive of the actual changes made
diff --git a/README.md b/README.md
@@ -1,11 +1,18 @@
 # dpdata
 
+[![DOI:10.1021/acs.jcim.5c01767](https://img.shields.io/badge/DOI-10.1021%2Facs.jcim.5c01767-blue)](https://doi.org/10.1021/acs.jcim.5c01767)
 [![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdata?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdata)
 [![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata)
 [![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/)
 
 **dpdata** is a Python package for manipulating atomistic data of software in computational science.
 
+## Credits
+
+If you use this software, please cite the following paper:
+
+- Jinzhe Zeng, Xingliang Peng, Yong-Bin Zhuang, Haidi Wang, Fengbo Yuan, Duo Zhang, Renxi Liu, Yingze Wang, Ping Tuo, Yuzhi Zhang, Yixiao Chen, Yifan Li, Cao Thang Nguyen, Jiameng Huang, Anyang Peng, Marián Rynik, Wei-Hong Xu, Zezhong Zhang, Xu-Yuan Zhou, Tao Chen, Jiahao Fan, Wanrun Jiang, Bowen Li, Denan Li, Haoxi Li, Wenshuo Liang, Ruihao Liao, Liping Liu, Chenxing Luo, Logan Ward, Kaiwei Wan, Junjie Wang, Pan Xiang, Chengqian Zhang, Jinchao Zhang, Rui Zhou, Jia-Xin Zhu, Linfeng Zhang, Han Wang, dpdata: A Scalable Python Toolkit for Atomistic Machine Learning Data Sets, *J. Chem. Inf. Model.*, 2025, DOI: [10.1021/acs.jcim.5c01767](https://doi.org/10.1021/acs.jcim.5c01767). [![Citations](https://citations.njzjz.win/10.1021/acs.jcim.5c01767)](https://badge.dimensions.ai/details/doi/10.1021/acs.jcim.5c01767)
+
 ## Installation
 
 dpdata only supports Python 3.8 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install dpdata:

diff --git a/docs/index.rst b/docs/index.rst
@@ -8,6 +8,19 @@ Welcome to dpdata's documentation!
 
 dpdata is a Python package for manipulating atomistic data of software in computational science.
 
+If you use this software, please cite the following paper:
+
+-  Jinzhe Zeng, Xingliang Peng, Yong-Bin Zhuang, Haidi Wang, Fengbo
+   Yuan, Duo Zhang, Renxi Liu, Yingze Wang, Ping Tuo, Yuzhi Zhang,
+   Yixiao Chen, Yifan Li, Cao Thang Nguyen, Jiameng Huang, Anyang Peng,
+   Marián Rynik, Wei-Hong Xu, Zezhong Zhang, Xu-Yuan Zhou, Tao Chen,
+   Jiahao Fan, Wanrun Jiang, Bowen Li, Denan Li, Haoxi Li, Wenshuo
+   Liang, Ruihao Liao, Liping Liu, Chenxing Luo, Logan Ward, Kaiwei Wan,
+   Junjie Wang, Pan Xiang, Chengqian Zhang, Jinchao Zhang, Rui Zhou,
+   Jia-Xin Zhu, Linfeng Zhang, Han Wang, dpdata: A Scalable Python
+   Toolkit for Atomistic Machine Learning Data Sets, *J. Chem. Inf.
+   Model.*, 2025.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:

diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
@@ -45,7 +45,10 @@ def get_path_out(fname, inlines):
 def get_energy(outlines):
     Etot = None
     for line in reversed(outlines):
-        if "final etot is" in line:
+        if "final etot is" in line:  # for LTS
+            Etot = float(line.split()[-2])  # in eV
+            return Etot, True
+        elif "TOTAL ENERGY" in line:  # for develop
             Etot = float(line.split()[-2])  # in eV
             return Etot, True
         elif "convergence has NOT been achieved!" in line:
@@ -59,7 +62,8 @@ def get_energy(outlines):
 def collect_force(outlines):
     force = []
     for i, line in enumerate(outlines):
-        if "TOTAL-FORCE (eV/Angstrom)" in line:
+        # if "TOTAL-FORCE (eV/Angstrom)" in line:
+        if "TOTAL-FORCE" in line:
             value_pattern = re.compile(
                 r"^\s*[A-Z][a-z]?[1-9][0-9]*\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s*$"
             )
@@ -95,7 +99,8 @@ def get_force(outlines, natoms):
 def collect_stress(outlines):
     stress = []
     for i, line in enumerate(outlines):
-        if "TOTAL-STRESS (KBAR)" in line:
+        # if "TOTAL-STRESS (KBAR)" in line:
+        if "TOTAL-STRESS" in line:
             value_pattern = re.compile(
                 r"^\s*[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s*$"
             )