From 06309915eac5eddd4aeba55e5b46cb33ec4dc6ff Mon Sep 17 00:00:00 2001
From: mVenetos97 <53448901+mVenetos97@users.noreply.github.com>
Date: Thu, 25 Aug 2022 15:45:07 -0700
Subject: [PATCH] Initial qchem parser file

---
 src/mrsimulator/utils/qchem.py | 199 +++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100644 src/mrsimulator/utils/qchem.py

diff --git a/src/mrsimulator/utils/qchem.py b/src/mrsimulator/utils/qchem.py
new file mode 100644
index 000000000..601188e88
--- /dev/null
+++ b/src/mrsimulator/utils/qchem.py
@@ -0,0 +1,199 @@
+"""Qchem Output File Parser."""
+from __future__ import annotations
+
+from operator import itemgetter
+from re import match
+
+import numpy as np
+from mrsimulator import Coupling
+from mrsimulator import Site
+from mrsimulator import SpinSystem
+from mrsimulator.spin_system.isotope import ISOTOPE_DATA
+from mrsimulator.spin_system.tensors import SymmetricTensor
+from pymatgen.analysis.nmr import ChemicalShielding
+from pymatgen.io.qchem import check_for_structure_changes
+from pymatgen.io.qchem.outputs import QCOutput
+from pymatgen.io.qchem.utils import read_pattern
+from pymatgen.io.qchem.utils import read_table_pattern
+
+
+__author__ = "Maxwell C. Venetos"
+__email__ = "mvenetos@berkeley.edu"
+
+
+class QChem_NMR(QCOutput):
+    """
+    Class to parse QChem output files.
+    """
+
+    def __init__(self, filename: str):
+        """
+        Args:
+            filename (str): Filename to parse
+        """
+        super().__init__(filename)
+
+        self.data["nmr"] = read_pattern(
+            self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*[(?i)nmr]"}
+        ).get("key")
+        if self.data.get("nmr", []):
+            self._read_nmr_data()
+
+        self.data["issc"] = read_pattern(
+            self.text, {"key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*[(?i)issc]"}
+        ).get("key")
+        if self.data.get("issc", []):
+            self._read_issc_data()
+
+    def _read_nmr_data(self):
+        header_pattern = r"\s*total shielding tensor\s*\n \
+        \s*Trace\s*=\s*[0-9]*\.[0-9]+\n\s*Full Tensor:"
+        table_pattern = (
+            r"\s*(-?[0-9]*\.[0-9]*)\s*(-?[0-9]*\.[0-9]*)\s*(-?[0-9]*\.[0-9]*)\s*"
+        )
+        footer_pattern = r"\s*\-+"
+        temp_tensors = read_table_pattern(
+            self.text, header_pattern, table_pattern, footer_pattern, postprocess=float
+        )
+        temp_species = read_pattern(
+            self.text, {"key": r"\s*\-*\s*ATOM\s*([a-zA-Z]+)\s*([0-9]+)\s*\-*\s*"}
+        ).get("key")
+
+        if temp_tensors is None or len(temp_tensors) == 0:
+            self.data["nmr"] = None
+        else:
+            d = []
+            for species, tensor in zip(temp_species, temp_tensors):
+                CS_tensor = ChemicalShielding(tensor[0])
+                Haeberlen_values = CS_tensor.haeberlen_values
+                temp_dict = {
+                    "species": species[0],
+                    "isotropic_chemical_shift": Haeberlen_values[0],
+                    "zeta": Haeberlen_values[2],
+                    "eta": Haeberlen_values[3],
+                }
+                d.append(temp_dict)
+            self.data["nmr"] = d
+
+    def _read_issc_data(self):
+        header_pattern = (
+            r"\s*Total Spin-Spin Coupling Tensor J \(Hz\):\s*\n\s*1\s*2\s*3\s*"
+        )
+        table_pattern = r"\s*([1-3])\s*(-?[0-9]*\.[0-9]*) \
+        \s*(-?[0-9]*\.[0-9]*)\s*(-?[0-9]*\.[0-9]*)\s*"
+        footer_pattern = r"\s*Isotropic:\s*(-?[0-9]*\.[0-9]*)\s*"
+        temp_tensors = read_table_pattern(
+            self.text, header_pattern, table_pattern, footer_pattern, postprocess=float
+        )
+        temp_coupling = read_pattern(
+            self.text,
+            {
+                "key": r"\s*Atoms\s*([a-zA-Z]+)\s*\(\#([0-9]+)\)\s*and \
+                \s*([a-zA-Z]+)\s*\(\#([0-9]+)\)\s*with g-factors\s \
+                *(-?[0-9]*\.[0-9]*)\s*and\s*(-?[0-9]*\.[0-9]*)\s*"
+            },
+        ).get("key")
+        if temp_tensors is None or len(temp_tensors) == 0:
+            self.data["issc"] = None
+        else:
+            d = []
+            for coupling_pair, tensor in zip(temp_coupling, temp_tensors):
+                tensor = np.asarray(tensor)
+                temp_dict = {
+                    "coupling": [coupling_pair[1], coupling_pair[3]],
+                    "j_tensor": tensor[:, 1:],
+                }
+                d.append(temp_dict)
+            self.data["issc"] = d
+
+    def _get_sites(self, ignore_species=None, isotope_map={}):
+        """
+        ingore_species: list of species to ignore
+        isotope map: dictionary with preferred isotope mappings ex: {'Na' : '29Na'}.
+        Default behaviour is assign most abundant isotope
+        """
+        site_collection = []
+        index_map = []
+        for idx, base_atom in enumerate(self.data["species"]):
+
+            if base_atom not in ignore_species:
+                if base_atom in isotope_map.keys():
+                    isotope_symbol = isotope_map[base_atom]
+                else:
+                    atom_types = []
+                    for isotope in ISOTOPE_DATA.keys():
+                        species = match(r"(\d+)\s*(\w+)", isotope)
+                        if species.group(2).casefold() == base_atom.casefold():
+                            atom_types.append(
+                                [isotope, ISOTOPE_DATA[isotope]["natural_abundance"]]
+                            )
+
+                    isotope_symbol = sorted(
+                        atom_types, key=itemgetter(1), reverse=True
+                    )[0][0]
+
+                temp_site = Site(
+                    isotope=isotope_symbol,
+                    isotropic_chemical_shift=self.data["nmr"][idx][
+                        "isotropic_chemical_shift"
+                    ],
+                    shielding_symmetric=SymmetricTensor(
+                        zeta=self.data["nmr"][idx]["zeta"],
+                        eta=self.data["nmr"][idx]["eta"],
+                    ),
+                )
+                index_map.append([idx, len(site_collection)])
+                site_collection.append(temp_site)
+
+        return site_collection, np.asarray(index_map)
+
+    def _get_couplings(self, index_map):
+        coupling_list = []
+
+        index_list = index_map[:, 0]
+        for coupled_pair in self.data["issc"]:
+            if (
+                coupled_pair["coupling"][0] in index_list
+                and coupled_pair["coupling"][1] in index_list
+            ):
+                index_0 = index_list.where(coupled_pair["coupling"][0])
+                index_1 = index_list.where(coupled_pair["coupling"][1])
+                isotropic_j = (
+                    coupled_pair["j_tensor"][0][0]
+                    + coupled_pair["j_tensor"][1][1]
+                    + coupled_pair["j_tensor"][2][2]
+                ) / 3
+                coupling_list.append(
+                    Coupling(
+                        site_index=[index_map[index_0, 1], index_map[index_1, 1]],
+                        isotropic_j=isotropic_j,
+                    )
+                )
+        return coupling_list
+
+    def as_SpinSystem(self, ignore_species=None, isotope_map={}):
+        """
+        ingore_species: list of species to ignore
+        isotope map: dictionary with preferred isotope mappings ex: {'Na' : '29Na'}.
+         Default behaviour is assign most abundant isotope
+        """
+        site_collection, index_map = self._get_sites(ingore_species, isotope_map)
+
+        if self.data["issc"] is not None:
+            coupling = self._get_couplings(index_map)
+        else:
+            coupling = None
+
+        return SpinSystem(sites=site_collection, couplings=coupling)
+
+    def marge_data(self, QChem_to_SpinSystem_):
+        check = check_for_structure_changes(
+            self.data["initial_molecule"],
+            QChem_to_SpinSystem_["data"]["initial_molecule"],
+        )
+        if check != "no change":
+            raise ValueError("ERROR: Molecules are not consistent between files")
+
+        for data_check in ["nmr", "issc"]:
+            if self[data_check] is None:
+                QChem_to_SpinSystem_["data"][data_check]