amino3to1 index improvement (#25)

rasbt · web-flow · commit 2bc018a6c909 · 2017-05-06T16:40:36.000-04:00
diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
@@ -373,7 +373,7 @@ def amino3to1(self, record='ATOM',
         cmp = 'placeholder'
         indices = []
 
-        for num, ind in zip(tmp['residue_number'], tmp.index):
+        for num, ind in zip(tmp['residue_number'], np.arange(tmp.shape[0])):
             if num != cmp:
                 indices.append(ind)
             cmp = num
diff --git a/biopandas/pdb/tests/test_amino3to1.py b/biopandas/pdb/tests/test_amino3to1.py
@@ -4,6 +4,7 @@
 # Project Website: http://rasbt.github.io/biopandas/
 # Code Repository: https://github.com/rasbt/biopandas
 
+import numpy as np
 from biopandas.pdb import PandasPdb
 import os
 
@@ -37,6 +38,38 @@ def test_defaults():
     assert expect_res == got_res
 
 
+def test_sameindex():
+    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
+                                                            '1t48_995.pdb')
+    p1t48 = PandasPdb()
+    p1t48.read_pdb(TESTDATA_1t48)
+    print(p1t48)
+    p1t48.df['ATOM'].index = np.zeros(p1t48.df['ATOM'].shape[0], dtype=int)
+
+    expect_res = ['M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q',
+                  'I', 'D', 'K', 'S', 'G', 'S', 'W', 'A', 'A',
+                  'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A',
+                  'S', 'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K',
+                  'L', 'P', 'K', 'N', 'K', 'N', 'R', 'N', 'R',
+                  'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H',
+                  'S', 'R', 'I', 'K', 'L', 'H', 'Q', 'E', 'D',
+                  'N', 'D', 'Y', 'I', 'N', 'A', 'S', 'L', 'I',
+                  'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y',
+                  'I', 'L', 'T', 'Q', 'G', 'P', 'L', 'P', 'N',
+                  'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M', 'V',
+                  'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V',
+                  'M', 'L', 'N', 'R', 'V', 'M', 'E', 'K', 'G',
+                  'S', 'L', 'K']
+
+    transl = p1t48.amino3to1()
+    expect_chain = ['A' for _ in range(transl.shape[0])]
+    got_chain = list(transl['chain_id'].values)
+    got_res = list(transl['residue_name'].values)
+
+    assert expect_chain == got_chain
+    assert expect_res == got_res
+
+
 def test_multichain():
     TESTDATA_5mtn = os.path.join(os.path.dirname(__file__),
                                  'data', '5mtn_multichain.pdb')
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -17,11 +17,12 @@ The CHANGELOG for the current development version is available at
 ##### Changes
 
 - The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now returns a pandas `DataFrame` instead of a pandas `Series` object. The returned data frame has two columns, `'chain_id'` and `'residue_name'`, where the former contains the chain ID of the amino acid and the latter contains the 1-letter amino acid code, respectively.
--  Significant speed improvements of the `distance` method of both `PandasPdb` and `PandasMol2` (now about 300 percent faster than previously)
+-  Significant speed improvements of the `distance` method of both `PandasPdb` and `PandasMol2` (now about 300 percent faster than previously).
 
 ##### Bug Fixes
 
 - The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now handles multi-chain proteins correctly.
+- The `amino3to1` method of `biopandas.pdb.PandasPDB` objects now also works as expected if the `'ATOM'` entry DataFrame contains disordered DataFrame indices or duplicate DataFrame index values.