updated tests for 1) windows-linux compatibility 2) af version difference, 3) restructured functions

gezmi · gezmi · commit 9da22dcf250f · 2025-11-05T18:59:46.000+02:00
diff --git a/biopandas/pdb/pandas_pdb.py b/biopandas/pdb/pandas_pdb.py
@@ -459,6 +459,7 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3):
         """Load PDB file from https://alphafold.ebi.ac.uk/."""
         txt = None
         url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id.upper()}-F1-model_v{af2_version}.pdb"
+        print(url)
         try:
             response = urlopen(url)
             txt = response.read()
diff --git a/biopandas/stack/stack.py b/biopandas/stack/stack.py
@@ -118,7 +118,7 @@ def fetch_pdb(self, key: str=None, pdb_id: str=None, uniprot_id: str=None):
             if key is None:
                 key = pdb_id
         elif uniprot_id:
-            pdb.fetch_pdb(uniprot_id=uniprot_id, source="alphafold2-v4")
+            pdb.fetch_pdb(uniprot_id=uniprot_id, source="alphafold2-v6")
             if key is None:
                 key = uniprot_id
         else:
diff --git a/tests/pdb/test_read_pdb.py b/tests/pdb/test_read_pdb.py
@@ -27,7 +27,7 @@
 TESTDATA_FILENAME = str(TEST_DATA.joinpath("3eiy.pdb"))
 TESTDATA_FILENAME2 = str(TEST_DATA.joinpath("4eiy_anisouchunk.pdb"))
 TESTDATA_FILENAME_GZ = str(TEST_DATA.joinpath("3eiy.pdb.gz"))
-TESTDATA_FILENAME_AF2_V4 = str(TEST_DATA.joinpath("AF-Q5VSL9-F1-model_v4.pdb"))
+TESTDATA_FILENAME_AF2_V4 = str(TEST_DATA.joinpath("AF-Q5VSL9-F1-model_v6.pdb"))
 
 TESTDATA_FILENAME_AF2_V3 = str(TEST_DATA.joinpath("AF-Q5VSL9-F1-model_v3.pdb"))
 
diff --git a/tests/stack/data/3eiy.pdb b/tests/stack/data/3eiy.pdb
diff --git a/tests/stack/data/4eiy_anisouchunk.pdb b/tests/stack/data/4eiy_anisouchunk.pdb
@@ -1,10 +1,5 @@
-HEADER    MEMBRANE PROTEIN                        06-APR-12   4EIY              
-ATOM    101  CG1 VAL A  12      -5.222   2.059   3.696  1.00 17.98           C  
-ANISOU  101  CG1 VAL A  12     2231   2335   2262    -42    326    285       C  
-ATOM    102  CG2 VAL A  12      -5.748   3.603   1.839  1.00 16.30           C  
-ANISOU  102  CG2 VAL A  12     2409   2061   1722     37    584    238       C  
-ATOM    103  N   GLU A  13      -8.338   2.518   5.470  1.00 16.24           N  
-ANISOU  103  N   GLU A  13     2362   2337   1469    -17      0      0       N  
-ATOM    104  CA  GLU A  13      -8.773   1.934   6.759  1.00 15.34           C  
-ANISOU  104  CA  GLU A  13     2354   2186   1285    -33    -18    -33       C  
-ATOM    105  C   GLU A  13      -9.966   1.026   6.560  1.00 16.90           C  
+ATOM    101  CG1 VAL A  12      -5.222   2.059   3.696  1.00 17.98           C
+ATOM    102  CG2 VAL A  12      -5.748   3.603   1.839  1.00 16.30           C
+ATOM    103  N   GLU A  13      -8.338   2.518   5.470  1.00 16.24           N
+ATOM    104  CA  GLU A  13      -8.773   1.934   6.759  1.00 15.34           C
+ATOM    105  C   GLU A  13      -9.966   1.026   6.560  1.00 16.90           C
diff --git a/tests/stack/data/AF-Q5VSL9-F1-model_v3.pdb b/tests/stack/data/AF-Q5VSL9-F1-model_v3.pdb
diff --git a/tests/stack/data/AF-Q5VSL9-F1-model_v4.pdb b/tests/stack/data/AF-Q5VSL9-F1-model_v4.pdb
diff --git a/tests/stack/test_read_stack.py b/tests/stack/test_read_stack.py
@@ -88,7 +88,7 @@ def test_add_multiple_structures_mixed():
     # There are multiple 3eiy, they overwrite each other, hence the length of dict is not 9
     assert len(stack.pdbs.keys()) == 6
     assert '3eiy' in stack.pdbs
-    assert 'AF-Q5VSL9-F1-model_v4' in stack.pdbs
+    assert 'AF-Q5VSL9-F1-model_v6' in stack.pdbs
     assert 'Q5VSL9' in stack.pdbs
     assert 'P99999' in stack.pdbs
 
diff --git a/tests/stack/test_stack.py b/tests/stack/test_stack.py
@@ -5,6 +5,7 @@
 # Code Repository: https://github.com/rasbt/biopandas
 
 from biopandas.stack.stack import PandasPdbStack
+from biopandas.align import TMAlign
 import os
 from nose.tools import assert_raises
 
@@ -89,12 +90,6 @@ def test_update_entry_nonexistent():
     assert '1YCR' in stack.pdbs
     assert '1A2B' in stack.pdbs
 
-def test_tmalign_inside_multiple_chains():
-    ppdb_stack = PandasPdbStack()
-    ppdb_stack.add_pdbs([TESTDATA_FILENAME, TESTDATA_FILENAME3, TESTDATA_FILENAME4])
-
-    assert_raises(ValueError, ppdb_stack.tmalign_inside)
-
 
 def filter_by_chains(key, pdb, chains):
     # Example function for applying filtering
@@ -111,10 +106,12 @@ def test_tmalign_inside_multiple_chains():
     filtered_stack = stack.apply_filter(filter_by_chains, keep_null=False, **args)
     chains_lens_filtered = filtered_stack.apply_calculation(calculate_chain_lengths)
     assert len(filtered_stack.pdbs) == 4
-    transformed_structures, tm_scores = filtered_stack.tmalign_inside()
 
-    assert tm_scores['3eiy'] == 0.37341
-    assert tm_scores['2d7t'] == 0.33733
+    tmalign = TMAlign()
+    _, transformed_structures, tm_scores = tmalign.tmalign_in_stack(filtered_stack, mobile_chains=args['chains'])
+
+    assert tm_scores['3eiy'] == 0.23483
+    assert tm_scores['2d7t'] == 0.27812
     assert tm_scores['1ycr_copy'] == 1
 
 def test_tmalign_inside_multiple_chains_specific_target():
@@ -126,8 +123,10 @@ def test_tmalign_inside_multiple_chains_specific_target():
     filtered_stack = stack.apply_filter(filter_by_chains, keep_null=False, **args)
     chains_lens_filtered = filtered_stack.apply_calculation(calculate_chain_lengths)
     assert len(filtered_stack.pdbs) == 4
-    transformed_structures, tm_scores = filtered_stack.tmalign_inside('3eiy')
 
-    assert tm_scores['1ycr'] == 0.23483
-    assert tm_scores['2d7t'] == 0.24401
+    tmalign = TMAlign()
+    _, transformed_structures, tm_scores = tmalign.tmalign_in_stack(filtered_stack, target='3eiy', mobile_chains=args['chains'])
+
+    assert tm_scores['1ycr'] == 0.37341
+    assert tm_scores['2d7t'] == 0.32871
     assert tm_scores['3eiy_copy'] == 1
diff --git a/tests/stack/test_write_stack.py b/tests/stack/test_write_stack.py
@@ -15,33 +15,34 @@
 )
 
 TESTDATA_FILENAME_GZ = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb.gz")
-TESTDATA_FILENAME_AF2_V4 = os.path.join(
-    os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v4.pdb"
+TESTDATA_FILENAME_AF2_V6 = os.path.join(
+    os.path.dirname(__file__), "data", "AF-Q5VSL9-F1-model_v6.pdb"
 )
 
 def test_write_pdb():
     stack = PandasPdbStack()
     stack.add_pdbs([TESTDATA_FILENAME, TESTDATA_FILENAME2,
-                   TESTDATA_FILENAME_AF2_V4])
+                   TESTDATA_FILENAME_AF2_V6])
     base_dir = os.path.dirname(__file__)
     stack.write_entries(base_dir)
 
     for key, pdb in stack.pdbs.items():
         outfile = os.path.join(base_dir, f'{key}.pdb')
         infile = os.path.join(base_dir, 'data', f'{key}.pdb')
         assert os.path.exists(outfile)
+
         with open(infile, "r") as f:
-            f1 = f.read().replace('\r\n', '\n').rstrip('\n')
+            f1 = [line.rstrip() for line in f.read().replace('\r', '\n').split('\n')]
         with open(outfile, "r") as f:
-            f2 = f.read().replace('\r\n', '\n').rstrip('\n')
+            f2 = [line.rstrip() for line in f.read().replace('\r', '\n').split('\n')]
         assert f1 == f2
 
         os.remove(outfile)
 
 def test_write_pdb_no_dir_exists():
     stack = PandasPdbStack()
     stack.add_pdbs([TESTDATA_FILENAME, TESTDATA_FILENAME2,
-                   TESTDATA_FILENAME_AF2_V4])
+                   TESTDATA_FILENAME_AF2_V6])
     base_dir = os.path.dirname(__file__)
     test_dir = os.path.join(base_dir, "test_dir")
     stack.write_entries(test_dir)
@@ -52,9 +53,9 @@ def test_write_pdb_no_dir_exists():
         assert os.path.exists(outfile)
 
         with open(infile, "r") as f:
-            f1 = f.read().replace('\r\n', '\n').rstrip('\n')
+            f1 = [line.rstrip() for line in f.read().replace('\r', '\n').split('\n')]
         with open(outfile, "r") as f:
-            f2 = f.read().replace('\r\n', '\n').rstrip('\n')
+            f2 = [line.rstrip() for line in f.read().replace('\r', '\n').split('\n')]
         assert f1 == f2
 
         os.remove(outfile)