Skip to content

Commit fcdfff9

Browse files
author
juliav
committed
Add support for writing CIF files, update tests and documentation
- Added support for writing mmCIF files - Added CIF file support in stack and align modules - Moved align and tmalign tests to tests/ directory - Updated tests for Windows-Linux compatibility - Updated AlphaFold database version to v6 - Removed non-atom lines from AF file - Updated changelog and fixed documentation links
1 parent 02bff5b commit fcdfff9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+21988
-18224
lines changed

biopandas/align/align.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def filter_and_validate_chain(self, pdb, chain_id):
4949
chain_col = 'label_asym_id'
5050
else:
5151
raise ValueError("No recognized chain identifier column found in the ATOM dataframe.")
52+
else:
53+
chain_col = 'chain_id'
5254

5355
filtered_pdb = deepcopy(pdb)
5456
filtered_atoms = pdb.df['ATOM'][pdb.df['ATOM'][chain_col].isin([chain_id])]

biopandas/align/tmalign.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ def __init__(self, tmalign_path: str=None):
3535
self.tmalign_path = os.path.join(path_script, './USalign')
3636
elif os.path.exists(os.path.join(path_script, './USalign.exe')):
3737
self.tmalign_path = os.path.join(path_script, './USalign.exe')
38+
# if our script has 'tests' in it, the path changes to "../../biopandas/align/"
39+
elif os.path.exists(os.path.join(path_script, '../../biopandas/align/USalign')):
40+
self.tmalign_path = os.path.join(path_script, '../../biopandas/align/USalign')
41+
elif os.path.exists(os.path.join(path_script, '../../biopandas/align/USalign.exe')):
42+
self.tmalign_path = os.path.join(path_script, '../../biopandas/align/USalign.exe')
3843
else:
3944
raise ValueError("Please provide the path to the TMalign executable.")
4045

biopandas/mmcif/engines.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
"occupancy": "occupancy",
8484
"B_iso_or_equiv": "b_factor",
8585
"type_symbol": "element_symbol",
86-
"label_asym_id": "chain_id"
8786
}
8887

8988
MMCIF_PDB_NONEFIELDS: List[str] = [

biopandas/mmcif/pandas_mmcif.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,9 @@ def fetch_mmcif(
166166
uniprot_id is not None and source == "pdb"
167167
)
168168
invalid_input_combination_2 = pdb_code is not None and source in {
169-
"alphafold2-v3",
170-
"alphafold2-v4",
169+
"alphafold2-v6",
170+
# "alphafold2-v3", # deprecated
171+
# "alphafold2-v4", # deprecated
171172
}
172173

173174
if invalid_input_identifier_1 or invalid_input_identifier_2:
@@ -196,6 +197,11 @@ def fetch_mmcif(
196197
self.mmcif_path, self.mmcif_text = self._fetch_af2(
197198
uniprot_id, af2_version
198199
)
200+
elif source == "alphafold2-v6":
201+
af2_version = 6
202+
self.mmcif_path, self.mmcif_text = self._fetch_af2(
203+
uniprot_id, af2_version
204+
)
199205
else:
200206
raise ValueError(
201207
f"Invalid source: {source}."

biopandas/pdb/pandas_pdb.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ def fetch_pdb(
138138
139139
source : str
140140
The source to retrieve the structure from
141-
(`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)).
141+
# (`"pdb"`, `"alphafold2-v3"`, `"alphafold2-v4"`(latest)). #deprecated
142+
(`"pdb"`, `"alphafold2-v6"`(latest)).
142143
Defaults to `"pdb"`.
143144
144145
Returns
@@ -151,8 +152,9 @@ def fetch_pdb(
151152
invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
152153
invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
153154
invalid_input_combination_2 = pdb_code is not None and source in {
154-
"alphafold2-v3",
155-
"alphafold2-v4",
155+
"alphafold2-v6",
156+
# "alphafold2-v3",
157+
# "alphafold2-v4",
156158
}
157159

158160
if invalid_input_identifier_1 or invalid_input_identifier_2:
@@ -173,6 +175,9 @@ def fetch_pdb(
173175
elif source == "alphafold2-v4":
174176
af2_version = 4
175177
self.pdb_path, self.pdb_text = self._fetch_af2(uniprot_id, af2_version)
178+
elif source == "alphafold2-v6":
179+
af2_version = 6
180+
self.pdb_path, self.pdb_text = self._fetch_af2(uniprot_id, af2_version)
176181
elif source == "pdb":
177182
self.pdb_path, self.pdb_text = self._fetch_pdb(pdb_code)
178183
else:
@@ -454,6 +459,7 @@ def _fetch_af2(uniprot_id: str, af2_version: int = 3):
454459
"""Load PDB file from https://alphafold.ebi.ac.uk/."""
455460
txt = None
456461
url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id.upper()}-F1-model_v{af2_version}.pdb"
462+
print(url)
457463
try:
458464
response = urlopen(url)
459465
txt = response.read()

biopandas/stack/stack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def fetch_pdb(self, key: str=None, pdb_id: str=None, uniprot_id: str=None):
118118
if key is None:
119119
key = pdb_id
120120
elif uniprot_id:
121-
pdb.fetch_pdb(uniprot_id=uniprot_id, source="alphafold2-v4")
121+
pdb.fetch_pdb(uniprot_id=uniprot_id, source="alphafold2-v6")
122122
if key is None:
123123
key = uniprot_id
124124
else:

biopandas/stack/tests/data/4eiy_anisouchunk.pdb

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)