Skip to content

Commit ed3965f

Browse files
authored
Merge branch 'main' into patch-0.4.1
2 parents ef2c004 + c155226 commit ed3965f

12 files changed

+17660
-660
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
site
2-
2+
docs/tutorials/*.md
3+
docs/tutorials/Working_with_mmCIF_Structures_in_DataFrames_files/*
4+
docs/tutorials/Working_with_MOL2_Structures_in_DataFrames_files/*
5+
docs/tutorials/Working_with_PDB_Structures_in_DataFrames_files/*
6+
docs/tutorials/data/3eiy_stripped.pdb.gz
37
docs/api_subpackages/
48
docs/api_modules
59
docs/py-docstring-parser/

biopandas/mmcif/pandas_mmcif.py

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,15 @@
88

99
import gzip
1010
import sys
11-
from typing import Dict
11+
import warnings
12+
from distutils.version import LooseVersion
13+
from typing import Dict, Optional
14+
from urllib.error import HTTPError, URLError
15+
from urllib.request import urlopen
1216

1317
import numpy as np
1418
import pandas as pd
1519

16-
try:
17-
from urllib.error import HTTPError, URLError
18-
from urllib.request import urlopen
19-
except ImportError:
20-
raise ValueError("Python 2.7 is no longer supported")
21-
22-
import warnings
23-
from distutils.version import LooseVersion
24-
2520
from ..pdb.engines import amino3to1dict
2621
from .engines import ANISOU_DF_COLUMNS, mmcif_col_types
2722
from .mmcif_parser import load_cif_data
@@ -72,20 +67,51 @@ def read_mmcif(self, path):
7267
self.code = self.data["entry"]["id"][0].lower()
7368
return self
7469

75-
def fetch_mmcif(self, pdb_code: str):
76-
"""Fetches mmCIF file contents from the Protein Databank at rcsb.org.
70+
def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"):
71+
"""Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/.
72+
.
7773
7874
Parameters
7975
----------
80-
pdb_code : str
81-
A 4-letter PDB code, e.g., "3eiy".
76+
pdb_code : str, optional
77+
A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`.
78+
79+
uniprot_id : str, optional
80+
A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`.
81+
82+
source : str
83+
The source to retrieve the structure from (`"pdb"`, `"alphafold2-v1"` or `"alphafold2-v2"`). Defaults to `"pdb"`.
8284
8385
Returns
8486
---------
8587
self
8688
8789
"""
88-
self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
90+
# Sanitize input
91+
invalid_input_identifier_1 = pdb_code is None and uniprot_id is None
92+
invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None
93+
invalid_input_combination_1 = uniprot_id is not None and source == "pdb"
94+
invalid_input_combination_2 = pdb_code is not None and source in {"alphafold2-v1", "alphafold2-v2"}
95+
96+
if invalid_input_identifier_1 or invalid_input_identifier_2:
97+
raise ValueError("Please provide either a PDB code or a UniProt ID.")
98+
99+
if invalid_input_combination_1 :
100+
raise ValueError("Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.")
101+
elif invalid_input_combination_2 :
102+
raise ValueError(f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.")
103+
104+
if source == "pdb":
105+
self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code)
106+
elif source == "alphafold2-v1":
107+
af2_version = 1
108+
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
109+
elif source == "alphafold2-v2":
110+
af2_version = 2
111+
self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version)
112+
else:
113+
raise ValueError(f"Invalid source: {source}. Please use one of 'pdb', 'alphafold2-v1' or 'alphafold-v2'.")
114+
89115
self._df = self._construct_df(text=self.mmcif_text)
90116
return self
91117

@@ -121,6 +147,25 @@ def _fetch_mmcif(pdb_code):
121147
print(f"URL Error {e.args}")
122148
return url, txt
123149

150+
@staticmethod
151+
def _fetch_af2(uniprot_id: str, af2_version: int = 2):
152+
"""Load MMCIF file from https://alphafold.ebi.ac.uk/."""
153+
txt = None
154+
url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id.upper()}-F1-model_v{af2_version}.cif"
155+
156+
try:
157+
response = urlopen(url)
158+
txt = response.read()
159+
if sys.version_info[0] >= 3:
160+
txt = txt.decode('utf-8')
161+
else:
162+
txt = txt.encode('ascii')
163+
except HTTPError as e:
164+
print('HTTP Error %s' % e.code)
165+
except URLError as e:
166+
print('URL Error %s' % e.args)
167+
return url, txt
168+
124169
@staticmethod
125170
def _read_mmcif(path):
126171
"""Read MMCIF file from local drive."""

biopandas/mmcif/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)