|
8 | 8 |
|
9 | 9 | import gzip |
10 | 10 | import sys |
11 | | -from typing import Dict |
| 11 | +import warnings |
| 12 | +from distutils.version import LooseVersion |
| 13 | +from typing import Dict, Optional |
| 14 | +from urllib.error import HTTPError, URLError |
| 15 | +from urllib.request import urlopen |
12 | 16 |
|
13 | 17 | import numpy as np |
14 | 18 | import pandas as pd |
15 | 19 |
|
16 | | -try: |
17 | | - from urllib.error import HTTPError, URLError |
18 | | - from urllib.request import urlopen |
19 | | -except ImportError: |
20 | | - raise ValueError("Python 2.7 is no longer supported") |
21 | | - |
22 | | -import warnings |
23 | | -from distutils.version import LooseVersion |
24 | | - |
25 | 20 | from ..pdb.engines import amino3to1dict |
26 | 21 | from .engines import ANISOU_DF_COLUMNS, mmcif_col_types |
27 | 22 | from .mmcif_parser import load_cif_data |
@@ -72,20 +67,51 @@ def read_mmcif(self, path): |
72 | 67 | self.code = self.data["entry"]["id"][0].lower() |
73 | 68 | return self |
74 | 69 |
|
75 | | - def fetch_mmcif(self, pdb_code: str): |
76 | | - """Fetches mmCIF file contents from the Protein Databank at rcsb.org. |
| 70 | + def fetch_mmcif(self, pdb_code: Optional[str] = None, uniprot_id: Optional[str] = None, source: str = "pdb"): |
| 71 | + """Fetches mmCIF file contents from the Protein Databank at rcsb.org or AlphaFold database at https://alphafold.ebi.ac.uk/. |
| 72 | +. |
77 | 73 |
|
78 | 74 | Parameters |
79 | 75 | ---------- |
80 | | - pdb_code : str |
81 | | - A 4-letter PDB code, e.g., "3eiy". |
| 76 | + pdb_code : str, optional |
| 77 | + A 4-letter PDB code, e.g., `"3eiy"` to retrieve structures from the PDB. Defaults to `None`. |
| 78 | +
|
| 79 | + uniprot_id : str, optional |
| 80 | + A UniProt Identifier, e.g., `"Q5VSL9"` to retrieve structures from the AF2 database. Defaults to `None`. |
| 81 | +
|
| 82 | + source : str |
| 83 | + The source to retrieve the structure from (`"pdb"`, `"alphafold2-v1"` or `"alphafold2-v2"`). Defaults to `"pdb"`. |
82 | 84 |
|
83 | 85 | Returns |
84 | 86 | --------- |
85 | 87 | self |
86 | 88 |
|
87 | 89 | """ |
88 | | - self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code) |
| 90 | + # Sanitize input |
| 91 | + invalid_input_identifier_1 = pdb_code is None and uniprot_id is None |
| 92 | + invalid_input_identifier_2 = pdb_code is not None and uniprot_id is not None |
| 93 | + invalid_input_combination_1 = uniprot_id is not None and source == "pdb" |
| 94 | + invalid_input_combination_2 = pdb_code is not None and source in {"alphafold2-v1", "alphafold2-v2"} |
| 95 | + |
| 96 | + if invalid_input_identifier_1 or invalid_input_identifier_2: |
| 97 | + raise ValueError("Please provide either a PDB code or a UniProt ID.") |
| 98 | + |
| 99 | + if invalid_input_combination_1 : |
| 100 | + raise ValueError("Please use a 'pdb_code' instead of 'uniprot_id' for source='pdb'.") |
| 101 | + elif invalid_input_combination_2 : |
| 102 | + raise ValueError(f"Please use a 'uniprot_id' instead of 'pdb_code' for source={source}.") |
| 103 | + |
| 104 | + if source == "pdb": |
| 105 | + self.mmcif_path, self.mmcif_text = self._fetch_mmcif(pdb_code) |
| 106 | + elif source == "alphafold2-v1": |
| 107 | + af2_version = 1 |
| 108 | + self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version) |
| 109 | + elif source == "alphafold2-v2": |
| 110 | + af2_version = 2 |
| 111 | + self.mmcif_path, self.mmcif_text = self._fetch_af2(uniprot_id, af2_version) |
| 112 | + else: |
| 113 | + raise ValueError(f"Invalid source: {source}. Please use one of 'pdb', 'alphafold2-v1' or 'alphafold-v2'.") |
| 114 | + |
89 | 115 | self._df = self._construct_df(text=self.mmcif_text) |
90 | 116 | return self |
91 | 117 |
|
@@ -121,6 +147,25 @@ def _fetch_mmcif(pdb_code): |
121 | 147 | print(f"URL Error {e.args}") |
122 | 148 | return url, txt |
123 | 149 |
|
| 150 | + @staticmethod |
| 151 | + def _fetch_af2(uniprot_id: str, af2_version: int = 2): |
| 152 | + """Load MMCIF file from https://alphafold.ebi.ac.uk/.""" |
| 153 | + txt = None |
| 154 | + url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id.upper()}-F1-model_v{af2_version}.cif" |
| 155 | + |
| 156 | + try: |
| 157 | + response = urlopen(url) |
| 158 | + txt = response.read() |
| 159 | + if sys.version_info[0] >= 3: |
| 160 | + txt = txt.decode('utf-8') |
| 161 | + else: |
| 162 | + txt = txt.encode('ascii') |
| 163 | + except HTTPError as e: |
| 164 | + print('HTTP Error %s' % e.code) |
| 165 | + except URLError as e: |
| 166 | + print('URL Error %s' % e.args) |
| 167 | + return url, txt |
| 168 | + |
124 | 169 | @staticmethod |
125 | 170 | def _read_mmcif(path): |
126 | 171 | """Read MMCIF file from local drive.""" |
|
0 commit comments