-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcif2pdb.py.bak
98 lines (83 loc) · 2.85 KB
/
cif2pdb.py.bak
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import sys
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB import PDBIO
"""
Based on github @sbliven/cif2pdb.py
"""
def convert_cif_to_pdb(cif_file, pdb_file=None, verbose=False):
"""
Convert a CIF into a PDB
"""
if not pdb_file:
pdb_file = cif_file+'.pdb'
# Not sure why biopython needs this to read a cif file
strucid = cif_file[:4] if len(cif_file)>4 else "1xxx"
# Read file
parser = MMCIFParser()
structure = parser.get_structure(strucid, cif_file)
# Rename long chains
try:
chainmap = rename_chains(structure)
except OutOfChainsError:
logging.error("Too many chains to represent in PDB format")
sys.exit(1)
# Print info about renaming of chains
if verbose:
for new,old in chainmap.items():
if new != old:
logging.info("Renaming chain {0} to {1}".format(old,new))
# Write PDB
io = PDBIO()
io.set_structure(structure)
#TODO What happens with large structures?
io.save(pdb_file)
def int_to_chain(i,base=62):
"""
int_to_chain(int,int) -> str
Converts a positive integer to a chain ID. Chain IDs include uppercase
characters, numbers, and optionally lowercase letters.
i = a positive integer to convert
base = the alphabet size to include. Typically 36 or 62
"""
if i < 0:
raise ValueError("positive integers only")
if base < 0 or 62 < base:
raise ValueError("Invalid base")
quot = int(i)//base
rem = i%base
if rem < 26:
letter = chr( ord("A") + rem)
elif rem < 36:
letter = str( rem-26)
else:
letter = chr( ord("a") + rem - 36)
if quot == 0:
return letter
else:
return int_to_chain(quot-1,base) + letter
class OutOfChainsError(Exception): pass
def rename_chains(structure):
"""Renames chains to be one-letter chains
Existing one-letter chains will be kept. Multi-letter chains will be truncated
or renamed to the next available letter of the alphabet.
If more than 62 chains are present in the structure, raises an OutOfChainsError
Returns a map between new and old chain IDs, as well as modifying the input structure
"""
next_chain = 0 #
# single-letters stay the same
chainmap = {c.id:c.id for c in structure.get_chains() if len(c.id) == 1}
for o in structure.get_chains():
if len(o.id) != 1:
if o.id[0] not in chainmap:
chainmap[o.id[0]] = o.id
o.id = o.id[0]
else:
c = int_to_chain(next_chain)
while c in chainmap:
next_chain += 1
c = int_to_chain(next_chain)
if next_chain >= 62:
raise OutOfChainsError()
chainmap[c] = o.id
o.id = c
return chainmap