Skip to content

Commit 06a67a3

Browse files
authored
Merge pull request #8 from DeepRank/update_read_write
Update read and write
2 parents 1688039 + 87ba2b1 commit 06a67a3

13 files changed

+747
-306
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ Byte-compiled / optimized
55
*.izone
66
*.lzone
77
*.ref_pairs
8+
.vscode/settings.json
9+
.vscode/launch.json
10+
.vscode/.ropeproject/config.py
11+
.vscode/.ropeproject/objectdb

pdb2sql/StructureSimilarity.py

Lines changed: 379 additions & 143 deletions
Large diffs are not rendered by default.

pdb2sql/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
#from pdb2sql import pdb2sql
2-
#from .interface import *
3-
#import transform
1+
from .pdb2sqlcore import pdb2sql
2+
from .interface import interface
3+
from .StructureSimilarity import StructureSimilarity
4+
from . import transform

pdb2sql/interface.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ class interface(pdb2sql):
1111
def __init__(self, pdb):
1212
'''Identify interface between protein chains.'''
1313

14-
pdb2sql.__init__(self, pdb, no_extra=True)
15-
self._create_sql()
14+
pdb2sql.__init__(self, pdb)
1615
self.backbone_type = ['CA', 'C', 'N', 'O']
1716

1817
##########################################################################

pdb2sql/pdb2sqlAlchemy.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@ class ATOM(Base):
1818
__tablename__ = 'ATOM'
1919
rowID = Column(Integer, primary_key=True)
2020
serial = Column(Integer, nullable=False)
21-
name = Column(String(5), nullable=False)
22-
altLoc = Column(String(5), nullable=False)
23-
resName = Column(String(5), nullable=False)
24-
chainID = Column(String(5), nullable=False)
21+
name = Column(String(6), nullable=False)
22+
altLoc = Column(String(1), nullable=False)
23+
resName = Column(String(3), nullable=False)
24+
chainID = Column(String(1), nullable=False)
2525
resSeq = Column(Integer, nullable=False)
26-
iCode = Column(String(5), nullable=False)
26+
iCode = Column(String(1), nullable=False)
2727
x = Column(Float, nullable=False)
2828
y = Column(Float, nullable=False)
2929
z = Column(Float, nullable=False)
3030
occ = Column(Float, nullable=False)
3131
temp = Column(Float, nullable=False)
32+
element = Column(String(2), nullable=False)
3233
model = Column(Integer, nullable=False)
3334

3435

@@ -39,10 +40,9 @@ def __init__(
3940
pdbfile,
4041
sqlfile=None,
4142
fix_chainID=False,
42-
verbose=False,
43-
no_extra=True):
43+
verbose=False):
4444
'''Use sqlAlchemy to load the database.'''
45-
super().__init__(pdbfile, sqlfile, fix_chainID, verbose, no_extra)
45+
super().__init__(pdbfile, sqlfile, fix_chainID, verbose)
4646
self._create_sql()
4747

4848
def _create_sql(self):
@@ -92,14 +92,15 @@ def _create_sql(self):
9292
if colname in del_copy.keys():
9393
data = line[del_copy[colname][0]:del_copy[colname][1]].strip()
9494

95-
# convert it if necessary
96-
if coltype == 'INT':
97-
data = int(data)
98-
elif coltype == 'REAL':
99-
data = float(data)
95+
# convert it if necessary
96+
if coltype == 'INT':
97+
data = int(data)
98+
elif coltype == 'REAL':
99+
data = float(data)
100100

101-
# append to dict
102-
at[colname] = data
101+
102+
# append to dict
103+
at[colname] = data
103104

104105
# create a new ATOM
105106
newat = ATOM(
@@ -115,6 +116,7 @@ def _create_sql(self):
115116
z=at['z'],
116117
occ=at['occ'],
117118
temp=at['temp'],
119+
element=at['element'],
118120
model=self.nModel)
119121

120122
# add the atom to the data base
@@ -264,7 +266,7 @@ def update(self, attribute, values, **kwargs):
264266
'Wrong number of values for the ATOM selection')
265267

266268
# goes through all the ros
267-
for irow in range(nvalues):
269+
for irow in range(nrow):
268270

269271
# create a dict of values
270272
dict_values = {}

pdb2sql/pdb2sql_base.py

Lines changed: 99 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sqlite3
22
import subprocess as sp
33
import os
4+
import warnings
45
import numpy as np
56
from time import time
67

@@ -12,23 +13,23 @@ def __init__(
1213
pdbfile,
1314
sqlfile=None,
1415
fix_chainID=False,
15-
verbose=False,
16-
no_extra=True):
16+
verbose=False):
1717
'''Base class for the definition of sql database.
1818
1919
Args:
20-
pdbbfile : name of the pdbfile
21-
sqlfile : name of the sql file (if None the db is stored in memeory)
22-
fix_chainID : bool to rename chain ID from A, B, C, ....
23-
verbose : bool verbose
24-
no_extra : bool don't consider the 'temp' and 'model' column
20+
pdbfile (str, list(str/bytes), ndarray) : name of pdbfile or
21+
list or ndarray containing the pdb data
22+
sqlfile (str, optional): name of the sqlfile.
23+
By default it is created in memory only.
24+
fix_chainID (bool, optinal): check if the name of the chains
25+
are A,B,C, .... and fix it if not.
26+
verbose (bool): probably print stuff
2527
'''
2628

2729
self.pdbfile = pdbfile
2830
self.sqlfile = sqlfile
2931
self.is_valid = True
3032
self.verbose = verbose
31-
self.no_extra = no_extra
3233

3334
# column names and types
3435
self.col = {'serial': 'INT',
@@ -43,6 +44,7 @@ def __init__(
4344
'z': 'REAL',
4445
'occ': 'REAL',
4546
'temp': 'REAL',
47+
'element': 'TEXT',
4648
'model': 'INT'}
4749

4850
# delimtier of the column format
@@ -55,12 +57,13 @@ def __init__(
5557
'resName': [17, 20],
5658
'chainID': [21, 22],
5759
'resSeq': [22, 26],
58-
'iCode': [26, 26],
60+
'iCode': [26, 27],
5961
'x': [30, 38],
6062
'y': [38, 46],
6163
'z': [46, 54],
6264
'occ': [54, 60],
63-
'temp': [60, 66]}
65+
'temp': [60, 66],
66+
'element': [76,78]}
6467

6568
##########################################################################
6669
#
@@ -112,41 +115,109 @@ def add_column(self, colname, coltype='FLOAT', default=0):
112115
def exportpdb(self, fname, append=False, periodic=False, **kwargs):
113116
'''Export a PDB file with kwargs selection.'''
114117

115-
# get the data
116-
data = self.get('*', **kwargs)
117-
118-
# write each line
119-
# the PDB format is pretty strict
120-
# http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
121118
if append:
122119
f = open(fname, 'a')
123120
else:
124121
f = open(fname, 'w')
125122

123+
lines = self.sql2pdb(**kwargs)
124+
for i in lines:
125+
f.write(i + '\n')
126+
127+
f.close()
128+
129+
def sql2pdb(self, **kwargs):
130+
"""Convert sql pdb data to PDB formatted lines
131+
132+
Returns:
133+
list: pdb-format lines
134+
"""
135+
data = self.get('*', **kwargs)
136+
pdb = []
137+
# the PDB format is pretty strict
138+
# http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
126139
for d in data:
127140
line = 'ATOM '
128141
line += '{:>5}'.format(d[0]) # serial
129142
line += ' '
130-
line += '{:^4}'.format(d[1]) # name
143+
line += self._format_atomname(d) # name
131144
line += '{:>1}'.format(d[2]) # altLoc
132-
line += '{:>3}'.format(d[3]) # resname
145+
line += '{:>3}'.format(d[3]) # resname
133146
line += ' '
134147
line += '{:>1}'.format(d[4]) # chainID
135148
line += '{:>4}'.format(d[5]) # resSeq
136149
line += '{:>1}'.format(d[6]) # iCODE
137150
line += ' '
138-
line += '{: 8.3f}'.format(d[7]) # x
139-
line += '{: 8.3f}'.format(d[8]) # y
140-
line += '{: 8.3f}'.format(d[9]) # z
141-
if not self.no_extra:
142-
line += '{: 6.2f}'.format(d[10]) # occ
143-
line += '{: 6.2f}'.format(d[11]) # temp
144-
line += '\n'
151+
line += pdb2sql_base._format_xyz(d[7]) # x
152+
line += pdb2sql_base._format_xyz(d[8]) # y
153+
line += pdb2sql_base._format_xyz(d[9]) # z
154+
line += '{:>6.2f}'.format(d[10]) # occ
155+
line += '{:>6.2f}'.format(d[11]) # temp
156+
line += ' ' * 10
157+
line += '{:>2}'.format(d[12]) # element
158+
# line += '\n'
159+
pdb.append(line)
160+
161+
return pdb
162+
163+
def _format_atomname(self, data):
164+
"""Format atom name to align with PDB reqireuments:
165+
- alignment of one-letter atom name starts at column 14,
166+
- while two-letter atom name such as FE starts at column 13.
145167
146-
f.write(line)
168+
Args:
169+
data(list): sql output for one pdb line
170+
171+
Returns:
172+
str: formatted atom name
173+
"""
174+
name = data[1]
175+
lname = len(name)
176+
if lname in (1, 4):
177+
name = '{:^4}'.format(name)
178+
elif lname == 2:
179+
if name == data[12]: # name == element
180+
name = '{:<4}'.format(name)
181+
else:
182+
name = '{:^4}'.format(name)
183+
else:
184+
if name[0] in '0123456789':
185+
name = '{:<4}'.format(name)
186+
else:
187+
name = '{:>4}'.format(name)
188+
return name
147189

148-
# close
149-
f.close()
190+
@staticmethod
191+
def _format_xyz(i):
192+
"""Format PDB coordinations x,y or z value.
193+
194+
Note: PDB has a fixed 8-column space for x,y or z value.
195+
Thus the value should be in the range of (-1e7, 1e8).
196+
197+
Args:
198+
i(float): PDB coordinations x, y or z.
199+
200+
Raises:
201+
ValueError: Exceed the range of (-1e7, 1e8)
202+
203+
Returns:
204+
str: formated x, y or z value.
205+
"""
206+
207+
if i >= 1e8 - 0.5 or i <= -1e7 + 0.5:
208+
raise ValueError(
209+
f'PDB coordination {i} exceeds the range of (-1e7, 1e8) '
210+
f'after rounding.')
211+
elif i >= 1e6 - 0.5 or i <= -1e5 + 0.5:
212+
i = '{:>8.0f}'.format(i)
213+
elif i >= 1e5 - 0.5 or i <= -1e4 + 0.5:
214+
i = '{:>8.1f}'.format(i)
215+
elif i >= 1e4 - 0.5 or i <= -1e3 + 0.5:
216+
i = '{:>8.2f}'.format(i)
217+
else:
218+
i = '{:>8.3f}'.format(i)
219+
220+
return i
150221

151222
def close(self, rmdb=True):
152223

0 commit comments

Comments
 (0)