-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathPFTLS_Chapter_06.py
More file actions
executable file
·112 lines (90 loc) · 3.57 KB
/
PFTLS_Chapter_06.py
File metadata and controls
executable file
·112 lines (90 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
__author__ = 'Amber Biology LLC'
# Python For The Life Sciences
# By Alex Lancaster & Gordon Webster
# Chapter 6
# The text of the book is (c) Amber Biology LLC (www.amberbiology.com)
# The Python code from the book is released into the public domain, as follows:
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org/>
import re
restrictionEnzymes = {}
restrictionEnzymes['bamH1'] = ['ggatcc',0]
restrictionEnzymes['sma1'] = ['cccggg',2]
restrictionEnzymes['nci1'] = ['cc[cg]gg',2]
restrictionEnzymes['scrF1'] = ['cc[atcg]gg',2]
sequence1 = 'atatatccgggatatatcccggatatat'
print(re.findall(restrictionEnzymes['bamH1'][0],sequence1))
print(re.findall(restrictionEnzymes['nci1'][0],sequence1))
print(re.findall(restrictionEnzymes['scrF1'][0],sequence1))
restrictionEnzymes['scrF1'] = ['cc.gg',2]
print(re.findall(restrictionEnzymes['scrF1'][0],sequence1))
promoter = 'ttgaca...................tataat'
promoter = 'ttgaca.{15,25}tataat'
sequence2 = 'cccccttgacaccccccccccccccccctataatccccc'
sequence3 = 'cccccttgacaccccccccccccccccccccctataatccccc'
print(re.findall(promoter,sequence2))
print(re.findall(promoter,sequence3))
print(re.finditer(promoter,sequence2))
matches = re.finditer(promoter,sequence2)
for m in matches:
print(m.group())
print(m.start(),m.end())
# Generating a randomized 250 million base chromosome may take a
# few minutes depending upon your computer's speed, so be patient.
# Searching it will be much (much) quicker :-)
import random
bases = ['a','t','c','g']
sequenceList = []
for n in range(0,250000000):
sequenceList.append(random.choice(bases))
chromosome = ''.join(sequenceList)
import time
searchPattern = 'tataat'
t1 = time.time()
result = re.finditer(searchPattern,chromosome)
t2 = time.time()
print('Start time =',t1,'seconds. End time =',t2,' seconds.')
nsearch = 1000000
t1 = time.time()
for n in range(0,nsearch):
result = re.finditer(searchPattern,chromosome)
t2 = time.time()
print('Average search time was ',(t2-t1)/float(nsearch),' seconds')
nmatches = 0
for match in result:
nmatches += 1
print('Number of search hits = ',nmatches)
searchPattern = 'tat.at'
nsearch = 1000000
t1 = time.time()
for n in range(0,nsearch):
result = re.finditer(searchPattern,chromosome)
t2 = time.time()
print('Average search time was ',(t2-t1)/float(nsearch),' seconds')
nmatches = 0
for match in result:
nmatches += 1
print('Number of search hits = ',nmatches)