-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathRex.py
executable file
·59 lines (49 loc) · 2.14 KB
/
Rex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
import re
#=========================================================================
# Attributes:
# match : returned from re.search()
# Instance Methods:
# rex=Rex()
# bool=rex.find("abc(\d+)def(\d+)ghi(\d+)",line)
# rex.findOrDie("abc(\d+)def(\d+)ghi(\d+)",line)
# x=rex[1]; y=rex[2]; z=rex[3]
#=========================================================================
class Rex:
"""Rex -- more compact regular expression matching similar to Perl"""
def __init__(self):
match=None
def find(self,pattern,line):
self.match=re.search(pattern,line)
return self.match is not None
def split(self,pattern,line):
fields=re.split(pattern,line)
nonEmpty=[]
for x in fields:
if(x!=""): nonEmpty.append(x)
return nonEmpty
def findOrDie(self,pattern,line):
if(not self.find(pattern,line)): raise Exception("can't parse: "+line)
def __getitem__(self,index):
return self.match.group(index)
def test_regex():
rex=Rex()
line="chr1 HAVANA initial-exon 34384 34457 . - 0 transcript_id=ENST00000361813.5;gene_id=ENSG00000198952.7;\n"
result=rex.find('transcript_id[:=]?\s*"?([^\s";]+)"?',line)
#result=rex.find('transcript_id=([^\s";]+)',line)
print(result)
#x=y=z=None
#if(rex.find("abc(\d+)abc(\d+)abc","ab123abc456abc789")):
# x=rex[1]; y=rex[2]
#elif(rex.find("dog(\d+)cat(\d+)cow(\d+)chicken(\d+)",
# "dog1cat2cow8chicken100")):
# x=rex[1]; y=rex[4]
#print(x,y)