-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathFastqReader.py
executable file
·54 lines (48 loc) · 1.97 KB
/
FastqReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# 2018 William H. Majoros ([email protected])
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
from Rex import Rex
rex=Rex()
import gzip
#=========================================================================
# Attributes:
# fh : file handle
# Instance Methods:
# reader=FastqReader(filename) # can be gzipped!
# (ID,seq,qual,qualSeq,pair)=reader.nextSequence() # returns None at EOF
# * pair indicates which read of the pair: 1 or 2
# * qual is an array of integer quality values
# * qualSeq is the raw quality string
# reader.close()
# Class Methods:
#=========================================================================
class FastqReader:
"""FastqReader"""
def __init__(self,filename):
if(filename is not None):
if(rex.find("\.gz$",filename)): self.fh=gzip.open(filename,"rt")
else: self.fh=open(filename,"r")
def close(self):
self.fh.close()
def nextSequence(self):
fh=self.fh
line=fh.readline()
if(line is None): return None
if(len(line)==0): return None
if(not rex.find("^(\S+)",line)):
return None
#raise Exception("Cannot parse fastq line: "+ID)
ID=rex[1]
pair=1
if(rex.find("\s+(\d)",line)): pair=int(rex[1])
seq=fh.readline().rstrip()
junk=fh.readline()
qualSeq=fh.readline().rstrip()
qual=[ord(x)-33 for x in qualSeq]
return (ID,seq,qual,qualSeq,pair)