-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathBedGeneReader.py
executable file
·77 lines (72 loc) · 2.66 KB
/
BedGeneReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
from BedReader import BedReader
from BedGene import BedGene
#=========================================================================
# Attributes:
#
# Instance Methods:
# reader=BedGeneReader()
# genes=reader.read(CDS_filename,UTR_filename=None)
# Class Methods:
#
# Private Methods:
# genes=self.readCDS(filename)
# self.addUTR(filename,genes)
# hash=self.hashGenes(genes)
#=========================================================================
class BedGeneReader:
"""BedGeneReader reads BedGene objects from a BED file"""
def __init__(self):
pass
def read(self,CDS_filename,UTR_filename=None):
genes=self.readCDS(CDS_filename)
if(UTR_filename): self.addUTR(UTR_filename,genes)
return genes
def readCDS(self,filename):
reader=BedReader(filename)
genes=[]
genesByName={}
while(True):
record=reader.nextRecord()
if(not record): break
if(not record.isBed6()):
raise Exception("BED file has too few fields")
id=record.name
gene=genesByName.get(id,None)
if(not gene):
gene=BedGene(id,record.chr,record.strand)
genesByName[id]=gene
genes.append(gene)
gene.addCDS(record.interval)
reader.close()
return genes
def hashGenes(self,genes):
hash={}
for gene in genes:
hash[gene.ID]=gene
return hash
def addUTR(self,filename,genes):
hash=self.hashGenes(genes)
reader=BedReader(filename)
while(True):
record=reader.nextRecord()
if(not record): break
if(not record.isBed6()):
raise Exception("BED file has too few fields")
id=record.name
gene=hash.get(id,None)
if(not gene):
gene=BedGene(id,record.chr,record.strand)
genes.append(gene)
hash[id]=gene
gene.addUTR(record.interval)
for gene in genes:
gene.coalesce()