forked from bmajoros/python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest-transcript-reader.py
executable file
·68 lines (59 loc) · 2.29 KB
/
test-transcript-reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3. You might need to update your version of module "future".
from GffTranscriptReader import GffTranscriptReader
#filename="/home/bmajoros/1000G/assembly/local-genes.gff"
#filename="/home/bmajoros/1000G/assembly/tmp.gff"
#filename="test/data/tmp.gff"
#filename="test/data/local-genes.gff"
filename="/home/bmajoros/ensembl/protein-coding.gff"
reader=GffTranscriptReader()
genes=reader.loadGenes(filename)
for gene in genes:
exons=gene.getMergedExons()
unmerged=0
for transcript in gene.transcripts:
unmerged+=len(transcript.getRawExons())
print(unmerged,"exons merged to",len(exons))
#for i in range(len(exons)):
# print("MERGED TO:",exons[i].begin,exons[i].end)
# print()
#transcripts=reader.loadGFF(filename)
#for transcript in transcripts:
#print(transcript.getID())
#gff=transcript.toGff()
#print(gff)
#genes=reader.loadGenes(filename)
#for gene in genes:
# print("gene",gene.getID())
# n=gene.getNumTranscripts()
# for i in range(n):
# transcript=gene.getIthTranscript(i)
# transID=transcript.getID()
# print("\t"+transID+"\t"+str(transcript.getBegin())+"\t"
# +str(transcript.getEnd()))
#hashTable=reader.hashBySubstrate(filename)
#keys=hashTable.keys()
#for key in keys:
# print(key)
#hashTable=reader.hashGenesBySubstrate(filename)
#keys=hashTable.keys()
#for key in keys:
# print(key)
#hashTable=reader.loadTranscriptIdHash(filename)
#keys=hashTable.keys()
#for key in keys:
# print(key)
#hashTable=reader.loadGeneIdHash(filename)
#keys=hashTable.keys()
#for key in keys:
# print(key)