-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGZilla.H
218 lines (200 loc) · 7.83 KB
/
GZilla.H
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/****************************************************************
GeneZilla
Copyright (C)2013 William H. Majoros ([email protected]).
This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
License (GPL) version 3, as described at www.opensource.org.
****************************************************************/
#ifndef INCL_GeneZilla_H
#include <math.h>
#include <iostream>
#include <fstream>
#include <typeinfo>
#include "BOOM/CommandLine.H"
#include "BOOM/DnaAlphabet.H"
#include "BOOM/Map.H"
#include "BOOM/Stack.H"
#include "BOOM/Constants.H"
#include "BOOM/Time.H"
#include "BOOM/Array1D.H"
#include "BOOM/GffReader.H"
#include "genezilla.H"
#include "SignalSensor.H"
#include "SignalQueue.H"
#include "SignalTypeProperties.H"
#include "NoncodingQueue.H"
#include "IntronQueue.H"
#include "EmpiricalDistribution.H"
#include "GeometricDistribution.H"
#include "Transitions.H"
#include "IsochoreFile.H"
//#include "GffReader.H"
#include "TopologyLoader.H"
#include "Edge.H"
#include "GarbageCollector.H"
#include "EdgeFactory.H"
#include "GffPathFromParseGraph.H"
#include "IsochoreTable.H"
#include "IsochoreInterval.H"
#include "EvidenceFilter.H";
#ifdef EXPLICIT_GRAPHS
#include "ParseGraph.H"
#endif
#ifdef REPORT_MEMORY_USAGE
#include "BOOM/MemoryProfiler.H"
#endif
/****************************************************************
class GeneZilla
****************************************************************/
class GeneZilla
{
protected:
bool useSignalScores, useContentScores, useDurationScores, prohibitPTCs;
public:
void omitSignalScores() { useSignalScores=false; }
void omitContentScores() { useContentScores=false; }
void omitDurationScores() { useDurationScores=false; }
void allowPTCs() { prohibitPTCs=false; }
/****************************************************************
PUBLIC MEMBERS
****************************************************************/
GeneZilla(const BOOM::String &PROGRAM_NAME,const BOOM::String &VERSION,
EdgeFactory &,int &transcriptId);
virtual ~GeneZilla();
virtual int main(int argc,char *argv[]);
virtual BOOM::Stack<SignalPtr> * processChunk(const Sequence &seq,
const BOOM::String &seqStr,
const BOOM::String &isoFilename,
const BOOM::String &substrateId,
ostream &osGraph,
bool dumpGraph,
String psaFilename);
void loadIsochoreBoundaries(const BOOM::String &);
void loadCpGislands(const BOOM::String &);
int mapPhaseBack(int phase,SignalPtr right,SignalPtr left);
DiscreteDistribution &getDistribution(ContentType);
Transitions *getTransitionProbs();
BOOM::Array1D<double> &getIntergenicSums() {return intergenicSums;}
const BOOM::String &getSubstrateId() {return substrateId;}
int getSeqLen() {return seqLen;}
static ContentType exonTypeToContentType(ExonType,Strand);
ContentSensor &getContentSensor(ContentType);
SignalSensor &getSignalSensor(SignalType);
BOOM::StringMap<char> *getStopCodonConsensuses();
GarbageCollector &getGC() {return garbageCollector;}
double scoreExon(SignalPtr left,SignalPtr right,int phase,
ContentType &);
void setEvidenceFilter(EvidenceFilter *e) { evidenceFilter=e; }
static float getGCcontent(const BOOM::String &);
Isochore *getIsochore(float gc) {return isochores.getIsochore(gc);}
#ifdef EXPLICIT_GRAPHS
ParseGraph &getParseGraph();
ParseGraph &parse(const BOOM::String &seqFile,const BOOM::String &isoFile,
float &gcContent);
ParseGraph &parse(const BOOM::String &seqFile,const BOOM::String &isoFile,
const Sequence *&,const BOOM::String *&seqString,
float &gcContent);
ParseGraph &parse(const BOOM::String &isoFile,const Sequence &,
const BOOM::String &seqString,float &gcContent);
int numRightTermini() {return rightTermini.size();}
BOOM::Vector<SignalPtr> *getPathFromGff(BOOM::Vector<
BOOM::GffTranscript*> &,
Sequence &,
const BOOM::String &seqStr,
BOOM::Vector<BOOL> &found);
void useOneTerminusOnly();
#endif
#ifdef FORCE_SPECIFIC_SIGNALS
void forceSignalCoords(BOOM::Vector<int> &signalCoords);
#endif
/****************************************************************
PRIVATE & PROTECTED MEMBERS
****************************************************************/
protected:
#ifdef FORCE_SPECIFIC_SIGNALS
BOOM::Set<int> forcedSignalCoords;
#endif
#ifdef EXPLICIT_GRAPHS
BOOM::Vector<SignalPtr> leftTermini, rightTermini;
ParseGraph parseGraph;
bool oneTerminusOnly;
virtual void buildParseGraph(const Sequence &seq,const BOOM::String &str);
#endif
EvidenceFilter *evidenceFilter;
const Sequence *seq;
const BOOM::String *seqStr;
IsochoreTable isochores;
Isochore *isochore; // the currently active isochore
//bool invertSignalProbs; // use P(signal|score), not P(sequence|signal)
int &transcriptId;
GarbageCollector garbageCollector;
EdgeFactory &edgeFactory;
BOOM::String PROGRAM_NAME, VERSION;
int seqLen;
BOOM::String substrateId;
BOOM::Vector<SignalQueue*> signalQueues;
BOOM::Vector<SignalQueue*> forwardCodingQueues, reverseCodingQueues;
BOOM::Map<ContentType,SignalQueue*> contentToQueue;
bool recentlyEclipsedPhases[3];
BOOM::Array1D<double> intergenicSums; // for printing exon scores in GFF
float gcContent;
bool modelCpGislands;
BOOM::Map< BOOM::String,BOOM::Vector<IsochoreInterval> >
isochoreIntervals;
IsochoreInterval nextIsochoreInterval;
int nextIsochoreIndex;
BOOM::Regex GCregex;
String psaFilename; // for dumping intergenic prefix sum array
virtual BOOM::Stack<SignalPtr> * mainAlgorithm(const Sequence &,
const BOOM::String &,
ostream &osGraph,
bool dumpGraph,
String psaFilename);
virtual void processIsochoreFile(const BOOM::String &filename,
float gcContent);
void computeIntergenicSums(const Sequence &,const BOOM::String &,
const char *);
void instantiateLeftTermini();
BOOM::Stack<SignalPtr> *instantiateRightTermini(const BOOM::String &,
int seqLen,
double &parseScore);
virtual void updateAccumulators(const Sequence &,const BOOM::String &,
int pos,Symbol,char);
/*inline*/ void linkBack(const BOOM::String &,SignalPtr newSignal);
inline void selectPredecessors(int newConsPos,SignalQueue &queue,
ContentType contentType,Strand strand,
double bestScore[3],SignalPtr bestPred[3],
SignalType toType,
const BOOM::String &substrate,
SignalPtr);
inline void selectIntergenicPred(int newConsPos,SignalQueue &queue,
Strand strand,double bestScore[3],
SignalPtr bestPred[3],ContentType,
SignalType toType,SignalPtr);
virtual void selectCodingPred(int newConsPos,SignalQueue &queue,
Strand strand,double bestScore[3],
SignalPtr bestPred[3],ContentType,
SignalType toType,SignalPtr);
inline void selectIntronPred(int newConsPos,SignalQueue &queue,
Strand strand,
double bestScore[3],SignalPtr bestPred[3],
ContentType,SignalType toType,
const BOOM::String &substrate,SignalPtr);
void enqueue(SignalPtr);
void handleStopCodons(const BOOM::String &,int pos);
void terminateForwardORFs(int position);
void terminateReverseORFs(int position);
void loadTransProbs(const BOOM::String &transFile,float optimism,
float intronOptimism);
BOOM::Stack<SignalPtr> *traceBack(SignalPtr rightTerminus,int phase);
void generateGff(BOOM::Stack<SignalPtr> *path,int seqLen,
double parseScore);
double scoreIntronPhases(SignalType predType,SignalType toType,
int oldPhase,int newPhase);
int getPhase(SignalPtr);
void observeRecentStopCodons(const BOOM::String &,SignalPtr);
void createQueue(ContentType);
void switchIsochore(float gcContent,int pos);
void resetAccumulatorPositions();
void crossIsochoreBoundary(int pos);
};
#endif