1
+ # ' @include union.R
2
+ NULL
3
+
4
+ # ' Add meta data column to anchors based on .bed file
5
+ # '
6
+ # ' \code{annotateAnchors} adds a logical variable to meta data columns in the
7
+ # ' anchors based on a GRanges object of features' genomic coordinates
8
+ # '
9
+ # ' This function adds column of TRUE/FALSE values on the loopdata object
10
+ # ' anchors whether a feature is observed nearby in \code{features}. The name
11
+ # ' of this column that will be in the anchors GRanges object is specified by
12
+ # ' a user defined string \code{featureName}. Gap tolerance between a feature
13
+ # ' and an anchor is specified by \code{maxgap}, where the default is 1,000bp.
14
+ # '
15
+ # ' @param dlo A loopdata object whose anchors will be annotated
16
+ # ' @param features A Granges object corresponding to locations of interest
17
+ # ' @param featureName A string that will be the mcol name in anchors
18
+ # ' @param maxgap A value of max permissible gap between a feature and anchor
19
+ # '
20
+ # ' @return A loopdata object with new meta data column in anchors
21
+ # '
22
+ # ' @examples
23
+ # ' # Annotate whether anchors are near a gene body; within 1kb
24
+ # ' rda<-paste(system.file('rda',package='diffloop'),'jpn_chr1reg.rda',sep='/')
25
+ # ' load(rda)
26
+ # ' gb <-getHumanGenes()
27
+ # ' jpn_chr1reg <- annotateAnchors(jpn_chr1reg,gb,'nearGeneBody')
28
+ # '
29
+ # ' # Adding close to gene bodies with no gap tolerance
30
+ # ' jpn_chr1reg <- annotateAnchors(jpn_chr1reg,gb,'inGeneBody',0)
31
+ # '
32
+ # ' @import GenomicRanges
33
+ # '
34
+ # ' @export
35
+ setGeneric (name = "annotateAnchors ", def = function(dlo, features,
36
+ featureName , maxgap ) standardGeneric(" annotateAnchors" ))
37
+
38
+ .annotateAnchors <- function (dlo , features , featureName , maxgap ) {
39
+ hits <- suppressWarnings(findOverlaps(features , dlo @ anchors ,
40
+ maxgap = maxgap ))
41
+ idx <- unique(subjectHits(hits ))
42
+ values <- data.frame (matrix (FALSE , ncol = 1 , nrow = length(ranges(dlo @ anchors ))))
43
+ values [idx , ] <- TRUE
44
+ colnames(values ) <- featureName
45
+ mcols(dlo @ anchors ) <- c(mcols(dlo @ anchors ), values )
46
+ return (dlo )
47
+ }
48
+
49
+ # ' @rdname annotateAnchors
50
+ setMethod (f = "annotateAnchors ", signature = c("loopdata", "GRanges",
51
+ " character" , " missing" ), definition = function (dlo , features ,
52
+ featureName , maxgap = 1000 ) {
53
+ maxgap <- 1000
54
+ .annotateAnchors(dlo , features , featureName , maxgap )
55
+ })
56
+
57
+ # ' @rdname annotateAnchors
58
+ setMethod (f = "annotateAnchors ", signature = c("loopdata", "GRanges",
59
+ " character" , " numeric" ), definition = function (dlo , features ,
60
+ featureName , maxgap ) {
61
+ .annotateAnchors(dlo , features , featureName , maxgap )
62
+ })
63
+
64
+
65
+ # ' Get protein coding gene regions
66
+ # '
67
+ # ' \code{getHumanGenes} returns a \code{GRanges} object of all protein
68
+ # ' coding genes genome-wide or within specified chromosomes
69
+ # '
70
+ # ' This function returns a \code{GRanges} object with the coordinates and
71
+ # ' gene IDs of all protein coding genes either genome-wide
72
+ # ' (by default) orspecified within a particular chromosome.
73
+ # '
74
+ # ' @param chr A vector of chromosomes
75
+ # '
76
+ # ' @return A GRanges object
77
+ # '
78
+ # ' @examples
79
+ # ' # Grab all protein coding gene locations genome-wide
80
+ # ' pc.genes <- getHumanGenes()
81
+ # ' # Grab all protein coding gene loctions on chromosome 1
82
+ # ' chr1 <- getHumanGenes(c('1'))
83
+ # ' @import GenomicRanges
84
+ # ' @import biomaRt
85
+ # ' @importFrom GenomeInfoDb sortSeqlevels seqlevels seqlevels<-
86
+ # ' @importFrom S4Vectors queryHits subjectHits
87
+ # '
88
+ # ' @export
89
+ setGeneric (name = "getHumanGenes ", def = function(chr) standardGeneric("getHumanGenes"))
90
+
91
+ # ' @import GenomicRanges
92
+ .getHumanGenes <- function (chr ) {
93
+ vals = list (chr , " protein_coding" )
94
+ mart = useMart(biomart = " ensembl" , dataset = " hsapiens_gene_ensembl" )
95
+ geneinfo = getBM(attributes = c(" chromosome_name" , " start_position" ,
96
+ " end_position" , " external_gene_name" ), filters = c(" chromosome_name" ,
97
+ " biotype" ), values = vals , mart = mart )
98
+ colnames(geneinfo ) <- c(" chr" , " start" , " end" , " id" )
99
+ raw <- makeGRangesFromDataFrame(geneinfo , keep.extra.columns = TRUE ,
100
+ ignore.strand = TRUE , seqnames.field = c(" chr" ), start.field = " start" ,
101
+ end.field = c(" end" ), starts.in.df.are.0based = FALSE )
102
+ gr <- sortSeqlevels(raw )
103
+ gr <- sort(gr )
104
+ return (gr )
105
+ }
106
+
107
+ # ' @rdname getHumanGenes
108
+ setMethod (f = "getHumanGenes ", signature = c("missing"), definition = function(chr) {
109
+ all <- c(" 1" , " 2" , " 3" , " 4" , " 5" , " 6" , " 7" , " 8" , " 9" , " 10" ,
110
+ " 11" , " 12" , " 13" , " 14" , " 15" , " 16" , " 17" , " 18" , " 19" ,
111
+ " 20" , " 21" , " 22" , " X" , " Y" )
112
+ return (.getHumanGenes(all ))
113
+ })
114
+
115
+ # ' @rdname getHumanGenes
116
+ setMethod (f = "getHumanGenes ", signature = c("character"), definition = function(chr) {
117
+ return (.getHumanGenes(chr ))
118
+ })
0 commit comments