Skip to content

Commit

Permalink
Merge pull request #77 from PF2-pasteur-fr/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
hvaret authored Apr 29, 2020
2 parents d04b76c + e88c235 commit 2b95eaa
Show file tree
Hide file tree
Showing 52 changed files with 209 additions and 107 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: SARTools
Type: Package
Title: Statistical Analysis of RNA-Seq Tools
Version: 1.7.2
Date: 2020-01-13
Version: 1.7.3
Date: 2020-04-29
Author: Marie-Agnes Dillies and Hugo Varet
Maintainer: Hugo Varet <[email protected]>
Depends: R (>= 3.3.0),
DESeq2 (>= 1.12.0),
edgeR (>= 3.12.0),
ggplot2,
ggplot2 (>= 3.3.0),
kableExtra
Imports: genefilter (>= 1.44.0),
GGally,
Expand All @@ -33,4 +33,4 @@ VignetteBuilder: knitr, rmarkdown
Encoding: latin1
Description: Provide R tools and an environment for the statistical analysis of RNA-Seq projects: load and clean data, produce figures, perform statistical analysis/testing with DESeq2 or edgeR, export results and create final report.
License: GPL-2
RoxygenNote: 7.0.0
RoxygenNote: 7.1.0
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
CHANGES IN VERSION 1.7.3
------------------------
o added some parameters to change the ggplot theme
o use breaks instead of binwidth for the raw p-values histogram
o updated vignette with ggplot2 figures
o now requires ggplot2 >= 3.3.0 (cf issue #76 on GitHub)

CHANGES IN VERSION 1.7.2
------------------------
o the levels of the variable of interest are now ordered as they appear in the target file (after the reference level set as parameter)
Expand Down
6 changes: 4 additions & 2 deletions R/BCVPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
#'
#' @param dge a \code{DGEList} object
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named BCV.png in the figures directory with a BCV plot produced by the \code{plotBCV()} function of the edgeR package
#' @author Marie-Agnes Dillies and Hugo Varet

BCVPlot <- function(dge, outfile=TRUE){
BCVPlot <- function(dge, outfile=TRUE, ggplot_theme=theme_gray()){
if (outfile) png(filename="figures/BCV.png", width=2100, height=1800, res=300)
A <- dge$AveLogCPM
if (is.null(A)) A <- aveLogCPM(dge$counts, offset = getOffset(dge))
Expand All @@ -26,6 +27,7 @@ BCVPlot <- function(dge, outfile=TRUE){
geom_hline(data=d, aes(yintercept=.data$sqrtcommon, color="c")) +
xlab("Average log CPM") +
ylab("Biological coefficient of variation") +
ggtitle("BCV plot"))
ggtitle("BCV plot") +
ggplot_theme)
if (outfile) dev.off()
}
8 changes: 5 additions & 3 deletions R/MAPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#' @param alpha cut-off to apply on each adjusted p-value
#' @param outfile TRUE to export the figure in a png file
#' @param log2FClim numeric vector containing both upper and lower y-axis limits for all the MA-plots produced (NULL by default to set them automatically)
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named MAPlot.png in the figures directory containing one MA-plot per comparison
#' @author Marie-Agnes Dillies and Hugo Varet

MAPlot <- function(complete, alpha=0.05, outfile=TRUE, log2FClim=NULL){
MAPlot <- function(complete, alpha=0.05, outfile=TRUE, log2FClim=NULL, ggplot_theme=theme_gray()){
ncol <- min(2, length(complete))
nrow <- ceiling(length(complete)/ncol)
if (outfile) png(filename="figures/MAPlot.png", width=cairoSizeWrapper(1800*ncol), height=cairoSizeWrapper(1800*nrow), res=300)
Expand All @@ -34,10 +35,11 @@ MAPlot <- function(complete, alpha=0.05, outfile=TRUE, log2FClim=NULL){
scale_colour_manual(values=c("no"="black", "yes"="red"), drop=FALSE) +
scale_shape_manual(values=c("bottom"=25, "in"=21, "top"=24), drop=FALSE) +
scale_fill_manual(values=c("no"="black", "yes"="red"), drop=FALSE) +
scale_y_continuous(expand=expand_scale(mult=c(0.03, 0.03))) +
scale_y_continuous(expand=expansion(mult=c(0.03, 0.03))) +
xlab("Mean of normalized counts") +
ylab(expression(log[2]~fold~change)) +
ggtitle(paste0("MA-plot - ", gsub("_"," ",name)))
ggtitle(paste0("MA-plot - ", gsub("_"," ",name))) +
ggplot_theme
}
tmpfun <- function(...) grid.arrange(..., nrow=nrow, ncol=ncol)
do.call(tmpfun, p)
Expand Down
14 changes: 7 additions & 7 deletions R/MDSPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,24 @@
#' @param gene.selection \code{"pairwise"} to choose the top features separately for each pairwise comparison between the samples or \code{"common"} to select the same features for all comparisons. Only used when \code{method="logFC"}
#' @param col colors to use (one per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named MDS.png in the figures directory
#' @author Marie-Agnes Dillies and Hugo Varet

MDSPlot <- function(dge, group, n=min(500, nrow(dge$counts)), gene.selection=c("pairwise", "common"),
col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
MDSPlot <- function(dge, group, n=min(500, nrow(dge$counts)), gene.selection=c("pairwise", "common", ggplot_theme=theme_gray()),
col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE, ggplot_theme=theme_gray()){
if (outfile) png(filename="figures/MDS.png", width=1800, height=1800, res=300)
coord <- plotMDS(dge, top=n, method="logFC", gene.selection=gene.selection[1], plot=FALSE)
coord <- as.data.frame(coord)
d <- data.frame(coord[,c("x", "y")],
group=group,
sample=factor(row.names(coord), levels=row.names(coord)))
d <- data.frame(x=coord$x, y=coord$y, group = group,
sample = factor(names(coord$x), levels = names(coord$x)))
print(ggplot(data=d, aes(x=.data$x, y=.data$y, color=group, label=sample)) +
geom_point(show.legend=TRUE, size=3) +
labs(color="") +
scale_colour_manual(values=col) +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab("Leading logFC dimension 1") +
ylab("Leading logFC dimension 2") +
ggtitle("Multi-Dimensional Scaling plot"))
ggtitle("Multi-Dimensional Scaling plot") +
ggplot_theme)
if (outfile) dev.off()
}
6 changes: 4 additions & 2 deletions R/PCAPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
#' @param n number of features to keep among the most variant
#' @param col colors to use (one per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named PCA.png in the figures directory with a pairwise plot of the three first principal components
#' @author Marie-Agnes Dillies and Hugo Varet

PCAPlot <- function(counts.trans, group, n=min(500, nrow(counts.trans)),
col=c("lightblue","orange","MediumVioletRed","SpringGreen"),
outfile=TRUE){
outfile=TRUE, ggplot_theme=theme_gray()){
# PCA on the 500 most variables features
rv = apply(counts.trans, 1, var, na.rm=TRUE)
pca = prcomp(t(counts.trans[order(rv, decreasing = TRUE), ][1:n,]))
Expand All @@ -33,7 +34,8 @@ PCAPlot <- function(counts.trans, group, n=min(500, nrow(counts.trans)),
scale_colour_manual(values=col) +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab(paste0("PC", index1, " (",prp[index1],"%)")) +
ylab(paste0("PC", index2, " (",prp[index2],"%)"))
ylab(paste0("PC", index2, " (",prp[index2],"%)")) +
ggplot_theme
}
p1 <- tmpFunction(c(1, 2))
p2 <- tmpFunction(c(1, 3))
Expand Down
6 changes: 4 additions & 2 deletions R/barplotNull.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors of the bars (one color per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named barplotNull.png in the figures directory
#' @author Marie-Agnes Dillies and Hugo Varet

barplotNull <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
barplotNull <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE, ggplot_theme=theme_gray()){
if (outfile) png(filename="figures/barplotNull.png", width=min(3600, 1800+800*ncol(counts)/10), height=1800, res=300)
percentage <- apply(counts, 2, function(x){sum(x == 0)})*100/nrow(counts)
percentage.allNull <- (nrow(counts) - nrow(removeNull(counts)))*100/nrow(counts)
Expand All @@ -20,9 +21,10 @@ barplotNull <- function(counts, group, col=c("lightblue","orange","MediumVioletR
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Percentage of null counts") +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))) +
ggtitle("Percentage of null counts per sample") +
ggplot_theme +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
geom_hline(yintercept=percentage.allNull, linetype="dashed", color="black", size=1))
if (outfile) dev.off()
}
8 changes: 5 additions & 3 deletions R/barplotTotal.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors of the bars (one color per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named barplotTotal.png in the figures directory
#' @author Marie-Agnes Dillies and Hugo Varet

barplotTotal <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
barplotTotal <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE, ggplot_theme=theme_gray()){
if (outfile) png(filename="figures/barplotTotal.png", width=min(3600, 1800+800*ncol(counts)/10), height=1800, res=300)
d <- data.frame(tc=colSums(counts)/1e6, sample=factor(colnames(counts), colnames(counts)), group)
print(ggplot(d, aes(x=.data$sample, y=.data$tc, fill=.data$group)) +
Expand All @@ -18,8 +19,9 @@ barplotTotal <- function(counts, group, col=c("lightblue","orange","MediumViolet
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Total read count (million)") +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))) +
ggtitle("Total read count per sample (million)") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)))
ggplot_theme +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))))
if (outfile) dev.off()
}
6 changes: 4 additions & 2 deletions R/clusterPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,20 @@
#' @param counts.trans a matrix a transformed counts (VST- or rlog-counts if use of DESeq2 or cpm-counts if use of edgeR)
#' @param group factor vector of the condition from which each sample belongs
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named cluster.png in the figures directory with the dendrogram of the clustering
#' @author Marie-Agnes Dillies and Hugo Varet

clusterPlot <- function(counts.trans, group, outfile=TRUE){
clusterPlot <- function(counts.trans, group, outfile=TRUE, ggplot_theme=theme_gray()){
hc <- hclust(dist(t(counts.trans)), method="ward.D")
if (outfile) png(filename="figures/cluster.png", width=1800, height=1800, res=300)
print(ggdendrogram(hc, theme_dendro=FALSE) +
xlab("Samples") +
ylab("Height") +
ggtitle("Cluster dendrogram\nEuclidean distance, Ward criterion") +
ggplot_theme +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
axis.text.y=element_text(angle=0)) +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))))
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))))
if (outfile) dev.off()
}
21 changes: 12 additions & 9 deletions R/countsBoxplots.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors of the boxplots (one per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named countsBoxplots.png in the figures directory containing boxplots of the raw and normalized counts
#' @author Marie-Agnes Dillies and Hugo Varet

countsBoxplots <- function(object, group, col = c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
countsBoxplots <- function(object, group, col = c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE, ggplot_theme=theme_gray()){
if (class(object)=="DESeqDataSet"){
counts <- counts(object)
counts <- removeNull(counts)
Expand All @@ -31,28 +32,30 @@ countsBoxplots <- function(object, group, col = c("lightblue","orange","MediumVi
p1 <- ggplot(d) +
geom_boxplot(aes(x=.data$ind, y=.data$values+1, fill=.data$group), show.legend=TRUE) +
labs(fill="") +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x))) +
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Raw counts") +
ggtitle("Raw counts distribution") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
ggplot_theme +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x)))

d <- stack(as.data.frame(norm.counts))
d$group <- rep(group, each=nrow(norm.counts))
p2 <- ggplot(d) +
geom_boxplot(aes(x=.data$ind, y=.data$values+1, fill=.data$group), show.legend=TRUE) +
labs(fill="") +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x))) +
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Normalized counts") +
ggtitle("Normalized counts distribution") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))
ggplot_theme +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x)))

grid.arrange(p1, p2, nrow=1, ncol=2)
if (outfile) dev.off()
Expand Down
6 changes: 4 additions & 2 deletions R/densityPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors of the curves (one per biological condition)
#' @param outfile TRUE to export the figure in a png file
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return A file named densplot.png in the figures directory
#' @author Marie-Agnes Dillies and Hugo Varet

densityPlot <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
densityPlot <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE, ggplot_theme=theme_gray()){
if (outfile) png(filename="figures/densplot.png", width=2000, height=1800, res=300)
counts <- removeNull(counts)
d <- stack(data.frame(counts))
Expand All @@ -23,6 +24,7 @@ densityPlot <- function(counts, group, col=c("lightblue","orange","MediumVioletR
scale_colour_manual(values=col) +
xlab("Raw counts") +
ylab("Density") +
ggtitle("Density of counts distribution"))
ggtitle("Density of counts distribution") +
ggplot_theme)
if (outfile) dev.off()
}
13 changes: 7 additions & 6 deletions R/descriptionPlots.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,30 @@
#' @param counts \code{matrix} of counts
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors for the plots (one per biological condition)
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return PNG files in the "figures" directory and the matrix of the most expressed sequences
#' @author Hugo Varet

descriptionPlots <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen")){
descriptionPlots <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), ggplot_theme=theme_gray()){
# create the figures directory if does not exist
if (!I("figures" %in% dir())) dir.create("figures", showWarnings=FALSE)

# total number of reads per sample
barplotTotal(counts=counts, group=group, col=col)
barplotTotal(counts=counts, group=group, col=col, ggplot_theme=ggplot_theme)

# percentage of null counts per sample
barplotNull(counts=counts, group=group, col=col)
barplotNull(counts=counts, group=group, col=col, ggplot_theme=ggplot_theme)

# distribution of counts per sample
densityPlot(counts=counts, group=group, col=col)
densityPlot(counts=counts, group=group, col=col, ggplot_theme=ggplot_theme)

# features which catch the most important number of reads
majSequences <- majSequences(counts=counts, group=group, col=col)
majSequences <- majSequences(counts=counts, group=group, col=col, ggplot_theme=ggplot_theme)

# SERE and pairwise scatter plots
cat("Matrix of SERE statistics:\n")
print(tabSERE(counts))
pairwiseScatterPlots(counts=counts)
pairwiseScatterPlots(counts=counts, ggplot_theme=ggplot_theme)

return(majSequences)
}
13 changes: 7 additions & 6 deletions R/diagSizeFactorsPlots.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
#' @param col colors for the plots
#' @param outfile TRUE to export the figure in a png file
#' @param plots vector of plots to generate
#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default)
#' @return Two files in the figures directory: diagSizeFactorsHist.png containing one histogram per sample and diagSizeFactorsTC.png for a plot of the size factors vs the total number of reads
#' @author Marie-Agnes Dillies and Hugo Varet

diagSizeFactorsPlots <- function(dds, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"),
outfile=TRUE, plots=c("diag","sf_libsize")){
outfile=TRUE, plots=c("diag","sf_libsize"), ggplot_theme=theme_gray()){
# histograms
if ("diag" %in% plots){
ncol <- 2
Expand All @@ -26,11 +27,12 @@ diagSizeFactorsPlots <- function(dds, group, col=c("lightblue","orange","MediumV
d <- data.frame(x=counts.trans[,j])
p[[j]] <- ggplot(data=d, aes(x=.data$x)) +
geom_histogram(bins=100) +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))) +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
xlab(expression(log[2]~(counts/geometric~mean))) +
ylab("") +
ggtitle(paste0("Size factor diagnostic - ", samples[j])) +
geom_vline(xintercept=log2(sizeFactors(dds)[j]), linetype="dashed", color="red", size=1)
geom_vline(xintercept=log2(sizeFactors(dds)[j]), linetype="dashed", color="red", size=1) +
ggplot_theme
}
tmpfun <- function(...) grid.arrange(..., nrow=nrow, ncol=ncol)
do.call(tmpfun, p)
Expand All @@ -50,9 +52,8 @@ diagSizeFactorsPlots <- function(dds, group, col=c("lightblue","orange","MediumV
xlab("Size factors") +
ylab("Total number of reads (millions)") +
ggtitle("Diagnostic: size factors vs total number of reads") +
geom_abline(slope=coefficients(lm(libsize ~ sf + 0, data=d)), intercept=0, show.legend=FALSE, linetype="dashed", color="grey"))
geom_abline(slope=coefficients(lm(libsize ~ sf + 0, data=d)), intercept=0, show.legend=FALSE, linetype="dashed", color="grey") +
ggplot_theme)
if (outfile) dev.off()
}
}


Loading

0 comments on commit 2b95eaa

Please sign in to comment.