Skip to content

Commit

Permalink
Merge pull request #31 from PF2-pasteur-fr/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
hvaret authored May 2, 2017
2 parents 651598e + 9890059 commit ffd6d18
Show file tree
Hide file tree
Showing 47 changed files with 95 additions and 90 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: SARTools
Type: Package
Title: Statistical Analysis of RNA-Seq Tools
Version: 1.4.0
Date: 2016-11-29
Version: 1.4.1
Date: 2017-05-02
Author: Marie-Agnes Dillies and Hugo Varet
Maintainer: Hugo Varet <[email protected]>
Depends: R (>= 3.3.0), DESeq2 (>= 1.12.0), edgeR (>= 3.12.0), xtable
Expand Down
5 changes: 5 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
CHANGES IN VERSION 1.4.1
------------------------
o SARTools now accepts count files generated by featureCounts (still one count file per sample)
o added sample names and colors to the diagnostic of the size factors with DESeq2

CHANGES IN VERSION 1.4.0
------------------------
o added a parameter in exportResults.DESeq2() and exportResults.edgeR() to avoid exporting the results
Expand Down
24 changes: 18 additions & 6 deletions R/diagSizeFactorsPlots.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
#' Plots to assess the estimations of the size factors
#'
#' @param dds a \code{DESeqDataSet} object
#' @param group factor vector of the condition from which each sample belongs
#' @param col colors for the plots
#' @param outfile TRUE to export the figure in a png file
#' @param plots vector of plots to generate
#' @return Two files in the figures directory: diagSizeFactorsHist.png containing one histogram per sample and diagSizeFactorsTC.png for a plot of the size factors vs the total number of reads
#' @author Marie-Agnes Dillies and Hugo Varet

diagSizeFactorsPlots <- function(dds, outfile=TRUE, plots=c("diag","sf_libsize")){
diagSizeFactorsPlots <- function(dds, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"),
outfile=TRUE, plots=c("diag","sf_libsize")){
# histograms
if ("diag" %in% plots){
ncol <- ifelse(ncol(counts(dds))<=4, ceiling(sqrt(ncol(counts(dds)))), 3)
Expand All @@ -27,13 +30,22 @@ diagSizeFactorsPlots <- function(dds, outfile=TRUE, plots=c("diag","sf_libsize")
}
if (outfile) dev.off()
}

# total read counts vs size factors
if ("sf_libsize" %in% plots){
if (outfile) png(filename="figures/diagSizeFactorsTC.png",width=1800,height=1800,res=300)
plot(sizeFactors(dds), colSums(counts(dds)), pch=19, las=1, xlab="Size factors",
ylab="Total number of reads",main="Diagnostic: size factors vs total number of reads")
abline(lm(colSums(counts(dds)) ~ sizeFactors(dds) + 0), lty=2, col="grey")
if (outfile) png(filename="figures/diagSizeFactorsTC.png", width=1800, height=1800, res=300)
sf <- sizeFactors(dds)
libsize <- colSums(counts(dds))/1e6
plot(sf, libsize, pch=16, las=1,
col = col[as.integer(group)],
xlab="Size factors", ylab="Total number of reads (millions)",
main="Diagnostic: size factors vs total number of reads")
abs <- range(sf); meanAbs <- mean(abs); abs <- abs(abs[2]-abs[1])/25;
ord <- range(libsize); meanOrd <- mean(ord); ord <- abs(ord[2]-ord[1])/25;
text(sf - ifelse(sf > meanAbs, abs, -abs),
libsize - ifelse(libsize > meanOrd, ord, -ord),
colnames(dds), col=col[as.integer(group)])
abline(lm(libsize ~ sf + 0), lty=2, col="grey")
if (outfile) dev.off()
}
}
Expand Down
50 changes: 33 additions & 17 deletions R/loadCountData.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,48 @@
#'
#' @param target target \code{data.frame} of the project returned by \code{loadTargetFile()}
#' @param rawDir path to the directory containing the count files
#' @param header a logical value indicating whether the file contains the names of the variables as its first line
#' @param skip number of lines of the data file to skip before beginning to read data
#' @param featuresToRemove vector of feature Ids (or character string common to feature Ids) to remove from the counts
#' @return The \code{matrix} of raw counts with row names corresponding to the feature Ids and column names to the sample names as provided in the first column of the target.
#' @details If \code{featuresToRemove} is equal to \code{"rRNA"}, all the features containing the character string "rRNA" will be removed from the counts.
#' @author Marie-Agnes Dillies and Hugo Varet

loadCountData <- function(target, rawDir="raw", header=FALSE, skip=0,
featuresToRemove=c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")){

loadCountData <- function(target, rawDir="raw", skip=0, featuresToRemove=c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")){

labels <- as.character(target[,1])
files <- as.character(target[,2])

rawCounts <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=header, skip=skip)
rawCounts <- rawCounts[,1:2]

# detect if input count files are from featureCounts or HTSeq-count
f1 <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=FALSE, nrows=1, stringsAsFactors=FALSE)
if (ncol(f1) >= 6 && all(apply(f1[1,1:6], 2, is.character))){
# counter featurecounts
idCol <- 1
countsCol <- 7
header <- TRUE
} else{
if (ncol(f1) >= 2 && is.character(f1[1,1]) & is.numeric(f1[1,2])){
# counter htseq-count
idCol <- 1
countsCol <- 2
header <- FALSE
} else{
stop("Can't determine if count files come from HTSeq-count or featureCounts")
}
}

rawCounts <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=header, skip=skip, stringsAsFactors=FALSE)
rawCounts <- rawCounts[,c(idCol, countsCol)]
colnames(rawCounts) <- c("Id", labels[1])
if (any(duplicated(rawCounts$Id))) stop("Duplicated feature names in ", files[1])
cat("Loading files:\n")
cat(files[1],": ",length(rawCounts[,labels[1]])," rows and ",sum(rawCounts[,labels[1]]==0)," null count(s)\n",sep="")

for (i in 2:length(files)){
tmp <- read.table(paste(rawDir,files[i],sep="/"), sep="\t", header=header, skip=skip)
tmp <- tmp[,1:2]
colnames(tmp) <- c("Id", labels[i])
if (any(duplicated(tmp$Id))) stop("Duplicated feature names in ", files[i])
rawCounts <- merge(rawCounts, tmp, by="Id", all=TRUE)
tmp <- read.table(paste(rawDir,files[i],sep="/"), sep="\t", header=header, skip=skip, stringsAsFactors=FALSE)
tmp <- tmp[,c(idCol, countsCol)]
colnames(tmp) <- c("Id", labels[i])
if (any(duplicated(tmp$Id))) stop("Duplicated feature names in ", files[i])
rawCounts <- merge(rawCounts, tmp, by="Id", all=TRUE)
cat(files[i],": ",length(tmp[,labels[i]])," rows and ",sum(tmp[,labels[i]]==0)," null count(s)\n",sep="")
}

Expand All @@ -45,11 +61,11 @@ loadCountData <- function(target, rawDir="raw", header=FALSE, skip=0,
for (f in setdiff(featuresToRemove,"")){
match <- grep(f, rownames(counts))
if (length(match)>0){
cat(rownames(counts)[match],sep="\n")
counts <- counts[-match,]
}
cat(rownames(counts)[match],sep="\n")
counts <- counts[-match,]
}
}

cat("\nTop of the counts matrix:\n")
print(head(counts))
cat("\nBottom of the counts matrix:\n")
Expand Down
1 change: 1 addition & 0 deletions R/loadTargetFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ loadTargetFile <- function(targetFile, varInt, condRef, batch){
if (any(is.na(cbind(target[,c(varInt, batch)], target[,1:2])))) stop("NA are present in the target file")
# warning message if batch is numeric
if (!is.null(batch) && is.numeric(target[,batch])) warning(paste("The", batch, "variable is numeric. Use factor() or rename the levels with letters to convert it into a factor"))
if (any(grepl("[[:punct:]]", as.character(target[,varInt])))) stop(paste("The", varInt, "variable contains punctuation characters, please remove them"))
cat("Target file:\n")
print(target)
return(target)
Expand Down
2 changes: 1 addition & 1 deletion R/summarizeResults.DESeq2.r
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ summarizeResults.DESeq2 <- function(out.DESeq2, group, independentFiltering=TRUE
results <- out.DESeq2$results

# diagnostic of the size factors
diagSizeFactorsPlots(dds=dds)
diagSizeFactorsPlots(dds=dds, group=group, col=col)

# boxplots before and after normalisation
countsBoxplots(dds, group=group, col=col)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ To install the SARTools package from GitHub, open a R session and:
- install DESeq2, edgeR and genefilter with `source("http://bioconductor.org/biocLite.R")` and `biocLite(c("DESeq2", "edgeR", "genefilter"))` (if not installed yet)
- install devtools with `install.packages("devtools")` (if not installed yet)
- Note: Ubuntu users may have to install some libraries (libxml2-dev, libcurl4-openssl-dev and libssl-dev) to be able to install DESeq2 and devtools
- for Windows users only, install [Rtools](http://cran.r-project.org/bin/windows/Rtools/) or check that it is already installed (needed to build the package)
- for Windows users only, install [Rtools](https://cran.r-project.org/bin/windows/Rtools/) or check that it is already installed (needed to build the package)
- load the devtools R package with `library(devtools)`
- run `install_github("PF2-pasteur-fr/SARTools", build_vignettes=TRUE)`

Expand Down
1 change: 0 additions & 1 deletion man/BCVPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/MAPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/MDSPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/PCAPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/SARTools-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions man/SERE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/barplotNull.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/barplotTotal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/checkParameters.DESeq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/checkParameters.edgeR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/clusterPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/countsBoxplots.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/densityPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/descriptionPlots.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions man/diagSizeFactorsPlots.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/dispersionsPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/exploreCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/exportResults.DESeq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/exportResults.edgeR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions man/loadCountData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/loadTargetFile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/majSequences.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/nDiffTotal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/pairwiseScatterPlots.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/rawpHist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/removeNull.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/run.DESeq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/run.edgeR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/summarizeResults.DESeq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/summarizeResults.edgeR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/tabIndepFiltering.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/tabSERE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/volcanoPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/writeReport.DESeq2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/writeReport.edgeR.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ffd6d18

Please sign in to comment.