From b2612841aaa585ea819f00645a0130118b6a6783 Mon Sep 17 00:00:00 2001
From: hvaret <hugo.varet@pasteur.fr>
Date: Mon, 20 Mar 2017 13:52:54 +0100
Subject: [PATCH 1/2] featureCounts

---
 DESCRIPTION            |  4 ++--
 NEWS                   |  4 ++++
 R/loadCountData.R      | 50 ++++++++++++++++++++++++++++--------------
 R/loadTargetFile.R     |  1 +
 man/loadCountData.Rd   |  4 +---
 vignettes/SARTools.rmd | 28 ++++++++++++-----------
 6 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c56ddbf..4a671f5 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SARTools
 Type: Package
 Title: Statistical Analysis of RNA-Seq Tools
-Version: 1.4.0
-Date: 2016-11-29
+Version: 1.4.1
+Date: 2017-03-21
 Author: Marie-Agnes Dillies and Hugo Varet
 Maintainer: Hugo Varet <hugo.varet@pasteur.fr>
 Depends: R (>= 3.3.0), DESeq2 (>= 1.12.0), edgeR (>= 3.12.0), xtable
diff --git a/NEWS b/NEWS
index 2231554..f9a0ebd 100755
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+CHANGES IN VERSION 1.4.1
+------------------------
+	o SARTools now accepts count files generated by featureCounts (still one count file per sample)
+
 CHANGES IN VERSION 1.4.0
 ------------------------
 	o added a parameter in exportResults.DESeq2() and exportResults.edgeR() to avoid exporting the results
diff --git a/R/loadCountData.R b/R/loadCountData.R
index 701bb19..e413cdf 100755
--- a/R/loadCountData.R
+++ b/R/loadCountData.R
@@ -4,32 +4,48 @@
 #'
 #' @param target target \code{data.frame} of the project returned by \code{loadTargetFile()}
 #' @param rawDir path to the directory containing the count files
-#' @param header a logical value indicating whether the file contains the names of the variables as its first line
 #' @param skip number of lines of the data file to skip before beginning to read data
 #' @param featuresToRemove vector of feature Ids (or character string common to feature Ids) to remove from the counts
 #' @return The \code{matrix} of raw counts with row names corresponding to the feature Ids and column names to the sample names as provided in the first column of the target.
 #' @details If \code{featuresToRemove} is equal to \code{"rRNA"}, all the features containing the character string "rRNA" will be removed from the counts.
 #' @author Marie-Agnes Dillies and Hugo Varet
 
-loadCountData <- function(target, rawDir="raw", header=FALSE, skip=0,
-                          featuresToRemove=c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")){
-
+loadCountData <- function(target, rawDir="raw", skip=0, featuresToRemove=c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")){
+  
   labels <- as.character(target[,1])
   files <- as.character(target[,2])
-
-  rawCounts <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=header, skip=skip)
-  rawCounts <- rawCounts[,1:2]
+  
+  # detect if input count files are from featureCounts or HTSeq-count
+  f1 <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=FALSE, nrows=1, stringsAsFactors=FALSE)
+  if (ncol(f1) >= 6 && all(apply(f1[1,1:6], 2, is.character))){
+    # counter featurecounts
+    idCol <- 1
+    countsCol <- 7
+    header <- TRUE
+  } else{
+    if (ncol(f1) >= 2 && is.character(f1[1,1]) & is.numeric(f1[1,2])){
+      # counter htseq-count
+      idCol <- 1
+      countsCol <- 2
+      header <- FALSE
+    } else{
+      stop("Can't determine if count files come from HTSeq-count or featureCounts")
+    }
+  }
+  
+  rawCounts <- read.table(paste(rawDir,files[1],sep="/"), sep="\t", quote="\"", header=header, skip=skip, stringsAsFactors=FALSE)
+  rawCounts <- rawCounts[,c(idCol, countsCol)]
   colnames(rawCounts) <- c("Id", labels[1])
   if (any(duplicated(rawCounts$Id))) stop("Duplicated feature names in ", files[1])
   cat("Loading files:\n")
   cat(files[1],": ",length(rawCounts[,labels[1]])," rows and ",sum(rawCounts[,labels[1]]==0)," null count(s)\n",sep="")
-
+  
   for (i in 2:length(files)){
-  	tmp <- read.table(paste(rawDir,files[i],sep="/"), sep="\t", header=header, skip=skip)
-    tmp <- tmp[,1:2]
-  	colnames(tmp) <- c("Id", labels[i])
-  	if (any(duplicated(tmp$Id))) stop("Duplicated feature names in ", files[i])
-	rawCounts <- merge(rawCounts, tmp, by="Id", all=TRUE)
+    tmp <- read.table(paste(rawDir,files[i],sep="/"), sep="\t", header=header, skip=skip, stringsAsFactors=FALSE)
+    tmp <- tmp[,c(idCol, countsCol)]
+    colnames(tmp) <- c("Id", labels[i])
+    if (any(duplicated(tmp$Id))) stop("Duplicated feature names in ", files[i])
+    rawCounts <- merge(rawCounts, tmp, by="Id", all=TRUE)
     cat(files[i],": ",length(tmp[,labels[i]])," rows and ",sum(tmp[,labels[i]]==0)," null count(s)\n",sep="")
   }
   
@@ -45,11 +61,11 @@ loadCountData <- function(target, rawDir="raw", header=FALSE, skip=0,
   for (f in setdiff(featuresToRemove,"")){
     match <- grep(f, rownames(counts))
     if (length(match)>0){
-	  cat(rownames(counts)[match],sep="\n")
-	  counts <- counts[-match,]
-	}
+      cat(rownames(counts)[match],sep="\n")
+      counts <- counts[-match,]
+    }
   }
-
+  
   cat("\nTop of the counts matrix:\n")
   print(head(counts))
   cat("\nBottom of the counts matrix:\n")
diff --git a/R/loadTargetFile.R b/R/loadTargetFile.R
index a89707e..65c7934 100755
--- a/R/loadTargetFile.R
+++ b/R/loadTargetFile.R
@@ -25,6 +25,7 @@ loadTargetFile <- function(targetFile, varInt, condRef, batch){
   if (any(is.na(cbind(target[,c(varInt, batch)], target[,1:2])))) stop("NA are present in the target file")
   # warning message if batch is numeric
   if (!is.null(batch) && is.numeric(target[,batch])) warning(paste("The", batch, "variable is numeric. Use factor() or rename the levels with letters to convert it into a factor"))
+  if (any(grepl("[[:punct:]]", as.character(target[,varInt])))) stop(paste("The", varInt, "variable contains punctuation characters, please remove them"))
   cat("Target file:\n")
   print(target)
   return(target)
diff --git a/man/loadCountData.Rd b/man/loadCountData.Rd
index dd2c3ab..6a7175c 100644
--- a/man/loadCountData.Rd
+++ b/man/loadCountData.Rd
@@ -4,7 +4,7 @@
 \alias{loadCountData}
 \title{Load count files}
 \usage{
-loadCountData(target, rawDir = "raw", header = FALSE, skip = 0,
+loadCountData(target, rawDir = "raw", skip = 0,
   featuresToRemove = c("alignment_not_unique", "ambiguous", "no_feature",
   "not_aligned", "too_low_aQual"))
 }
@@ -13,8 +13,6 @@ loadCountData(target, rawDir = "raw", header = FALSE, skip = 0,
 
 \item{rawDir}{path to the directory containing the count files}
 
-\item{header}{a logical value indicating whether the file contains the names of the variables as its first line}
-
 \item{skip}{number of lines of the data file to skip before beginning to read data}
 
 \item{featuresToRemove}{vector of feature Ids (or character string common to feature Ids) to remove from the counts}
diff --git a/vignettes/SARTools.rmd b/vignettes/SARTools.rmd
index 093c1b2..86d9bdc 100755
--- a/vignettes/SARTools.rmd
+++ b/vignettes/SARTools.rmd
@@ -30,14 +30,14 @@ The only file the user has to deal with for an analysis is either `template_scri
 
 ### 2.2 Data files
 
-The statistical analysis assumes that reads have already been mapped and that counts per feature (gene or transcript) are available. If counting has been done with HTSeq-count [4], output files are ready to be loaded in R with the dedicated SARTools function. If not, the user must supply one count file per sample with two tab delimited columns without header:
+The statistical analysis assumes that reads have already been mapped and that counts per feature (gene or transcript) are available. If counting has been done with HTSeq-count [4] or featureCounts [5], output files are ready to be loaded in R with the dedicated SARTools function. If not, the user must supply one count file per sample with two tab delimited columns without header:
 
  - the unique IDs of the features in the first column;
  - the raw counts associated with these features in the second column (null or positive integers).
 
 All the count data files have to be placed in a directory whose name will be passed as a parameter at the beginning of the R script. 
 
-The user has to supply another tab delimited file which describes the experiment, i.e. which contains the name of the biological condition associated with each sample. This file is called "target" as a reference to the target file needed when using the limma package [5]. This file has one row per sample and is composed of at least three columns with headers: 
+The user has to supply another tab delimited file which describes the experiment, i.e. which contains the name of the biological condition associated with each sample. This file is called "target" as a reference to the target file needed when using the limma package [6]. This file has one row per sample and is composed of at least three columns with headers: 
 
  - first column: unique names of the samples (short but informative as they will be displayed on all the figures);
  - second column: name of the count files;
@@ -73,10 +73,10 @@ All the parameters that can be modified by the user are at the beginning of the
  - `condRef`: reference biological condition used to compute fold-changes (no default, must be one of the levels of `varInt`);
  - `batch`: adjustment variable to use as a batch effect, must be a column of the target file (`"day"` for example, or `NULL` if no batch effect needs to be taken into account);
  - `fitType`: (if use of DESeq2) type of model for the mean-dispersion relationship (`"parametric"` by default, or `"local"`);
- - `cooksCutoff`: (if use of DESeq2) `TRUE` (default) of `FALSE` to execute or not the detection of the outliers [6];
- - `independentFiltering`: (if use of DESeq2) `TRUE` (default) of `FALSE` to execute or not the independent filtering [7];
+ - `cooksCutoff`: (if use of DESeq2) `TRUE` (default) of `FALSE` to execute or not the detection of the outliers [7];
+ - `independentFiltering`: (if use of DESeq2) `TRUE` (default) of `FALSE` to execute or not the independent filtering [8];
  - `alpha`: significance threshold applied to the adjusted p-values to select the differentially expressed features (default is `0.05`);
- - `pAdjustMethod`: p-value adjustment method for multiple testing (`"BH"` by default, `"BY"` or any value of `p.adjust.methods`) [8,9];
+ - `pAdjustMethod`: p-value adjustment method for multiple testing (`"BH"` by default, `"BY"` or any value of `p.adjust.methods`) [9,10];
  - `typeTrans`: (if use of DESeq2) method of transformation of the counts for the clustering and the PCA (default is `"VST"` for Variance Stabilizing Transformation, or `"rlog"` for Regularized Log Transformation);
  - `locfunc`: (if use of DESeq2) function used for the estimation of the size factors (default is `"median"`, or `"shorth"` from the genefilter` package);
  - `cpmCutoff`: (if use of edgeR) counts-per-million cut-off to filter low counts (default is 1, set to 0 to disable filtering);
@@ -94,7 +94,7 @@ When the parameters have been defined, the user can run all the R code, either s
  - target with the count files loaded and the biological condition associated with each sample;
  - number of features and null counts in each file;
  - top and bottom of the count matrix;
- - SERE coefficients computed between each pair of samples [10];
+ - SERE coefficients computed between each pair of samples [11];
  - normalization factors (TMM for edgeR and size factors for DESeq2);
  - number of features discarded by the independent filtering (if use of DESeq2);
  - number of differentially expressed features.
@@ -139,7 +139,7 @@ This section aims at listing some problems that the user can face when analyzing
 ### 4.1 Inversion of samples
 For a variety of reasons, it might happen that some sample names are erroneously switched at a step of the experiment. This can be detected during the statistical analysis in several ways. Here, we have intentionally inverted two file names in a target file, such that the counts associated with these two samples (WT3 and KO3) are inverted. 
 
-The first tool to detect the inversion is the SERE statistic [10] since its goal is to measure the similarity between samples. The SERE values obtained are displayed on the lower triangle of the figure 1. We clearly observe that KO3 is more similar to WT1 (SERE=1.7) than to KO2 (3.4), which potentially reveals a problem within the samples under study. The same phenomenon happens with WT3 which is more similar to KO1 (1.6) than to WT1 (4.59).
+The first tool to detect the inversion is the SERE statistic [11] since its goal is to measure the similarity between samples. The SERE values obtained are displayed on the lower triangle of the figure 1. We clearly observe that KO3 is more similar to WT1 (SERE=1.7) than to KO2 (3.4), which potentially reveals a problem within the samples under study. The same phenomenon happens with WT3 which is more similar to KO1 (1.6) than to WT1 (4.59).
 
 ![figures/inversionpairwiseScatter.png](figures/inversionpairwiseScatter.png)
 
@@ -214,15 +214,17 @@ The user can try the R scripts `template_script_DESeq2.r` and `template_script_e
 
 [4] Anders S, Pyl TP, Huber W. **HTSeq - A Python framework to work with high-throughput sequencing data**. *Bioinformatics*. 2014; doi:10.1093/bioinformatics/btu638.
 
-[5] Ritchie ME, Phipson B, Wu D, et al. **limma powers differential expression analyses for RNA-sequencing and microarray studies**. *Nucleic Acids Research*. 2015; doi:10.1093/nar/gkv007.
+[5] Liao Y, Smyth GK and Shi W. **featureCounts: an efficient general purpose program for assigning sequence reads to genomic features**. *Bioinformatics*, 2014; doi:10.1093/bioinformatics/btt656.
 
-[6] Cook RD. **Detection of Influential Observation in Linear Regression**. *Technometrics*. 1977; DOI:10.1080/00401706.2000.10485981.
+[6] Ritchie ME, Phipson B, Wu D, et al. **limma powers differential expression analyses for RNA-sequencing and microarray studies**. *Nucleic Acids Research*. 2015; doi:10.1093/nar/gkv007.
 
-[7] Bourgon R, Gentleman R and Huber W. **Independent filtering increases detection power for high-throughput experiments**. *PNAS*. 2010; doi:10.1073/pnas.0914005107.
+[7] Cook RD. **Detection of Influential Observation in Linear Regression**. *Technometrics*. 1977; DOI:10.1080/00401706.2000.10485981.
 
-[8] Benjamini Y and Hochberg Y. **Controlling the false discovery rate: a practical and powerful approach to multiple testing**. *Journal of the Royal Statistical Society B*. 1995; doi:10.2307/2346101.
+[8] Bourgon R, Gentleman R and Huber W. **Independent filtering increases detection power for high-throughput experiments**. *PNAS*. 2010; doi:10.1073/pnas.0914005107.
 
-[9] Benjamini Y and Yekutieli D. **The control of the false discovery rate in multiple testing under dependency**. *Annals of Statistics*. 2001.
+[9] Benjamini Y and Hochberg Y. **Controlling the false discovery rate: a practical and powerful approach to multiple testing**. *Journal of the Royal Statistical Society B*. 1995; doi:10.2307/2346101.
 
-[10] Schulze SK, Kanwar R, Golzenleuchter M, et al. **SERE: Single-parameter quality control and sample comparison for RNA-Seq**. *BMC Genomics*. 2012; doi:10.1186/1471-2164-13-524.
+[10] Benjamini Y and Yekutieli D. **The control of the false discovery rate in multiple testing under dependency**. *Annals of Statistics*. 2001.
+
+[11] Schulze SK, Kanwar R, Golzenleuchter M, et al. **SERE: Single-parameter quality control and sample comparison for RNA-Seq**. *BMC Genomics*. 2012; doi:10.1186/1471-2164-13-524.
 

From 9890059b1d8b190feda4e4accdfd6e0346644638 Mon Sep 17 00:00:00 2001
From: hvaret <hugo.varet@pasteur.fr>
Date: Tue, 2 May 2017 10:16:32 +0200
Subject: [PATCH 2/2] 1.4.1

---
 DESCRIPTION                    |  2 +-
 NEWS                           |  1 +
 R/diagSizeFactorsPlots.R       | 24 ++++++++++++++++++------
 R/summarizeResults.DESeq2.r    |  2 +-
 README.md                      |  2 +-
 man/BCVPlot.Rd                 |  1 -
 man/MAPlot.Rd                  |  1 -
 man/MDSPlot.Rd                 |  1 -
 man/PCAPlot.Rd                 |  1 -
 man/SARTools-package.Rd        |  1 -
 man/SERE.Rd                    |  7 +++----
 man/barplotNull.Rd             |  1 -
 man/barplotTotal.Rd            |  1 -
 man/checkParameters.DESeq2.Rd  |  1 -
 man/checkParameters.edgeR.Rd   |  1 -
 man/clusterPlot.Rd             |  1 -
 man/countsBoxplots.Rd          |  1 -
 man/densityPlot.Rd             |  1 -
 man/descriptionPlots.Rd        |  1 -
 man/diagSizeFactorsPlots.Rd    |  9 +++++++--
 man/dispersionsPlot.Rd         |  1 -
 man/exploreCounts.Rd           |  1 -
 man/exportResults.DESeq2.Rd    |  1 -
 man/exportResults.edgeR.Rd     |  1 -
 man/loadCountData.Rd           |  1 -
 man/loadTargetFile.Rd          |  1 -
 man/majSequences.Rd            |  1 -
 man/nDiffTotal.Rd              |  1 -
 man/pairwiseScatterPlots.Rd    |  1 -
 man/rawpHist.Rd                |  1 -
 man/removeNull.Rd              |  1 -
 man/run.DESeq2.Rd              |  1 -
 man/run.edgeR.Rd               |  1 -
 man/summarizeResults.DESeq2.Rd |  1 -
 man/summarizeResults.edgeR.Rd  |  1 -
 man/tabIndepFiltering.Rd       |  1 -
 man/tabSERE.Rd                 |  1 -
 man/volcanoPlot.Rd             |  1 -
 man/writeReport.DESeq2.Rd      |  1 -
 man/writeReport.edgeR.Rd       |  1 -
 template_script_DESeq2.r       |  4 ++--
 template_script_DESeq2_CL.r    |  4 ++--
 template_script_edgeR.r        |  4 ++--
 template_script_edgeR_CL.r     |  4 ++--
 44 files changed, 40 insertions(+), 56 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4a671f5..277186d 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: SARTools
 Type: Package
 Title: Statistical Analysis of RNA-Seq Tools
 Version: 1.4.1
-Date: 2017-03-21
+Date: 2017-05-02
 Author: Marie-Agnes Dillies and Hugo Varet
 Maintainer: Hugo Varet <hugo.varet@pasteur.fr>
 Depends: R (>= 3.3.0), DESeq2 (>= 1.12.0), edgeR (>= 3.12.0), xtable
diff --git a/NEWS b/NEWS
index f9a0ebd..0cbef0a 100755
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,7 @@
 CHANGES IN VERSION 1.4.1
 ------------------------
 	o SARTools now accepts count files generated by featureCounts (still one count file per sample)
+	o added sample names and colors to the diagnostic of the size factors with DESeq2
 
 CHANGES IN VERSION 1.4.0
 ------------------------
diff --git a/R/diagSizeFactorsPlots.R b/R/diagSizeFactorsPlots.R
index 1a53ac9..cf3255d 100755
--- a/R/diagSizeFactorsPlots.R
+++ b/R/diagSizeFactorsPlots.R
@@ -3,12 +3,15 @@
 #' Plots to assess the estimations of the size factors
 #'
 #' @param dds a \code{DESeqDataSet} object
+#' @param group factor vector of the condition from which each sample belongs
+#' @param col colors for the plots
 #' @param outfile TRUE to export the figure in a png file
 #' @param plots vector of plots to generate
 #' @return Two files in the figures directory: diagSizeFactorsHist.png containing one histogram per sample and diagSizeFactorsTC.png for a plot of the size factors vs the total number of reads
 #' @author Marie-Agnes Dillies and Hugo Varet
 
-diagSizeFactorsPlots <- function(dds, outfile=TRUE, plots=c("diag","sf_libsize")){
+diagSizeFactorsPlots <- function(dds, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), 
+                                 outfile=TRUE, plots=c("diag","sf_libsize")){
   # histograms
   if ("diag" %in% plots){
     ncol <- ifelse(ncol(counts(dds))<=4, ceiling(sqrt(ncol(counts(dds)))), 3)
@@ -27,13 +30,22 @@ diagSizeFactorsPlots <- function(dds, outfile=TRUE, plots=c("diag","sf_libsize")
     }
     if (outfile) dev.off()
   }
-
+  
   # total read counts vs size factors
   if ("sf_libsize" %in% plots){
-    if (outfile) png(filename="figures/diagSizeFactorsTC.png",width=1800,height=1800,res=300)  
-    plot(sizeFactors(dds), colSums(counts(dds)), pch=19, las=1, xlab="Size factors",
-	 ylab="Total number of reads",main="Diagnostic: size factors vs total number of reads")
-    abline(lm(colSums(counts(dds)) ~ sizeFactors(dds) + 0), lty=2, col="grey")
+    if (outfile) png(filename="figures/diagSizeFactorsTC.png", width=1800, height=1800, res=300)  
+    sf <- sizeFactors(dds)
+    libsize <- colSums(counts(dds))/1e6
+    plot(sf, libsize, pch=16, las=1,
+         col = col[as.integer(group)],
+         xlab="Size factors", ylab="Total number of reads (millions)",
+         main="Diagnostic: size factors vs total number of reads")
+    abs <- range(sf); meanAbs <- mean(abs); abs <- abs(abs[2]-abs[1])/25;
+    ord <- range(libsize); meanOrd <- mean(ord); ord <- abs(ord[2]-ord[1])/25;
+    text(sf - ifelse(sf > meanAbs, abs, -abs), 
+         libsize - ifelse(libsize > meanOrd, ord, -ord),
+         colnames(dds), col=col[as.integer(group)])
+    abline(lm(libsize ~ sf + 0), lty=2, col="grey")
     if (outfile) dev.off()
   }
 }
diff --git a/R/summarizeResults.DESeq2.r b/R/summarizeResults.DESeq2.r
index 6fca5bb..393792a 100755
--- a/R/summarizeResults.DESeq2.r
+++ b/R/summarizeResults.DESeq2.r
@@ -21,7 +21,7 @@ summarizeResults.DESeq2 <- function(out.DESeq2, group, independentFiltering=TRUE
   results <- out.DESeq2$results
   
   # diagnostic of the size factors
-  diagSizeFactorsPlots(dds=dds)
+  diagSizeFactorsPlots(dds=dds, group=group, col=col)
   
   # boxplots before and after normalisation
   countsBoxplots(dds, group=group, col=col)
diff --git a/README.md b/README.md
index 147abfa..9c7bd97 100755
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ To install the SARTools package from GitHub, open a R session and:
 - install DESeq2, edgeR and genefilter with `source("http://bioconductor.org/biocLite.R")` and `biocLite(c("DESeq2", "edgeR", "genefilter"))` (if not installed yet)
 - install devtools with `install.packages("devtools")` (if not installed yet)
 - Note: Ubuntu users may have to install some libraries (libxml2-dev, libcurl4-openssl-dev and libssl-dev) to be able to install DESeq2 and devtools
-- for Windows users only, install [Rtools](http://cran.r-project.org/bin/windows/Rtools/) or check that it is already installed (needed to build the package)
+- for Windows users only, install [Rtools](https://cran.r-project.org/bin/windows/Rtools/) or check that it is already installed (needed to build the package)
 - load the devtools R package with `library(devtools)`
 - run `install_github("PF2-pasteur-fr/SARTools", build_vignettes=TRUE)`
 
diff --git a/man/BCVPlot.Rd b/man/BCVPlot.Rd
index 8d5a177..e4904dd 100644
--- a/man/BCVPlot.Rd
+++ b/man/BCVPlot.Rd
@@ -20,4 +20,3 @@ Biological Coefficient of Variation plot (for edgeR objects)
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/MAPlot.Rd b/man/MAPlot.Rd
index 42a9a34..cd4145d 100644
--- a/man/MAPlot.Rd
+++ b/man/MAPlot.Rd
@@ -22,4 +22,3 @@ MA-plot for each comparison: log2(FC) vs mean of normalized counts with one dot
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/MDSPlot.Rd b/man/MDSPlot.Rd
index 9a089a7..ef09cc7 100644
--- a/man/MDSPlot.Rd
+++ b/man/MDSPlot.Rd
@@ -30,4 +30,3 @@ Multi-Dimensional Scaling plot of samples based on the 500 most variant features
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/PCAPlot.Rd b/man/PCAPlot.Rd
index 061ddd2..dea93db 100644
--- a/man/PCAPlot.Rd
+++ b/man/PCAPlot.Rd
@@ -28,4 +28,3 @@ Principal Component Analysis of samples based on the 500 most variant features o
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/SARTools-package.Rd b/man/SARTools-package.Rd
index c552ce6..e2a3d07 100644
--- a/man/SARTools-package.Rd
+++ b/man/SARTools-package.Rd
@@ -10,4 +10,3 @@ Provide R tools and an environment for the statistical analysis of RNA-Seq proje
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/SERE.Rd b/man/SERE.Rd
index f4bbe01..97e0726 100644
--- a/man/SERE.Rd
+++ b/man/SERE.Rd
@@ -15,10 +15,9 @@ The SERE coefficient for the two samples
 \description{
 Compute the SERE coefficient for two samples
 }
-\author{
-See paper published
-}
 \references{
 Schulze, Kanwar, Golzenleuchter et al, SERE: Single-parameter quality control and sample comparison for RNA-Seq, BMC Genomics, 2012
 }
-
+\author{
+See paper published
+}
diff --git a/man/barplotNull.Rd b/man/barplotNull.Rd
index 82c0dc5..31315e3 100644
--- a/man/barplotNull.Rd
+++ b/man/barplotNull.Rd
@@ -25,4 +25,3 @@ Bar plot of the percentage of null counts per sample
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/barplotTotal.Rd b/man/barplotTotal.Rd
index 38c8e27..109080d 100644
--- a/man/barplotTotal.Rd
+++ b/man/barplotTotal.Rd
@@ -25,4 +25,3 @@ Bar plot of the total number of reads per sample
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/checkParameters.DESeq2.Rd b/man/checkParameters.DESeq2.Rd
index db4c3e5..eb0d7cd 100644
--- a/man/checkParameters.DESeq2.Rd
+++ b/man/checkParameters.DESeq2.Rd
@@ -50,4 +50,3 @@ Check the format and the validity of the parameters which will be used for the a
 \author{
 Hugo Varet
 }
-
diff --git a/man/checkParameters.edgeR.Rd b/man/checkParameters.edgeR.Rd
index e252cdd..468e8e6 100644
--- a/man/checkParameters.edgeR.Rd
+++ b/man/checkParameters.edgeR.Rd
@@ -46,4 +46,3 @@ Check the format and the validity of the parameters which will be used for the a
 \author{
 Hugo Varet
 }
-
diff --git a/man/clusterPlot.Rd b/man/clusterPlot.Rd
index 3b57fed..61841fb 100644
--- a/man/clusterPlot.Rd
+++ b/man/clusterPlot.Rd
@@ -22,4 +22,3 @@ Clustering of the samples based on VST- or rlog-counts (if use of DESeq2) or cpm
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/countsBoxplots.Rd b/man/countsBoxplots.Rd
index 6091b49..1f1d5e0 100644
--- a/man/countsBoxplots.Rd
+++ b/man/countsBoxplots.Rd
@@ -25,4 +25,3 @@ Box-plots of raw and normalized counts distributions per sample to assess the ef
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/densityPlot.Rd b/man/densityPlot.Rd
index 298e746..2202bba 100644
--- a/man/densityPlot.Rd
+++ b/man/densityPlot.Rd
@@ -25,4 +25,3 @@ Estimation the counts density for each sample
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/descriptionPlots.Rd b/man/descriptionPlots.Rd
index 6886bc8..308668e 100644
--- a/man/descriptionPlots.Rd
+++ b/man/descriptionPlots.Rd
@@ -23,4 +23,3 @@ Description plots of the counts according to the biological condition
 \author{
 Hugo Varet
 }
-
diff --git a/man/diagSizeFactorsPlots.Rd b/man/diagSizeFactorsPlots.Rd
index edb36eb..72060a8 100644
--- a/man/diagSizeFactorsPlots.Rd
+++ b/man/diagSizeFactorsPlots.Rd
@@ -4,11 +4,17 @@
 \alias{diagSizeFactorsPlots}
 \title{Assess the estimations of the size factors}
 \usage{
-diagSizeFactorsPlots(dds, outfile = TRUE, plots = c("diag", "sf_libsize"))
+diagSizeFactorsPlots(dds, group, col = c("lightblue", "orange",
+  "MediumVioletRed", "SpringGreen"), outfile = TRUE, plots = c("diag",
+  "sf_libsize"))
 }
 \arguments{
 \item{dds}{a \code{DESeqDataSet} object}
 
+\item{group}{factor vector of the condition from which each sample belongs}
+
+\item{col}{colors for the plots}
+
 \item{outfile}{TRUE to export the figure in a png file}
 
 \item{plots}{vector of plots to generate}
@@ -22,4 +28,3 @@ Plots to assess the estimations of the size factors
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/dispersionsPlot.Rd b/man/dispersionsPlot.Rd
index 8a0537d..e93815e 100644
--- a/man/dispersionsPlot.Rd
+++ b/man/dispersionsPlot.Rd
@@ -20,4 +20,3 @@ A plot of the mean-dispersion relationship and a diagnostic of log normality of
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/exploreCounts.Rd b/man/exploreCounts.Rd
index f47ff3e..948ca2d 100644
--- a/man/exploreCounts.Rd
+++ b/man/exploreCounts.Rd
@@ -27,4 +27,3 @@ Explore counts structure: PCA (DESeq2) or MDS (edgeR) and clustering
 \author{
 Hugo Varet
 }
-
diff --git a/man/exportResults.DESeq2.Rd b/man/exportResults.DESeq2.Rd
index 1cb3769..d42c891 100644
--- a/man/exportResults.DESeq2.Rd
+++ b/man/exportResults.DESeq2.Rd
@@ -24,4 +24,3 @@ Export counts and DESeq2 results
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/exportResults.edgeR.Rd b/man/exportResults.edgeR.Rd
index e6b4eee..e7c1b2c 100644
--- a/man/exportResults.edgeR.Rd
+++ b/man/exportResults.edgeR.Rd
@@ -29,4 +29,3 @@ Export counts and edgeR results
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/loadCountData.Rd b/man/loadCountData.Rd
index 6a7175c..71194d4 100644
--- a/man/loadCountData.Rd
+++ b/man/loadCountData.Rd
@@ -29,4 +29,3 @@ If \code{featuresToRemove} is equal to \code{"rRNA"}, all the features containin
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/loadTargetFile.Rd b/man/loadTargetFile.Rd
index 2b76f1d..4176468 100644
--- a/man/loadTargetFile.Rd
+++ b/man/loadTargetFile.Rd
@@ -27,4 +27,3 @@ The \code{batch} parameter is used only to check if it is available in the targe
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/majSequences.Rd b/man/majSequences.Rd
index 41bc11c..4cc2f7a 100644
--- a/man/majSequences.Rd
+++ b/man/majSequences.Rd
@@ -27,4 +27,3 @@ Proportion of reads associated with the three most expressed sequences per sampl
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/nDiffTotal.Rd b/man/nDiffTotal.Rd
index e8a1a9d..cfb10c5 100644
--- a/man/nDiffTotal.Rd
+++ b/man/nDiffTotal.Rd
@@ -20,4 +20,3 @@ Number of down- and up-regulated features per comparison
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/pairwiseScatterPlots.Rd b/man/pairwiseScatterPlots.Rd
index cae97a6..62a4632 100644
--- a/man/pairwiseScatterPlots.Rd
+++ b/man/pairwiseScatterPlots.Rd
@@ -22,4 +22,3 @@ Scatter plots for pairwise comparaisons of log counts
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/rawpHist.Rd b/man/rawpHist.Rd
index 97f4349..663d1d7 100644
--- a/man/rawpHist.Rd
+++ b/man/rawpHist.Rd
@@ -20,4 +20,3 @@ Histogram of raw p-values for each comparison
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/removeNull.Rd b/man/removeNull.Rd
index aa4b21d..571ecf6 100644
--- a/man/removeNull.Rd
+++ b/man/removeNull.Rd
@@ -18,4 +18,3 @@ Remove features with null counts in all samples. These features do not contain a
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/run.DESeq2.Rd b/man/run.DESeq2.Rd
index a839629..8378a51 100644
--- a/man/run.DESeq2.Rd
+++ b/man/run.DESeq2.Rd
@@ -40,4 +40,3 @@ Wrapper to run DESeq2: create the \code{DESeqDataSet}, normalize data, estimate
 \author{
 Hugo Varet
 }
-
diff --git a/man/run.edgeR.Rd b/man/run.edgeR.Rd
index b22a4b4..4720544 100644
--- a/man/run.edgeR.Rd
+++ b/man/run.edgeR.Rd
@@ -35,4 +35,3 @@ Wrapper to run edgeR: create the \code{dge} object, normalize data, estimate dis
 \author{
 Hugo Varet
 }
-
diff --git a/man/summarizeResults.DESeq2.Rd b/man/summarizeResults.DESeq2.Rd
index 37229a9..63d7f95 100644
--- a/man/summarizeResults.DESeq2.Rd
+++ b/man/summarizeResults.DESeq2.Rd
@@ -30,4 +30,3 @@ Summarize DESeq2 analysis: diagnotic plots, dispersions plot, summary of the ind
 \author{
 Hugo Varet
 }
-
diff --git a/man/summarizeResults.edgeR.Rd b/man/summarizeResults.edgeR.Rd
index 6e64f83..627f3ab 100644
--- a/man/summarizeResults.edgeR.Rd
+++ b/man/summarizeResults.edgeR.Rd
@@ -27,4 +27,3 @@ Summarize edgeR analysis: diagnotic plots, dispersions plot, summary of the inde
 \author{
 Hugo Varet
 }
-
diff --git a/man/tabIndepFiltering.Rd b/man/tabIndepFiltering.Rd
index 0c1ddae..98d8764 100644
--- a/man/tabIndepFiltering.Rd
+++ b/man/tabIndepFiltering.Rd
@@ -18,4 +18,3 @@ Compute the number of features discarded by the independent filtering for each c
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/tabSERE.Rd b/man/tabSERE.Rd
index 3a0e002..97323ca 100644
--- a/man/tabSERE.Rd
+++ b/man/tabSERE.Rd
@@ -18,4 +18,3 @@ Compute the SERE statistic for each pair of samples
 \author{
 Marie-Agnes Dillies and Hugo Varet
 }
-
diff --git a/man/volcanoPlot.Rd b/man/volcanoPlot.Rd
index 2460f02..2243243 100644
--- a/man/volcanoPlot.Rd
+++ b/man/volcanoPlot.Rd
@@ -22,4 +22,3 @@ Volcano plot for each comparison: -log10(adjusted P value) vs log2(FC) with one
 \author{
 Hugo Varet
 }
-
diff --git a/man/writeReport.DESeq2.Rd b/man/writeReport.DESeq2.Rd
index 94576ef..8231b6d 100644
--- a/man/writeReport.DESeq2.Rd
+++ b/man/writeReport.DESeq2.Rd
@@ -63,4 +63,3 @@ This function generates the HTML report for a statistical analysis with DESeq2.
 \author{
 Hugo Varet
 }
-
diff --git a/man/writeReport.edgeR.Rd b/man/writeReport.edgeR.Rd
index c743a1c..c002334 100644
--- a/man/writeReport.edgeR.Rd
+++ b/man/writeReport.edgeR.Rd
@@ -57,4 +57,3 @@ This function generates the HTML report for a statistical analysis with edgeR. I
 \author{
 Hugo Varet
 }
-
diff --git a/template_script_DESeq2.r b/template_script_DESeq2.r
index eefc4b9..afc2dfe 100755
--- a/template_script_DESeq2.r
+++ b/template_script_DESeq2.r
@@ -1,8 +1,8 @@
 ################################################################################
 ### R script to compare several conditions with the SARTools and DESeq2 packages
 ### Hugo Varet
-### May 9th, 2016
-### designed to be executed with SARTools 1.4.0
+### May 2nd, 2017
+### designed to be executed with SARTools 1.4.1
 ################################################################################
 
 ################################################################################
diff --git a/template_script_DESeq2_CL.r b/template_script_DESeq2_CL.r
index 05526c4..2b29d45 100755
--- a/template_script_DESeq2_CL.r
+++ b/template_script_DESeq2_CL.r
@@ -1,8 +1,8 @@
 ################################################################################
 ### R script to compare several conditions with the SARTools and DESeq2 packages
 ### Hugo Varet
-### November 28th, 2016
-### designed to be executed with SARTools 1.4.0
+### May 2nd, 2017
+### designed to be executed with SARTools 1.4.1
 ### run "Rscript template_script_DESeq2_CL.r --help" to get some help
 ################################################################################
 
diff --git a/template_script_edgeR.r b/template_script_edgeR.r
index 761dbc7..490c10b 100755
--- a/template_script_edgeR.r
+++ b/template_script_edgeR.r
@@ -1,8 +1,8 @@
 ################################################################################
 ### R script to compare several conditions with the SARTools and edgeR packages
 ### Hugo Varet
-### May 9th, 2016
-### designed to be executed with SARTools 1.4.0
+### May 2nd, 2017
+### designed to be executed with SARTools 1.4.1
 ################################################################################
 
 ################################################################################
diff --git a/template_script_edgeR_CL.r b/template_script_edgeR_CL.r
index 09d582d..8af7f3a 100755
--- a/template_script_edgeR_CL.r
+++ b/template_script_edgeR_CL.r
@@ -1,8 +1,8 @@
 ################################################################################
 ### R script to compare several conditions with the SARTools and edgeR packages
 ### Hugo Varet
-### November 28th, 2016
-### designed to be executed with SARTools 1.4.0
+### May 2nd, 2017
+### designed to be executed with SARTools 1.4.1
 ### run "Rscript template_script_edgeR_CL.r --help" to get some help
 ################################################################################