diff --git a/README.md b/README.md index 9065bb3b8..3a29d6a3c 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,8 @@ # ![nfcore/test-datasets](docs/images/test-datasets_logo.png) -Test data to be used for automated testing with the nf-core pipelines -> ⚠️ **Do not merge your test data to `master`! Each pipeline has a dedicated branch (and a special one for modules)** +# test-datasets: stableexpression +This branch contains test data to be used for automated testing with the nf-core/stableexpression pipeline. -## Introduction +## Content of this repository -nf-core is a collection of high quality Nextflow pipelines. This repository contains various files for CI and unit testing of nf-core pipelines and infrastructure. - -The principle for nf-core test data is as small as possible, as large as necessary. Please see the [guidelines](https://nf-co.re/docs/contributing/test_data_guidelines) for more detailed information. Always ask for guidance on the [nf-core slack](https://nf-co.re/join) before adding new test data. - -## Documentation - -nf-core/test-datasets comes with documentation in the `docs/` directory: - -01. [Add a new test dataset](https://github.com/nf-core/test-datasets/blob/master/docs/ADD_NEW_DATA.md) -02. [Use an existing test dataset](https://github.com/nf-core/test-datasets/blob/master/docs/USE_EXISTING_DATA.md) - -## Downloading test data - -Due the large number of large files in this repository for each pipeline, we highly recommend cloning only the branches you would use. - -```bash -git clone --single-branch --branch -``` - -To subsequently clone other branches[^1] - -```bash -git remote set-branches --add origin [remote-branch] -git fetch -``` - -## Support - -For further information or help, don't hesitate to get in touch on our [Slack organisation](https://nf-co.re/join/slack) (a tool for instant messaging). - -[^1]: From [stackoverflow](https://stackoverflow.com/a/60846265/11502856) +All the data contained here were subsampled from datasets collected from Expression. In some cases, data were also generated randomly. diff --git a/test_data/input_datasets/input.csv b/test_data/input_datasets/input.csv new file mode 100644 index 000000000..73278d53e --- /dev/null +++ b/test_data/input_datasets/input.csv @@ -0,0 +1,3 @@ +counts,design,platform,normalised +https://raw.githubusercontent.com/nf-core/test-datasets/stableexpression/test_data/input_datasets/microarray.normalised.csv,https://raw.githubusercontent.com/nf-core/test-datasets/stableexpression/test_data/input_datasets/microarray.normalised.design.csv,microarray,true +https://raw.githubusercontent.com/nf-core/test-datasets/stableexpression/test_data/input_datasets/rnaseq.raw.csv,https://raw.githubusercontent.com/nf-core/test-datasets/stableexpression/test_data/input_datasets/rnaseq.raw.design.csv,rnaseq,false diff --git a/test_data/input_datasets/input_big.yaml b/test_data/input_datasets/input_big.yaml new file mode 100644 index 000000000..f54577bbf --- /dev/null +++ b/test_data/input_datasets/input_big.yaml @@ -0,0 +1,4 @@ +- counts: https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/modules_testdata/SRP254919.salmon.merged.gene_counts.top1000cov.assay.tsv + design: https://raw.githubusercontent.com/nf-core/test-datasets/stableexpression/test_data/input_datasets/rnaseq_big.design.csv + platform: rnaseq + normalised: false diff --git a/test_data/input_datasets/mapping.csv b/test_data/input_datasets/mapping.csv new file mode 100644 index 000000000..5eac321ff --- /dev/null +++ b/test_data/input_datasets/mapping.csv @@ -0,0 +1,10 @@ +original_gene_id,ensembl_gene_id +ENSRNA049453121,SNSRNA049434199 +ENSRNA049453138,SNSRNA049434246 +ENSRNA049454388,SNSRNA049434252 +ENSRNA049454416,SNSRNA049434253 +ENSRNA049454647,SNSRNA049434254 +ENSRNA049454661,SNSRNA049434255 +ENSRNA049454747,SNSRNA049434256 +ENSRNA049454887,SNSRNA049434257 +ENSRNA049454931,SNSRNA049434258 diff --git a/test_data/input_datasets/metadata.csv b/test_data/input_datasets/metadata.csv new file mode 100644 index 000000000..a7cd3a847 --- /dev/null +++ b/test_data/input_datasets/metadata.csv @@ -0,0 +1,10 @@ +ensembl_gene_id,name,description +ENSRNA049453121,geneA,descriptionA +ENSRNA049453138,geneB,descriptionB +ENSRNA049454388,geneC,descriptionC +ENSRNA049454416,geneD,descriptionD +ENSRNA049454647,geneE,descriptionE +ENSRNA049454661,geneF,descriptionF +ENSRNA049454747,geneG,descriptionG +ENSRNA049454887,geneH,descriptionH +ENSRNA049454931,geneI,descriptionI diff --git a/test_data/input_datasets/microarray.normalised.csv b/test_data/input_datasets/microarray.normalised.csv new file mode 100644 index 000000000..1f93b0ca1 --- /dev/null +++ b/test_data/input_datasets/microarray.normalised.csv @@ -0,0 +1,10 @@ +ensembl_gene_id,GSM1528575,GSM1528576,GSM1528579,GSM1528583,GSM1528584,GSM1528585,GSM1528580,GSM1528586,GSM1528582,GSM1528578,GSM1528581,GSM1528577 +ENSRNA049453121,20925.1255070264,136184.261516502,144325.370645564,89427.0987612997,164143.182734208,34178.6378088171,28842.7323281157,76973.395782103,41906.9367255656,44756.5602263121,252562.049703724,6953.65643340122 +ENSRNA049453138,196173.051628372,16607.8367703051,344972.83715281,22602.4535330758,13678.598561184,104546.421532852,15451.4637472048,71664.8857281649,160643.257448002,91459.0578537683,88396.7173963033,281623.08555275 +ENSRNA049454388,91547.4240932405,11625.4857392136,84483.143792525,80582.6604222701,218857.576978944,58304.7350856292,42234.0009090266,88475.1675656357,87306.1181782617,17513.436610296,90922.3378933406,76490.2207674135 +ENSRNA049454416,20925.1255070264,106290.155329953,193607.204524536,47170.3378081581,392119.825420608,190998.270108096,90648.5873169351,81397.1541603848,83813.8734511313,165404.67909724,111127.301869638,194702.380135234 +ENSRNA049454647,99394.3461583754,91343.1022366783,3520.13099135521,71738.2220832404,118547.854196928,20105.0810640101,81377.7090686122,15040.7784861581,66352.6498154789,110918.431865208,55563.6509348192,111258.50293442 +ENSRNA049454661,175247.926121346,66431.3470812206,24640.9169394865,52083.9146631746,360203.095444512,36189.1459152181,70046.6356539953,85820.9125386666,13968.9789085219,50594.3724297441,25256.2049703724,52152.4232505092 +ENSRNA049454747,117703.830977024,154452.881963838,281610.479308417,29481.4611300988,191500.379856576,152798.616086476,53565.0743236435,14156.0268105017,293348.557078959,155674.99209152,63140.5124259309,243377.975169043 +ENSRNA049454887,2615.6406883783,164417.584026021,28161.0479308417,82548.0911642767,50154.861391008,136714.551235268,97859.270398964,64586.872322914,328271.004350264,159566.866893808,151537.229822234,86920.7054175153 +ENSRNA049454931,177863.566809724,81378.4001744952,235848.776420799,88444.3833902964,18238.131414912,120630.48638406,82407.8066517592,50430.8455124123,118736.320722436,68107.8090400402,232357.085727426,163410.926184929 diff --git a/test_data/input_datasets/microarray.normalised.design.csv b/test_data/input_datasets/microarray.normalised.design.csv new file mode 100644 index 000000000..d31e5cef6 --- /dev/null +++ b/test_data/input_datasets/microarray.normalised.design.csv @@ -0,0 +1,13 @@ +sample,condition +GSM1528575,g1 +GSM1528576,g1 +GSM1528579,g1 +GSM1528583,g2 +GSM1528584,g2 +GSM1528585,g2 +GSM1528580,g3 +GSM1528586,g3 +GSM1528582,g3 +GSM1528578,g4 +GSM1528581,g4 +GSM1528577,g4 diff --git a/test_data/input_datasets/rnaseq.raw.csv b/test_data/input_datasets/rnaseq.raw.csv new file mode 100644 index 000000000..4d558cc2b --- /dev/null +++ b/test_data/input_datasets/rnaseq.raw.csv @@ -0,0 +1,10 @@ +ensembl_gene_id,ESM1528575,ESM1528576,ESM1528579,ESM1528583,ESM1528584,ESM1528585,ESM1528580,ESM1528586,ESM1528582,ESM1528578,ESM1528581,ESM1528577 +ENSRNA049453121,1,82,8,82,4,68,88,73,46,57,25,22 +ENSRNA049453138,68,93,41,84,36,18,28,92,84,85,92,32 +ENSRNA049454388,38,10,0,23,11,17,95,57,25,82,10,70 +ENSRNA049454416,75,55,7,30,79,60,15,97,12,35,60,56 +ENSRNA049454647,35,64,55,91,48,95,68,100,24,26,100,47 +ENSRNA049454661,8,99,80,48,86,29,80,17,19,9,44,2 +ENSRNA049454747,67,7,98,53,3,10,52,87,4,80,22,15 +ENSRNA049454887,8,40,24,90,42,52,79,81,94,23,35,81 +ENSRNA049454931,45,49,67,73,26,76,41,16,34,47,36,25 diff --git a/test_data/input_datasets/rnaseq.raw.design.csv b/test_data/input_datasets/rnaseq.raw.design.csv new file mode 100644 index 000000000..469751d2f --- /dev/null +++ b/test_data/input_datasets/rnaseq.raw.design.csv @@ -0,0 +1,13 @@ +sample,condition +ESM1528575,g1 +ESM1528576,g1 +ESM1528579,g1 +ESM1528583,g2 +ESM1528584,g2 +ESM1528585,g2 +ESM1528580,g3 +ESM1528586,g3 +ESM1528582,g3 +ESM1528578,g4 +ESM1528581,g4 +ESM1528577,g4 diff --git a/test_data/input_datasets/rnaseq_big.design.csv b/test_data/input_datasets/rnaseq_big.design.csv new file mode 100644 index 000000000..e8de12df2 --- /dev/null +++ b/test_data/input_datasets/rnaseq_big.design.csv @@ -0,0 +1,7 @@ +sample,condition +SRX8042381,control +SRX8042382,control +SRX8042383,control +SRX8042384,treatment +SRX8042385,treatment +SRX8042386,treatment