work_representative-non-coding-transcriptome_part-1.md
Table of contents
Code: Get situated
#!/bin/bash
cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215"
source activate gff3_env
if [[ ! -d infiles_gtf-gff3/representation ]]; then
mkdir -p infiles_gtf-gff3/representation/{NUTs,CUTs_SUTs,CUTs-HMM_CUTs-4X,XUTs,SRATs,ncRNAs}
fi
Notes, code: Download NUTs
Download manually to infiles_gtf-gff3/representation/NUTs
from email from Michael Lidschreiber.
Code: Download NUTs
#!/bin/bash
# Give file a scrutable name
cp \
infiles_gtf-gff3/representation/NUTs/Sc.cerevisiae.feature.anno_Schulz_2013.gtf \
infiles_gtf-gff3/representation/NUTs/NUTs.gtf
#NOTE Already in R64 coordinates
Code: Download CUTs, SUTs
#!/bin/bash
# Get the list of CUTs, SUTs
curl \
https://static-content.springer.com/esm/art%3A10.1038%2Fnature07728/MediaObjects/41586_2009_BFnature07728_MOESM276_ESM.xls \
> infiles_gtf-gff3/representation/CUTs_SUTs/41586_2009_BFnature07728_MOESM276_ESM.xls
# Give file a scrutable name
cp \
infiles_gtf-gff3/representation/CUTs_SUTs/41586_2009_BFnature07728_MOESM276_ESM.xls \
infiles_gtf-gff3/representation/CUTs_SUTs/CUTs_SUTs.xls
# Get necessary liftOver file, and give it a helpful name
curl \
sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases/liftover/V56_2007_04_06_V64_2011_02_03.over.chain \
> infiles_gtf-gff3/representation/CUTs_SUTs/V56_2007_04_06_V64_2011_02_03.over.chain
cp \
infiles_gtf-gff3/representation/CUTs_SUTs/V56_2007_04_06_V64_2011_02_03.over.chain \
infiles_gtf-gff3/representation/CUTs_SUTs/liftOver_R56-to-R64.chain
Code: Download CUTs-4X, CUTs-HMM
#!/bin/bash
# Get CUTs-4x
curl \
https://static-content.springer.com/esm/art%3A10.1186%2Fs12864-016-2622-5/MediaObjects/12864_2016_2622_MOESM5_ESM.xlsx \
> infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/12864_2016_2622_MOESM5_ESM.xlsx
# Get CUTs-HMM
curl \
https://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74028/suppl/GSE74028_S288c.CUTs.txt.gz \
> infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/GSE74028_S288c.CUTs.txt.gz
# Give files scrutable names
cp \
infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/12864_2016_2622_MOESM5_ESM.xlsx \
infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/CUTs-4x.xlsx
cp \
infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/GSE74028_S288c.CUTs.txt.gz \
infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/CUTs-HMM.txt.gz
#NOTE Already in R64 coordinates
Code: Download XUTs
#!/bin/bash
# Get XUTs
curl \
http://vm-gb.curie.fr/XUT/XUTs_Van_Dijk_et_al_2011.gff \
> infiles_gtf-gff3/representation/XUTs/XUTs_Van_Dijk_et_al_2011.gff
# Give file a scrutable name
cp \
infiles_gtf-gff3/representation/XUTs/XUTs_Van_Dijk_et_al_2011.gff \
infiles_gtf-gff3/representation/XUTs/XUTs.gff
# Get necessary liftOver file, and give it a helpful name
curl \
http://sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases/liftover/V63_2010_01_05_V64_2011_02_03.over.chain \
> infiles_gtf-gff3/representation/XUTs/V63_2010_01_05_V64_2011_02_03.over.chain
cp \
infiles_gtf-gff3/representation/XUTs/V63_2010_01_05_V64_2011_02_03.over.chain \
infiles_gtf-gff3/representation/XUTs/liftOver_R63-to-R64.chain
Code: Download SRATs
#!/bin/bash
# Get SRATs
curl \
static-content.springer.com/esm/art%3A10.1038%2Fncomms13610/MediaObjects/41467_2016_BFncomms13610_MOESM1735_ESM.csv \
> infiles_gtf-gff3/representation/SRATs/41467_2016_BFncomms13610_MOESM1735_ESM.csv
# Give file a scrutable name
cp \
infiles_gtf-gff3/representation/SRATs/41467_2016_BFncomms13610_MOESM1735_ESM.csv \
infiles_gtf-gff3/representation/SRATs/SRATs.csv
#NOTE Already in R64 coordinates
Notes, code: Include SGD R64-1-1 ncRNAs
gtf
of SGD R64-1-1
ncRNAs were processed/isolated from saccharomyces_cerevisiae_R64-1-1_20110208.gff
in work_assess-process_R64-1-1-gff3_categorize-Trinity-transfrags_part-1.Rmd
.
#!/bin/bash
# Copy ncRNAs to experiment directory
cp \
outfiles_gtf-gff3/representation/Greenlaw-et-al_ncRNAs.gtf \
infiles_gtf-gff3/representation/ncRNAs/processed_ncRNA_sense.gtf
# Give file a shorter name
cp \
infiles_gtf-gff3/representation/ncRNAs/processed_ncRNA_sense.gtf \
infiles_gtf-gff3/representation/ncRNAs/ncRNAs.gtf
#NOTE Already in R64 coordinates
Go to work_representative-non-coding-transcriptome_part-2.Rmd