Skip to content

Latest commit

 

History

History
233 lines (189 loc) · 6.58 KB

work_representative-non-coding-transcriptome_part-1.md

File metadata and controls

233 lines (189 loc) · 6.58 KB

work_representative-non-coding-transcriptome_part-1.md

Table of contents
  1. Obtain the data
    1. Get situated
      1. Code
    2. Download NUTs
      1. Notes, code
    3. Download CUTs, SUTs
      1. Code
    4. Download CUTs-4X, CUTs-HMM
      1. Code
    5. Download XUTs
      1. Code
    6. Download SRATs
      1. Code
    7. Include SGD R64-1-1 ncRNAs
      1. Notes, code
  2. Next step

Obtain the data

Get situated

Code

Code: Get situated
#!/bin/bash

cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215"
source activate gff3_env

if [[ ! -d infiles_gtf-gff3/representation ]]; then
    mkdir -p infiles_gtf-gff3/representation/{NUTs,CUTs_SUTs,CUTs-HMM_CUTs-4X,XUTs,SRATs,ncRNAs}
fi

Download NUTs

Notes, code

Notes, code: Download NUTs

Download manually to infiles_gtf-gff3/representation/NUTs from email from Michael Lidschreiber.

Code: Download NUTs
#!/bin/bash

#  Give file a scrutable name
cp \
    infiles_gtf-gff3/representation/NUTs/Sc.cerevisiae.feature.anno_Schulz_2013.gtf \
    infiles_gtf-gff3/representation/NUTs/NUTs.gtf

#NOTE Already in R64 coordinates


Download CUTs, SUTs

Code

Code: Download CUTs, SUTs
#!/bin/bash

#  Get the list of CUTs, SUTs
curl \
    https://static-content.springer.com/esm/art%3A10.1038%2Fnature07728/MediaObjects/41586_2009_BFnature07728_MOESM276_ESM.xls \
        > infiles_gtf-gff3/representation/CUTs_SUTs/41586_2009_BFnature07728_MOESM276_ESM.xls

#  Give file a scrutable name
cp \
    infiles_gtf-gff3/representation/CUTs_SUTs/41586_2009_BFnature07728_MOESM276_ESM.xls \
    infiles_gtf-gff3/representation/CUTs_SUTs/CUTs_SUTs.xls

#  Get necessary liftOver file, and give it a helpful name
curl \
    sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases/liftover/V56_2007_04_06_V64_2011_02_03.over.chain \
        > infiles_gtf-gff3/representation/CUTs_SUTs/V56_2007_04_06_V64_2011_02_03.over.chain

cp \
    infiles_gtf-gff3/representation/CUTs_SUTs/V56_2007_04_06_V64_2011_02_03.over.chain \
    infiles_gtf-gff3/representation/CUTs_SUTs/liftOver_R56-to-R64.chain

Download CUTs-4X, CUTs-HMM

Code

Code: Download CUTs-4X, CUTs-HMM
#!/bin/bash

#  Get CUTs-4x
curl \
    https://static-content.springer.com/esm/art%3A10.1186%2Fs12864-016-2622-5/MediaObjects/12864_2016_2622_MOESM5_ESM.xlsx \
        > infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/12864_2016_2622_MOESM5_ESM.xlsx

#  Get CUTs-HMM
curl \
    https://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74028/suppl/GSE74028_S288c.CUTs.txt.gz \
        > infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/GSE74028_S288c.CUTs.txt.gz

#  Give files scrutable names
cp \
    infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/12864_2016_2622_MOESM5_ESM.xlsx \
    infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/CUTs-4x.xlsx

cp \
    infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/GSE74028_S288c.CUTs.txt.gz \
    infiles_gtf-gff3/representation/CUTs-HMM_CUTs-4X/CUTs-HMM.txt.gz

#NOTE Already in R64 coordinates

Download XUTs

Code

Code: Download XUTs
#!/bin/bash

#  Get XUTs
curl \
    http://vm-gb.curie.fr/XUT/XUTs_Van_Dijk_et_al_2011.gff \
        > infiles_gtf-gff3/representation/XUTs/XUTs_Van_Dijk_et_al_2011.gff

#  Give file a scrutable name
cp \
    infiles_gtf-gff3/representation/XUTs/XUTs_Van_Dijk_et_al_2011.gff \
    infiles_gtf-gff3/representation/XUTs/XUTs.gff

#  Get necessary liftOver file, and give it a helpful name
curl \
    http://sgd-archive.yeastgenome.org/sequence/S288C_reference/genome_releases/liftover/V63_2010_01_05_V64_2011_02_03.over.chain \
        > infiles_gtf-gff3/representation/XUTs/V63_2010_01_05_V64_2011_02_03.over.chain

cp \
    infiles_gtf-gff3/representation/XUTs/V63_2010_01_05_V64_2011_02_03.over.chain \
    infiles_gtf-gff3/representation/XUTs/liftOver_R63-to-R64.chain

Download SRATs

Code

Code: Download SRATs
#!/bin/bash

#  Get SRATs
curl \
    static-content.springer.com/esm/art%3A10.1038%2Fncomms13610/MediaObjects/41467_2016_BFncomms13610_MOESM1735_ESM.csv \
        > infiles_gtf-gff3/representation/SRATs/41467_2016_BFncomms13610_MOESM1735_ESM.csv

#  Give file a scrutable name
cp \
    infiles_gtf-gff3/representation/SRATs/41467_2016_BFncomms13610_MOESM1735_ESM.csv \
    infiles_gtf-gff3/representation/SRATs/SRATs.csv

#NOTE Already in R64 coordinates

Include SGD R64-1-1 ncRNAs

Notes, code

Notes, code: Include SGD R64-1-1 ncRNAs

gtf of SGD R64-1-1 ncRNAs were processed/isolated from saccharomyces_cerevisiae_R64-1-1_20110208.gff in work_assess-process_R64-1-1-gff3_categorize-Trinity-transfrags_part-1.Rmd.

#!/bin/bash

#  Copy ncRNAs to experiment directory
cp \
    outfiles_gtf-gff3/representation/Greenlaw-et-al_ncRNAs.gtf \
    infiles_gtf-gff3/representation/ncRNAs/processed_ncRNA_sense.gtf

#  Give file a shorter name
cp \
    infiles_gtf-gff3/representation/ncRNAs/processed_ncRNA_sense.gtf \
    infiles_gtf-gff3/representation/ncRNAs/ncRNAs.gtf

#NOTE Already in R64 coordinates


Next step

Go to work_representative-non-coding-transcriptome_part-2.Rmd