Skip to content

Latest commit

 

History

History
1502 lines (1274 loc) · 51.3 KB

work_count_features_htseq-count.md

File metadata and controls

1502 lines (1274 loc) · 51.3 KB

#work_count_features_htseq-count.md

Table of Contents
  1. Get situated
    1. Code
  2. Run htseq-count on bams in bams_renamed/ with combined_SC_KL_20S.gff3
    1. Set up outfile directories
      1. Code
    2. Set up arrays of bams
      1. Code
    3. Index bams
      1. Code
    4. Run htseq-count with combined_SC_KL_20S.gff3
      1. Set up necessary variables
        1. Code
      2. Set up and submit htseq-count jobs
        1. Code
    5. Concatenate files, copy pertinent files to AG
      1. Code
  3. Run htseq-count on bams in bams_renamed/ with combined_AG.sans-chr.gtf
    1. Set up outfile directories
      1. Code
    2. Strip "chr" from chromosome names in combined_AG.gtf
      1. Code
    3. Set up arrays of bams
      1. Code
    4. Index bams
      1. Code
    5. Run htseq-count with combined_AG.sans-chr.gtf
      1. Set up necessary variables
        1. Code
      2. Set up and submit htseq-count jobs
        1. Code
    6. Concatenate files, copy pertinent files to AG
      1. Code

Get situated

Code

Code: Get situated
#!/bin/bash

# tmux new -s htseq
# tmux a -t htseq

transcriptome && 
    {
        cd "results/2023-0215/" \
            || echo "cd'ing failed; check on this..."
    }

source activate gff3_env

.,

Printed: Get situated
❯ transcriptome &&
>     {
>         cd "results/2023-0215/" \
>             || echo "cd'ing failed; check on this..."
>     }
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215


❯ source activate gff3_env


❯ .,
total 22M
drwxrws---  8 kalavatt 2.0K Mar 30 12:24 ./
drwxrws--- 12 kalavatt  270 Mar 27 11:06 ../
drwxrws---  2 kalavatt  259 Mar 14 15:52 bams/
drwxrws---  8 kalavatt  175 Mar 14 15:54 bams_renamed/
drwxrws---  4 kalavatt   53 Mar 27 14:01 infiles_gtf-gff3/
drwxrws---  4 kalavatt  170 Mar 30 12:24 notebook/
drwxrws---  4 kalavatt   53 Mar 30 11:19 outfiles_gtf-gff3/
drwxrws---  4 kalavatt   53 Mar 28 12:32 outfiles_htseq-count/
-rw-rw----  1 kalavatt  31K Mar  3 09:16 test_count_features.md
-rw-rw----  1 kalavatt  25K Mar 29 10:48 work_assessment-processing_gtfs.md
-rw-rw----  1 kalavatt 736K Mar 30 12:24 work_assessment-processing_gtfs.nb.html
-rw-rw----  1 kalavatt  21K Mar 30 12:24 work_assessment-processing_gtfs.Rmd
-rw-rw----  1 kalavatt  78K Mar 29 10:48 work_count_features_featureCounts.md
-rw-rw----  1 kalavatt  12K Mar 29 10:48 work_count_features_htseq-count.md
-rw-rw----  1 kalavatt 217K Mar 30 12:24 work_env-building.md
-rw-rw----  1 kalavatt 3.2M Mar 29 10:48 work_evaluation-etc_rough-draft_Rrp6-WT_SS_timecourse_groupwise.nb.html
-rw-rw----  1 kalavatt  51K Mar 29 10:48 work_evaluation-etc_rough-draft_Rrp6-WT_SS_timecourse_groupwise.Rmd
-rw-rw----  1 kalavatt 1.9M Mar 29 10:48 work_evaluation-etc_variables_pairwise-groupwise.nb.html
-rw-rw----  1 kalavatt  50K Mar 29 10:48 work_evaluation-etc_variables_pairwise-groupwise.Rmd
-rw-rw----  1 kalavatt  39K Mar 29 10:48 work_evaluation-etc_variables_pairwise-groupwise.tmp-gw.R
-rw-rw----  1 kalavatt  33K Mar 29 10:48 work_evaluation-etc_variables_pairwise-groupwise.tmp-pw.R
-rw-rw----  1 kalavatt 6.7K Mar 29 10:48 work_evaluation-etc_variables_pairwise-groupwise.TODOs-scraps-etc.txt
-rw-rw----  1 kalavatt 656K Mar  3 09:16 work_gff3_convert-strand-designations.nb.html
-rw-rw----  1 kalavatt 2.0K Mar  3 09:16 work_gff3_convert-strand-designations.Rmd
-rw-rw----  1 kalavatt 6.8K Feb 22 16:21 work_gff3_include-20S.md
-rw-rw----  1 kalavatt 5.6K Feb 22 16:21 work_model-variables.md
-rw-rw----  1 kalavatt 2.5M Mar 13 16:33 work_normalization-etc_rough-draft_NNS_vary-on-transcription.nb.html
-rw-rw----  1 kalavatt  33K Mar 13 16:33 work_normalization-etc_rough-draft_NNS_vary-on-transcription.Rmd
-rw-rw----  1 kalavatt 1.1M Mar 29 10:48 work_normalization-etc_rough-draft_OsTIR-NNS_vary-on-strain.nb.html
-rw-rw----  1 kalavatt  59K Mar 29 10:48 work_normalization-etc_rough-draft_OsTIR-NNS_vary-on-strain.Rmd
-rw-rw----  1 kalavatt 4.5M Mar 29 10:48 work_normalization-etc_rough-draft_wild-type_vary-on-state_antisense.nb.html
-rw-rw----  1 kalavatt  64K Mar 29 10:48 work_normalization-etc_rough-draft_wild-type_vary-on-state_antisense.Rmd
-rw-rw----  1 kalavatt 803K Mar 29 10:48 work_normalization-etc_rough-draft_wild-type_vary-on-state.nb.html
-rw-rw----  1 kalavatt  53K Mar 29 10:48 work_normalization-etc_rough-draft_wild-type_vary-on-state.Rmd


Run htseq-count on bams in bams_renamed/ with combined_SC_KL_20S.gff3

Set up outfile directories

Code

Code: Get situated
#!/bin/bash

# transcriptome && 
#     {
#         cd "results/2023-0215/" \
#             || echo "cd'ing failed; check on this..."
#     }
#
# source activate gff3_env
#
# .,

for h in ./outfiles_htseq-count/already/combined-SC-KL-20S/U*; do
    if [[ ! -e "${h}" ]]; then
        mkdir -p outfiles_htseq-count/already/combined-SC-KL-20S/{UTK_prim_no,UTK_prim_pos,UTK_prim_UMI,UT_prim_no,UT_prim_pos,UT_prim_UMI}/err_out
    else
        echo "Directories present; skipping mkdir'ing of outfile directories"
    fi

    break
done

Set up arrays of bams

Code

Code: Set up arrays
#!/bin/bash

unset UT_prim_UMI
typeset -a UT_prim_UMI
while IFS=" " read -r -d $'\0'; do
    UT_prim_UMI+=( "${REPLY}" )
done < <(\
    find "bams_renamed/UT_prim_UMI" \
        -type l \
        -name "*.bam" \
        -print0 \
            | sort -z \
)

unset UTK_prim_UMI
typeset -a UTK_prim_UMI
while IFS=" " read -r -d $'\0'; do
    UTK_prim_UMI+=( "${REPLY}" )
done < <(\
    find "bams_renamed/UTK_prim_UMI" \
        -type l \
        -name "*.bam" \
        -print0 \
            | sort -z \
)

# unset UT_prim_pos
# typeset -a UT_prim_pos
# while IFS=" " read -r -d $'\0'; do
#     UT_prim_pos+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UT_prim_pos" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UTK_prim_pos
# typeset -a UTK_prim_pos
# while IFS=" " read -r -d $'\0'; do
#     UTK_prim_pos+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UTK_prim_pos" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UT_prim_no
# typeset -a UT_prim_no
# while IFS=" " read -r -d $'\0'; do
#     UT_prim_no+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UT_prim_no" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UTK_prim_no
# typeset -a UTK_prim_no
# while IFS=" " read -r -d $'\0'; do
#     UTK_prim_no+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UTK_prim_no" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )

echo_test "${UT_prim_UMI[@]}"
echo_test "${UTK_prim_UMI[@]}"
# echo_test "${UT_prim_pos[@]}"
# echo_test "${UTK_prim_pos[@]}"
# echo_test "${UT_prim_no[@]}"
# echo_test "${UTK_prim_no[@]}"

echo "${#UT_prim_UMI[@]}"
echo "${#UTK_prim_UMI[@]}"
# echo "${#UT_prim_pos[@]}"
# echo "${#UTK_prim_pos[@]}"
# echo "${#UT_prim_no[@]}"
# echo "${#UTK_prim_no[@]}"

Index bams

Code

Code: Index all bams in arrays
#!/bin/bash

for h in ./bams_renamed/UT_prim_UMI/*.bai; do
    if [[ ! -e "${h}" ]]; then
        ml SAMtools/1.16.1-GCC-11.2.0

        # for i in \
        #     "${UT_prim_UMI[@]}" \
        #     "${UTK_prim_UMI[@]}" \
        #     "${UT_prim_pos[@]}" \
        #     "${UTK_prim_pos[@]}" \
        #     "${UT_prim_no[@]}" \
        #     "${UTK_prim_no[@]}"; do
        for i in \
            "${UT_prim_UMI[@]}" \
            "${UTK_prim_UMI[@]}"; do
                echo "${i}"
                samtools index -@ "${SLURM_CPUS_ON_NODE}" "${i}"

            module purge SAMtools/1.16.1-GCC-11.2.0
        done
    else
        echo "Bam indices exist; skipping the running of samtools index"
    fi

    break
done

Run htseq-count with combined_SC_KL_20S.gff3

Set up necessary variables

Code
Code: Set up necessary variables
#!/bin/bash

gtf="infiles_gtf-gff3/already/combined_SC_KL_20S.gff3"  # echo "${gtf}"  # ., "${gtf}"

job_name="run_htseq-count"  # echo "${job_name}"
threads=8  # echo "${threads}"

job_no_max=24  # echo "${job_no_max}"

# echo_test "${UT_prim_UMI[@]}"
# echo "${#UT_prim_UMI[@]}"

Set up and submit htseq-count jobs

Code
Code: Set up and submit htseq-count jobs
#!/bin/bash

h=0
# for i in "strd-eq"; do
for i in "strd-eq" "strd-op"; do
    # for j in "${UT_prim_UMI[0]}" \
    #          "${UT_prim_UMI[1]}" \
    #          "${UT_prim_UMI[2]}" \
    #          "${UT_prim_UMI[3]}" \
    #          "${UT_prim_UMI[4]}" \
    #          "${UT_prim_UMI[5]}";
    # do
    for j in "${UT_prim_UMI[@]}"; do
        # i="strd-eq"  # echo "${i}"
        # j="${UT_prim_UMI[0]}"  # echo "${j}"


        #  -------------------------------------
        in="${j}"  # echo "${in}"
        
        out="$(
            echo "${in}" \
                | sed 's:bams_renamed:outfiles_htseq-count\/already\/combined-SC-KL-20S:g' \
                | sed 's:.bam::g'
        ).hc-${i}.tsv"   # echo "${out}"  # ., "$(dirname "${out}")"

        err_out="$(
            dirname "${out}"
        )/err_out/$(
            basename "${out}" .tsv
        )"  # echo "${err_out}"  # ., "$(dirname "${err_out}")"


        #  -------------------------------------
        let h++
        iter="${h}"
        echo "        #  -------------------------------------"
        printf "        Iteration '%d'\n" "${iter}"

        echo """
        Running htseq-count
                    directory                                                            file
                in  $(dirname ${in})                                             $(basename ${in})
               out  $(dirname ${out})          $(basename ${out})
            stdout  $(dirname ${err_out})  $(basename ${err_out}).stdout.txt
            stderr  $(dirname ${err_out})  $(basename ${err_out}).stderr.txt
        """

        if [[ "${i}" == "strd-eq" ]]; then
            hc_strd="yes"  # echo "${hc_strd}"
        elif [[ "${i}" == "strd-op" ]]; then
            hc_strd="reverse"  # echo "${hc_strd}"
        fi


        #  -------------------------------------
        echo """
        sbatch \\
            --job-name=${job_name} \\
            --nodes=1 \\
            --cpus-per-task=${threads} \\
            --error=${err_out}.%A.stderr.txt \\
            --output=${err_out}.%A.stdout.txt \\
            htseq-count \\
                --order \"pos\" \\
                --stranded \"${hc_strd}\" \\
                --nonunique \"none\" \\
                --type \"mRNA\" \\
                --idattr \"ID\" \\
                --nprocesses ${threads} \\
                --counts_output \"${out}\" \\
                --with-header \\
                \"${in}\" \\
                \"${gtf}\" \\
                     > >(tee -a \"${err_out}.stdout.txt\") \\
                    2> >(tee -a \"${err_out}.stderr.txt\")
        """
        # (Scraps)
        # --array=${iter}-${max_id_job}%${max_id_task} \
        # --array=1-${max_id_job}%${max_id_task} \


        #  -------------------------------------
        # start="$(date +%s)"

        # if [[ $(( tally - 1 )) -ge ${job_no_max} ]]; then
        #     echo TRUE
        # else
        #     echo FALSE
        # fi
        
        # tally="$(squeue -u $(whoami) | grep -c "${job_name}")"  #TODO Debug this
        tally="$(squeue -u $(whoami) | wc -l)"  # echo "${tally}"
        while [[ $(( tally - 1 )) -ge ${job_no_max} ]]; do
            sleep 5
            printf "."
            tally="$(squeue -u $(whoami) | wc -l)"
        done

        sbatch \
            --job-name=${job_name} \
            --nodes=1 \
            --cpus-per-task=${threads} \
            --error=${err_out}.%A.stderr.txt \
            --output=${err_out}.%A.stdout.txt \
            htseq-count \
                --order "pos" \
                --stranded "${hc_strd}" \
                --nonunique "none" \
                --type "mRNA" \
                --idattr "ID" \
                --nprocesses ${threads} \
                --counts_output "${out}" \
                --with-header \
                "${in}" \
                "${gtf}"
        
        # end="$(date +%s)"
        #
        # run_time="$(echo "${end}" - "${start}" | bc -l)"
        # printf 'Run time: %dh:%dm:%ds\n' \
        #     $(( run_time/3600 )) $(( run_time%3600/60 )) $(( run_time%60 ))

        sleep 0.5
        echo ""
    done
done

Concatenate files, copy pertinent files to AG

Code

Code: Concatenate files, copy pertinent files to AG
#!/bin/bash

cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-SC-KL-20S/UT_prim_UMI"

bash ../../../../../../bin/process_htseq-count_outfiles.sh \
    -u FALSE \
    -q "." \
    -o "./all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv" \
    -s "hc-strd-eq"

bash ../../../../../../bin/process_htseq-count_outfiles.sh \
    -u FALSE \
    -q "." \
    -o "./all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv" \
    -s "hc-strd-op"

., all-samples*


#TODO Move this to its own location?
for i in ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/*; do
    if [[ ! -e "${i}" ]]; then
        mkdir ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI
        mkdir ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/{antisense_transcript,CUT,CUT_2016,CUT_4X,mRNA,ncRNA,NUTs,rRNA,snoRNA,snRNA,SRAT,SUT,tRNA,XUT}/UT_prim_UMI
    fi

    break
done

cp \
    all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv \
    all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv \
    /home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI

Printed: Concatenate files, copy pertinent files to AG
❯ cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-SC-KL-20S/UT_prim_UMI"


❯ bash ../../../../../../bin/process_htseq-count_outfiles.sh \
>     -u FALSE \
>     -q "." \
>     -o "./all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv" \
>     -s "hc-strd-eq"
"Safe mode" is FALSE.


❯ bash ../../../../../../bin/process_htseq-count_outfiles.sh \
>     -u FALSE \
>     -q "." \
>     -o "./all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv" \
>     -s "hc-strd-op"
"Safe mode" is FALSE.


❯ ., all-samples*
-rw-rw---- 1 kalavatt 2.8M Mar 31 14:19 all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv
-rw-rw---- 1 kalavatt 2.1M Mar 31 14:19 all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv


❯ for i in ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/*; do
>     if [[ ! -e "${i}" ]]; then
>         mkdir ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI
>         mkdir ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/{antisense_transcript,CUT,CUT_2016,CUT_4X,mRNA,ncRNA,NUTs,rRNA,snoRNA,snRNA,SRAT,SUT,tRNA,XUT}/UT_prim_UMI
>     fi
> 
>     break
> done
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/antisense_transcript'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/antisense_transcript/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_2016'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_2016/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_4X'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_4X/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/mRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/mRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/ncRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/ncRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/NUTs'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/NUTs/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/rRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/rRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snoRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snoRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SRAT'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SRAT/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SUT'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SUT/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/tRNA'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/tRNA/UT_prim_UMI'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/XUT'
mkdir: created directory '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/XUT/UT_prim_UMI'


❯ cp \
>     all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv \
>     all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv \
>     /home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI
'all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI/all-samples.combined-SC-KL-20S.hc-strd-eq.mRNA.tsv'
'all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-SC-KL-20S/UT_prim_UMI/all-samples.combined-SC-KL-20S.hc-strd-op.mRNA.tsv'


Run htseq-count on bams in bams_renamed/ with combined_AG.sans-chr.gtf

Set up outfile directories

Code

Code: Set up outfile directories
#!/bin/bash

# transcriptome && 
#     {
#         cd "results/2023-0215/" \
#             || echo "cd'ing failed; check on this..."
#     }
#
# source activate gff3_env
#
# .,

for h in ./outfiles_htseq-count/already/combined-AG/a*; do
    if [[ ! -e "${h}" ]]; then
        mkdir -p outfiles_htseq-count/already/combined-AG/{antisense_transcript,CUT,CUT_4X,mRNA,ncRNA,rRNA,snoRNA,snRNA,SUT,tRNA,XUT,CUT_2016,SRAT,NUTs}/{UT_prim_UMI,UTK_prim_UMI}/err_out
    else
        echo "Directories present; skipping mkdir'ing of outfile directories"
    fi

    break
done

Strip "chr" from chromosome names in combined_AG.gtf

Code

Code: Strip "chr" from chromosome names in combined_AG.gtf

#TODO Get this work into another location

#!/bin/bash

cd ~/genomes/combined_AG/gtf
sed 's/^chr//' combined_AG.gtf > combined_AG.sans-chr.gtf
head combined_AG.sans-chr.gtf
tail combined_AG.sans-chr.gtf

cd ~/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215

cp \
    ~/genomes/combined_AG/gtf/combined_AG.sans-chr.gtf \
    ~/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/infiles_gtf-gff3/already/combined_AG.sans-chr.gtf
head ~/genomes/combined_AG/gtf/combined_AG.sans-chr.gtf
tail ~/genomes/combined_AG/gtf/combined_AG.sans-chr.gtf

Set up arrays of bams

Code

Code: Set up arrays
#!/bin/bash

unset UT_prim_UMI
typeset -a UT_prim_UMI
while IFS=" " read -r -d $'\0'; do
    UT_prim_UMI+=( "${REPLY}" )
done < <(\
    find "bams_renamed/UT_prim_UMI" \
        -type l \
        -name "*.bam" \
        -print0 \
            | sort -z \
)

unset UTK_prim_UMI
typeset -a UTK_prim_UMI
while IFS=" " read -r -d $'\0'; do
    UTK_prim_UMI+=( "${REPLY}" )
done < <(\
    find "bams_renamed/UTK_prim_UMI" \
        -type l \
        -name "*.bam" \
        -print0 \
            | sort -z \
)

# unset UT_prim_pos
# typeset -a UT_prim_pos
# while IFS=" " read -r -d $'\0'; do
#     UT_prim_pos+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UT_prim_pos" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UTK_prim_pos
# typeset -a UTK_prim_pos
# while IFS=" " read -r -d $'\0'; do
#     UTK_prim_pos+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UTK_prim_pos" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UT_prim_no
# typeset -a UT_prim_no
# while IFS=" " read -r -d $'\0'; do
#     UT_prim_no+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UT_prim_no" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )
#
# unset UTK_prim_no
# typeset -a UTK_prim_no
# while IFS=" " read -r -d $'\0'; do
#     UTK_prim_no+=( "${REPLY}" )
# done < <(\
#     find "bams_renamed/UTK_prim_no" \
#         -type l \
#         -name "*.bam" \
#         -print0 \
#             | sort -z \
# )

echo_test "${UT_prim_UMI[@]}"
echo_test "${UTK_prim_UMI[@]}"
# echo_test "${UT_prim_pos[@]}"
# echo_test "${UTK_prim_pos[@]}"
# echo_test "${UT_prim_no[@]}"
# echo_test "${UTK_prim_no[@]}"

echo "${#UT_prim_UMI[@]}"
echo "${#UTK_prim_UMI[@]}"
# echo "${#UT_prim_pos[@]}"
# echo "${#UTK_prim_pos[@]}"
# echo "${#UT_prim_no[@]}"
# echo "${#UTK_prim_no[@]}"

Index bams

Code

Code: Index all bams in arrays
#!/bin/bash

for h in ./bams_renamed/UT_prim_UMI/*.bai; do
    if [[ ! -e "${h}" ]]; then
        ml SAMtools/1.16.1-GCC-11.2.0

        # for i in \
        #     "${UT_prim_UMI[@]}" \
        #     "${UTK_prim_UMI[@]}" \
        #     "${UT_prim_pos[@]}" \
        #     "${UTK_prim_pos[@]}" \
        #     "${UT_prim_no[@]}" \
        #     "${UTK_prim_no[@]}"; do
        for i in \
            "${UT_prim_UMI[@]}" \
            "${UTK_prim_UMI[@]}"; do
                echo "${i}"
                samtools index -@ "${SLURM_CPUS_ON_NODE}" "${i}"

            module purge SAMtools/1.16.1-GCC-11.2.0
        done
    else
        echo "Bam indices exist; skipping the running of samtools index"
    fi

    break
done

Run htseq-count with combined_AG.sans-chr.gtf

Set up necessary variables

Code
Code: Set up necessary variables
#!/bin/bash

unset features
typeset -a features=(
    antisense_transcript
    CUT
    CUT_4X
    mRNA
    ncRNA
    rRNA
    snoRNA
    snRNA
    SUT
    tRNA
    XUT
    CUT_2016
    SRAT
    NUTs
)
# echo_test "${features[@]}"
# echo "${#features[@]}"

gtf="infiles_gtf-gff3/already/combined_AG.sans-chr.gtf"  # echo "${gtf}"  # ., "${gtf}"

job_name="run_htseq-count"  # echo "${job_name}"
threads=8  # echo "${threads}"

job_no_max=120  # echo "${job_no_max}"

Set up and submit htseq-count jobs

Code
Code: Set up and submit htseq-count jobs
#!/bin/bash

g=0
for h in "${features[@]}"; do
    for i in "strd-eq"; do
        for j in "${UT_prim_UMI[@]}"; do
            # h="${features[0]}"  # echo "${h}"
            # i="strd-eq"  # echo "${i}"
            # j="${UT_prim_UMI[0]}"  # echo "${j}"


            #  -------------------------------------
            type="${h}"  # echo "${type}"
            strd="${i}"
            in="${j}"  # echo "${in}"
            
            out="$(
                echo "${in}" \
                    | sed "s:bams_renamed:outfiles_htseq-count\/already\/combined-AG\/${type}:g" \
                    | sed "s:.bam:.hc-${i}.tsv:g"
            )"   # echo "${out}"  # ., "$(dirname "${out}")"

            err_out="$(
                dirname "${out}"
            )/err_out/$(
                basename "${out}" .tsv
            )"  # echo "${err_out}"  # ., "$(dirname "${err_out}")"


            #  -------------------------------------
            let g++
            iter="${g}"
            echo "        #  -------------------------------------"
            printf "        Iteration '%d'\n" "${iter}"

            echo """
            Running htseq-count
                        directory                                                            file
                    in  $(dirname ${in})                                             $(basename ${in})
                   out  $(dirname ${out})          $(basename ${out})
                stdout  $(dirname ${err_out})  $(basename ${err_out}).stdout.txt
                stderr  $(dirname ${err_out})  $(basename ${err_out}).stderr.txt
            """

            if [[ "${i}" == "strd-eq" ]]; then
                hc_strd="yes"  # echo "${hc_strd}"
            elif [[ "${i}" == "strd-op" ]]; then
                hc_strd="reverse"  # echo "${hc_strd}"
            fi


            #  -------------------------------------
            echo "\
            sbatch \\
                --job-name=${job_name} \\
                --nodes=1 \\
                --cpus-per-task=${threads} \\
                --error=${err_out}.%A.stderr.txt \\
                --output=${err_out}.%A.stdout.txt \\
                htseq-count \\
                    --order \"pos\" \\
                    --stranded \"${hc_strd}\" \\
                    --nonunique \"none\" \\
                    --type \"${type}\" \\
                    --idattr \"gene_id\" \\
                    --nprocesses ${threads} \\
                    --counts_output \"${out}\" \\
                    --with-header \\
                    \"${in}\" \\
                    \"${gtf}\" \\
                         > >(tee -a \"${err_out}.stdout.txt\") \\
                        2> >(tee -a \"${err_out}.stderr.txt\")
            "


            #  -------------------------------------
            tally="$(squeue -u $(whoami) | wc -l)"  # echo "${tally}"
            while [[ $(( tally - 1 )) -ge ${job_no_max} ]]; do
                sleep 5
                printf "."
                tally="$(squeue -u $(whoami) | wc -l)"
            done
            
            sbatch \
                --job-name=${job_name} \
                --nodes=1 \
                --cpus-per-task=${threads} \
                --error=${err_out}.%A.stderr.txt \
                --output=${err_out}.%A.stdout.txt \
                htseq-count \
                    --order "pos" \
                    --stranded "${hc_strd}" \
                    --nonunique "none" \
                    --type "${type}" \
                    --idattr "gene_id" \
                    --nprocesses ${threads} \
                    --counts_output "${out}" \
                    --with-header \
                    "${in}" \
                    "${gtf}"
            
            sleep 0.15
            echo ""
        done
    done
done

Concatenate files, copy pertinent files to AG

Code

Code: Concatenate files, copy pertinent files to AG
#!/bin/bash

unset features
typeset -a features=(
    # antisense_transcript
    # CUT
    # CUT_4X
    # mRNA
    # ncRNA
    # rRNA
    # snoRNA
    # snRNA
    # SUT
    # tRNA
    # XUT
    # CUT_2016
    SRAT
    NUTs
)
# echo_test "${features[@]}"
# echo "${#features[@]}"

cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG"

for i in "${features[@]}"; do
    echo "#  -------------------------------------"
    echo "#+ combined-AG/${i}/UT_prim_UMI"
    
    cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/${i}/UT_prim_UMI"
    # .,
    # ls *.tsv | wc -l
    pwd
    
    echo """
    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \\
        -u FALSE \\
        -q \".\" \\
        -o \"./all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
        -s \"hc-strd-eq\""""

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
        -s "hc-strd-eq"
    
    echo """    cp \\
        \"all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
        \"${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI\"
    """

    if [[ -f "all-samples.combined-AG.hc-strd-eq.${i}.tsv" ]]; then
        cp \
            "all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
            "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
    else
        echo "Error: Some problem with generation of 'all-samples.*' file; breaking"
        # break
    fi
    echo ""

    ., "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
    echo ""
    echo ""
done

Printed: Concatenate files, copy pertinent files to AG
❯ for i in "${features[@]}"; do
>     echo "#  -------------------------------------"
>     echo "#+ combined-AG/${i}/UT_prim_UMI"
> 
>     cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/${i}/UT_prim_UMI"
>     # .,
>     # ls *.tsv | wc -l
>     pwd
> 
>     echo """
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \\
>         -u FALSE \\
>         -q \".\" \\
>         -o \"./all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         -s \"hc-strd-eq\""""
> 
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
>         -u FALSE \
>         -q "." \
>         -o "./all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>         -s "hc-strd-eq"
> 
>     echo """
>     cp \\
>         \"all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         \"${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI\"
>     """
> 
>     if [[ -f "all-samples.combined-AG.hc-strd-eq.${i}.tsv" ]]; then
>         cp \
>             "all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>             "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     else
>         echo "Error: Some problem with generation of 'all-samples.*' file; breaking"
>         # break
>     fi
>     echo ""
> 
>     ., "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     echo ""
>     echo ""
> done
#  -------------------------------------
#+ combined-AG/antisense_transcript/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/antisense_transcript/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.antisense_transcript.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.antisense_transcript.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/antisense_transcript/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.antisense_transcript.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/antisense_transcript/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.antisense_transcript.tsv'

total 496K
drwxrws--- 2 kalavatt  77 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 94K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.antisense_transcript.tsv


#  -------------------------------------
#+ combined-AG/CUT/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/CUT/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.CUT.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.CUT.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.CUT.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.CUT.tsv'

total 296K
drwxrws--- 2 kalavatt   60 Mar 31 15:20 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 172K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.CUT.tsv


#  -------------------------------------
#+ combined-AG/CUT_4X/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/CUT_4X/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.CUT_4X.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.CUT_4X.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_4X/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.CUT_4X.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_4X/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.CUT_4X.tsv'

total 320K
drwxrws--- 2 kalavatt   63 Mar 31 15:20 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 172K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.CUT_4X.tsv


#  -------------------------------------
#+ combined-AG/mRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/mRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.mRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.mRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/mRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.mRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/mRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.mRNA.tsv'

total 2.0M
drwxrws--- 2 kalavatt   61 Mar 31 15:20 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 1.6M Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.mRNA.tsv


#  -------------------------------------
#+ combined-AG/ncRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/ncRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.ncRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.ncRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/ncRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.ncRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/ncRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.ncRNA.tsv'

total 176K
drwxrws--- 2 kalavatt  62 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 15K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.ncRNA.tsv


#  -------------------------------------
#+ combined-AG/rRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/rRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.rRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.rRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/rRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.rRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/rRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.rRNA.tsv'

total 176K
drwxrws--- 2 kalavatt  61 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 14K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.rRNA.tsv


#  -------------------------------------
#+ combined-AG/snoRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/snoRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.snoRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.snoRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snoRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.snoRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snoRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.snoRNA.tsv'

total 240K
drwxrws--- 2 kalavatt  63 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 31K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.snoRNA.tsv


#  -------------------------------------
#+ combined-AG/snRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/snRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.snRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.snRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.snRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/snRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.snRNA.tsv'

total 176K
drwxrws--- 2 kalavatt  62 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 13K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.snRNA.tsv


#  -------------------------------------
#+ combined-AG/SUT/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/SUT/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.SUT.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.SUT.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SUT/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.SUT.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SUT/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.SUT.tsv'

total 320K
drwxrws--- 2 kalavatt   60 Mar 31 15:20 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 173K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.SUT.tsv


#  -------------------------------------
#+ combined-AG/tRNA/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/tRNA/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.tRNA.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.tRNA.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/tRNA/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.tRNA.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/tRNA/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.tRNA.tsv'

total 336K
drwxrws--- 2 kalavatt  61 Mar 31 15:20 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 53K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.tRNA.tsv


#  -------------------------------------
#+ combined-AG/XUT/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/XUT/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.XUT.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.XUT.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/XUT/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.XUT.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/XUT/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.XUT.tsv'

total 816K
drwxrws--- 2 kalavatt   60 Mar 31 15:20 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 313K Mar 31 15:20 all-samples.combined-AG.hc-strd-eq.XUT.tsv


❯ for i in "${features[@]}"; do
>     echo "#  -------------------------------------"
>     echo "#+ combined-AG/${i}/UT_prim_UMI"
> 
>     cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/${i}/UT_prim_UMI"
>     # .,
>     # ls *.tsv | wc -l
>     pwd
> 
>     echo """
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \\
>         -u FALSE \\
>         -q \".\" \\
>         -o \"./all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         -s \"hc-strd-eq\""""
> 
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
>         -u FALSE \
>         -q "." \
>         -o "./all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>         -s "hc-strd-eq"
> 
>     echo """    cp \\
>         \"all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         \"${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI\"
>     """
> 
>     if [[ -f "all-samples.combined-AG.hc-strd-eq.${i}.tsv" ]]; then
>         cp \
>             "all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>             "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     else
>         echo "Error: Some problem with generation of 'all-samples.*' file; breaking"
>         # break
>     fi
>     echo ""
> 
>     ., "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     echo ""
>     echo ""
> done
#  -------------------------------------
#+ combined-AG/CUT_2016/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/CUT_2016/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.CUT_2016.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.CUT_2016.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_2016/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.CUT_2016.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/CUT_2016/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.CUT_2016.tsv'

total 672K
drwxrws--- 2 kalavatt   65 Mar 31 15:51 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 172K Mar 31 15:51 all-samples.combined-AG.hc-strd-eq.CUT_2016.tsv


❯ for i in "${features[@]}"; do
>     echo "#  -------------------------------------"
>     echo "#+ combined-AG/${i}/UT_prim_UMI"
> 
>     cd "${HOME}/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/${i}/UT_prim_UMI"
>     # .,
>     # ls *.tsv | wc -l
>     pwd
> 
>     echo """
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \\
>         -u FALSE \\
>         -q \".\" \\
>         -o \"./all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         -s \"hc-strd-eq\""""
> 
>     bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
>         -u FALSE \
>         -q "." \
>         -o "./all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>         -s "hc-strd-eq"
> 
>     echo """    cp \\
>         \"all-samples.combined-AG.hc-strd-eq.${i}.tsv\" \\
>         \"${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI\"
>     """
> 
>     if [[ -f "all-samples.combined-AG.hc-strd-eq.${i}.tsv" ]]; then
>         cp \
>             "all-samples.combined-AG.hc-strd-eq.${i}.tsv" \
>             "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     else
>         echo "Error: Some problem with generation of 'all-samples.*' file; breaking"
>         # break
>     fi
>     echo ""
> 
>     ., "${HOME}/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/${i}/UT_prim_UMI"
>     echo ""
>     echo ""
> done
#  -------------------------------------
#+ combined-AG/SRAT/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/SRAT/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.SRAT.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.SRAT.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SRAT/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.SRAT.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/SRAT/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.SRAT.tsv'

total 520K
drwxrws--- 2 kalavatt  61 Apr  1 10:16 ./
drwxrws--- 3 kalavatt  29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 99K Apr  1 10:16 all-samples.combined-AG.hc-strd-eq.SRAT.tsv


#  -------------------------------------
#+ combined-AG/NUTs/UT_prim_UMI
/home/kalavatt/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/outfiles_htseq-count/already/combined-AG/NUTs/UT_prim_UMI

    bash ../../../../../../../bin/process_htseq-count_outfiles.sh \
        -u FALSE \
        -q "." \
        -o "./all-samples.combined-AG.hc-strd-eq.NUTs.tsv" \
        -s "hc-strd-eq"
"Safe mode" is FALSE.

    cp \
        "all-samples.combined-AG.hc-strd-eq.NUTs.tsv" \
        "/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/NUTs/UT_prim_UMI"

'all-samples.combined-AG.hc-strd-eq.NUTs.tsv' -> '/home/kalavatt/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331/already/combined-AG/NUTs/UT_prim_UMI/all-samples.combined-AG.hc-strd-eq.NUTs.tsv'

total 384K
drwxrws--- 2 kalavatt   61 Apr  1 10:16 ./
drwxrws--- 3 kalavatt   29 Mar 31 14:33 ../
-rw-rw---- 1 kalavatt 313K Apr  1 10:16 all-samples.combined-AG.hc-strd-eq.NUTs.tsv

Code

#TODO Better place or different notebook for this (and related) code chunks?

#!/bin/bash

cd ~/tsukiyamalab/alisong/tsvs_htseq-count_2023-0331
mkdir -p gtf-gff3/combined

cp \
    ~/tsukiyamalab/kalavatt/2022-2023_RRP6-NAB3/results/2023-0215/infiles_gtf-gff3/already/{combined_AG.sans-chr.gtf,combined_SC_KL_20S.gff3} \
    gtf-gff3/combined

mv already/ combined/
mkdir matrices
mv combined/ matrices/