Skip to content

Commit

Permalink
See time_FHCC.xlsx for 2023-0613 and 2023-0614
Browse files Browse the repository at this point in the history
  • Loading branch information
Kris Alavattam committed Jun 15, 2023
1 parent 00914d9 commit ce000c1
Show file tree
Hide file tree
Showing 6 changed files with 1,886 additions and 485 deletions.
203 changes: 203 additions & 0 deletions results/2023-0215/collate_sea-tsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@

# collate_sea-tsv.sh
# KA

# Note: Change (hardcode) variables labeled #ARGUMENT as necessary


# Initialize variables, arrays -----------------------------------------------
# Run "check"/safety code below
checks=TRUE #ARGUMENT

# Include only the first part of the directory names; below, we use wildcard
#+ matching (*) to find all related directories
p_nab3="${HOME}/tsukiyamalab/alisong/KA.2023-0315.sacCer3_fasta/automate_try1/nab3" #ARGUMENT
p_nrd1="${HOME}/tsukiyamalab/alisong/KA.2023-0315.sacCer3_fasta/automate_try1/nrd1" #ARGUMENT

# Path to, and including, outfile directory
p_out="${HOME}/tsukiyamalab/alisong/KA.2023-0315.sacCer3_fasta/automate_try1" #ARGUMENT
[[ "${checks}" == TRUE ]] &&
{
if [[ ! -d "${p_out}" ]]; then
echo "Outfile dirctory \"p_out\" does not exist"
exit 1
fi
}

# Names of outfile dataframes
df_nab3="df_sea_nab3.tsv" #ARGUMENT
df_nrd1="df_sea_nrd1.tsv" #ARGUMENT

# Initialize an array of all sea.tsv files assoc. with the Nab3 experiments
unset sea_nab3
typeset -a sea_nab3
while IFS=" " read -r -d $'\0'; do
sea_nab3+=( "${REPLY}" )
done < <(
find "${p_nab3}"* \
-type f \
-name "sea.tsv" \
-print0 \
| sort -z
)
[[ "${checks}" == TRUE ]] &&
{
if [[ ${#sea_nab3[@]} -gt 0 ]]; then
echo "Array \"sea_nab3\" contains the following elements:"
for i in "${sea_nab3[@]}"; do echo " - ${i}"; done
echo ""
else
echo "Oops, something went wrong... Array \"sea_nab3\" is empty"
exit 1
fi

}

# Initialize an array of all sea.tsv files assoc. with the Nrd1 experiments
unset sea_nrd1
typeset -a sea_nrd1
while IFS=" " read -r -d $'\0'; do
sea_nrd1+=( "${REPLY}" )
done < <(
find "${p_nrd1}"* \
-type f \
-name "sea.tsv" \
-print0 \
| sort -z
)
[[ "${checks}" == TRUE ]] &&
{
if [[ ${#sea_nrd1[@]} -gt 0 ]]; then
echo "Array \"sea_nrd1\" contains the following elements:"
for i in "${sea_nrd1[@]}"; do echo " - ${i}"; done
echo ""
else
echo "Oops, something went wrong... Array \"sea_nrd1\" is empty"
exit 1
fi

}


# Write dataframe for Nab3 information ---------------------------------------
echo "Begin work with Nab3"

# If a "${df_nab3}" file already exists in "${p_out}", delete it
if [[ -f "${p_out}/${df_nab3}" ]]; then rm "${p_out}/${df_nab3}"; fi

# Initialize a new, empty "${df_nab3}" in "${p_out}"
touch "${p_out}/${df_nab3}"
[[ "${checks}" == TRUE ]] \
&& ls -lhaFG "${p_out}/${df_nab3}" \
&& cat "${p_out}/${df_nab3}" \
&& printf "\n"

h=0
for i in "${sea_nab3[@]}"; do
# Initialize changing variables
tmp_dirname="$(dirname "${i}")"
samp="$(basename "${tmp_dirname}")"

# Count iterations, printing steps in pipeline as they begin
let h++
echo "# -------------------------------------"
printf "Iteration '%d'\n" "${h}"

echo " - Adding column of sample names (written to disc)"
if [[ -f "${p_out}/tmp_A.txt" ]]; then rm "${p_out}/tmp_A.txt"; fi
echo "${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}" \
> "${p_out}/tmp_A.txt"

echo " - Adding columns of sample values, metrics (written to disc)"
if [[ -f "${p_out}/tmp_B.txt" ]]; then rm "${p_out}/tmp_B.txt"; fi
cat "${i}" | head -n -4 | tail -n -10 | cut -f 1-3,5-17 \
> "${p_out}/tmp_B.txt"

echo " - Binding columns and appending to \"\${p_out}/\${df_nab3}\""
paste "${p_out}/tmp_A.txt" "${p_out}/tmp_B.txt" >> "${p_out}/${df_nab3}"

echo " - Removing temporary files"
rm "${p_out}/tmp_A.txt" "${p_out}/tmp_B.txt"

printf "\n\n"
done

# Give "${df_nab3}" a header of column names
echo "Appending header of column names to \"\${p_out}/\${df_nab3}\""
echo ""

if [[ -f "${p_out}/tmp.tsv" ]]; then rm "${p_out}/tmp.tsv"; fi
echo "sample"$'\t'"rank"$'\t'"db"$'\t'"id"$'\t'"consensus"$'\t'"tp"$'\t'"tp%"$'\t'"fp"$'\t'"fp%"$'\t'"enr_ratio"$'\t'"score_thr"$'\t'"pvalue"$'\t'"log_pvalue"$'\t'"evalue"$'\t'"log_evalue"$'\t'"qvalue"$'\t'"log_qvalue" \
| cat - "${p_out}/${df_nab3}" \
>> "${p_out}/tmp.tsv"

[[ -s "${p_out}/tmp.tsv" ]] && exit_no="$(echo $?)"
if [[ ${exit_no} -eq 0 ]]; then
mv -f "${p_out}/tmp.tsv" "${p_out}/${df_nab3}"
else
echo "Oops, something went wrong... File \"\${p_out}/tmp.tsv\" is empty"
exit 1
fi


# Write dataframe for Nrd1 information ---------------------------------------
echo "Begin work with Nrd1"

# If a "${df_nrd1}" file already exists in "${p_out}", delete it
if [[ -f "${p_out}/${df_nrd1}" ]]; then rm "${p_out}/${df_nrd1}"; fi

# Initialize a new, empty "${df_nrd1}" in "${p_out}"
touch "${p_out}/${df_nrd1}"
[[ "${checks}" == TRUE ]] \
&& ls -lhaFG "${p_out}/${df_nrd1}" \
&& cat "${p_out}/${df_nrd1}" \
&& printf "\n"

h=0
for i in "${sea_nrd1[@]}"; do
# Initialize changing variables
tmp_dirname="$(dirname "${i}")"
samp="$(basename "${tmp_dirname}")"

# Count iterations, printing steps in pipeline as they begin
let h++
echo "# -------------------------------------"
printf "Iteration '%d'\n" "${h}"

echo " - Adding column of sample names (written to disc)"
if [[ -f "${p_out}/tmp_A.txt" ]]; then rm "${p_out}/tmp_A.txt"; fi
echo "${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}"$'\n'"${samp}" \
> "${p_out}/tmp_A.txt"

echo " - Adding columns of sample values, metrics (written to disc)"
if [[ -f "${p_out}/tmp_B.txt" ]]; then rm "${p_out}/tmp_B.txt"; fi
cat "${i}" | head -n -4 | tail -n -10 | cut -f 1-3,5-17 \
> "${p_out}/tmp_B.txt"

echo " - Binding columns and appending to \"\${p_out}/\${df_nrd1}\""
paste "${p_out}/tmp_A.txt" "${p_out}/tmp_B.txt" >> "${p_out}/${df_nrd1}"

echo " - Removing temporary files"
rm "${p_out}/tmp_A.txt" "${p_out}/tmp_B.txt"

printf "\n\n"
done

# Give "${df_nrd1}" a header of column names
echo "Appending header of column names to \"\${p_out}/\${df_nrd1}\""
echo ""

if [[ -f "${p_out}/tmp.tsv" ]]; then rm "${p_out}/tmp.tsv"; fi
echo "sample"$'\t'"rank"$'\t'"db"$'\t'"id"$'\t'"consensus"$'\t'"tp"$'\t'"tp%"$'\t'"fp"$'\t'"fp%"$'\t'"enr_ratio"$'\t'"score_thr"$'\t'"pvalue"$'\t'"log_pvalue"$'\t'"evalue"$'\t'"log_evalue"$'\t'"qvalue"$'\t'"log_qvalue" \
| cat - "${p_out}/${df_nrd1}" \
>> "${p_out}/tmp.tsv"

[[ -s "${p_out}/tmp.tsv" ]] && exit_no="$(echo $?)"
if [[ ${exit_no} -eq 0 ]]; then
mv -f "${p_out}/tmp.tsv" "${p_out}/${df_nrd1}"
else
echo "Oops, something went wrong... File \"\${p_out}/tmp.tsv\" is empty"
exit 1
fi

exit 0
Loading

0 comments on commit ce000c1

Please sign in to comment.