-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrossalign.sh
executable file
·90 lines (79 loc) · 4.33 KB
/
crossalign.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/bin/sh
file=$1
file2=$2
network=$3
random=$4
cd tmp/$random
awk '{if($1~/>/){printf "\n%s\t", $1}else printf $1 }' $file | awk '(NF>1)' > input.fasta
#cat input.fasta
if [ $network == "normal" ]
then
echo "normal"
cp input.fasta input_bis.fasta
awk '{if($1~/>/){printf "\n%s\t", $1}else printf $1 }' $file2 | awk '(NF>1)' > input2.fasta
python crossalignpipe.py $network $file2 > dtw_output.tmp
awk '(NF==2 && $2~/0./){printf "%.3f\n",$2}' dtw_output.tmp > outputs/score.txt
python pvalue.py > outputs/pval.txt
sed 's/]/-/g' dtw_output.tmp | awk '(NF>2 && $1~/-/){$1=""; print $0}' > outputs/matches.txt
fi
if [ $network == "obe" ]
then
echo "obe"
cp input.fasta input_bis.fasta
awk '{if($1~/>/){printf "\n%s\t", $1}else printf $1 }' $file2 | awk '(NF>1)' > input2.fasta
python crossalignpipe.py $network $file2 > dtw_output.tmp
awk '(NF==2 && $2~/0./){printf "%.3f\n",$2}' dtw_output.tmp > outputs/score.txt
python pvalue.py > outputs/pval.txt
sed 's/]/-/g' dtw_output.tmp | awk '(NF>2 && $1~/-/){$1=""; print $0}' > outputs/matches.txt
#for i in `awk '{print $0}' ./outputs/matches.txt | tr " " "\n" | awk '($1!~/]/)' | awk '(length($1)>0)'`; do awk '(NR=="'$i'")' shorter.txt; done > cross_short.txt
cp shorter.txt cross_short.txt
length=`wc cross_short.txt | awk '{print $1}'`
awk '{for(i=1;i<=NF;i++){print $i}}' outputs/matches.txt > matches.col
for ((i=1;i<=$length;i++));do pos=`awk '(NR=="'$i'")' matches.col`; awk '(NR=='$pos')' longer.txt | awk '{print "'$i'", $2}'; done > cross_long.txt
start0=$(head -n 1 ./outputs/matches.txt | awk '{print $1}')
end0=$(wc cross_short.txt | awk '{print $1}')
final0=$(($end0+$start0))
echo $final0 > outputs/end.txt
#awk -v start=$start0 -v end=$final0 '($1>=start && $1<=end)' longer.txt > cross_long.txt
head -n 1 ./outputs/matches.txt | awk '{print $1}' > outputs/start.txt
Rscript overlap.r
paste -d " " cross_short.txt cross_long.txt| awk '{printf "%s\t%s\t%s\t%s\n", $1, $2, '$start0'+$3, $4}' >outputs/aligned.profiles.txt
awk -F '\t' 'BEGIN{printf "<tbody>\n"}{printf "\t<tr>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n",$1, $2, $3, $4}END{printf "</tbody>\n"}' outputs/aligned.profiles.txt > ./outputs/table.html
fi
if [ $network == "fragment" ]
then
echo "fragment"
cp input.fasta input_bis.fasta
awk '{if($1~/>/){printf "\n%s\t", $1}else printf $1 }' $file2 | awk '(NF>1)' > input2.fasta
python crossalignpipe.py $network $file2 > dtw_output.tmp
awk '(NF==2 && $2~/0./){printf "%.3f\n",$2}' dtw_output.tmp > outputs/score.txt
sed 's/]/-/g' dtw_output.tmp | awk '(NF>2 && $1~/-/){$1=""; print $0}' > outputs/matches.txt
awk '(NF==2 && $1=="[1]"){printf "%s\t",$2} (NF>2 && $1=="[1]"){printf "%s\t%s\n",$2,$2+200}' dtw_output.tmp | sed 's/"//g' > outputs/table_final.txt
python multipval.py
awk -F '\t' 'BEGIN{printf "<tbody>\n"}{printf "\t<tr>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n",$1, $3, $4, $5, $6}END{printf "</tbody>\n"}' ./outputs/table_final2.txt > outputs/table.html
fi
if [[ $network = *"dataset"* ]]
then
if [[ $network = *"custom"* ]]
then
echo "custom_dataset"; echo $file2
mkdir custom_dataset
awk '{if($1~/>/){printf "\n%s\t", $1}else printf $1 }' $file2 | awk '(NF>1)' > multi.input.fasta
echo "creating custom dataset"
python multicrosspipeline.py global
echo "dataset done"
python crossalignpipe.py $network > dtw_output.tmp
else
echo "dataset"; echo $file2
python crossalignpipe.py $network $file2 > dtw_output.tmp
fi
awk '(NF==2 && $1=="[1]"){printf "%s\t",$2} (NF>2 && $1=="[1]"){printf "%s\t%s\n",$2,$2+200}' dtw_output.tmp | sed 's/"//g' > outputs/table_final.txt
paste ./outputs/table_final.txt leng.txt > table_big.txt
python multipval_dat.py
echo "#short_RNA large_RNA Structural_Score Starting_match p-value" >./outputs/output_table.txt
awk '{print $1,$2,$3,$4,$6, log($6)}' ./outputs/table_final2.txt | sort -nk6 | awk '{print $1, $2, $3, $4, $5}' >>./outputs/output_table.txt
#awk '{print $1,$2,$3,$4,$6}' ./outputs/table_final2.txt > ./outputs/output_table.txt
zip -r ./outputs/Submission ./outputs/output_table.txt
head -21 ./outputs/output_table.txt | awk 'BEGIN{printf "<tbody>\n"}NR>1{printf "\t<tr>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t\t<td>%s</td>\n\t</tr>\n",$1, $2, $3, $4, $5}END{printf "</tbody>\n"}' > outputs/table.html
fi
cd ../..