generated from snakemake-workflows/snakemake-workflow-template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding arima mapping pipeline scripts
- Loading branch information
Showing
3 changed files
with
269 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#!/usr/bin/perl | ||
use strict; | ||
use warnings; | ||
|
||
my $prev_id = ""; | ||
my @five; | ||
my @three; | ||
my @unmap; | ||
my @mid; | ||
my @all; | ||
my $counter = 0; | ||
|
||
while (<STDIN>){ | ||
chomp; | ||
if (/^@/){ | ||
print($_ . "\n"); | ||
next; | ||
} | ||
my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split(/\t/); | ||
my $bin = reverse(dec2bin($flag)); | ||
my @binary = split(//, $bin); | ||
if ($prev_id ne $id && $prev_id ne ""){ | ||
if ($counter == 1){ | ||
if (@five == 1){ | ||
print($five[0] . "\n"); | ||
} | ||
else{ | ||
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]); | ||
my $bin_1 = reverse(dec2bin($flag_1)); | ||
my @binary_1 = split(//, $bin_1); | ||
$binary_1[2] = 1; | ||
my $bin_1_new = reverse(join("",@binary_1)); | ||
my $flag_1_new = bin2dec($bin_1_new); | ||
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n"); | ||
} | ||
} | ||
elsif ($counter == 2 && @five == 1){ | ||
print($five[0] . "\n"); | ||
} | ||
else{ | ||
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]); | ||
my $bin_1 = reverse(dec2bin($flag_1)); | ||
my @binary_1 = split(//, $bin_1); | ||
$binary_1[2] = 1; | ||
my $bin_1_new = reverse(join("", @binary_1)); | ||
my $flag_1_new = bin2dec($bin_1_new); | ||
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n"); | ||
} | ||
|
||
$counter = 0; | ||
undef @unmap; | ||
undef @five; | ||
undef @three; | ||
undef @mid; | ||
undef @all; | ||
} | ||
|
||
$counter++; | ||
$prev_id = $id; | ||
push(@all, $_); | ||
if ($binary[2] == 1){ | ||
push @unmap,$_; | ||
} | ||
elsif ($binary[4] == 0 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 1 && $cigar =~ m/.*M$/){ | ||
push(@five, $_); | ||
} | ||
elsif ($binary[4] == 1 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 0 && $cigar =~ m/.*M$/){ | ||
push(@three, $_); | ||
} | ||
elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){ | ||
push(@mid, $_); | ||
} | ||
} | ||
|
||
if ($counter == 1){ | ||
if (@five == 1){ | ||
print($five[0] . "\n"); | ||
} | ||
else{ | ||
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]); | ||
my $bin_1 = reverse(dec2bin($flag_1)); | ||
my @binary_1 = split(//, $bin_1); | ||
$binary_1[2] = 1; | ||
my $bin_1_new = reverse(join("", @binary_1)); | ||
my $flag_1_new = bin2dec($bin_1_new); | ||
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n"); | ||
} | ||
} | ||
elsif ($counter == 2 && @five == 1){ | ||
print($five[0] . "\n"); | ||
} | ||
else{ | ||
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]); | ||
my $bin_1 = reverse(dec2bin($flag_1)); | ||
my @binary_1 = split(//, $bin_1); | ||
$binary_1[2] = 1; | ||
my $bin_1_new = reverse(join("", @binary_1)); | ||
my $flag_1_new = bin2dec($bin_1_new); | ||
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n"); | ||
} | ||
|
||
sub dec2bin { | ||
return unpack("B32", pack("N", shift)); | ||
} | ||
|
||
sub bin2dec { | ||
return unpack("N", pack("B32", substr("0" x 32 . shift, -32))); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/perl | ||
use strict; | ||
|
||
if ((scalar @ARGV) != 1){ | ||
print("Please provide the <bamfile>\n"); | ||
exit; | ||
} | ||
open(FILE, "samtools view $ARGV[0] |"); | ||
|
||
my ($all, $intra, $inter); | ||
my ($intra_1, $intra_10, $intra_15, $intra_20); | ||
while (my $line = <FILE>){ | ||
chomp $line; | ||
my @liner = split('\t', $line); | ||
$all++; | ||
|
||
if ($liner[6] eq '='){ | ||
$intra++; | ||
if (abs($liner[8]) >= 1000) {$intra_1++;} | ||
if (abs($liner[8]) >= 10000) {$intra_10++;} | ||
if (abs($liner[8]) >= 15000) {$intra_15++;} | ||
if (abs($liner[8]) >= 20000) {$intra_20++;} | ||
} | ||
else{ | ||
$inter++; | ||
} | ||
} | ||
close(FILE); | ||
|
||
$all = $all/2; | ||
$intra = $intra/2; | ||
$intra_1 = $intra_1/2; | ||
$intra_10 = $intra_10/2; | ||
$intra_15 = $intra_15/2; | ||
$intra_20 = $intra_20/2; | ||
$inter = $inter/2; | ||
|
||
print("All\t$all\n"); | ||
print("All intra\t$intra\n"); | ||
print("All intra 1kb\t$intra_1\n"); | ||
print("All intra 10kb\t$intra_10\n"); | ||
print("All intra 15kb\t$intra_15\n"); | ||
print("All intra 20kb\t$intra_20\n"); | ||
print("All inter\t$inter\n"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#!/usr/bin/perl | ||
use strict; | ||
|
||
MAIN : { | ||
my ($read1_bam, $read2_bam, $samtools, $mq) = @ARGV; | ||
|
||
if ( (! defined($read1_bam)) || (! defined($read2_bam)) ){ | ||
die ("Usage: ./two_read_bam_combiner.pl <read 1 bam> <read 2 bam> <path to samtools> <minimum map quality filter>\n"); | ||
} | ||
|
||
open(FILE1, "$samtools view -h $read1_bam |"); | ||
open(FILE2, "$samtools view -h $read2_bam |"); | ||
|
||
my $line1 = <FILE1>; | ||
my $line2 = <FILE2>; | ||
|
||
my $counter = 0; | ||
my $new_counter = 0; | ||
|
||
while (defined($line1)){ | ||
if ($line1 =~ /^(\@)SQ/){ | ||
if ($line1 ne $line2){ | ||
print($line1); | ||
print($line2); | ||
die ("Inconsistent BAM headers. BAM files must be aligned to same reference."); | ||
} | ||
else{ | ||
print($line1); | ||
} | ||
$line1 = <FILE1>; | ||
$line2 = <FILE2>; | ||
next; | ||
} | ||
|
||
$counter++; | ||
if ($counter == ($new_counter + 1000000)){ | ||
print(STDERR $counter . "\n"); | ||
$new_counter = $counter; | ||
} | ||
|
||
chomp $line1; | ||
chomp $line2; | ||
|
||
my ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/, $line1); | ||
my ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/, $line2); | ||
|
||
if ($id1 ne $id2){ | ||
die ("The read id's of the two files do not match up at line number $counter. Files should be from the same sample and sorted in identical order.\n"); | ||
} | ||
|
||
my $bin1 = reverse(dec2bin($flag1)); | ||
my $bin2 = reverse(dec2bin($flag2)); | ||
|
||
my @binary1 = split(//, $bin1); | ||
my @binary2 = split(//, $bin2); | ||
|
||
my $trouble = 0; | ||
if (($binary1[2] == 1) || ($mapq1 < $mq)){ | ||
$trouble = 1; | ||
} | ||
if (($binary2[2]== 1) || ($mapq2 < $mq)) { | ||
$trouble = 1; | ||
} | ||
|
||
my $proper_pair1; | ||
my $proper_pair2; | ||
my $dist1; | ||
my $dist2; | ||
|
||
if (($binary1[2] == 0) && ($binary2[2] == 0)){ | ||
$proper_pair1 = 1; | ||
$proper_pair2 = 1; | ||
if ($chr_from1 eq $chr_from2){ | ||
my $dist = abs($loc_from1 - $loc_from2); | ||
if ($loc_from1 >= $loc_from2){ | ||
$dist1 = -1 * $dist; | ||
$dist2 = $dist; | ||
} | ||
else{ | ||
$dist1 = $dist; | ||
$dist2 = -1 * $dist; | ||
} | ||
} | ||
else{ | ||
$dist1 = 0; | ||
$dist2 = 0; | ||
} | ||
} | ||
else{ | ||
$proper_pair1 = 0; | ||
$proper_pair2 = 0; | ||
$dist1 = 0; | ||
$dist2 = 0; | ||
} | ||
|
||
my $new_bin1 = join("", "0" x 21, $binary1[10], $binary1[9], "001", $binary2[4], $binary1[4], $binary2[2], $binary1[2], $proper_pair1, "1"); | ||
my $new_bin2 = join("", "0" x 21, $binary2[10], $binary2[9], "010", $binary1[4], $binary2[4], $binary1[2], $binary2[2], $proper_pair2, "1"); | ||
|
||
my $new_flag1 = bin2dec($new_bin1); | ||
my $new_flag2 = bin2dec($new_bin2); | ||
|
||
unless ($trouble == 1){ | ||
print(join("\t", $id1, $new_flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $chr_from2, $loc_from2, $dist1, $read1, $read_qual1, @rest1) . "\n"); | ||
print(join("\t", $id2, $new_flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $chr_from1, $loc_from1, $dist2, $read2, $read_qual2, @rest2) . "\n"); | ||
} | ||
$line1 = <FILE1>; | ||
$line2 = <FILE2>; | ||
} | ||
} | ||
|
||
sub dec2bin { | ||
return unpack("B32", pack("N", shift)); | ||
} | ||
|
||
sub bin2dec { | ||
return unpack("N", pack("B32", substr("0" x 32 . shift, -32))); | ||
} |