Skip to content

Commit

Permalink
adding arima mapping pipeline scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
LiaOb21 committed Jan 3, 2024
1 parent 6f04029 commit 706406e
Show file tree
Hide file tree
Showing 3 changed files with 269 additions and 0 deletions.
108 changes: 108 additions & 0 deletions scripts/filter_five_end.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/perl
use strict;
use warnings;

my $prev_id = "";
my @five;
my @three;
my @unmap;
my @mid;
my @all;
my $counter = 0;

while (<STDIN>){
chomp;
if (/^@/){
print($_ . "\n");
next;
}
my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split(/\t/);
my $bin = reverse(dec2bin($flag));
my @binary = split(//, $bin);
if ($prev_id ne $id && $prev_id ne ""){
if ($counter == 1){
if (@five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("",@binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
}
elsif ($counter == 2 && @five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}

$counter = 0;
undef @unmap;
undef @five;
undef @three;
undef @mid;
undef @all;
}

$counter++;
$prev_id = $id;
push(@all, $_);
if ($binary[2] == 1){
push @unmap,$_;
}
elsif ($binary[4] == 0 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 1 && $cigar =~ m/.*M$/){
push(@five, $_);
}
elsif ($binary[4] == 1 && $cigar =~ m/^[0-9]*M/ || $binary[4] == 0 && $cigar =~ m/.*M$/){
push(@three, $_);
}
elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){
push(@mid, $_);
}
}

if ($counter == 1){
if (@five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}
}
elsif ($counter == 2 && @five == 1){
print($five[0] . "\n");
}
else{
my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split(/\t/, $all[0]);
my $bin_1 = reverse(dec2bin($flag_1));
my @binary_1 = split(//, $bin_1);
$binary_1[2] = 1;
my $bin_1_new = reverse(join("", @binary_1));
my $flag_1_new = bin2dec($bin_1_new);
print(join("\t", $id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) . "\n");
}

sub dec2bin {
return unpack("B32", pack("N", shift));
}

sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}
44 changes: 44 additions & 0 deletions scripts/get_stats.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/perl
use strict;

if ((scalar @ARGV) != 1){
print("Please provide the <bamfile>\n");
exit;
}
open(FILE, "samtools view $ARGV[0] |");

my ($all, $intra, $inter);
my ($intra_1, $intra_10, $intra_15, $intra_20);
while (my $line = <FILE>){
chomp $line;
my @liner = split('\t', $line);
$all++;

if ($liner[6] eq '='){
$intra++;
if (abs($liner[8]) >= 1000) {$intra_1++;}
if (abs($liner[8]) >= 10000) {$intra_10++;}
if (abs($liner[8]) >= 15000) {$intra_15++;}
if (abs($liner[8]) >= 20000) {$intra_20++;}
}
else{
$inter++;
}
}
close(FILE);

$all = $all/2;
$intra = $intra/2;
$intra_1 = $intra_1/2;
$intra_10 = $intra_10/2;
$intra_15 = $intra_15/2;
$intra_20 = $intra_20/2;
$inter = $inter/2;

print("All\t$all\n");
print("All intra\t$intra\n");
print("All intra 1kb\t$intra_1\n");
print("All intra 10kb\t$intra_10\n");
print("All intra 15kb\t$intra_15\n");
print("All intra 20kb\t$intra_20\n");
print("All inter\t$inter\n");
117 changes: 117 additions & 0 deletions scripts/two_read_bam_combiner.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/perl
use strict;

MAIN : {
my ($read1_bam, $read2_bam, $samtools, $mq) = @ARGV;

if ( (! defined($read1_bam)) || (! defined($read2_bam)) ){
die ("Usage: ./two_read_bam_combiner.pl <read 1 bam> <read 2 bam> <path to samtools> <minimum map quality filter>\n");
}

open(FILE1, "$samtools view -h $read1_bam |");
open(FILE2, "$samtools view -h $read2_bam |");

my $line1 = <FILE1>;
my $line2 = <FILE2>;

my $counter = 0;
my $new_counter = 0;

while (defined($line1)){
if ($line1 =~ /^(\@)SQ/){
if ($line1 ne $line2){
print($line1);
print($line2);
die ("Inconsistent BAM headers. BAM files must be aligned to same reference.");
}
else{
print($line1);
}
$line1 = <FILE1>;
$line2 = <FILE2>;
next;
}

$counter++;
if ($counter == ($new_counter + 1000000)){
print(STDERR $counter . "\n");
$new_counter = $counter;
}

chomp $line1;
chomp $line2;

my ($id1, $flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $d1_1, $d2_1, $d3_1, $read1, $read_qual1, @rest1) = split(/\t/, $line1);
my ($id2, $flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $d1_2, $d2_2, $d3_2, $read2, $read_qual2, @rest2) = split(/\t/, $line2);

if ($id1 ne $id2){
die ("The read id's of the two files do not match up at line number $counter. Files should be from the same sample and sorted in identical order.\n");
}

my $bin1 = reverse(dec2bin($flag1));
my $bin2 = reverse(dec2bin($flag2));

my @binary1 = split(//, $bin1);
my @binary2 = split(//, $bin2);

my $trouble = 0;
if (($binary1[2] == 1) || ($mapq1 < $mq)){
$trouble = 1;
}
if (($binary2[2]== 1) || ($mapq2 < $mq)) {
$trouble = 1;
}

my $proper_pair1;
my $proper_pair2;
my $dist1;
my $dist2;

if (($binary1[2] == 0) && ($binary2[2] == 0)){
$proper_pair1 = 1;
$proper_pair2 = 1;
if ($chr_from1 eq $chr_from2){
my $dist = abs($loc_from1 - $loc_from2);
if ($loc_from1 >= $loc_from2){
$dist1 = -1 * $dist;
$dist2 = $dist;
}
else{
$dist1 = $dist;
$dist2 = -1 * $dist;
}
}
else{
$dist1 = 0;
$dist2 = 0;
}
}
else{
$proper_pair1 = 0;
$proper_pair2 = 0;
$dist1 = 0;
$dist2 = 0;
}

my $new_bin1 = join("", "0" x 21, $binary1[10], $binary1[9], "001", $binary2[4], $binary1[4], $binary2[2], $binary1[2], $proper_pair1, "1");
my $new_bin2 = join("", "0" x 21, $binary2[10], $binary2[9], "010", $binary1[4], $binary2[4], $binary1[2], $binary2[2], $proper_pair2, "1");

my $new_flag1 = bin2dec($new_bin1);
my $new_flag2 = bin2dec($new_bin2);

unless ($trouble == 1){
print(join("\t", $id1, $new_flag1, $chr_from1, $loc_from1, $mapq1, $cigar1, $chr_from2, $loc_from2, $dist1, $read1, $read_qual1, @rest1) . "\n");
print(join("\t", $id2, $new_flag2, $chr_from2, $loc_from2, $mapq2, $cigar2, $chr_from1, $loc_from1, $dist2, $read2, $read_qual2, @rest2) . "\n");
}
$line1 = <FILE1>;
$line2 = <FILE2>;
}
}

sub dec2bin {
return unpack("B32", pack("N", shift));
}

sub bin2dec {
return unpack("N", pack("B32", substr("0" x 32 . shift, -32)));
}

0 comments on commit 706406e

Please sign in to comment.