-
Notifications
You must be signed in to change notification settings - Fork 1
/
dissect_similarity.pl
66 lines (61 loc) · 1.63 KB
/
dissect_similarity.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
use strict;
use warnings;
my($in,$out) = @ARGV;
open(IN,"<$in") or die $!;
open(OUT,">$out");
open(OUT1,">$out.stat");
my @samples;
my %hash_stat;
while(<IN>){
chomp;
my($chr,$start,$end,$mknum,@datas) = split/\t/;
if($_ =~ /^#/){
@samples = @datas;
next;
}
next if($mknum < 1);
my %hash_sample;
for(my $i = 0; $i < @samples; $i++){
my $sample = $samples[$i];
my($idn,$cov) = split/\|/, $datas[$i];
next unless($cov/$mknum > 1/3);
$hash_sample{$sample}{idn} = $idn/$cov;
$hash_sample{$sample}{cov} = $cov/$mknum;
}
my @samples_sorted = sort {$hash_sample{$b}{idn} <=> $hash_sample{$a}{idn} or $hash_sample{$b}{idn} <=> $hash_sample{$a}{idn}} keys %hash_sample;
my @topSamples = ();
my @topCovers = ();
my $top;
for(my $i = 0; $i < @samples_sorted; $i++){
my $sample = $samples_sorted[$i];
if($i == 0){
push @topSamples, $sample;
push @topCovers, $hash_sample{$sample}{cov};
$top = $hash_sample{$sample}{idn};
next;
}
last if($hash_sample{$sample}{idn} < $top);
push @topSamples, $sample;
push @topCovers, $hash_sample{$sample}{cov};
}
next if(@topSamples == 0);
my $score = 1/@topSamples;
foreach my $sample(@topSamples){
$hash_stat{$sample}{abscore}++;
$hash_stat{$sample}{rescore} += $score;
}
print OUT "$chr\t$start\t$end\t$mknum\t$top\t".join(",",@topSamples)."\t".join(",",@topCovers)."\n";
}
close IN;
close OUT;
foreach my $sample(@samples){
my $abscore = 0;
my $rescore = 0;
if(exists $hash_stat{$sample}){
$abscore = $hash_stat{$sample}{abscore};
$rescore = $hash_stat{$sample}{rescore};
}
$rescore = sprintf("%.3f",$rescore);
print OUT1 "$sample\t$abscore\t$rescore\n";
}
close OUT1;