-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmksapTranslate
executable file
·131 lines (120 loc) · 3.53 KB
/
mksapTranslate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/local/bin/perl
############################################################################
# SCRIPT NAME: mksapTranslate
# DESCRIPTION: read in aligned HLADB files; output sap files
#
# DATE WRITTEN: April 28, 2002
# WRITTEN BY: Martin Maiers
#
# REVISION HISTORY:
# REVISION DATE REVISED BY DESCRIPTION
# ------- ---------- -------------- -------------------------------------
#
##############################################################################
use strict;
use warnings;
foreach my $file (`/bin/ls -1 *.db`) {
chomp $file;
if($file=~/(HLA-\w+).db/) {
my $locus = $1;
mksap($locus);
}
}
exit 0;
sub mksap {
my $locus = shift;
my $dbfile = "$locus.db";
my $sapfile = "$locus.sap";
print "creating $sapfile from $dbfile\n";
my %Seq;
open PEP, $dbfile or die "$0: $dbfile: $!\n";
my $maxlength = 0;
while(<PEP>) {
chomp;
my($loc, $allele, $seq) = split / /; # tab
$loc = "C" if $loc eq "Cw";
next unless $locus =~/^$loc/i;
next if $allele =~/N/; # skip nulls
#$allele =substr($allele, 0, 4); # ignore slient mutations
my $newallele = getAA($allele);
#print STDERR "$allele becomes $newallele\n";
$allele = $newallele;
my $len = length($seq);
$maxlength = $len if $maxlength < $len;
$Seq{$allele} = $seq;
}
#
# count amino acids that vary
#
my @pos;
my ($offset, $s_codon, $e_codon);
if($locus =~/^HLA-A/i || $locus =~/^HLA-B/i || $locus=~/^HLA-C/i) {
print STDERR "class 1 style\n";
$offset = 1; # -23 # 1
$s_codon = 1; # 24 # 1
$e_codon = 183; # 180 # 183
} elsif ($locus =~/^HLA-DRB[1345]/i) {
print STDERR "class 2 style\n";
$offset = 1; #-28; # because the leader is 29 position -28 to 0
$s_codon = 1; #9;
$e_codon = 95;
} elsif ($locus =~/^HLA-DQB1/i) {
print STDERR "class 2 style\n";
$offset = 5; # 5
$s_codon = 1; # 1
$e_codon = 95; # 95
} elsif ($locus =~/^HLA-DQA1/i) {
print STDERR "class 2 style\n";
$offset = 1; # 1
$s_codon = 1; # 1
$e_codon = 88; # 88
} elsif ($locus =~/^HLA-DPB1/i) {
# imgt/HLA nucleotide CDS in blocks
print STDERR "class 2 style\n";
$offset = 1; # 1
$s_codon = 1; # 1
$e_codon = 93; # 93
} elsif ($locus =~/^HLA-DPA1/i) {
# imgt/HLA nucleotide CDS in blocks
print STDERR "class 2 style\n";
$offset = 1; # 1
$s_codon = 1; # 1
$e_codon = 85; # 85
} else {
die "unable to process locus: $locus\n";
}
for(my $i=0; $i<$maxlength; $i++) {
my %CTaa;
foreach my $allele (keys %Seq) {
my $seq = $Seq{$allele};
next if $i > length($seq);
my $aa = substr($seq, $i, 1);
next if $aa eq "*";
next if $aa eq "X";
$CTaa{$aa}++;
}
my $codon = $i + $offset;
next unless $codon >=$s_codon;
next unless $codon <=$e_codon;
next unless scalar(keys %CTaa)>1; # uncomment this line to have all positions be treated as polymorphic
push @pos, $codon;
}
my %sap;
foreach my $allele (keys %Seq) {
foreach my $aapos (@pos) {
next if ($aapos - $offset) > length($Seq{$allele});
my $s = substr($Seq{$allele}, $aapos - $offset,1).$aapos;
push @{$sap{$allele}}, $s;
}
}
open SAPFILE, ">$locus.sap" or die "$0: $! $locus.sap\n";
foreach (sort keys %sap) {
print SAPFILE join (' ', $locus, $_, join (' ', @{$sap{$_}})), "\n";
}
close SAPFILE;
}
sub getAA {
my $allele = shift;
my (@blocks) = split /:/, $allele;
return join (':', $blocks[0], $blocks[1]);
}