-
Notifications
You must be signed in to change notification settings - Fork 2
/
eGIFT_parser.pl
executable file
·75 lines (73 loc) · 2.1 KB
/
eGIFT_parser.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#remove "species","technique",cell (type/line)
#!/usr/bin/perl
if ($#ARGV > -1)
{ $file = $ARGV[0];
}
my @black;
open(BLACK,'<blacklist.txt') or die;
@fasta=<BLACK>;
close(BLACK);
foreach $fasta (@fasta) {
# chomp($fasta);
$fasta=~s/\R//;
if($fasta=~m/--/){
$fasta=~m/(.*)[\s+]--/;
$fasta=$1;
push (@black,$fasta);
}
else{
push (@black,$fasta);
}
}
my $file_egift=$file.'.txt';
#print $file_egift."\n";
open(FASTA,"<$file_egift") or die ("can not open");
#print FASTA;
$outfile=$file.'_gene_iterm.txt';
$outfile_ORG=$file.'_gene_iterm_original.txt';
$outfile1=$file.'_gene_iterm_modified.txt';
#$outfile="gene-iterm.txt";
#$outfile1="gene-iterm-modified.txt";
#print $outfile."\n";
open(OUT,">$outfile") or die ("can not open $outfile");
open(OUTORG,">$outfile_ORG") or die ("can not open $outfile_ORG");
open(OUT1,">$outfile1") or die ("can not open");
while(<FASTA>) {
next if($.==1);
my ($iterm,$score,$cat,$count,$genes)=map {s/^"(.*)"$/$1/; $_;} split /,/;
#$list = str_replace('"','-',$list); $text =~ tr/a/z/;
$iterm =~ tr/"/-/;
if ($iterm ~~ @black){
while(/(\S+) \(\d*\)/g) {
my $name=$1;
$name=~tr/a-z/A-Z/;
#print $name."\n";
if (($cat ne 'species')&&($cat ne 'technique')&&($cat ne 'cell (type/line)')){
#if ($cat==""){ $cat = "no_cat"; }
print OUT "$name\t$iterm\t$cat\n";
print OUTORG "$name\t$iterm\n";
}
}
#print "****************************************************************************$iterm\n";
next;
}
else{
while(/(\S+) \(\d*\)/g) {
my $name=$1;
#print $name."\n";
$name=~tr/a-z/A-Z/;
if (($cat ne 'species')&&($cat ne 'technique')&&($cat ne 'cell (type/line)')){
#if ($cat==""){ $cat = "no_cat"; }
print OUT "$name\t$iterm\t$cat\n";
print OUTORG "$name\t$iterm\n";
print OUT1 "$name\t$iterm\n";
}
}
}
#print "$iterm\n";
}
#print "-----DONE-----\n";
close(FASTA);
close(OUT);
close(OUTORG);
close(OUT1);