cogclass2018.pl
COGclass2018.pl
written by Haeyoung Jeong.
#!/usr/bin/perl
# $ARGV[0]: GenQuery.COG.csv.bestHit;
$fun = '/data/Utilities/DB/COG/COG2014/fun2003-2014.tab';
$name = '/data/Utilities/DB/COG/COG2014/cognames2003-2014.tab';
open FUN, $fun;
while (<FUN>) {
chomp;
next if /^#/;
my @temp = split /\t/, $_;
$code2fun{$temp[0]} = $temp[1];
push @oneLetter, $temp[0];
}
close FUN,
open NAME, $name;
while (<NAME>) {
chomp;
next if /^#/;
my @temp = split /\t/, $_;
if (length($temp[1]) > 1) {
my @tmp = split //, $temp[1];
$item = $tmp[ rand @tmp ];
$cog2code{$temp[0]} = $item;
print STDERR "$temp[0] $temp[1] (random selection) ===> $item\n";
} else {
$cog2code{$temp[0]} = $temp[1];
}
$cog2name{$temp[0]} = $temp[2];
}
open COG, $ARGV[0]; # GenQuery.COG.csv.bestHit
while (<COG>) {
chomp;
my @temp = split /,/, $_;
if ($temp[5] == -1) {
$notAssigned++;
$num++;
} else {
$codeNum{$cog2code{$temp[5]}}++;
$num++;
}
print $_, ',', $cog2code{$temp[5]}, ',' . $cog2name{$temp[5]} . "\n";
$lines++;
}
print "#--------------------------------------------\n";
foreach (@oneLetter) {
$codeNum{$_} = 0 unless exists $codeNum{$_};
print join "\t", '#', $code2fun{$_}, $codeNum{$_} . "\n";
}
print join "\t", '#', 'Not assigned', $notAssigned ."\n";
print "#--------------------------------------------\n";
print "#total: $num (line numbers of $ARGV[0] file: $lines)\n";
cogclass2018.pl.txt · Last modified: by 127.0.0.1
