User Tools

Site Tools


cogclass2018.pl

COGclass2018.pl

written by Haeyoung Jeong.

#!/usr/bin/perl
# $ARGV[0]: GenQuery.COG.csv.bestHit;

$fun = '/data/Utilities/DB/COG/COG2014/fun2003-2014.tab';
$name = '/data/Utilities/DB/COG/COG2014/cognames2003-2014.tab';

open FUN, $fun;
while (<FUN>) {
    chomp;
    next if /^#/;
    my @temp = split /\t/, $_;
    $code2fun{$temp[0]} = $temp[1];
    push @oneLetter, $temp[0];
}
close FUN,

open NAME, $name;
while (<NAME>) {
    chomp;
    next if /^#/;
    my @temp = split /\t/, $_;
    if (length($temp[1]) > 1) {
        my @tmp = split //, $temp[1];
        $item = $tmp[ rand @tmp ];
        $cog2code{$temp[0]} = $item;
        print STDERR "$temp[0] $temp[1] (random selection) ===> $item\n";
    } else {
        $cog2code{$temp[0]} = $temp[1];
    }
    $cog2name{$temp[0]} = $temp[2];
}

open COG, $ARGV[0]; # GenQuery.COG.csv.bestHit
while (<COG>) {
    chomp;
    my @temp = split /,/, $_;
    if ($temp[5] == -1) {
        $notAssigned++;
        $num++;
    
    } else {
        $codeNum{$cog2code{$temp[5]}}++;
        $num++;
    }
    print $_, ',', $cog2code{$temp[5]}, ',' . $cog2name{$temp[5]} . "\n"; 
    $lines++;
} 

print "#--------------------------------------------\n";
foreach (@oneLetter) {
    $codeNum{$_} = 0 unless exists $codeNum{$_};
    print join "\t", '#', $code2fun{$_}, $codeNum{$_} . "\n";
}
print join "\t", '#', 'Not assigned', $notAssigned ."\n";
print "#--------------------------------------------\n";
print "#total: $num (line numbers of $ARGV[0] file: $lines)\n";
cogclass2018.pl.txt · Last modified: 2021/03/17 13:09 by 127.0.0.1