====== COGclass2018.pl ====== written by Haeyoung Jeong. #!/usr/bin/perl # $ARGV[0]: GenQuery.COG.csv.bestHit; $fun = '/data/Utilities/DB/COG/COG2014/fun2003-2014.tab'; $name = '/data/Utilities/DB/COG/COG2014/cognames2003-2014.tab'; open FUN, $fun; while () { chomp; next if /^#/; my @temp = split /\t/, $_; $code2fun{$temp[0]} = $temp[1]; push @oneLetter, $temp[0]; } close FUN, open NAME, $name; while () { chomp; next if /^#/; my @temp = split /\t/, $_; if (length($temp[1]) > 1) { my @tmp = split //, $temp[1]; $item = $tmp[ rand @tmp ]; $cog2code{$temp[0]} = $item; print STDERR "$temp[0] $temp[1] (random selection) ===> $item\n"; } else { $cog2code{$temp[0]} = $temp[1]; } $cog2name{$temp[0]} = $temp[2]; } open COG, $ARGV[0]; # GenQuery.COG.csv.bestHit while () { chomp; my @temp = split /,/, $_; if ($temp[5] == -1) { $notAssigned++; $num++; } else { $codeNum{$cog2code{$temp[5]}}++; $num++; } print $_, ',', $cog2code{$temp[5]}, ',' . $cog2name{$temp[5]} . "\n"; $lines++; } print "#--------------------------------------------\n"; foreach (@oneLetter) { $codeNum{$_} = 0 unless exists $codeNum{$_}; print join "\t", '#', $code2fun{$_}, $codeNum{$_} . "\n"; } print join "\t", '#', 'Not assigned', $notAssigned ."\n"; print "#--------------------------------------------\n"; print "#total: $num (line numbers of $ARGV[0] file: $lines)\n";