cogclass2018.pl
COGclass2018.pl
written by Haeyoung Jeong.
#!/usr/bin/perl # $ARGV[0]: GenQuery.COG.csv.bestHit; $fun = '/data/Utilities/DB/COG/COG2014/fun2003-2014.tab'; $name = '/data/Utilities/DB/COG/COG2014/cognames2003-2014.tab'; open FUN, $fun; while (<FUN>) { chomp; next if /^#/; my @temp = split /\t/, $_; $code2fun{$temp[0]} = $temp[1]; push @oneLetter, $temp[0]; } close FUN, open NAME, $name; while (<NAME>) { chomp; next if /^#/; my @temp = split /\t/, $_; if (length($temp[1]) > 1) { my @tmp = split //, $temp[1]; $item = $tmp[ rand @tmp ]; $cog2code{$temp[0]} = $item; print STDERR "$temp[0] $temp[1] (random selection) ===> $item\n"; } else { $cog2code{$temp[0]} = $temp[1]; } $cog2name{$temp[0]} = $temp[2]; } open COG, $ARGV[0]; # GenQuery.COG.csv.bestHit while (<COG>) { chomp; my @temp = split /,/, $_; if ($temp[5] == -1) { $notAssigned++; $num++; } else { $codeNum{$cog2code{$temp[5]}}++; $num++; } print $_, ',', $cog2code{$temp[5]}, ',' . $cog2name{$temp[5]} . "\n"; $lines++; } print "#--------------------------------------------\n"; foreach (@oneLetter) { $codeNum{$_} = 0 unless exists $codeNum{$_}; print join "\t", '#', $code2fun{$_}, $codeNum{$_} . "\n"; } print join "\t", '#', 'Not assigned', $notAssigned ."\n"; print "#--------------------------------------------\n"; print "#total: $num (line numbers of $ARGV[0] file: $lines)\n";
cogclass2018.pl.txt · Last modified: 2021/03/17 13:09 by 127.0.0.1