User Tools

Site Tools


n50_simple.pl
#!/usr/bin/perl
#
# Source: 
#    http://genomics-array.blogspot.com/2011/02/calculating-n50-of-contig-assembly-file.html
#    Corrected by Haeyoung Jeong
## Read Fasta File and compute length ###
#

my $length;
my $totalLength; 
my @arr;
my $num;

while(<>){
   chomp; 
   if(/^>([^\s]+)/){
       $num++;
       push (@arr, $length);
       $totalLength += $length; 
       $length=0;
  } else {
  s/\s//g;
  s/\t//g;
  $length += length($_);
  }
}
push (@arr, $length); # for the last contig
$totalLength += $length;

close(FH);

my @sort = sort {$b <=> $a} @arr;
my $n50; 
#print "Total $num contigs ($totalLength)\n";
foreach my $val(@sort){
     $n50+=$val;
      if($n50 >= $totalLength/2){
#         print "N50 contig length is $n50 and N50 value is: $val\n";
         $n50val = $val;
         last; 
     }
}
#print "Average contig length is ", $totalLength / $num, "\n";
#print "Max. contig length is ", $sort[0], "\n"; 

$avg = sprintf("%d", $totalLength / $num);

# number, total length, max, N50, average length
print join " ", $num, $totalLength, $sort[0], $n50val, $avg, " (count, total, max, n50, average)\n";
n50_simple.pl.txt · Last modified: 2021/03/17 13:09 by 127.0.0.1