n50_simple.pl
#!/usr/bin/perl
#
# Source:
# http://genomics-array.blogspot.com/2011/02/calculating-n50-of-contig-assembly-file.html
# Corrected by Haeyoung Jeong
## Read Fasta File and compute length ###
#
my $length;
my $totalLength;
my @arr;
my $num;
while(<>){
chomp;
if(/^>([^\s]+)/){
$num++;
push (@arr, $length);
$totalLength += $length;
$length=0;
} else {
s/\s//g;
s/\t//g;
$length += length($_);
}
}
push (@arr, $length); # for the last contig
$totalLength += $length;
close(FH);
my @sort = sort {$b <=> $a} @arr;
my $n50;
#print "Total $num contigs ($totalLength)\n";
foreach my $val(@sort){
$n50+=$val;
if($n50 >= $totalLength/2){
# print "N50 contig length is $n50 and N50 value is: $val\n";
$n50val = $val;
last;
}
}
#print "Average contig length is ", $totalLength / $num, "\n";
#print "Max. contig length is ", $sort[0], "\n";
$avg = sprintf("%d", $totalLength / $num);
# number, total length, max, N50, average length
print join " ", $num, $totalLength, $sort[0], $n50val, $avg, " (count, total, max, n50, average)\n";
n50_simple.pl.txt · Last modified: by 127.0.0.1
