Hello, I've noticed some strange behaviour when parsing BLAST .xml output files (-oufmt 5) using BioPerl's Bio::SearchIO library.
I have a simple parser script that looks something like:
#!/usr/bin/perl -w
use strict;
use Bio::SearchIO;
my $in = Bio::SearchIO -> new (-format => 'blastxml', -file => "consensusSeqs.BLASTp.xml");
open (OUT, ">consensusSeqs.parse.OUT");
my $query_count = 1;
while( my $result = $in->next_result ) {
print "Query count: ".$query_count."\n";
print "Query name: ".$result->query_description."\n";
print "Number of hits: ".$result->num_hits."\n";
my $hit_count = 1;
if (!defined $result->next_hit) {
print OUT $result->query_description."\tNo hit\n";
}
while ( my $hit = $result->next_hit ) {
print "\tHit count: ".$hit_count."\n";
while ( my $hsp = $hit->next_hsp ) {
my @a = split /\|/, $hit->name;
my $hit_accession = $a[3];
# print "Hit name: ".$hit->name."\n";
print "\tHit accession: ".$hit_accession."\n";
## get some stats of hit
my $percent_id = sprintf("%.2f", $hsp->percent_identity);
my $percent_q_coverage = sprintf("%.2f", ((($hsp->length('query')/($result->query_length)))*100));
my @b = split /[\[\]]/, $hit->description;
my $organism = $b[1];
(my $short_desc = $hit->description) =~ s/\[.*//;
print ...