1
###############################################################################
2
# Bio::Tools::BPlite::HSP
3
###############################################################################
4
# HSP = High Scoring Pair (to all non-experts as I am)
6
# The original BPlite.pm module has been written by Ian Korf !
7
# see http://sapiens.wustl.edu/~ikorf
9
# You may distribute this module under the same terms as perl itself
13
# BioPerl module for Bio::Tools::BPlite::HSP
15
# Cared for by Peter Schattner <schattner@alum.mit.edu>
17
# Copyright Peter Schattner
19
# You may distribute this module under the same terms as perl itself
21
# POD documentation - main docs before the code
25
Bio::Tools::BPlite::HSP - Blast report High Scoring Pair (HSP)
29
use Bio::Tools::BPlite;
30
my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
32
while(my $sbjct = $report->nextSbjct) {
33
while (my $hsp = $sbjct->nextHSP) {
49
$hsp->hit->overlaps($exon);
53
# the following line takes you to the next report in the stream/file
54
# it will return 0 if that report is empty,
55
# but that is valid for an empty blast report.
58
last if $report->_parseHeader == -1;
65
This object handles the High Scoring Pair data for a Blast report.
66
This is where the percent identity, query and hit sequence length,
67
P value, etc are stored and where most of the necessary information is located when building logic around parsing a Blast report.
69
See L<Bio::Tools::BPlite> for more detailed information on the entire
70
BPlite Blast parsing system.
76
User feedback is an integral part of the evolution of this and other
77
Bioperl modules. Send your comments and suggestions preferably to
78
the Bioperl mailing list. Your participation is much appreciated.
80
bioperl-l@bioperl.org - General discussion
81
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85
Report bugs to the Bioperl bug tracking system to help us keep track
86
of the bugs and their resolution. Bug reports can be submitted via
89
http://bugzilla.open-bio.org/
91
=head1 AUTHOR - Peter Schattner
93
Email: schattner@alum.mit.edu
97
Jason Stajich, jason@bioperl.org
101
The rest of the documentation details each of the object methods.
102
Internal methods are usually preceded with a _
106
# Let the code begin...
108
package Bio::Tools::BPlite::HSP;
112
# to disable overloading comment this out:
113
#use overload '""' => '_overload';
115
# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair
117
use Bio::SeqFeature::Similarity;
119
use base qw(Bio::SeqFeature::SimilarityPair);
122
my ($class, @args) = @_;
124
# workaround to make sure frame is not set before strand is
125
# interpreted from query/hit info
126
# this workaround removes the key from the hash
127
# so the superclass does not try and work with it
128
# we'll take care of setting it in this module later on
131
foreach ( keys %newargs ) {
136
# done with workaround
138
my $self = $class->SUPER::new(%newargs);
140
my ($score,$bits,$match,$hsplength,$positive,$gaps,$p,$exp,$qb,$qe,$sb,
141
$se,$qs,$ss,$hs,$qname,$sname,$qlength,$slength,$qframe,$sframe,
143
$self->_rearrange([qw(SCORE
167
$blasttype = 'UNKNOWN' unless $blasttype;
168
$self->report_type($blasttype);
169
# Determine strand meanings
170
my ($queryfactor, $sbjctfactor) = (1,0); # default
171
if ($blasttype eq 'BLASTP' || $blasttype eq 'TBLASTN' ) {
174
if ($blasttype eq 'TBLASTN' || $blasttype eq 'TBLASTX' ||
175
$blasttype eq 'BLASTN' ) {
180
$self->{'BLAST_TYPE'} = $blasttype;
182
# Store the aligned query as sequence feature
184
if ($qe > $qb) { # normal query: start < end
185
if ($queryfactor) { $strand = 1; } else { $strand = undef; }
186
$self->query( Bio::SeqFeature::Similarity->new
187
(-start=>$qb, -end=>$qe, -strand=>$strand,
188
-source=>"BLAST" ) ) }
189
else { # reverse query (i dont know if this is possible, but feel free to correct)
190
if ($queryfactor) { $strand = -1; } else { $strand = undef; }
191
$self->query( Bio::SeqFeature::Similarity->new
192
(-start=>$qe, -end=>$qb, -strand=>$strand,
193
-source=>"BLAST" ) ) }
195
# store the aligned hit as sequence feature
196
if ($se > $sb) { # normal hit
197
if ($sbjctfactor) { $strand = 1; } else { $strand = undef; }
198
$self->hit( Bio::SeqFeature::Similarity->new
199
(-start=>$sb, -end=>$se, -strand=>$strand,
200
-source=>"BLAST" ) ) }
201
else { # reverse hit: start bigger than end
202
if ($sbjctfactor) { $strand = -1; } else { $strand = undef; }
203
$self->hit( Bio::SeqFeature::Similarity->new
204
(-start=>$se, -end=>$sb, -strand=>$strand,
205
-source=>"BLAST" ) ) }
208
$self->query->seq_id($qname); # query name
209
$self->hit->seq_id($sname); # hit name
212
$self->query->seqlength($qlength); # query length
213
$self->hit->seqlength($slength); # hit length
216
$self->score($score);
219
$self->significance($p);
220
$self->{'EXP'} = $exp;
222
$self->query->frac_identical($match);
223
$self->hit->frac_identical($match);
224
$self->{'HSPLENGTH'} = $hsplength;
225
$self->{'PERCENT'} = int((1000 * $match)/$hsplength)/10;
226
$self->{'POSITIVE'} = $positive;
227
$self->{'GAPS'} = $gaps;
232
$self->frame($qframe, $sframe);
233
return $self; # success - we hope!
236
# to disable overloading comment this out:
239
return $self->start."..".$self->end." ".$self->bits;
245
Usage : $type = $sbjct->report_type()
246
Function : Returns the type of report from which this hit was obtained.
247
This usually pertains only to BLAST and friends reports, for which
248
the report type denotes what type of sequence was aligned against
249
what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated dna-prt,
250
TBLASTN prt-translated dna, TBLASTX translated dna-translated dna).
252
Returns : A string (BLASTN, BLASTP, BLASTX, TBLASTN, TBLASTX, UNKNOWN)
253
Args : a string on set (you should know what you are doing)
258
my ($self, $rpt) = @_;
260
$self->{'_report_type'} = $rpt;
262
return $self->{'_report_type'};
268
Usage : my $exp = $hsp->EXP;
269
Function: returns the EXP value for the HSP
270
Returns : string value
272
Note : Patch provided by Sami Ashour for BTK parsing
278
return $_[0]->{'EXP'};
286
Function : returns the P (significance) value for a HSP
287
Returns : (double) significance value
293
my ($self, @args) = @_;
294
my $float = $self->significance(@args);
295
my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1
296
if ($float =~ /^$match$/) {
299
} elsif ("1$float" =~ /^$match$/) {
300
# Almost C float, Jitterbug 974
303
$self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead.");
311
Usage : $hsp->percent();
312
Function : returns the percent matching
313
Returns : (double) percent matching
318
sub percent {shift->{'PERCENT'}}
324
Usage : $hsp->match();
325
Function : returns the match
327
Returns : (double) frac_identical
332
sub match {shift->query->frac_identical(@_)}
337
Usage : $hsp->hsplength();
338
Function : returns the HSP length (including gaps)
339
Returns : (integer) HSP length
344
sub hsplength {shift->{'HSPLENGTH'}}
349
Usage : $hsp->positive();
350
Function : returns the number of positive matches (symbols in the alignment
351
with a positive score)
352
Returns : (int) number of positive matches in the alignment
357
sub positive {shift->{'POSITIVE'}}
362
Usage : $hsp->gaps();
363
Function : returns the number of gaps or 0 if none
364
Returns : (int) number of gaps or 0 if none
369
sub gaps {shift->{'GAPS'}}
374
Usage : $hsp->querySeq();
375
Function : returns the query sequence
376
Returns : (string) the Query Sequence
381
sub querySeq {shift->{'QS'}}
386
Usage : $hsp->sbjctSeq();
387
Function : returns the Sbjct sequence
388
Returns : (string) the Sbjct Sequence
393
sub sbjctSeq {shift->{'SS'}}
398
Usage : $hsp->homologySeq();
399
Function : returns the homologous sequence
400
Returns : (string) homologous sequence
405
sub homologySeq {shift->{'HS'}}
411
Function : returns the Query Sequence (same as querySeq)
412
Returns : (string) query Sequence
417
sub qs {shift->{'QS'}}
423
Function : returns the subject sequence ( same as sbjctSeq)
424
Returns : (string) Sbjct Sequence
429
sub ss {shift->{'SS'}}
435
Function : returns the Homologous Sequence (same as homologySeq )
436
Returns : (string) Homologous Sequence
441
sub hs {shift->{'HS'}}
444
my ($self, $qframe, $sframe) = @_;
445
if( defined $qframe ) {
448
} elsif( $qframe !~ /^([+-])?([1-3])/ ) {
449
$self->warn("Specifying an invalid query frame ($qframe)");
452
if( ($1 eq '-' && $self->query->strand >= 0) ||
453
($1 eq '+' && $self->query->strand <= 0) ) {
454
$self->warn("Query frame ($qframe) did not match strand of query (". $self->query->strand() . ")");
456
# Set frame to GFF [0-2]
459
$self->{'QFRAME'} = $qframe;
461
if( defined $sframe ) {
464
} elsif( $sframe !~ /^([+-])?([1-3])/ ) {
465
$self->warn("Specifying an invalid hit frame ($sframe)");
468
if( ($1 eq '-' && $self->hit->strand >= 0) ||
469
($1 eq '+' && $self->hit->strand <= 0) )
471
$self->warn("Hit frame ($sframe) did not match strand of hit (". $self->hit->strand() . ")");
474
# Set frame to GFF [0-2]
477
$self->{'SFRAME'} = $sframe;
480
(defined $qframe && $self->SUPER::frame($qframe) &&
481
($self->{'FRAME'} = $qframe)) ||
482
(defined $sframe && $self->SUPER::frame($sframe) &&
483
($self->{'FRAME'} = $sframe));
486
$self->{'BLAST_TYPE'} eq 'TBLASTX')
488
return ($self->{'QFRAME'}, $self->{'SFRAME'});
489
} elsif (wantarray()) {
490
(defined $self->{'QFRAME'} &&
491
return ($self->{'QFRAME'}, undef)) ||
492
(defined $self->{'SFRAME'} &&
493
return (undef, $self->{'SFRAME'}));
495
(defined $self->{'QFRAME'} &&
496
return $self->{'QFRAME'}) ||
497
(defined $self->{'SFRAME'} &&
498
return $self->{'SFRAME'});