340
477
$self->{'_iterator'} = 0;
482
Usage : $ambig_code = $hit_object->ambiguous_aln();
483
Purpose : Sets/Gets ambiguity code data member.
484
Example : (see usage)
485
Returns : String = 'q', 's', 'qs', '-'
486
: 'q' = query sequence contains overlapping sub-sequences
487
: while sbjct does not.
488
: 's' = sbjct sequence contains overlapping sub-sequences
489
: while query does not.
490
: 'qs' = query and sbjct sequence contains overlapping sub-sequences
491
: relative to each other.
492
: '-' = query and sbjct sequence do not contains multiple domains
493
: relative to each other OR both contain the same distribution
494
: of similar domains.
497
Comment : Note: "sbjct" is synonymous with "hit"
501
#--------------------
503
#--------------------
505
if(@_) { $self->{'_ambiguous_aln'} = shift; }
506
$self->{'_ambiguous_aln'} || '-';
511
See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
519
if(@_) { $self->{'_overlap'} = shift; }
520
defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
526
Usage : $hit_object->n();
527
Purpose : Gets the N number for the current hit.
528
: This is the number of HSPs in the set which was ascribed
529
: the lowest P-value (listed on the description line).
530
: This number is not the same as the total number of HSPs.
531
: To get the total number of HSPs, use num_hsps().
532
Example : $n = $hit_object->n();
535
Throws : Exception if HSPs have not been set (BLAST2 reports).
536
Comments : Note that the N parameter is not reported in gapped BLAST2.
537
: Calling n() on such reports will result in a call to num_hsps().
538
: The num_hsps() method will count the actual number of
539
: HSPs in the alignment listing, which may exceed N in
542
See Also : L<num_hsps()|num_hsps>
551
# The check for $self->{'_n'} is a remnant from the 'query' mode days
552
# in which the sbjct object would collect data from the description
556
if(not defined($self->{'_n'})) {
563
$n ||= $self->num_hsps;
570
Usage : $hit_object->p( [format] );
571
Purpose : Get the P-value for the best HSP of the given BLAST hit.
572
: (Note that P-values are not provided with NCBI Blast2 reports).
573
Example : $p = $sbjct->p;
574
: $p = $sbjct->p('exp'); # get exponent only.
575
: ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
576
Returns : Float or scientific notation number (the raw P-value, DEFAULT).
577
: Integer if format == 'exp' (the magnitude of the base 10 exponent).
578
: 2-element list (float, int) if format == 'parts' and P-value
579
: is in scientific notation (See Comments).
580
Argument : format: string of 'raw' | 'exp' | 'parts'
581
: 'raw' returns value given in report. Default. (1.2e-34)
582
: 'exp' returns exponent value only (34)
583
: 'parts' returns the decimal and exponent as a
584
: 2-element list (1.2, -34) (See Comments).
585
Throws : Warns if no P-value is defined. Uses expect instead.
586
Comments : Using the 'parts' argument is not recommended since it will not
587
: work as expected if the P-value is not in scientific notation.
588
: That is, floats are not converted into sci notation before
589
: splitting into parts.
591
See Also : L<expect()|expect>, L<signif()|signif>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
598
# Some duplication of logic for p(), expect() and signif() for the sake of performance.
599
my ($self, $fmt) = @_;
601
my $val = $self->{'_p'};
604
if(not defined $val) {
605
# P-value not defined, must be a NCBI Blast2 report.
606
# Use expect instead.
607
$self->warn( "P-value not defined. Using expect() instead.");
608
$val = $self->{'_expect'};
611
return $val if not $fmt or $fmt =~ /^raw/i;
612
## Special formats: exponent-only or as list.
613
return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
614
return (split (/eE/, $val)) if $fmt =~ /^parts/i;
616
## Default: return the raw P-value.
622
Usage : $hit_object->hsp( [string] );
623
Purpose : Get a single HSPI object for the present HitI object.
624
Example : $hspObj = $hit_object->hsp; # same as 'best'
625
: $hspObj = $hit_object->hsp('best');
626
: $hspObj = $hit_object->hsp('worst');
627
Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
628
Argument : String (or no argument).
629
: No argument (default) = highest scoring HSP (same as 'best').
630
: 'best' or 'first' = highest scoring HSP.
631
: 'worst' or 'last' = lowest scoring HSP.
632
Throws : Exception if the HSPs have not been collected.
633
: Exception if an unrecognized argument is used.
635
See Also : L<hsps()|hsps>, L<num_hsps>()
642
my( $self, $option ) = @_;
645
if (not ref $self->{'_hsps'}) {
646
$self->throw("Can't get HSPs: data not collected.");
649
my @hsps = @{$self->{'_hsps'}};
651
return $hsps[0] if $option =~ /best|first|1/i;
652
return $hsps[$#hsps] if $option =~ /worst|last/i;
654
$self->throw("Can't get HSP for: $option\n" .
655
"Valid arguments: 'best', 'worst'");
658
=head2 logical_length
660
Usage : $hit_object->logical_length( [seq_type] );
661
: (mostly intended for internal use).
662
Purpose : Get the logical length of the hit sequence.
663
: If the Blast is a TBLASTN or TBLASTX, the returned length
664
: is the length of the would-be amino acid sequence (length/3).
665
: For all other BLAST flavors, this function is the same as length().
666
Example : $len = $hit_object->logical_length();
668
Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
669
('sbjct' is synonymous with 'hit')
671
Comments : This is important for functions like frac_aligned_query()
672
: which need to operate in amino acid coordinate space when dealing
673
: with [T]BLAST[NX] type reports.
675
See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
679
#--------------------
681
#--------------------
683
my $seqType = shift || 'query';
684
$seqType = 'sbjct' if $seqType eq 'hit';
688
# For the sbjct, return logical sbjct length
689
if( $seqType eq 'sbjct' ) {
690
$length = $self->length;
691
# Adjust length based on BLAST flavor.
692
if($self->algorithm =~ /TBLAST[NX]/ ) {
696
# Otherwise, return logical query length
697
$length = $self->query_length();
698
$self->throw("Must have defined query_len") unless ( $length );
700
# Adjust length based on BLAST flavor.
701
if($self->algorithm =~ /T?BLASTX/ ) {
710
Usage : $hit_object->length_aln( [seq_type] );
711
Purpose : Get the total length of the aligned region for query or sbjct seq.
712
: This number will include all HSPs
713
Example : $len = $hit_object->length_aln(); # default = query
714
: $lenAln = $hit_object->length_aln('query');
716
Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
717
('sbjct' is synonymous with 'hit')
718
Throws : Exception if the argument is not recognized.
719
Comments : This method will report the logical length of the alignment,
720
: meaning that for TBLAST[NX] reports, the length is reported
721
: using amino acid coordinate space (i.e., nucleotides / 3).
723
: This method requires that all HSPs be tiled. If they have not
724
: already been tiled, they will be tiled first automatically..
725
: If you don't want the tiled data, iterate through each HSP
726
: calling length() on each (use hsps() to get all HSPs).
728
See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
735
my( $self, $seqType, $num ) = @_;
737
$seqType ||= 'query';
738
$seqType = 'sbjct' if $seqType eq 'hit';
740
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
743
return $self->{'_length_aln_'.$seqType} = $num;
746
my $data = $self->{'_length_aln_'.$seqType};
748
## If we don't have data, figure out what went wrong.
750
$self->throw("Can't get length aln for sequence type \"$seqType\". " .
751
"Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
758
Usage : $hit_object->gaps( [seq_type] );
759
Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
760
: Data is summed across all HSPs.
761
Example : $qgaps = $hit_object->gaps('query');
762
: $hgaps = $hit_object->gaps('hit');
763
: $tgaps = $hit_object->gaps(); # default = total (query + hit)
764
Returns : scalar context: integer
765
: array context without args: two-element list of integers
766
: (queryGaps, hitGaps)
767
: Array context can be forced by providing an argument of 'list' or 'array'.
769
: CAUTION: Calling this method within printf or sprintf is arrray context.
770
: So this function may not give you what you expect. For example:
771
: printf "Total gaps: %d", $hit->gaps();
772
: Actually returns a two-element array, so what gets printed
773
: is the number of gaps in the query, not the total
775
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
776
('sbjct' is synonymous with 'hit')
778
Comments : If you need data for each HSP, use hsps() and then interate
779
: through each HSP object.
780
: This method requires that all HSPs be tiled. If they have not
781
: already been tiled, they will be tiled first automatically..
782
: Not relying on wantarray since that will fail in situations
783
: such as printf "%d", $hit->gaps() in which you might expect to
784
: be printing the total gaps, but evaluates to array context.
786
See Also : L<length_aln()|length_aln>
793
my( $self, $seqType, $num ) = @_;
795
$seqType ||= (wantarray ? 'list' : 'total');
796
$seqType = 'sbjct' if $seqType eq 'hit';
798
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
800
$seqType = lc($seqType);
803
$self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
805
return $self->{'_gaps_'.$seqType} = $num;
807
elsif($seqType =~ /list|array/i) {
808
return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
810
elsif($seqType eq 'total') {
811
return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
813
return $self->{'_gaps_'.$seqType} || 0;
820
See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
827
my( $self, $arg1, $arg2) = @_;
830
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
833
@data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
835
return @data if @data;
838
if( defined $arg2 ) {
839
$self->{'_totalIdentical'} = $arg1;
840
$self->{'_totalConserved'} = $arg2;
841
return ( $arg1, $arg2 );
843
elsif($arg1 =~ /^id/i) {
844
$data = $self->{'_totalIdentical'};
846
$data = $self->{'_totalConserved'};
848
return $data if $data;
851
## Something went wrong if we make it to here.
852
$self->throw("Can't get identical or conserved data: no data.");
858
Usage : $sbjct->start( [seq_type] );
859
Purpose : Gets the start coordinate for the query, sbjct, or both sequences
860
: in the BlastHit object. If there is more than one HSP, the lowest start
861
: value of all HSPs is returned.
862
Example : $qbeg = $sbjct->start('query');
863
: $sbeg = $sbjct->start('hit');
864
: ($qbeg, $sbeg) = $sbjct->start();
865
Returns : scalar context: integer
866
: array context without args: list of two integers (queryStart, sbjctStart)
867
: Array context can be "induced" by providing an argument of 'list' or 'array'.
868
Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
869
('sbjct' is synonymous with 'hit')
871
Comments : This method requires that all HSPs be tiled. If there is more than one
872
: HSP and they have not already been tiled, they will be tiled first automatically..
873
: Remember that the start and end coordinates of all HSPs are
874
: normalized so that start < end. Strand information can be
875
: obtained by calling $hit->strand().
877
See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
878
L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
885
my ($self, $seqType, $num) = @_;
887
$seqType ||= (wantarray ? 'list' : 'query');
888
$seqType = 'sbjct' if $seqType eq 'hit';
891
$seqType = "_\L$seqType\E";
892
return $self->{$seqType.'Start'} = $num;
895
# If there is only one HSP, defer this call to the solitary HSP.
896
if($self->num_hsps == 1) {
897
return $self->hsp->start($seqType);
899
&Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
900
if($seqType =~ /list|array/i) {
901
return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
903
## Sensitive to member name changes.
904
$seqType = "_\L$seqType\E";
905
return $self->{$seqType.'Start'};
913
Usage : $sbjct->end( [seq_type] );
914
Purpose : Gets the end coordinate for the query, sbjct, or both sequences
915
: in the BlastHit object. If there is more than one HSP,
917
: value of all HSPs is returned.
918
Example : $qend = $sbjct->end('query');
919
: $send = $sbjct->end('hit');
920
: ($qend, $send) = $sbjct->end();
921
Returns : scalar context: integer
922
: array context without args: list of two integers
923
: (queryEnd, sbjctEnd)
924
: Array context can be "induced" by providing an argument
925
: of 'list' or 'array'.
926
Argument : In scalar context: seq_type = 'query' or 'sbjct'
927
: (case insensitive). If not supplied, 'query' is used.
929
Comments : This method requires that all HSPs be tiled. If there is
930
: more than one HSP and they have not already been tiled,
931
: they will be tiled first automatically..
932
: Remember that the start and end coordinates of all HSPs are
933
: normalized so that start < end. Strand information can be
934
: obtained by calling $hit->strand().
936
See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
943
my ($self, $seqType, $num) = @_;
945
$seqType ||= (wantarray ? 'list' : 'query');
946
$seqType = 'sbjct' if $seqType eq 'hit';
949
$seqType = "_\L$seqType\E";
950
return $self->{$seqType.'Stop'} = $num;
953
# If there is only one HSP, defer this call to the solitary HSP.
954
if($self->num_hsps == 1) {
955
return $self->hsp->end($seqType);
957
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
958
if($seqType =~ /list|array/i) {
959
return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
961
## Sensitive to member name changes.
962
$seqType = "_\L$seqType\E";
963
return $self->{$seqType.'Stop'};
970
Usage : $sbjct->range( [seq_type] );
971
Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
972
: in the HSP alignment.
973
Example : ($qbeg, $qend) = $sbjct->range('query');
974
: ($sbeg, $send) = $sbjct->range('hit');
975
Returns : Two-element array of integers
976
Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
977
('sbjct' is synonymous with 'hit')
980
See Also : L<start()|start>, L<end()|end>
987
my ($self, $seqType) = @_;
988
$seqType ||= 'query';
989
$seqType = 'sbjct' if $seqType eq 'hit';
990
return ($self->start($seqType), $self->end($seqType));
994
=head2 frac_identical
996
Usage : $hit_object->frac_identical( [seq_type] );
997
Purpose : Get the overall fraction of identical positions across all HSPs.
998
: The number refers to only the aligned regions and does not
999
: account for unaligned regions in between the HSPs, if any.
1000
Example : $frac_iden = $hit_object->frac_identical('query');
1001
Returns : Float (2-decimal precision, e.g., 0.75).
1002
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1003
: default = 'query' (but see comments below).
1004
: ('sbjct' is synonymous with 'hit')
1006
Comments : Different versions of Blast report different values for the total
1007
: length of the alignment. This is the number reported in the
1008
: denominators in the stats section:
1009
: "Identical = 34/120 Positives = 67/120".
1010
: NCBI BLAST uses the total length of the alignment (with gaps)
1011
: WU-BLAST uses the length of the query sequence (without gaps).
1013
: Therefore, when called with an argument of 'total',
1014
: this method will report different values depending on the
1015
: version of BLAST used. Total does NOT take into account HSP
1016
: tiling, so it should not be used.
1018
: To get the fraction identical among only the aligned residues,
1019
: ignoring the gaps, call this method without an argument or
1020
: with an argument of 'query' or 'hit'.
1022
: If you need data for each HSP, use hsps() and then iterate
1023
: through the HSP objects.
1024
: This method requires that all HSPs be tiled. If they have not
1025
: already been tiled, they will be tiled first automatically.
1027
See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1032
sub frac_identical {
1034
my ($self, $seqType) = @_;
1035
$seqType ||= 'query';
1036
$seqType = 'sbjct' if $seqType eq 'hit';
1038
## Sensitive to member name format.
1039
$seqType = lc($seqType);
1041
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1043
my $ident = $self->matches('id');
1044
my $total = $self->length_aln($seqType);
1045
my $ratio = $ident / $total;
1046
my $ratio_rounded = sprintf( "%.3f", $ratio);
1048
# Round down iff normal rounding yields 1 (just like blast)
1049
$ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1050
return $ratio_rounded;
1054
=head2 frac_conserved
1056
Usage : $hit_object->frac_conserved( [seq_type] );
1057
Purpose : Get the overall fraction of conserved positions across all HSPs.
1058
: The number refers to only the aligned regions and does not
1059
: account for unaligned regions in between the HSPs, if any.
1060
Example : $frac_cons = $hit_object->frac_conserved('hit');
1061
Returns : Float (2-decimal precision, e.g., 0.75).
1062
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1063
: default = 'query' (but see comments below).
1064
: ('sbjct' is synonymous with 'hit')
1066
Comments : Different versions of Blast report different values for the total
1067
: length of the alignment. This is the number reported in the
1068
: denominators in the stats section:
1069
: "Positives = 34/120 Positives = 67/120".
1070
: NCBI BLAST uses the total length of the alignment (with gaps)
1071
: WU-BLAST uses the length of the query sequence (without gaps).
1073
: Therefore, when called with an argument of 'total',
1074
: this method will report different values depending on the
1075
: version of BLAST used. Total does NOT take into account HSP
1076
: tiling, so it should not be used.
1078
: To get the fraction conserved among only the aligned residues,
1079
: ignoring the gaps, call this method without an argument or
1080
: with an argument of 'query' or 'hit'.
1082
: If you need data for each HSP, use hsps() and then interate
1083
: through the HSP objects.
1084
: This method requires that all HSPs be tiled. If they have not
1085
: already been tiled, they will be tiled first automatically.
1087
See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1091
#--------------------
1092
sub frac_conserved {
1093
#--------------------
1094
my ($self, $seqType) = @_;
1095
$seqType ||= 'query';
1096
$seqType = 'sbjct' if $seqType eq 'hit';
1098
## Sensitive to member name format.
1099
$seqType = lc($seqType);
1101
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1103
my $consv = $self->matches('cons');
1104
my $total = $self->length_aln($seqType);
1105
my $ratio = $consv / $total;
1106
my $ratio_rounded = sprintf( "%.3f", $ratio);
1108
# Round down iff normal rounding yields 1 (just like blast)
1109
$ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1110
return $ratio_rounded;
1116
=head2 frac_aligned_query
1118
Usage : $hit_object->frac_aligned_query();
1119
Purpose : Get the fraction of the query sequence which has been aligned
1120
: across all HSPs (not including intervals between non-overlapping
1122
Example : $frac_alnq = $hit_object->frac_aligned_query();
1123
Returns : Float (2-decimal precision, e.g., 0.75).
1126
Comments : If you need data for each HSP, use hsps() and then interate
1127
: through the HSP objects.
1128
: To compute the fraction aligned, the logical length of the query
1129
: sequence is used, meaning that for [T]BLASTX reports, the
1130
: full length of the query sequence is converted into amino acids
1131
: by dividing by 3. This is necessary because of the way
1132
: the lengths of aligned sequences are computed.
1133
: This method requires that all HSPs be tiled. If they have not
1134
: already been tiled, they will be tiled first automatically.
1136
See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1140
#----------------------
1141
sub frac_aligned_query {
1142
#----------------------
1145
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1146
sprintf( "%.2f", $self->length_aln('query') /
1147
$self->logical_length('query'));
1152
=head2 frac_aligned_hit
1154
Usage : $hit_object->frac_aligned_hit();
1155
Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1156
: across all HSPs (not including intervals between non-overlapping
1158
Example : $frac_alnq = $hit_object->frac_aligned_hit();
1159
Returns : Float (2-decimal precision, e.g., 0.75).
1162
Comments : If you need data for each HSP, use hsps() and then interate
1163
: through the HSP objects.
1164
: To compute the fraction aligned, the logical length of the sbjct
1165
: sequence is used, meaning that for TBLAST[NX] reports, the
1166
: full length of the sbjct sequence is converted into amino acids
1167
: by dividing by 3. This is necessary because of the way
1168
: the lengths of aligned sequences are computed.
1169
: This method requires that all HSPs be tiled. If they have not
1170
: already been tiled, they will be tiled first automatically.
1172
See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1176
#--------------------
1177
sub frac_aligned_hit {
1178
#--------------------
1181
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1182
sprintf( "%.2f", $self->length_aln('sbjct') / $self->logical_length('sbjct'));
1186
## These methods are being maintained for backward compatibility.
1188
=head2 frac_aligned_sbjct
1190
Same as L<frac_aligned_hit()|frac_aligned_hit>
1194
*frac_aligned_sbjct = \&fract_aligned_hit;
1196
=head2 num_unaligned_sbjct
1198
Same as L<num_unaligned_hit()|num_unaligned_hit>
1202
*num_unaligned_sbjct = \&num_unaligned_hit;
1205
=head2 num_unaligned_hit
1207
Usage : $hit_object->num_unaligned_hit();
1208
Purpose : Get the number of the unaligned residues in the hit sequence.
1209
: Sums across all all HSPs.
1210
Example : $num_unaln = $hit_object->num_unaligned_hit();
1214
Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1215
: They apply here as well.
1216
: If you need data for each HSP, use hsps() and then interate
1217
: through the HSP objects.
1218
: This method requires that all HSPs be tiled. If they have not
1219
: already been tiled, they will be tiled first automatically..
1221
See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1225
#---------------------
1226
sub num_unaligned_hit {
1227
#---------------------
1230
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1232
my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1233
($num < 0 ? 0 : $num );
1237
=head2 num_unaligned_query
1239
Usage : $hit_object->num_unaligned_query();
1240
Purpose : Get the number of the unaligned residues in the query sequence.
1241
: Sums across all all HSPs.
1242
Example : $num_unaln = $hit_object->num_unaligned_query();
1246
Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1247
: They apply here as well.
1248
: If you need data for each HSP, use hsps() and then interate
1249
: through the HSP objects.
1250
: This method requires that all HSPs be tiled. If they have not
1251
: already been tiled, they will be tiled first automatically..
1253
See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1257
#-----------------------
1258
sub num_unaligned_query {
1259
#-----------------------
1262
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1264
my $num = $self->logical_length('query') - $self->length_aln('query');
1265
($num < 0 ? 0 : $num );
1272
Usage : $hit->seq_inds( seq_type, class, collapse );
1273
Purpose : Get a list of residue positions (indices) across all HSPs
1274
: for identical or conserved residues in the query or sbjct sequence.
1275
Example : @s_ind = $hit->seq_inds('query', 'identical');
1276
: @h_ind = $hit->seq_inds('hit', 'conserved');
1277
: @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1278
Returns : Array of integers
1279
: May include ranges if collapse is non-zero.
1280
Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1281
: ('sbjct' is synonymous with 'hit')
1282
: [1] class = 'identical' or 'conserved' (default = 'identical')
1283
: (can be shortened to 'id' or 'cons')
1284
: (actually, anything not 'id' will evaluate to 'conserved').
1285
: [2] collapse = boolean, if non-zero, consecutive positions are merged
1286
: using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1287
: collapses to "1-5 7 9-11". This is useful for
1288
: consolidating long lists. Default = no collapse.
1291
See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1298
my ($self, $seqType, $class, $collapse) = @_;
1300
$seqType ||= 'query';
1301
$class ||= 'identical';
1304
$seqType = 'sbjct' if $seqType eq 'hit';
1307
foreach $hsp ($self->hsps) {
1308
# This will merge data for all HSPs together.
1309
push @inds, $hsp->seq_inds($seqType, $class);
1312
# Need to remove duplicates and sort the merged positions.
1314
my %tmp = map { $_, 1 } @inds;
1315
@inds = sort {$a <=> $b} keys %tmp;
1318
$collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
1324
See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
1331
my ($self, $seqType, $strnd) = @_;
1333
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1335
$seqType ||= (wantarray ? 'list' : 'query');
1336
$seqType = 'sbjct' if $seqType eq 'hit';
1338
$seqType = lc($seqType);
1340
if( defined $strnd ) {
1341
$self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1343
return $self->{'_strand_'.$seqType} = $strnd;
1347
# If there is only one HSP, defer this call to the solitary HSP.
1348
if($self->num_hsps == 1) {
1349
return $self->hsp->strand($seqType);
1351
elsif( defined $self->{'_strand_query'}) {
1352
# Get the data computed during hsp tiling.
1353
$qstr = $self->{'_strand_query'};
1354
$hstr = $self->{'_strand_sbjct'}
1357
# otherwise, iterate through all HSPs collecting strand info.
1358
# This will return the string "-1/1" if there are HSPs on different strands.
1359
# NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1360
# (unless the above elsif{} is commented out).
1362
foreach my $hsp( $self->hsps ) {
1363
my ( $q, $h ) = $hsp->strand();
1367
$qstr = join( '/', sort keys %qstr);
1368
$hstr = join( '/', sort keys %hstr);
1371
if($seqType =~ /list|array/i) {
1372
return ($qstr, $hstr);
1373
} elsif( $seqType eq 'query' ) {
1382
See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
1389
my( $self, $frm ) = @_;
1391
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1393
if( defined $frm ) {
1394
return $self->{'_frame'} = $frm;
1397
# The check for $self->{'_frame'} is a remnant from the 'query' mode days
1398
# in which the sbjct object would collect data from the description line only.
1401
if(not defined($self->{'_frame'})) {
1402
$frame = $self->hsp->frame;
1404
$frame = $self->{'_frame'};
1412
Usage : $obj->rank($newval)
1413
Function: Get/Set the rank of this Hit in the Query search list
1414
i.e. this is the Nth hit for a specific query
1415
Returns : value of rank
1416
Args : newvalue (optional)
1423
return $self->{'_rank'} = shift if @_;
1424
return $self->{'_rank'} || 1;
1430
Usage : $locus = $hit->locus();
1431
Function: Retrieve the locus (if available) for the hit
1432
Returns : a scalar string (empty string if not set)
1438
my ($self,$value) = @_;
1439
my $previous = $self->{'_locus'};
1440
if( defined $value || ! defined $previous ) {
1441
unless (defined $value) {
1442
if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1443
$value = $previous = $3;
1445
$value = $previous = '';
1448
$self->{'_locus'} = $value;
1453
=head2 each_accession_number
1455
Title : each_accession_number
1456
Usage : @each_accession_number = $hit->each_accession_number();
1457
Function: Get each accession number listed in the description of the hit.
1458
If there are no alternatives, then only the primary accession will
1460
Returns : list of all accession numbers in the description
1465
sub each_accession_number {
1466
my ($self,$value) = @_;
1467
my $desc = $self->{'_description'};
1468
#put primary accnum on the list
1470
push (@accnums,$self->{'_accession'});
1471
if( defined $desc ) {
1472
while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1474
my ($acc, $version);
1475
if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
1476
($acc, $version) = split /\./, $2;
1477
} elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1478
($acc, $version) = split /\./, $3;
1479
} elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1481
} elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1482
($acc,$version) = ($2);
1484
#punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1485
#Database Name Identifier Syntax
1486
#============================ ========================
1487
#GenBank gb|accession|locus
1488
#EMBL Data Library emb|accession|locus
1489
#DDBJ, DNA Database of Japan dbj|accession|locus
1490
#NBRF PIR pir||entry
1491
#Protein Research Foundation prf||name
1492
#SWISS-PROT sp|accession|entry name
1493
#Brookhaven Protein Data Bank pdb|entry|chain
1494
#Patents pat|country|number
1495
#GenInfo Backbone Id bbs|number
1496
#General database identifier gnl|database|identifier
1497
#NCBI Reference Sequence ref|accession|locus
1498
#Local Sequence identifier lcl|identifier
1501
push(@accnums, $acc);
1509
See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1515
return $self->{'_tiled_hsps'} = shift if @_;
1516
return $self->{'_tiled_hsps'};
1521
Title : query_length
1522
Usage : $obj->query_length($newval)
1523
Function: Get/Set the query_length
1524
Returns : value of query_length (a scalar)
1525
Args : on set, new value (a scalar or undef, optional)
1533
return $self->{'_query_length'} = shift if @_;
1534
return $self->{'_query_length'};