~ubuntu-branches/ubuntu/saucy/bioperl/saucy-proposed

« back to all changes in this revision

Viewing changes to Bio/SearchIO/fasta.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2009-03-10 07:19:11 UTC
  • mfrom: (1.2.3 upstream)
  • Revision ID: james.westby@ubuntu.com-20090310071911-fukqzw54pyb1f0bd
Tags: 1.6.0-2
* Removed patch system (not used):
  - removed instuctions in debian/rules;
  - removed quilt from Build-Depends in debian/control.
* Re-enabled tests:
  - uncommented test command in debian/rules;
  - uncommented previously missing build-dependencies in debian/control.
  - Re-enabled tests and uncommented build-dependencies accordingly.
* Removed libmodule-build-perl and libtest-harness-perl from
  Build-Depends-Indep (provided by perl-modules).
* Better cleaning of empty directories using find -type d -empty -delete
  instead of rmdir in debian/rules (LP: #324001).

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# $Id: fasta.pm,v 1.50.4.1 2006/10/02 23:10:26 sendu Exp $
 
1
# $Id: fasta.pm 15164 2008-12-15 19:30:46Z cjfields $
2
2
#
3
3
# BioPerl module for Bio::SearchIO::fasta
4
4
#
18
18
 
19
19
  # Do not use this object directly, use it through the SearchIO system
20
20
   use Bio::SearchIO;
21
 
   my $searchio = new Bio::SearchIO(-format => 'fasta',
22
 
                                    -file   => 'report.FASTA');
 
21
   my $searchio = Bio::SearchIO->new(-format => 'fasta',
 
22
                    -file   => 'report.FASTA');
23
23
   while( my $result = $searchio->next_result ) {
24
 
        # ... do what you would normally doi with Bio::SearchIO.
 
24
    # ... do what you would normally doi with Bio::SearchIO.
25
25
   }
26
26
 
27
27
=head1 DESCRIPTION
66
66
 
67
67
=cut
68
68
 
69
 
 
70
69
# Let the code begin...
71
70
 
72
 
 
73
71
package Bio::SearchIO::fasta;
74
72
use vars qw(%MODEMAP %MAPPING $IDLENGTH);
75
73
use strict;
77
75
# Object preamble - inherits from Bio::Root::RootI
78
76
 
79
77
use Bio::Factory::ObjectFactory;
80
 
use POSIX;
81
 
 
82
 
BEGIN { 
 
78
 
 
79
BEGIN {
 
80
 
83
81
    # Set IDLENGTH to a new value if you have
84
82
    # compile FASTA with a different ID length
85
83
    # (actually newest FASTA allows the setting of this
87
85
    $IDLENGTH = 6;
88
86
 
89
87
    # mapping of NCBI Blast terms to Bioperl hash keys
90
 
    %MODEMAP = ('FastaOutput' => 'result',
91
 
                'Hit'         => 'hit',
92
 
                'Hsp'         => 'hsp'
93
 
                );
 
88
    %MODEMAP = (
 
89
        'FastaOutput' => 'result',
 
90
        'Hit'         => 'hit',
 
91
        'Hsp'         => 'hsp'
 
92
    );
94
93
 
95
94
    # This should really be done more intelligently, like with
96
95
    # XSLT
97
96
 
98
 
    %MAPPING = 
99
 
        ( 
100
 
          'Hsp_bit-score' => 'HSP-bits',
101
 
          'Hsp_score'     => 'HSP-score',
102
 
          'Hsp_sw-score'  => 'HSP-swscore',
103
 
          'Hsp_evalue'    => 'HSP-evalue',
104
 
          'Hsp_query-from'=> 'HSP-query_start',
105
 
          'Hsp_query-to'  => 'HSP-query_end',
106
 
          'Hsp_hit-from'  => 'HSP-hit_start',
107
 
          'Hsp_hit-to'    => 'HSP-hit_end',
108
 
          'Hsp_positive'  => 'HSP-conserved',
109
 
          'Hsp_identity'  => 'HSP-identical',
110
 
          'Hsp_gaps'      => 'HSP-hsp_gaps',
111
 
          'Hsp_hitgaps'   => 'HSP-hit_gaps',
112
 
          'Hsp_querygaps' => 'HSP-query_gaps',
113
 
          'Hsp_qseq'      => 'HSP-query_seq',
114
 
          'Hsp_hseq'      =>  'HSP-hit_seq',
115
 
          'Hsp_midline'   =>  'HSP-homology_seq',
116
 
          'Hsp_align-len' =>  'HSP-hsp_length',
117
 
          'Hsp_query-frame'=> 'HSP-query_frame',
118
 
          'Hsp_hit-frame'  => 'HSP-hit_frame',
119
 
 
120
 
          'Hit_id'        => 'HIT-name',
121
 
          'Hit_len'       => 'HIT-length',
122
 
          'Hit_accession' => 'HIT-accession',
123
 
          'Hit_def'       => 'HIT-description',
124
 
          'Hit_signif'    => 'HIT-significance',
125
 
          'Hit_score'     => 'HIT-score',
126
 
 
127
 
          'FastaOutput_program'  => 'RESULT-algorithm_name',
128
 
          'FastaOutput_version'  => 'RESULT-algorithm_version',
129
 
          'FastaOutput_query-def'=> 'RESULT-query_name',
130
 
          'FastaOutput_querydesc'=> 'RESULT-query_description',
131
 
          'FastaOutput_query-len'=> 'RESULT-query_length',
132
 
          'FastaOutput_db'       => 'RESULT-database_name',
133
 
          'FastaOutput_db-len'   => 'RESULT-database_entries',
134
 
          'FastaOutput_db-let'   => 'RESULT-database_letters',
135
 
 
136
 
          'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
137
 
          'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
138
 
          'Parameters_include'   => { 'RESULT-parameters' => 'include'},
139
 
          'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
140
 
          'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
141
 
          'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
142
 
          'Parameters_gap-ext'   => { 'RESULT-parameters' => 'gapext'},
143
 
          'Parameters_word-size' => { 'RESULT-parameters' => 'wordsize'},
144
 
          'Parameters_ktup'      => { 'RESULT-parameters' => 'ktup'},
145
 
          'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
146
 
          'Statistics_db-num'    => { 'RESULT-statistics' => 'dbentries'},
147
 
          'Statistics_db-len'    => { 'RESULT-statistics' => 'dbletters'},
148
 
          'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
149
 
          'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
150
 
          'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
151
 
          'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
152
 
          'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
153
 
          );
 
97
    %MAPPING = (
 
98
        'Hsp_bit-score'   => 'HSP-bits',
 
99
        'Hsp_score'       => 'HSP-score',
 
100
        'Hsp_sw-score'    => 'HSP-swscore',
 
101
        'Hsp_evalue'      => 'HSP-evalue',
 
102
        'Hsp_query-from'  => 'HSP-query_start',
 
103
        'Hsp_query-to'    => 'HSP-query_end',
 
104
        'Hsp_hit-from'    => 'HSP-hit_start',
 
105
        'Hsp_hit-to'      => 'HSP-hit_end',
 
106
        'Hsp_positive'    => 'HSP-conserved',
 
107
        'Hsp_identity'    => 'HSP-identical',
 
108
        'Hsp_gaps'        => 'HSP-hsp_gaps',
 
109
        'Hsp_hitgaps'     => 'HSP-hit_gaps',
 
110
        'Hsp_querygaps'   => 'HSP-query_gaps',
 
111
        'Hsp_qseq'        => 'HSP-query_seq',
 
112
        'Hsp_hseq'        => 'HSP-hit_seq',
 
113
        'Hsp_midline'     => 'HSP-homology_seq',
 
114
        'Hsp_align-len'   => 'HSP-hsp_length',
 
115
        'Hsp_query-frame' => 'HSP-query_frame',
 
116
        'Hsp_hit-frame'   => 'HSP-hit_frame',
 
117
 
 
118
        'Hit_id'        => 'HIT-name',
 
119
        'Hit_len'       => 'HIT-length',
 
120
        'Hit_accession' => 'HIT-accession',
 
121
        'Hit_def'       => 'HIT-description',
 
122
        'Hit_signif'    => 'HIT-significance',
 
123
        'Hit_score'     => 'HIT-score',
 
124
 
 
125
        'FastaOutput_program'   => 'RESULT-algorithm_name',
 
126
        'FastaOutput_version'   => 'RESULT-algorithm_version',
 
127
        'FastaOutput_query-def' => 'RESULT-query_name',
 
128
        'FastaOutput_querydesc' => 'RESULT-query_description',
 
129
        'FastaOutput_query-len' => 'RESULT-query_length',
 
130
        'FastaOutput_db'        => 'RESULT-database_name',
 
131
        'FastaOutput_db-len'    => 'RESULT-database_entries',
 
132
        'FastaOutput_db-let'    => 'RESULT-database_letters',
 
133
 
 
134
        'Parameters_matrix'      => { 'RESULT-parameters' => 'matrix' },
 
135
        'Parameters_expect'      => { 'RESULT-parameters' => 'expect' },
 
136
        'Parameters_include'     => { 'RESULT-parameters' => 'include' },
 
137
        'Parameters_sc-match'    => { 'RESULT-parameters' => 'match' },
 
138
        'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch' },
 
139
        'Parameters_gap-open'    => { 'RESULT-parameters' => 'gapopen' },
 
140
        'Parameters_gap-ext'     => { 'RESULT-parameters' => 'gapext' },
 
141
        'Parameters_word-size'   => { 'RESULT-parameters' => 'wordsize' },
 
142
        'Parameters_ktup'        => { 'RESULT-parameters' => 'ktup' },
 
143
        'Parameters_filter'      => { 'RESULT-parameters' => 'filter' },
 
144
        'Statistics_db-num'      => { 'RESULT-statistics' => 'dbentries' },
 
145
        'Statistics_db-len'      => { 'RESULT-statistics' => 'dbletters' },
 
146
        'Statistics_hsp-len'     => { 'RESULT-statistics' => 'hsplength' },
 
147
        'Statistics_eff-space'   => { 'RESULT-statistics' => 'effectivespace' },
 
148
        'Statistics_kappa'       => { 'RESULT-statistics' => 'kappa' },
 
149
        'Statistics_lambda'      => { 'RESULT-statistics' => 'lambda' },
 
150
        'Statistics_entropy'     => { 'RESULT-statistics' => 'entropy' },
 
151
    );
154
152
}
155
153
 
156
 
 
157
154
use base qw(Bio::SearchIO);
158
155
 
159
156
=head2 new
160
157
 
161
158
 Title   : new
162
 
 Usage   : my $obj = new Bio::SearchIO::fasta();
 
159
 Usage   : my $obj = Bio::SearchIO::fasta->new();
163
160
 Function: Builds a new Bio::SearchIO::fasta object 
164
161
 Returns : Bio::SearchIO::fasta
165
162
 Args    : -idlength - set ID length to something other 
171
168
=cut
172
169
 
173
170
sub _initialize {
174
 
  my($self,@args) = @_;
175
 
  $self->SUPER::_initialize(@args);
176
 
  return unless @args;
177
 
  my ($idlength) = $self->_rearrange([qw(IDLENGTH)],@args);
178
 
  $self->idlength($idlength || $IDLENGTH);
179
 
  $self->_eventHandler->register_factory('hsp', 
180
 
                                         Bio::Factory::ObjectFactory->new(
181
 
                                            -type      => 'Bio::Search::HSP::FastaHSP',
182
 
                                            -interface => 'Bio::Search::HSP::HSPI'));
183
 
  return 1;
 
171
    my ( $self, @args ) = @_;
 
172
    $self->SUPER::_initialize(@args);
 
173
    return unless @args;
 
174
    my ($idlength) = $self->_rearrange( [qw(IDLENGTH)], @args );
 
175
    $self->idlength( $idlength || $IDLENGTH );
 
176
    $self->_eventHandler->register_factory(
 
177
        'hsp',
 
178
        Bio::Factory::ObjectFactory->new(
 
179
            -type      => 'Bio::Search::HSP::FastaHSP',
 
180
            -interface => 'Bio::Search::HSP::HSPI'
 
181
        )
 
182
    );
 
183
    return 1;
184
184
}
185
185
 
186
186
=head2 next_result
193
193
 
194
194
=cut
195
195
 
196
 
sub next_result{
197
 
   my ($self) = @_;
198
 
   local $/ = "\n";
199
 
   local $_;
200
 
 
201
 
   my $data = '';
202
 
   my $seentop = 0;
203
 
   my $current_hsp;
204
 
   $self->start_document();
205
 
   my @hit_signifs;
206
 
   while( defined ($_ = $self->_readline )) {
207
 
       next if( ! $self->in_element('hsp')  &&
208
 
                /^\s+$/); # skip empty lines
209
 
       if( m/(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi || 
210
 
           /(\S+)\s+compares\s+a/ ||
211
 
           ( m/^\#\s+/ && 
212
 
             ($_ = $self->_readline) &&
213
 
             /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi ||
214
 
             /(\S+)\s+compares\s+a/
215
 
           )
216
 
         ) {
217
 
           if( $seentop ) {
218
 
               $self->_pushback($_);
219
 
               $self->end_element({ 'Name' => 'FastaOutput'});
220
 
               return $self->end_document();
221
 
           }
222
 
           $self->{'_reporttype'} = $1;
223
 
           $self->start_element({ 'Name' => 'FastaOutput' } );
224
 
           $self->{'_result_count'}++;
225
 
           $seentop = 1;
226
 
           
227
 
           $self->element({ 'Name' => 'FastaOutput_program',
228
 
                            'Data' => $self->{'_reporttype'}});
229
 
           $_ = $self->_readline();
230
 
           my ($version) = (/version\s+(\S+)/);
231
 
           $version = '' unless defined $version;
232
 
           $self->{'_version'} = $version;
233
 
           $self->element({ 'Name' => 'FastaOutput_version',
234
 
                            'Data' => $version});
235
 
 
236
 
           my ($last, $leadin, $type, $querylen, $querytype, $querydef);
237
 
 
238
 
           while( defined($_ = $self->_readline()) ) {
239
 
               if( /^ (
 
196
sub next_result {
 
197
    my ($self) = @_;
 
198
    local $/ = "\n";
 
199
    local $_;
 
200
 
 
201
    my $data    = '';
 
202
    my $seentop = 0;
 
203
    my $current_hsp;
 
204
    $self->start_document();
 
205
    my @hit_signifs;
 
206
    while ( defined( $_ = $self->_readline ) ) {
 
207
        next if ( !$self->in_element('hsp')
 
208
            && /^\s+$/ );    # skip empty lines
 
209
        if (
 
210
               m/(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi
 
211
            || /(\S+)\s+compares\s+a/
 
212
            || (   m/^\#\s+/
 
213
                && ( $_ = $self->_readline )
 
214
                && /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?sequence/oxi
 
215
                || /(\S+)\s+compares\s+a/ )
 
216
          )
 
217
        {
 
218
            if ($seentop) {
 
219
                $self->_pushback($_);
 
220
                $self->end_element( { 'Name' => 'FastaOutput' } );
 
221
                return $self->end_document();
 
222
            }
 
223
            $self->{'_reporttype'} = $1;
 
224
            $self->start_element( { 'Name' => 'FastaOutput' } );
 
225
            $self->{'_result_count'}++;
 
226
            $seentop = 1;
 
227
            #$self->debug( "reporttype is " . $self->{'_reporttype'} . "\n" );
 
228
            $self->element(
 
229
                {
 
230
                    'Name' => 'FastaOutput_program',
 
231
                    'Data' => $self->{'_reporttype'}
 
232
                }
 
233
            );
 
234
            $_ = $self->_readline();
 
235
            my ($version) = (/version\s+(\S+)/);
 
236
            $version = '' unless defined $version;
 
237
            $self->{'_version'} = $version;
 
238
            $self->element(
 
239
                {
 
240
                    'Name' => 'FastaOutput_version',
 
241
                    'Data' => $version
 
242
                }
 
243
            );
 
244
 
 
245
            my ( $last, $leadin, $type, $querylen, $querytype, $querydef );
 
246
 
 
247
            while ( defined( $_ = $self->_readline() ) ) {
 
248
                if (
 
249
                    /^ (
240
250
                       (?:\s+>) |             # fa33 lead-in
241
251
                       (?:\s*\d+\s*>>>)       # fa34 mlib lead-in
242
252
                      )
243
253
                      (.*)
244
254
                   /x
245
 
                 ) {
246
 
                   ($leadin, $querydef) = ($1, $2);
247
 
                   if ($leadin =~ m/>>>/) {
248
 
                       if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
249
 
                           ($querydef, $querylen, $querytype) = ($1, $2, $3);
250
 
                           last;
251
 
                       }
252
 
                   } else {
253
 
                       if( $last =~ /(\S+)[:,]\s*(\d+)\s+(aa|nt)/ ) {
254
 
                           ($querylen, $querytype) = ($2, $3);
255
 
                           $querydef ||= $1;
256
 
                           last;
257
 
                       }
258
 
                   }
259
 
               } elsif ( m/^\s*vs\s+\S+/o ) {
260
 
                   if ( $last =~ /(\S+)[,:]\s+(\d+)\s+(aa|nt)/o) {
261
 
                       ($querydef, $querylen, $querytype) = ($1, $2, $3);
262
 
                       last;
263
 
                   }
264
 
               } 
265
 
               $last = $_;
266
 
           }
267
 
           
268
 
           if( $self->{'_reporttype'} &&
269
 
               $self->{'_reporttype'} eq 'FASTA'
270
 
               ) {
271
 
               if( $querytype eq 'nt') {
272
 
                   $self->{'_reporttype'} = 'FASTN' ;
273
 
               } elsif( $querytype eq 'aa' ) {
274
 
                   $self->{'_reporttype'} = 'FASTP' ;
275
 
               }
276
 
           }
277
 
           my ($name, $descr) = $querydef =~ m/^(\S+)\s*(.*?)\s*$/o;
278
 
           $self->element({'Name' => 'FastaOutput_query-def',
279
 
                           'Data' => $name});
280
 
           $self->element({'Name' => 'FastaOutput_querydesc',
281
 
                           'Data' => $descr});
282
 
           if ($querylen) {
283
 
               $self->element({'Name' => 'FastaOutput_query-len',
284
 
                               'Data' => $querylen});
285
 
           } else {
286
 
               $self->warn("unable to find and set query length");
287
 
           }
288
 
           if( $last =~ /^\s*vs\s+(\S+)/ || 
289
 
               ( $last =~ /^searching\s+(\S+)\s+library/ ) ||
290
 
               (defined $_ && /^\s*vs\s+(\S+)/) ||
291
 
               (defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/)
292
 
             ) {
293
 
               $self->element({'Name' => 'FastaOutput_db',
294
 
                           'Data' => $1});
295
 
           } elsif (m/^\s+opt(?:\s+E\(\))?$/o) {
296
 
               # histogram ... read over it more rapidly than the larger outer loop:
297
 
               while (defined($_ = $self->_readline)) {
298
 
                   last if m/^>\d+/;
299
 
               }
300
 
           }
301
 
 
302
 
       } elsif( /(\d+) residues in\s+(\d+)\s+sequences/ ) {
303
 
           $self->element({'Name' => 'FastaOutput_db-let',
304
 
                           'Data' => $1});
305
 
           $self->element({'Name' => 'FastaOutput_db-len',
306
 
                           'Data' => $2});
307
 
           $self->element({'Name' => 'Statistics_db-len',
308
 
                           'Data' => $1});
309
 
           $self->element({'Name' => 'Statistics_db-num',
310
 
                           'Data' => $2});         
311
 
       } elsif( /Lambda=\s*(\S+)/ ) {
312
 
           $self->element({'Name' => 'Statistics_lambda',
313
 
                           'Data' => $1});        
314
 
       } elsif (/K=\s*(\S+)/) {
315
 
           $self->element({'Name' => 'Statistics_kappa',
316
 
                           'Data' => $1});
317
 
       } elsif( /^\s*(Smith-Waterman).+(\S+)\s*matrix [^\]]*?(xS)?\]/ ) {          
318
 
           $self->element({'Name' => 'Parameters_matrix',
319
 
                           'Data' => $2});
320
 
           $self->element({'Name' => 'Parameters_filter',
321
 
                           'Data' => defined $3 ? 1 : 0,
322
 
                          });
323
 
           $self->{'_reporttype'} = $1;
324
 
 
325
 
           $self->element({ 'Name' => 'FastaOutput_program',
326
 
                            'Data' => $self->{'_reporttype'}});
327
 
           
328
 
       } elsif( /The best( related| unrelated)? scores are:/ ) {
329
 
           my $rel = $1;
330
 
           my @labels = split;
331
 
           @labels = map {
332
 
               if ($_ =~ m/^E\((\d+)\)$/o) {
333
 
                   $self->element({'Name' => 'Statistics_eff-space', 'Data' => $1});
334
 
                   "evalue";
335
 
               } else {
336
 
                   $_;
337
 
               }
338
 
           } @labels[$rel ? 5 : 4 .. $#labels];
339
 
 
340
 
           while( defined ($_ = $self->_readline() ) && 
341
 
                  ! /^\s+$/ ) {
342
 
               my @line = split;
343
 
 
344
 
               if ($line[-1] =~ m/\=/o && $labels[-1] eq 'fs') {
345
 
                   # unlabelled alignment hit;
346
 
                   push @labels, "aln_code";
347
 
               }
348
 
 
349
 
               my %data;
350
 
               @data{@labels} = splice(@line, @line - @labels);
351
 
               if ($line[-1] =~ m/\[([1-6rf])\]/o) {
352
 
                   my $fr = $1;
353
 
                   $data{lframe} = ($fr =~ /\d/o ?
354
 
                                    ($fr <= 3   ? "+$fr" : "-@{[$fr-3]}") :
355
 
                                    ($fr eq 'f' ? '+1'  : '-1')
356
 
                                    );
357
 
                   pop @line;
358
 
               } else {
359
 
                   $data{lframe} = '0';
360
 
               }
361
 
 
362
 
               if ($line[-1] =~ m/^\(?(\d+)\)$/) {
363
 
                   $data{hit_len} = $1;
364
 
                   pop @line;
365
 
                   if ($line[-1] =~ m/^\($/) {
366
 
                       pop @line;
367
 
                   }
368
 
               } else {
369
 
                   $data{hit_len} = 0;
370
 
               }
371
 
 
372
 
               # rebuild the first part of the line, preserving spaces:
373
 
               ($_) = m/^(\S+(?:\s+\S+){$#line})/;
374
 
 
375
 
               my ($id, $desc) = split(/\s+/,$_,2);
376
 
               my @pieces = split(/\|/,$id);
377
 
               my $acc = pop @pieces;
378
 
               $acc =~ s/\.\d+$//;
379
 
 
380
 
               @data{qw(id desc acc)} = ($id, $desc, $acc);
381
 
 
382
 
               push @hit_signifs, \%data;
383
 
           }
384
 
       } elsif( /^\s*([T]?FAST[XYAF]).+,\s*(\S+)\s*matrix[^\]]+?(xS)?\]\s*ktup:\s*(\d+)/ ) {
385
 
           $self->element({'Name' => 'Parameters_matrix',
386
 
                           'Data' => $2});
387
 
           $self->element({'Name' => 'Parameters_filter',
388
 
                           'Data' => defined $3 ? 1 : 0,
389
 
                          });
390
 
           $self->element({'Name' => 'Parameters_ktup',
391
 
                           'Data' => $4});
392
 
           $self->{'_reporttype'} = $1 if( $self->{'_reporttype'} !~ /FAST[PN]/i ) ;
393
 
 
394
 
           $self->element({ 'Name' => 'FastaOutput_program',
395
 
                            'Data' => $self->{'_reporttype'}});
396
 
           
397
 
       } elsif( /(?:gap\-pen|open\/ext):\s+([\-\+]?\d+)\s*\/\s*([\-\+]?\d+).+width:\s+(\d+)/ ) {
398
 
           $self->element({'Name' => 'Parameters_gap-open',
399
 
                           'Data' => $1});
400
 
           $self->element({'Name' => 'Parameters_gap-ext',
401
 
                           'Data' => $2});
402
 
           $self->element({'Name' => 'Parameters_word-size',
403
 
                           'Data' => $3});
404
 
       } elsif( /^>>(.+?)\s+\((\d+)\s*(aa|nt)\)$/ ) {
405
 
           if( $self->in_element('hsp') ) {
406
 
               $self->end_element({ 'Name' => 'Hsp'});
407
 
           }
408
 
           if( $self->in_element('hit') ) {
409
 
               $self->end_element({ 'Name' => 'Hit'});
410
 
           }
411
 
           
412
 
           $self->start_element({'Name' => 'Hit'});
413
 
           $self->element({ 'Name' => 'Hit_len',
414
 
                            'Data' => $2});  
415
 
           my ($id,$desc) = split(/\s+/,$1,2);
416
 
           $self->element({ 'Name' => 'Hit_id',
417
 
                            'Data' => $id});       
418
 
           my @pieces = split(/\|/,$id);
419
 
           my $acc = pop @pieces;
420
 
           $acc =~ s/\.\d+$//;
421
 
           $self->element({ 'Name' =>  'Hit_accession',
422
 
                            'Data'  => $acc});  
423
 
           $self->element({ 'Name' => 'Hit_def',
424
 
                            'Data' => $desc});     
425
 
 
426
 
           $_ = $self->_readline();
427
 
           my ($score,$bits,$e) = /Z-score: \s* (\S+) \s*
 
255
                  )
 
256
                {
 
257
                    ( $leadin, $querydef ) = ( $1, $2 );
 
258
                    if ( $leadin =~ m/>>>/ ) {
 
259
                        if ( $querydef =~
 
260
                            /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt).*$/o )
 
261
                        {
 
262
                            ( $querydef, $querylen, $querytype ) =
 
263
                              ( $1, $2, $3 );
 
264
                            last;
 
265
                        }
 
266
                    }
 
267
                    else {
 
268
                        if ( $last =~ /(\S+)[:,]\s*(\d+)\s+(aa|nt)/ ) {
 
269
                            ( $querylen, $querytype ) = ( $2, $3 );
 
270
                            $querydef ||= $1;
 
271
                            last;
 
272
                        }
 
273
                    }
 
274
                }
 
275
                elsif (m/^\s*vs\s+\S+/o) {
 
276
                    if ( $last =~ /(\S+)[,:]\s+(\d+)\s+(aa|nt)/o ) {
 
277
                        ( $querydef, $querylen, $querytype ) = ( $1, $2, $3 );
 
278
                        last;
 
279
                    }
 
280
                }
 
281
                $last = $_;
 
282
            }
 
283
            if (   $self->{'_reporttype'}
 
284
                && $self->{'_reporttype'} eq 'FASTA' )
 
285
            {
 
286
                if ( $querytype eq 'nt' ) {
 
287
                    $self->{'_reporttype'} = 'FASTN';
 
288
                }
 
289
                elsif ( $querytype eq 'aa' ) {
 
290
                    $self->{'_reporttype'} = 'FASTP';
 
291
                }
 
292
            }
 
293
            my ( $name, $descr ) = $querydef =~ m/^(\S+)\s*(.*?)\s*$/o;
 
294
            $self->element(
 
295
                {
 
296
                    'Name' => 'FastaOutput_query-def',
 
297
                    'Data' => $name
 
298
                }
 
299
            );
 
300
            $self->element(
 
301
                {
 
302
                    'Name' => 'FastaOutput_querydesc',
 
303
                    'Data' => $descr
 
304
                }
 
305
            );
 
306
            if ($querylen) {
 
307
                $self->element(
 
308
                    {
 
309
                        'Name' => 'FastaOutput_query-len',
 
310
                        'Data' => $querylen
 
311
                    }
 
312
                );
 
313
            }
 
314
            else {
 
315
                $self->warn("unable to find and set query length");
 
316
            }
 
317
            if (
 
318
                   $last =~ /^\s*vs\s+(\S+)/
 
319
                || ( $last =~ /^searching\s+(\S+)\s+library/ )
 
320
                || ( $last =~ /^Library:\s+(\S+)\s+/ )
 
321
                || (
 
322
                    defined $_
 
323
                    && (   /^\s*vs\s+(\S+)/
 
324
                        || /^Library:\s+(\S+)\s+/ )
 
325
                )
 
326
                || ( defined( $_ = $self->_readline() )
 
327
                    && ( /^\s*vs\s+(\S+)/ || /^Library:\s+(\S+)/ ) )
 
328
              )
 
329
            {
 
330
                $self->element(
 
331
                    {
 
332
                        'Name' => 'FastaOutput_db',
 
333
                        'Data' => $1
 
334
                    }
 
335
                );
 
336
            }
 
337
            elsif (m/^\s+opt(?:\s+E\(\))?$/o) {
 
338
 
 
339
           # histogram ... read over it more rapidly than the larger outer loop:
 
340
                while ( defined( $_ = $self->_readline ) ) {
 
341
                    last if m/^>\d+/;
 
342
                }
 
343
            }
 
344
        }
 
345
        elsif (/(\d+)\s+residues\s+in\s+(\d+)\s+(?:library\s+)?sequences/) {
 
346
            $self->element(
 
347
                {
 
348
                    'Name' => 'FastaOutput_db-let',
 
349
                    'Data' => $1
 
350
                }
 
351
            );
 
352
            $self->element(
 
353
                {
 
354
                    'Name' => 'FastaOutput_db-len',
 
355
                    'Data' => $2
 
356
                }
 
357
            );
 
358
            $self->element(
 
359
                {
 
360
                    'Name' => 'Statistics_db-len',
 
361
                    'Data' => $1
 
362
                }
 
363
            );
 
364
            $self->element(
 
365
                {
 
366
                    'Name' => 'Statistics_db-num',
 
367
                    'Data' => $2
 
368
                }
 
369
            );
 
370
        }
 
371
        elsif (/Lambda=\s*(\S+)/) {
 
372
            $self->element(
 
373
                {
 
374
                    'Name' => 'Statistics_lambda',
 
375
                    'Data' => $1
 
376
                }
 
377
            );
 
378
        }
 
379
        elsif (/K=\s*(\S+)/) {
 
380
            $self->element(
 
381
                {
 
382
                    'Name' => 'Statistics_kappa',
 
383
                    'Data' => $1
 
384
                }
 
385
            );
 
386
        }
 
387
        elsif (/^\s*(Smith-Waterman).+(\S+)\s*matrix [^\]]*?(xS)?\]/) {
 
388
            $self->element(
 
389
                {
 
390
                    'Name' => 'Parameters_matrix',
 
391
                    'Data' => $2
 
392
                }
 
393
            );
 
394
            $self->element(
 
395
                {
 
396
                    'Name' => 'Parameters_filter',
 
397
                    'Data' => defined $3 ? 1 : 0,
 
398
                }
 
399
            );
 
400
            $self->{'_reporttype'} = $1;
 
401
 
 
402
            $self->element(
 
403
                {
 
404
                    'Name' => 'FastaOutput_program',
 
405
                    'Data' => $self->{'_reporttype'}
 
406
                }
 
407
            );
 
408
        }
 
409
        elsif (/The best( related| unrelated)? scores are:/) {
 
410
            my $rel    = $1;
 
411
            my @labels = split;
 
412
            @labels = map {
 
413
                if ( $_ =~ m/^E\((\d+)\)$/o )
 
414
                {
 
415
                    $self->element(
 
416
                        { 'Name' => 'Statistics_eff-space', 'Data' => $1 } );
 
417
                    "evalue";
 
418
                }
 
419
                else {
 
420
                    $_;
 
421
                }
 
422
            } @labels[ $rel ? 5 : 4 .. $#labels ];
 
423
 
 
424
            while ( defined( $_ = $self->_readline() )
 
425
                && !/^\s+$/ )
 
426
            {
 
427
                my @line = split;
 
428
 
 
429
                if ( $line[-1] =~ m/\=/o && $labels[-1] eq 'fs' ) {
 
430
 
 
431
                    # unlabelled alignment hit;
 
432
                    push @labels, "aln_code";
 
433
                }
 
434
 
 
435
                my %data;
 
436
                @data{@labels} = splice( @line, @line - @labels );
 
437
                if ( $line[-1] =~ m/\[([1-6rf])\]/o ) {
 
438
                    my $fr = $1;
 
439
                    $data{lframe} = (
 
440
                        $fr =~ /\d/o
 
441
                        ? ( $fr <= 3 ? "+$fr" : "-@{[$fr-3]}" )
 
442
                        : ( $fr eq 'f' ? '+1' : '-1' )
 
443
                    );
 
444
                    pop @line;
 
445
                }
 
446
                else {
 
447
                    $data{lframe} = '0';
 
448
                }
 
449
 
 
450
                if ( $line[-1] =~ m/^\(?(\d+)\)$/ ) {
 
451
                    $data{hit_len} = $1;
 
452
                    pop @line;
 
453
                    if ( $line[-1] =~ m/^\($/ ) {
 
454
                        pop @line;
 
455
                    }
 
456
                }
 
457
                else {
 
458
                    $data{hit_len} = 0;
 
459
                }
 
460
 
 
461
                # rebuild the first part of the line, preserving spaces:
 
462
                ($_) = m/^(\S+(?:\s+\S+){$#line})/;
 
463
 
 
464
                my ( $id, $desc ) = split( /\s+/, $_, 2 );
 
465
                my @pieces = split( /\|/, $id );
 
466
                my $acc = pop @pieces;
 
467
                $acc =~ s/\.\d+$//;
 
468
 
 
469
                @data{qw(id desc acc)} = ( $id, $desc, $acc );
 
470
 
 
471
                push @hit_signifs, \%data;
 
472
            }
 
473
        }
 
474
        elsif (
 
475
/^\s*([T]?FAST[XYAF]).+,\s*(\S+)\s*matrix[^\]]+?(xS)?\]\s*ktup:\s*(\d+)/
 
476
          )
 
477
        {
 
478
 
 
479
            $self->element(
 
480
                {
 
481
                    'Name' => 'Parameters_matrix',
 
482
                    'Data' => $2
 
483
                }
 
484
            );
 
485
            $self->element(
 
486
                {
 
487
                    'Name' => 'Parameters_filter',
 
488
                    'Data' => defined $3 ? 1 : 0,
 
489
                }
 
490
            );
 
491
            $self->element(
 
492
                {
 
493
                    'Name' => 'Parameters_ktup',
 
494
                    'Data' => $4
 
495
                }
 
496
            );
 
497
            $self->{'_reporttype'} = $1
 
498
              if ( $self->{'_reporttype'} !~ /FAST[PN]/i );
 
499
 
 
500
            $self->element(
 
501
                {
 
502
                    'Name' => 'FastaOutput_program',
 
503
                    'Data' => $self->{'_reporttype'}
 
504
                }
 
505
            );
 
506
        }
 
507
        elsif (/^Algorithm:\s+(\S+)\s+\(([^)]+)\)\s+(\S+)/) {
 
508
            $self->{'_reporttype'} = $1
 
509
              if ( $self->{'_reporttype'} !~ /FAST[PN]/i );
 
510
        }
 
511
        elsif (
 
512
            /^Parameters:\s+(\S+)\s*matrix\s*(?:\(([^(]+?)\))?\s*ktup:\s*(\d+)/)
 
513
        {    # FASTA 35.04
 
514
            $self->element(
 
515
                {
 
516
                    'Name' => 'Parameters_matrix',
 
517
                    'Data' => $1
 
518
                }
 
519
            );
 
520
            $self->element(
 
521
                {
 
522
                    'Name' => 'Parameters_filter',
 
523
                    'Data' => defined $2 ? $2 : 0,
 
524
                }
 
525
            );
 
526
            $self->element(
 
527
                {
 
528
                    'Name' => 'Parameters_ktup',
 
529
                    'Data' => $3
 
530
                }
 
531
            );
 
532
        }
 
533
        elsif (
 
534
/(?:gap\-pen|open\/ext):\s+([\-\+]?\d+)\s*\/\s*([\-\+]?\d+).+width:\s+(\d+)/
 
535
          )
 
536
        {
 
537
            $self->element(
 
538
                {
 
539
                    'Name' => 'Parameters_gap-open',
 
540
                    'Data' => $1
 
541
                }
 
542
            );
 
543
            $self->element(
 
544
                {
 
545
                    'Name' => 'Parameters_gap-ext',
 
546
                    'Data' => $2
 
547
                }
 
548
            );
 
549
            $self->element(
 
550
                {
 
551
                    'Name' => 'Parameters_word-size',
 
552
                    'Data' => $3
 
553
                }
 
554
            );
 
555
        }
 
556
        elsif (/^>>(.+?)\s+\((\d+)\s*(aa|nt)\)$/) {
 
557
            if ( $self->in_element('hsp') ) {
 
558
                $self->end_element( { 'Name' => 'Hsp' } );
 
559
            }
 
560
            if ( $self->in_element('hit') ) {
 
561
                $self->end_element( { 'Name' => 'Hit' } );
 
562
            }
 
563
 
 
564
            $self->start_element( { 'Name' => 'Hit' } );
 
565
            $self->element(
 
566
                {
 
567
                    'Name' => 'Hit_len',
 
568
                    'Data' => $2
 
569
                }
 
570
            );
 
571
            my ( $id, $desc ) = split( /\s+/, $1, 2 );
 
572
            $self->element(
 
573
                {
 
574
                    'Name' => 'Hit_id',
 
575
                    'Data' => $id
 
576
                }
 
577
            );
 
578
 
 
579
            #$self->debug("Hit ID is $id\n") if $self->verbose > 0;
 
580
            my @pieces = split( /\|/, $id );
 
581
            my $acc = pop @pieces;
 
582
            $acc =~ s/\.\d+$//;
 
583
            $self->element(
 
584
                {
 
585
                    'Name' => 'Hit_accession',
 
586
                    'Data' => $acc
 
587
                }
 
588
            );
 
589
            $self->element(
 
590
                {
 
591
                    'Name' => 'Hit_def',
 
592
                    'Data' => $desc
 
593
                }
 
594
            );
 
595
 
 
596
            $_ = $self->_readline();
 
597
            my ( $score, $bits, $e ) = /Z-score: \s* (\S+) \s*
428
598
                               (?: bits: \s* (\S+) \s+ )?
429
599
                               (?: E|expect ) \s* \(\) :? \s*(\S+)/ox;
430
 
           $bits = $score unless defined $bits;
431
 
 
432
 
           my $v = shift @hit_signifs;
433
 
           if( defined $v ) {
434
 
               @{$v}{qw(evalue bits z-sc)} = ($e, $bits, $score);
435
 
           }
436
 
           $self->element({'Name' => 'Hit_signif',
437
 
                           'Data' => $v ? $v->{evalue} : $e });
438
 
           $self->element({'Name' => 'Hit_score',
439
 
                           'Data' => $v ? $v->{bits} : $bits });
440
 
           $self->start_element({'Name' => 'Hsp'});
441
 
 
442
 
           $self->element({'Name' => 'Hsp_score',
443
 
                           'Data' => $v ? $v->{'z-sc'} : $score });
444
 
           $self->element({'Name' => 'Hsp_evalue',
445
 
                           'Data' => $v ? $v->{evalue} : $e });
446
 
           $self->element({'Name' => 'Hsp_bit-score',
447
 
                           'Data' => $v ? $v->{bits} : $bits });
448
 
           $_ = $self->_readline();
449
 
           if( s/Smith-Waterman score:\s*(\d+)\;?// ) {
450
 
               $self->element({'Name' => 'Hsp_sw-score',
451
 
                               'Data' => $1});
452
 
           }
453
 
           if( / (\d*\.?\d+)\% \s* identity
 
600
            $bits = $score unless defined $bits;
 
601
 
 
602
            my $v = shift @hit_signifs;
 
603
            if ( defined $v ) {
 
604
                @{$v}{qw(evalue bits z-sc)} = ( $e, $bits, $score );
 
605
            }
 
606
            $self->element(
 
607
                {
 
608
                    'Name' => 'Hit_signif',
 
609
                    'Data' => $v ? $v->{evalue} : $e
 
610
                }
 
611
            );
 
612
            $self->element(
 
613
                {
 
614
                    'Name' => 'Hit_score',
 
615
                    'Data' => $v ? $v->{bits} : $bits
 
616
                }
 
617
            );
 
618
            $self->start_element( { 'Name' => 'Hsp' } );
 
619
 
 
620
            $self->element(
 
621
                {
 
622
                    'Name' => 'Hsp_score',
 
623
                    'Data' => $v ? $v->{'z-sc'} : $score
 
624
                }
 
625
            );
 
626
            $self->element(
 
627
                {
 
628
                    'Name' => 'Hsp_evalue',
 
629
                    'Data' => $v ? $v->{evalue} : $e
 
630
                }
 
631
            );
 
632
            $self->element(
 
633
                {
 
634
                    'Name' => 'Hsp_bit-score',
 
635
                    'Data' => $v ? $v->{bits} : $bits
 
636
                }
 
637
            );
 
638
            $_ = $self->_readline();
 
639
 
 
640
            if (s/Smith-Waterman score:\s*(\d+)\;?//) {
 
641
                $self->element(
 
642
                    {
 
643
                        'Name' => 'Hsp_sw-score',
 
644
                        'Data' => $1
 
645
                    }
 
646
                );
 
647
            }
 
648
            if (
 
649
                / (\d*\.?\d+)\% \s* identity
454
650
                 (?:\s* \(\s*(\S+)\% \s* (?:ungapped|similar) \) )?
455
651
                 \s* in \s* (\d+) \s+ (?:aa|nt) \s+ overlap \s*
456
652
                 \( (\d+) \- (\d+) : (\d+) \- (\d+) \)
457
 
               /x ) {
458
 
               my ($identper,$gapper,$len,$querystart,
459
 
                   $queryend,$hitstart,$hitend) = ($1,$2,$3,$4,$5,$6,$7);
460
 
               my $ident = POSIX::ceil(($identper/100) * $len);
461
 
               my $gaps = ( defined $gapper ) ? POSIX::ceil ( ($gapper/100) * $len) : undef;
462
 
               
463
 
               $self->element({'Name' => 'Hsp_gaps',
464
 
                               'Data' => $gaps});
465
 
               $self->element({'Name' => 'Hsp_identity',
466
 
                               'Data' => $ident});
467
 
               $self->element({'Name' => 'Hsp_positive',
468
 
                               'Data' => $ident});
469
 
               $self->element({'Name' => 'Hsp_align-len',
470
 
                               'Data' => $len});
471
 
               
472
 
               $self->debug( "query_start = $querystart, query_end = $queryend\n");
473
 
               $self->element({'Name' => 'Hsp_query-from',
474
 
                               'Data' => $querystart});
475
 
               $self->element({'Name' => 'Hsp_query-to',
476
 
                               'Data' => $queryend});
477
 
               $self->element({'Name' => 'Hsp_hit-from',
478
 
                               'Data' => $hitstart});
479
 
               $self->element({'Name' => 'Hsp_hit-to',
480
 
                               'Data' => $hitend});
481
 
               
482
 
               }
483
 
 
484
 
           if ($v) {
485
 
               $self->element({'Name' => 'Hsp_querygaps', 'Data' => $v->{qgaps} }) if exists $v->{qgaps};
486
 
               $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $v->{lgaps} }) if exists $v->{lgaps};
487
 
 
488
 
               if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
489
 
                   if( 8 == scalar grep { exists $v->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
490
 
                       if ($v->{ax0} < $v->{an0}) {
491
 
                           $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($v->{px0} - $v->{ax0}) % 3) + 1]}" });
492
 
                       } else {
493
 
                           $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($v->{an0} - $v->{pn0}) % 3) + 1]}" });
494
 
                       }
495
 
                       if ($v->{ax1} < $v->{an1}) {
496
 
                           $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($v->{px1} - $v->{ax1}) % 3) + 1]}" });
497
 
                       } else {
498
 
                           $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($v->{an1} - $v->{pn1}) % 3) + 1]}" });
499
 
                       }
500
 
                   } else {
501
 
                       $self->element({'Name' => 'Hsp_query-frame', 'Data' => $v->{lframe} });
502
 
                       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
503
 
                   }
504
 
               } else {
505
 
                   $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
506
 
                   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $v->{lframe} });
507
 
               }
508
 
 
509
 
           } else {
510
 
               $self->warn( "unable to parse FASTA score line: $_");
511
 
           }
512
 
       } elsif( /\d+\s*residues\s*in\s*\d+\s*query\s*sequences/ ) {
513
 
           if( $self->in_element('hsp') ) {
514
 
               $self->end_element({'Name' => 'Hsp'});
515
 
           } 
516
 
           if( $self->in_element('hit') ) {
517
 
               $self->end_element({'Name' => 'Hit'});
518
 
           }
519
 
           
520
 
#          $_ = $self->_readline();
521
 
#          my ( $liblen,$libsize) = /(\d+)\s+residues\s*in(\d+)\s*library/;
522
 
           # fast forward to the end of the file as there is 
523
 
           # nothing else left to do with this file and want to be sure and
524
 
           # reset it
525
 
           while(defined($_ = $self->_readline() ) ) { 
526
 
               last if( /^Function used was/);
527
 
               if( /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?
528
 
                   sequence/oxi ||
529
 
                   /(\S+)\s+compares\s+a/oi ) {
530
 
                   $self->_pushback($_);
531
 
               }
532
 
           }
533
 
 
534
 
           if (@hit_signifs) {
535
 
               # process remaining best hits
536
 
               for my $h (@hit_signifs) {
537
 
                   # Hsp_score Hsp_evalue Hsp_bit-score
538
 
                   # Hsp_sw-score Hsp_gaps Hsp_identity Hsp_positive
539
 
                   # Hsp_align-len Hsp_query-from Hsp_query-to
540
 
                   # Hsp_hit-from Hsp_hit-to Hsp_qseq Hsp_midline
541
 
 
542
 
                   $self->start_element({'Name' => 'Hit'});
543
 
                   $self->element({ 'Name' => 'Hit_len',
544
 
                                    'Data' => $h->{hit_len}
545
 
                                  }) if exists $h->{hit_len};
546
 
                   $self->element({ 'Name' => 'Hit_id',
547
 
                                    'Data' => $h->{id}
548
 
                                  }) if exists $h->{id};
549
 
                   $self->element({ 'Name' =>  'Hit_accession',
550
 
                                    'Data'  => $h->{acc}
551
 
                                  }) if exists $h->{acc};
552
 
                   $self->element({ 'Name' => 'Hit_def',
553
 
                                    'Data' => $h->{desc}
554
 
                                  }) if exists $h->{desc};
555
 
                   $self->element({'Name' => 'Hit_signif',
556
 
                                   'Data' => $h->{evalue}
557
 
                                  }) if exists $h->{evalue};
558
 
                   $self->element({'Name' => 'Hit_score',
559
 
                                   'Data' => $h->{bits}
560
 
                                  }) if exists $h->{bits};
561
 
 
562
 
                   $self->start_element({'Name' => 'Hsp'});
563
 
                   $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
564
 
                   $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
565
 
                   $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
566
 
                   $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
567
 
                   $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
568
 
                   $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
569
 
                       if (exists $h->{'%_id'} && exists $h->{alen});
570
 
                   if( exists $h->{'%_gid'} ) { 
571
 
                       $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_gid'} * $h->{alen})}) if exists $h->{'%_gid'} && exists $h->{alen};
572
 
                   } else { 
573
 
                       $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
574
 
                           if (exists $h->{'%_id'} && exists $h->{alen});
575
 
                   }
576
 
                   $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
577
 
                   $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
578
 
                   $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
579
 
                   $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
580
 
                   $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
581
 
 
582
 
                   $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
583
 
                   $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
584
 
 
585
 
                   if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
586
 
                       if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
587
 
                           if ($h->{ax0} < $h->{an0}) {
588
 
                               $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
589
 
                           } else {
590
 
                               $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
591
 
                           }
592
 
                           if ($h->{ax1} < $h->{an1}) {
593
 
                               $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
594
 
                           } else {
595
 
                               $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
596
 
                           }
597
 
                       } else {
598
 
                           $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
599
 
                           $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
600
 
                       }
601
 
                   } else {
602
 
                       $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
603
 
                       $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
604
 
                   }
605
 
 
606
 
                   $self->end_element({'Name' => 'Hsp'});
607
 
                   $self->end_element({'Name' => 'Hit'});
608
 
               }
609
 
           }
610
 
 
611
 
           $self->end_element({ 'Name' => 'FastaOutput'});
612
 
           return $self->end_document();
613
 
       } elsif( /^\s*\d+\s*>>>/) {
614
 
           if ($self->within_element('FastaOutput')) {
615
 
               if( $self->in_element('hsp') ) {
616
 
                   $self->end_element({'Name' => 'Hsp'});
617
 
               } 
618
 
               if( $self->in_element('hit') ) {
619
 
                   $self->end_element({'Name' => 'Hit'});
620
 
               }
621
 
 
622
 
               if (@hit_signifs) {
623
 
                   # process remaining best hits
624
 
                   for my $h (@hit_signifs) {
625
 
                       $self->start_element({'Name' => 'Hit'});
626
 
                       $self->element({ 'Name' => 'Hit_len',
627
 
                                        'Data' => $h->{hit_len}
628
 
                                      }) if exists $h->{hit_len};
629
 
                       $self->element({ 'Name' => 'Hit_id',
630
 
                                        'Data' => $h->{id}
631
 
                                      }) if exists $h->{id};
632
 
                       $self->element({ 'Name' =>  'Hit_accession',
633
 
                                        'Data'  => $h->{acc}
634
 
                                      }) if exists $h->{acc};
635
 
                       $self->element({ 'Name' => 'Hit_def',
636
 
                                        'Data' => $h->{desc}
637
 
                                      }) if exists $h->{desc};
638
 
                       $self->element({'Name' => 'Hit_signif',
639
 
                                       'Data' => $h->{evalue}
640
 
                                      }) if exists $h->{evalue};
641
 
                       $self->element({'Name' => 'Hit_score',
642
 
                                       'Data' => $h->{bits}
643
 
                                      }) if exists $h->{bits};
644
 
 
645
 
                       $self->start_element({'Name' => 'Hsp'});
646
 
                       $self->element({'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} }) if exists $h->{'z-sc'};
647
 
                       $self->element({'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} }) if exists $h->{evalue};
648
 
                       $self->element({'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }) if exists $h->{bits};
649
 
                       $self->element({'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} }) if exists $h->{sw};
650
 
                       $self->element({'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} }) if exists $h->{'%_gid'};
651
 
                       $self->element({'Name' => 'Hsp_identity', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
652
 
                           if (exists $h->{'%_id'} && exists $h->{alen});
653
 
                       if( exists $h->{'%_gid'} ) { 
654
 
                           $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_gid'} * $h->{alen})}) if exists $h->{'%_gid'} && exists $h->{alen};
655
 
                       } else { 
656
 
                           $self->element({'Name' => 'Hsp_positive', 'Data' => POSIX::ceil($h->{'%_id'} * $h->{alen}) })
657
 
                           if (exists $h->{'%_id'} && exists $h->{alen});
658
 
                       }
659
 
                       $self->element({'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }) if exists $h->{alen};
660
 
                       $self->element({'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }) if exists $h->{an0};
661
 
                       $self->element({'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} }) if exists $h->{ax0};
662
 
                       $self->element({'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} }) if exists $h->{an1};
663
 
                       $self->element({'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} }) if exists $h->{ax1};
664
 
 
665
 
                       $self->element({'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} }) if exists $h->{qgaps};
666
 
                       $self->element({'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} }) if exists $h->{lgaps};
667
 
                       
668
 
                       if ($self->{'_reporttype'} =~ m/^FAST[NXY]$/o) {
669
 
                           if( 8 == scalar grep { exists $h->{$_} } qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) ) {
670
 
                               if ($h->{ax0} < $h->{an0}) {
671
 
                                   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}" });
672
 
                               } else {
673
 
                                   $self->element({'Name' => 'Hsp_query-frame', 'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}" });
674
 
                               }
675
 
                               if ($h->{ax1} < $h->{an1}) {
676
 
                                   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}" });
677
 
                               } else {
678
 
                                   $self->element({'Name' => 'Hsp_hit-frame', 'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}" });
679
 
                               }
680
 
                           } else {
681
 
                               $self->element({'Name' => 'Hsp_query-frame', 'Data' => $h->{lframe} });
682
 
                               $self->element({'Name' => 'Hsp_hit-frame', 'Data' => 0 });
683
 
                           }
684
 
                       } else {
685
 
                           $self->element({'Name' => 'Hsp_query-frame', 'Data' => 0 });
686
 
                           $self->element({'Name' => 'Hsp_hit-frame', 'Data' => $h->{lframe} });
687
 
                       }
688
 
 
689
 
                       $self->end_element({'Name' => 'Hsp'});
690
 
                       $self->end_element({'Name' => 'Hit'});
691
 
                   }
692
 
               }
693
 
               $self->end_element({ 'Name' => 'FastaOutput' });
694
 
               $self->_pushback($_);
695
 
               return $self->end_document();
696
 
           } else {
697
 
               $self->start_element({ 'Name' => 'FastaOutput' });
698
 
               $self->{'_result_count'}++;
699
 
               $seentop = 1;
700
 
               $self->element({ 'Name' => 'FastaOutput_program',
701
 
                                'Data' => $self->{'_reporttype'} });
702
 
               $self->element({ 'Name' => 'FastaOutput_version',
703
 
                                'Data' => $self->{'_version'} });
704
 
 
705
 
               my ($type, $querylen, $querytype, $querydef);
706
 
 
707
 
               if( /^\s*\d+\s*>>>(.*)/ ) {
708
 
                   $querydef = $1;
709
 
                   if($querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt)\s*$/o ) {
710
 
                       ($querydef, $querylen, $querytype) = ($1, $2, $3);
711
 
                   }
712
 
               }
713
 
 
714
 
               if( $self->{'_reporttype'} &&
715
 
                   $self->{'_reporttype'} eq 'FASTA' 
716
 
                   ) {
717
 
                   if( $querytype eq 'nt') {
718
 
                       $self->{'_reporttype'} = 'FASTN' ;
719
 
                   } elsif( $querytype eq 'aa' ) {
720
 
                       $self->{'_reporttype'} = 'FASTP' ;
721
 
                   }
722
 
               }
723
 
               my ($name,$descr) = ($querydef =~ m/^(\S+)(?:\s+(.*))?\s*$/o);
724
 
               $self->element({'Name' => 'FastaOutput_query-def',
725
 
                               'Data' => $name});
726
 
               $self->element({'Name' => 'FastaOutput_querydesc',
727
 
                               'Data' => $descr});
728
 
               if ($querylen) {
729
 
                   $self->element({'Name' => 'FastaOutput_query-len',
730
 
                                   'Data' => $querylen});
731
 
               } else {
732
 
                   $self->warn("unable to find and set query length");
733
 
               }
734
 
 
735
 
 
736
 
               if( defined ($_ = $self->_readline()) && /^\s*vs\s+(\S+)/ ) {
737
 
                   $self->element({'Name' => 'FastaOutput_db',
738
 
                                   'Data' => $1});
739
 
               }
740
 
           }
741
 
       } elsif( $self->in_element('hsp' ) ) {
742
 
           
743
 
           my @data = ( [],[],[]);
744
 
           my $count = 0;
745
 
           my $len = $self->idlength + 1;
746
 
           my ($seq1_id);
747
 
           while( defined($_ ) ) {
748
 
               chomp;
749
 
               $self->debug( "$count $_\n");
750
 
               
751
 
               if( /residues in \d+\s+query\s+sequences/o) {
752
 
                   $self->_pushback($_);
753
 
                   last;
754
 
               } elsif (/^>>>\*\*\*/o) {
755
 
                   $self->end_element({Name => "Hsp"});
756
 
                   last;
757
 
               } elsif (/^>>/o) {
758
 
                   $self->_pushback($_);
759
 
                   last;
760
 
               } elsif (/^\s*\d+\s*>>>/o) {
761
 
                   $self->_pushback($_);
762
 
                   last;
763
 
               }
764
 
               if( $count == 0 ) { 
765
 
                   if( /^(\S+)\s+/ ) {
766
 
                       $self->_pushback($_);
767
 
                       $count = 2;
768
 
                   } elsif( /^\s+\d+/ || /^\s+$/ ) { 
769
 
                       # do nothing, this is really a 0 line
770
 
                   } elsif( length($_) == 0 ) { 
771
 
                       $count = -1;
772
 
                   } else { 
773
 
                       $self->_pushback($_);
774
 
                       $count = 0;
775
 
                   }
776
 
               } elsif( $count == 1 || $count == 3 ) {
777
 
                   if( /^(\S+)\s+/ ) {
778
 
                       $len = CORE::length($1) if $len < CORE::length($1);
779
 
                       s/\s+$//; # trim trailing spaces,we don't want them 
780
 
                       push @{$data[$count-1]},substr($_,$len);
781
 
                   } elsif( /^\s+(\d+)/ ) {
782
 
                       $count = -1;
783
 
                       $self->_pushback($_);
784
 
                   } elsif( /^\s+$/ || length($_) == 0) {
785
 
                       $count = 5;  
786
 
                       # going to skip these
787
 
                   } else {
788
 
                       $self->warn("Unrecognized alignment line ($count) '$_'");
789
 
                   }
790
 
               } elsif( $count == 2 ) {
791
 
                   if( /^\s+\d+\s+/ ) {
792
 
                       $self->warn("$_\n") if $self->verbose > 0;
793
 
                       # we are on a Subject part of the alignment
794
 
                       # but we THOUGHT we were on the Query
795
 
                       # move that last line to the proper place
796
 
                       push @{$data[2]}, pop @{$data[0]};
797
 
                       $count = 4;
798
 
                   } else {
799
 
                       # toss the first IDLENGTH characters of the line
800
 
                       if( length($_) >= $len ) {
801
 
                           push @{$data[$count-1]}, substr($_,$len);
802
 
                       }
803
 
                   }
804
 
               } 
805
 
               last if( $count++ >= 5);
806
 
               $_ = $self->_readline();        
807
 
           }
808
 
           if( @{$data[0]} || @{$data[2]}) {
809
 
               $self->characters({'Name' => 'Hsp_qseq',
810
 
                                  'Data' => join('',@{$data[0]}) });
811
 
               $self->characters({'Name' => 'Hsp_midline',
812
 
                                  'Data' => join('',@{$data[1]}) });
813
 
               $self->characters({'Name' => 'Hsp_hseq',
814
 
                                  'Data' => join('',@{$data[2]}) });
815
 
           }
816
 
       } else {
817
 
           if( ! $seentop ) {
818
 
               $self->debug($_);
819
 
               $self->warn("unrecognized FASTA Family report file!");
820
 
               return;
821
 
           }
822
 
       }
823
 
   }
 
653
               /x
 
654
              )
 
655
            {
 
656
                my ( $identper, $gapper, $len, $querystart, $queryend,
 
657
                    $hitstart, $hitend )
 
658
                  = ( $1, $2, $3, $4, $5, $6, $7 );
 
659
                my $ident = sprintf( "%.0f", ( $identper / 100 ) * $len );
 
660
                my $positive = sprintf( "%.0f", ( $gapper / 100 ) * $len );
 
661
 
 
662
                $self->element(
 
663
                    {
 
664
                        'Name' => 'Hsp_identity',
 
665
                        'Data' => $ident
 
666
                    }
 
667
                );
 
668
                $self->element(
 
669
                    {
 
670
                        'Name' => 'Hsp_positive',
 
671
                        'Data' => $positive
 
672
                    }
 
673
                );
 
674
                $self->element(
 
675
                    {
 
676
                        'Name' => 'Hsp_align-len',
 
677
                        'Data' => $len
 
678
                    }
 
679
                );
 
680
 
 
681
                $self->element(
 
682
                    {
 
683
                        'Name' => 'Hsp_query-from',
 
684
                        'Data' => $querystart
 
685
                    }
 
686
                );
 
687
                $self->element(
 
688
                    {
 
689
                        'Name' => 'Hsp_query-to',
 
690
                        'Data' => $queryend
 
691
                    }
 
692
                );
 
693
                $self->element(
 
694
                    {
 
695
                        'Name' => 'Hsp_hit-from',
 
696
                        'Data' => $hitstart
 
697
                    }
 
698
                );
 
699
                $self->element(
 
700
                    {
 
701
                        'Name' => 'Hsp_hit-to',
 
702
                        'Data' => $hitend
 
703
                    }
 
704
                );
 
705
 
 
706
            }
 
707
 
 
708
            if ($v) {
 
709
                $self->element(
 
710
                    { 'Name' => 'Hsp_querygaps', 'Data' => $v->{qgaps} } )
 
711
                  if exists $v->{qgaps};
 
712
                $self->element(
 
713
                    { 'Name' => 'Hsp_hitgaps', 'Data' => $v->{lgaps} } )
 
714
                  if exists $v->{lgaps};
 
715
 
 
716
                if ( $self->{'_reporttype'} =~ m/^FAST[NXY]$/o ) {
 
717
                    if ( 8 == scalar grep { exists $v->{$_} }
 
718
                        qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) )
 
719
                    {
 
720
                        if ( $v->{ax0} < $v->{an0} ) {
 
721
                            $self->element(
 
722
                                {
 
723
                                    'Name' => 'Hsp_query-frame',
 
724
                                    'Data' =>
 
725
                                      "-@{[(($v->{px0} - $v->{ax0}) % 3) + 1]}"
 
726
                                }
 
727
                            );
 
728
                        }
 
729
                        else {
 
730
                            $self->element(
 
731
                                {
 
732
                                    'Name' => 'Hsp_query-frame',
 
733
                                    'Data' =>
 
734
                                      "+@{[(($v->{an0} - $v->{pn0}) % 3) + 1]}"
 
735
                                }
 
736
                            );
 
737
                        }
 
738
                        if ( $v->{ax1} < $v->{an1} ) {
 
739
                            $self->element(
 
740
                                {
 
741
                                    'Name' => 'Hsp_hit-frame',
 
742
                                    'Data' =>
 
743
                                      "-@{[(($v->{px1} - $v->{ax1}) % 3) + 1]}"
 
744
                                }
 
745
                            );
 
746
                        }
 
747
                        else {
 
748
                            $self->element(
 
749
                                {
 
750
                                    'Name' => 'Hsp_hit-frame',
 
751
                                    'Data' =>
 
752
                                      "+@{[(($v->{an1} - $v->{pn1}) % 3) + 1]}"
 
753
                                }
 
754
                            );
 
755
                        }
 
756
                    }
 
757
                    else {
 
758
                        $self->element(
 
759
                            {
 
760
                                'Name' => 'Hsp_query-frame',
 
761
                                'Data' => $v->{lframe}
 
762
                            }
 
763
                        );
 
764
                        $self->element(
 
765
                            { 'Name' => 'Hsp_hit-frame', 'Data' => 0 } );
 
766
                    }
 
767
                }
 
768
                else {
 
769
                    $self->element(
 
770
                        { 'Name' => 'Hsp_query-frame', 'Data' => 0 } );
 
771
                    $self->element(
 
772
                        { 'Name' => 'Hsp_hit-frame', 'Data' => $v->{lframe} } );
 
773
                }
 
774
 
 
775
            }
 
776
            else {
 
777
                $self->warn("unable to parse FASTA score line: $_");
 
778
            }
 
779
        }
 
780
        elsif (/\d+\s*residues\s*in\s*\d+\s*query\s*sequences/) {
 
781
            if ( $self->in_element('hsp') ) {
 
782
                $self->end_element( { 'Name' => 'Hsp' } );
 
783
            }
 
784
            if ( $self->in_element('hit') ) {
 
785
                $self->end_element( { 'Name' => 'Hit' } );
 
786
            }
 
787
 
 
788
           #       $_ = $self->_readline();
 
789
           #       my ( $liblen,$libsize) = /(\d+)\s+residues\s*in(\d+)\s*library/;
 
790
           # fast forward to the end of the file as there is
 
791
           # nothing else left to do with this file and want to be sure and
 
792
           # reset it
 
793
            while ( defined( $_ = $self->_readline() ) ) {
 
794
                last if (/^Function used was/);
 
795
                if (
 
796
                    /(\S+)\s+searches\s+a\s+(protein\s+or\s+DNA\s+)?
 
797
           sequence/oxi || /(\S+)\s+compares\s+a/oi
 
798
                  )
 
799
                {
 
800
                    $self->_pushback($_);
 
801
                }
 
802
            }
 
803
 
 
804
            if (@hit_signifs) {
 
805
 
 
806
                # process remaining best hits
 
807
                for my $h (@hit_signifs) {
 
808
 
 
809
                    # Hsp_score Hsp_evalue Hsp_bit-score
 
810
                    # Hsp_sw-score Hsp_gaps Hsp_identity Hsp_positive
 
811
                    # Hsp_align-len Hsp_query-from Hsp_query-to
 
812
                    # Hsp_hit-from Hsp_hit-to Hsp_qseq Hsp_midline
 
813
 
 
814
                    $self->start_element( { 'Name' => 'Hit' } );
 
815
                    $self->element(
 
816
                        {
 
817
                            'Name' => 'Hit_len',
 
818
                            'Data' => $h->{hit_len}
 
819
                        }
 
820
                    ) if exists $h->{hit_len};
 
821
                    $self->element(
 
822
                        {
 
823
                            'Name' => 'Hit_id',
 
824
                            'Data' => $h->{id}
 
825
                        }
 
826
                    ) if exists $h->{id};
 
827
                    $self->element(
 
828
                        {
 
829
                            'Name' => 'Hit_accession',
 
830
                            'Data' => $h->{acc}
 
831
                        }
 
832
                    ) if exists $h->{acc};
 
833
                    $self->element(
 
834
                        {
 
835
                            'Name' => 'Hit_def',
 
836
                            'Data' => $h->{desc}
 
837
                        }
 
838
                    ) if exists $h->{desc};
 
839
                    $self->element(
 
840
                        {
 
841
                            'Name' => 'Hit_signif',
 
842
                            'Data' => $h->{evalue}
 
843
                        }
 
844
                    ) if exists $h->{evalue};
 
845
                    $self->element(
 
846
                        {
 
847
                            'Name' => 'Hit_score',
 
848
                            'Data' => $h->{bits}
 
849
                        }
 
850
                    ) if exists $h->{bits};
 
851
 
 
852
                    $self->start_element( { 'Name' => 'Hsp' } );
 
853
                    $self->element(
 
854
                        { 'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} } )
 
855
                      if exists $h->{'z-sc'};
 
856
                    $self->element(
 
857
                        { 'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} } )
 
858
                      if exists $h->{evalue};
 
859
                    $self->element(
 
860
                        { 'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} } )
 
861
                      if exists $h->{bits};
 
862
                    $self->element(
 
863
                        { 'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} } )
 
864
                      if exists $h->{sw};
 
865
                    $self->element(
 
866
                        { 'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} } )
 
867
                      if exists $h->{'%_gid'};
 
868
                    $self->element(
 
869
                        {
 
870
                            'Name' => 'Hsp_identity',
 
871
                            'Data' =>
 
872
                              sprintf( "%.0f", $h->{'%_id'} * $h->{alen} )
 
873
                        }
 
874
                    ) if ( exists $h->{'%_id'} && exists $h->{alen} );
 
875
 
 
876
                    if ( exists $h->{'%_gid'} ) {
 
877
                        $self->element(
 
878
                            {
 
879
                                'Name' => 'Hsp_positive',
 
880
                                'Data' =>
 
881
                                  sprintf( "%.0f", $h->{'%_gid'} * $h->{alen} )
 
882
                            }
 
883
                        ) if exists $h->{'%_gid'} && exists $h->{alen};
 
884
                    }
 
885
                    else {
 
886
                        $self->element(
 
887
                            {
 
888
                                'Name' => 'Hsp_positive',
 
889
                                'Data' =>
 
890
                                  sprintf( "%.0f", $h->{'%_id'} * $h->{alen} )
 
891
                            }
 
892
                        ) if ( exists $h->{'%_id'} && exists $h->{alen} );
 
893
                    }
 
894
                    $self->element(
 
895
                        { 'Name' => 'Hsp_align-len', 'Data' => $h->{alen} } )
 
896
                      if exists $h->{alen};
 
897
                    $self->element(
 
898
                        { 'Name' => 'Hsp_query-from', 'Data' => $h->{an0} } )
 
899
                      if exists $h->{an0};
 
900
                    $self->element(
 
901
                        { 'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} } )
 
902
                      if exists $h->{ax0};
 
903
                    $self->element(
 
904
                        { 'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} } )
 
905
                      if exists $h->{an1};
 
906
                    $self->element(
 
907
                        { 'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} } )
 
908
                      if exists $h->{ax1};
 
909
 
 
910
                    $self->element(
 
911
                        { 'Name' => 'Hsp_querygaps', 'Data' => $h->{qgaps} } )
 
912
                      if exists $h->{qgaps};
 
913
                    $self->element(
 
914
                        { 'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} } )
 
915
                      if exists $h->{lgaps};
 
916
 
 
917
                    if ( $self->{'_reporttype'} =~ m/^FAST[NXY]$/o ) {
 
918
                        if ( 8 == scalar grep { exists $h->{$_} }
 
919
                            qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) )
 
920
                        {
 
921
                            if ( $h->{ax0} < $h->{an0} ) {
 
922
                                $self->element(
 
923
                                    {
 
924
                                        'Name' => 'Hsp_query-frame',
 
925
                                        'Data' =>
 
926
"-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}"
 
927
                                    }
 
928
                                );
 
929
                            }
 
930
                            else {
 
931
                                $self->element(
 
932
                                    {
 
933
                                        'Name' => 'Hsp_query-frame',
 
934
                                        'Data' =>
 
935
"+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}"
 
936
                                    }
 
937
                                );
 
938
                            }
 
939
                            if ( $h->{ax1} < $h->{an1} ) {
 
940
                                $self->element(
 
941
                                    {
 
942
                                        'Name' => 'Hsp_hit-frame',
 
943
                                        'Data' =>
 
944
"-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}"
 
945
                                    }
 
946
                                );
 
947
                            }
 
948
                            else {
 
949
                                $self->element(
 
950
                                    {
 
951
                                        'Name' => 'Hsp_hit-frame',
 
952
                                        'Data' =>
 
953
"+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}"
 
954
                                    }
 
955
                                );
 
956
                            }
 
957
                        }
 
958
                        else {
 
959
                            $self->element(
 
960
                                {
 
961
                                    'Name' => 'Hsp_query-frame',
 
962
                                    'Data' => $h->{lframe}
 
963
                                }
 
964
                            );
 
965
                            $self->element(
 
966
                                { 'Name' => 'Hsp_hit-frame', 'Data' => 0 } );
 
967
                        }
 
968
                    }
 
969
                    else {
 
970
                        $self->element(
 
971
                            { 'Name' => 'Hsp_query-frame', 'Data' => 0 } );
 
972
                        $self->element(
 
973
                            {
 
974
                                'Name' => 'Hsp_hit-frame',
 
975
                                'Data' => $h->{lframe}
 
976
                            }
 
977
                        );
 
978
                    }
 
979
 
 
980
                    $self->end_element( { 'Name' => 'Hsp' } );
 
981
                    $self->end_element( { 'Name' => 'Hit' } );
 
982
                }
 
983
            }
 
984
            $self->end_element( { 'Name' => 'FastaOutput' } );
 
985
            return $self->end_document();
 
986
        }
 
987
        elsif (/^\s*\d+\s*>>>/) {
 
988
            if ( $self->within_element('FastaOutput') ) {
 
989
                if ( $self->in_element('hsp') ) {
 
990
                    $self->end_element( { 'Name' => 'Hsp' } );
 
991
                }
 
992
                if ( $self->in_element('hit') ) {
 
993
                    $self->end_element( { 'Name' => 'Hit' } );
 
994
                }
 
995
 
 
996
                if (@hit_signifs) {
 
997
 
 
998
                    # process remaining best hits
 
999
                    for my $h (@hit_signifs) {
 
1000
                        $self->start_element( { 'Name' => 'Hit' } );
 
1001
                        $self->element(
 
1002
                            {
 
1003
                                'Name' => 'Hit_len',
 
1004
                                'Data' => $h->{hit_len}
 
1005
                            }
 
1006
                        ) if exists $h->{hit_len};
 
1007
                        $self->element(
 
1008
                            {
 
1009
                                'Name' => 'Hit_id',
 
1010
                                'Data' => $h->{id}
 
1011
                            }
 
1012
                        ) if exists $h->{id};
 
1013
                        $self->element(
 
1014
                            {
 
1015
                                'Name' => 'Hit_accession',
 
1016
                                'Data' => $h->{acc}
 
1017
                            }
 
1018
                        ) if exists $h->{acc};
 
1019
                        $self->element(
 
1020
                            {
 
1021
                                'Name' => 'Hit_def',
 
1022
                                'Data' => $h->{desc}
 
1023
                            }
 
1024
                        ) if exists $h->{desc};
 
1025
                        $self->element(
 
1026
                            {
 
1027
                                'Name' => 'Hit_signif',
 
1028
                                'Data' => $h->{evalue}
 
1029
                            }
 
1030
                        ) if exists $h->{evalue};
 
1031
                        $self->element(
 
1032
                            {
 
1033
                                'Name' => 'Hit_score',
 
1034
                                'Data' => $h->{bits}
 
1035
                            }
 
1036
                        ) if exists $h->{bits};
 
1037
 
 
1038
                        $self->start_element( { 'Name' => 'Hsp' } );
 
1039
                        $self->element(
 
1040
                            { 'Name' => 'Hsp_score', 'Data' => $h->{'z-sc'} } )
 
1041
                          if exists $h->{'z-sc'};
 
1042
                        $self->element(
 
1043
                            { 'Name' => 'Hsp_evalue', 'Data' => $h->{evalue} } )
 
1044
                          if exists $h->{evalue};
 
1045
                        $self->element(
 
1046
                            { 'Name' => 'Hsp_bit-score', 'Data' => $h->{bits} }
 
1047
                        ) if exists $h->{bits};
 
1048
                        $self->element(
 
1049
                            { 'Name' => 'Hsp_sw-score', 'Data' => $h->{sw} } )
 
1050
                          if exists $h->{sw};
 
1051
                        $self->element(
 
1052
                            { 'Name' => 'Hsp_gaps', 'Data' => $h->{'%_gid'} } )
 
1053
                          if exists $h->{'%_gid'};
 
1054
                        $self->element(
 
1055
                            {
 
1056
                                'Name' => 'Hsp_identity',
 
1057
                                'Data' =>
 
1058
                                  sprintf( "%.0f", $h->{'%_id'} * $h->{alen} )
 
1059
                            }
 
1060
                        ) if ( exists $h->{'%_id'} && exists $h->{alen} );
 
1061
 
 
1062
                        if ( exists $h->{'%_gid'} ) {
 
1063
                            $self->element(
 
1064
                                {
 
1065
                                    'Name' => 'Hsp_positive',
 
1066
                                    'Data' => sprintf( "%.0f",
 
1067
                                        $h->{'%_gid'} * $h->{alen} )
 
1068
                                }
 
1069
                            ) if exists $h->{'%_gid'} && exists $h->{alen};
 
1070
                        }
 
1071
                        else {
 
1072
                            $self->element(
 
1073
                                {
 
1074
                                    'Name' => 'Hsp_positive',
 
1075
                                    'Data' => sprintf( "%.0f",
 
1076
                                        $h->{'%_id'} * $h->{alen} )
 
1077
                                }
 
1078
                            ) if ( exists $h->{'%_id'} && exists $h->{alen} );
 
1079
                        }
 
1080
                        $self->element(
 
1081
                            { 'Name' => 'Hsp_align-len', 'Data' => $h->{alen} }
 
1082
                        ) if exists $h->{alen};
 
1083
                        $self->element(
 
1084
                            { 'Name' => 'Hsp_query-from', 'Data' => $h->{an0} }
 
1085
                        ) if exists $h->{an0};
 
1086
                        $self->element(
 
1087
                            { 'Name' => 'Hsp_query-to', 'Data' => $h->{ax0} } )
 
1088
                          if exists $h->{ax0};
 
1089
                        $self->element(
 
1090
                            { 'Name' => 'Hsp_hit-from', 'Data' => $h->{an1} } )
 
1091
                          if exists $h->{an1};
 
1092
                        $self->element(
 
1093
                            { 'Name' => 'Hsp_hit-to', 'Data' => $h->{ax1} } )
 
1094
                          if exists $h->{ax1};
 
1095
 
 
1096
                        $self->element(
 
1097
                            {
 
1098
                                'Name' => 'Hsp_querygaps',
 
1099
                                'Data' => $h->{qgaps}
 
1100
                            }
 
1101
                        ) if exists $h->{qgaps};
 
1102
                        $self->element(
 
1103
                            { 'Name' => 'Hsp_hitgaps', 'Data' => $h->{lgaps} } )
 
1104
                          if exists $h->{lgaps};
 
1105
 
 
1106
                        if ( $self->{'_reporttype'} =~ m/^FAST[NXY]$/o ) {
 
1107
                            if ( 8 == scalar grep { exists $h->{$_} }
 
1108
                                qw(an0 ax0 pn0 px0 an1 ax1 pn1 px1) )
 
1109
                            {
 
1110
                                if ( $h->{ax0} < $h->{an0} ) {
 
1111
                                    $self->element(
 
1112
                                        {
 
1113
                                            'Name' => 'Hsp_query-frame',
 
1114
                                            'Data' => "-@{[(($h->{px0} - $h->{ax0}) % 3) + 1]}"
 
1115
                                        }
 
1116
                                    );
 
1117
                                }
 
1118
                                else {
 
1119
                                    $self->element(
 
1120
                                        {
 
1121
                                            'Name' => 'Hsp_query-frame',
 
1122
                                            'Data' => "+@{[(($h->{an0} - $h->{pn0}) % 3) + 1]}"
 
1123
                                        }
 
1124
                                    );
 
1125
                                }
 
1126
                                if ( $h->{ax1} < $h->{an1} ) {
 
1127
                                    $self->element(
 
1128
                                        {
 
1129
                                            'Name' => 'Hsp_hit-frame',
 
1130
                                            'Data' => "-@{[(($h->{px1} - $h->{ax1}) % 3) + 1]}"
 
1131
                                        }
 
1132
                                    );
 
1133
                                }
 
1134
                                else {
 
1135
                                    $self->element(
 
1136
                                        {
 
1137
                                            'Name' => 'Hsp_hit-frame',
 
1138
                                            'Data' => "+@{[(($h->{an1} - $h->{pn1}) % 3) + 1]}"
 
1139
                                        }
 
1140
                                    );
 
1141
                                }
 
1142
                            }
 
1143
                            else {
 
1144
                                $self->element(
 
1145
                                    {
 
1146
                                        'Name' => 'Hsp_query-frame',
 
1147
                                        'Data' => $h->{lframe}
 
1148
                                    }
 
1149
                                );
 
1150
                                $self->element(
 
1151
                                    { 'Name' => 'Hsp_hit-frame', 'Data' => 0 }
 
1152
                                );
 
1153
                            }
 
1154
                        }
 
1155
                        else {
 
1156
                            $self->element(
 
1157
                                { 'Name' => 'Hsp_query-frame', 'Data' => 0 } );
 
1158
                            $self->element(
 
1159
                                {
 
1160
                                    'Name' => 'Hsp_hit-frame',
 
1161
                                    'Data' => $h->{lframe}
 
1162
                                }
 
1163
                            );
 
1164
                        }
 
1165
 
 
1166
                        $self->end_element( { 'Name' => 'Hsp' } );
 
1167
                        $self->end_element( { 'Name' => 'Hit' } );
 
1168
                    }
 
1169
                }
 
1170
                $self->end_element( { 'Name' => 'FastaOutput' } );
 
1171
                $self->_pushback($_);
 
1172
                return $self->end_document();
 
1173
            }
 
1174
            else {
 
1175
                $self->start_element( { 'Name' => 'FastaOutput' } );
 
1176
                $self->{'_result_count'}++;
 
1177
                $seentop = 1;
 
1178
                $self->element(
 
1179
                    {
 
1180
                        'Name' => 'FastaOutput_program',
 
1181
                        'Data' => $self->{'_reporttype'}
 
1182
                    }
 
1183
                );
 
1184
                $self->element(
 
1185
                    {
 
1186
                        'Name' => 'FastaOutput_version',
 
1187
                        'Data' => $self->{'_version'}
 
1188
                    }
 
1189
                );
 
1190
 
 
1191
                my ( $type, $querylen, $querytype, $querydef );
 
1192
 
 
1193
                if (/^\s*\d+\s*>>>(.*)/) {
 
1194
                    $querydef = $1;
 
1195
                    if ( $querydef =~ /^(.*?)\s+(?:\-\s+)?(\d+)\s+(aa|nt).*$/o )
 
1196
                    {
 
1197
                        ( $querydef, $querylen, $querytype ) = ( $1, $2, $3 );
 
1198
                    }
 
1199
                }
 
1200
 
 
1201
                if (   $self->{'_reporttype'}
 
1202
                    && $self->{'_reporttype'} eq 'FASTA' )
 
1203
                {
 
1204
                    if ( $querytype eq 'nt' ) {
 
1205
                        $self->{'_reporttype'} = 'FASTN';
 
1206
                    }
 
1207
                    elsif ( $querytype eq 'aa' ) {
 
1208
                        $self->{'_reporttype'} = 'FASTP';
 
1209
                    }
 
1210
                }
 
1211
                my ( $name, $descr ) =
 
1212
                  ( $querydef =~ m/^(\S+)(?:\s+(.*))?\s*$/o );
 
1213
                $self->element(
 
1214
                    {
 
1215
                        'Name' => 'FastaOutput_query-def',
 
1216
                        'Data' => $name
 
1217
                    }
 
1218
                );
 
1219
                $self->element(
 
1220
                    {
 
1221
                        'Name' => 'FastaOutput_querydesc',
 
1222
                        'Data' => $descr
 
1223
                    }
 
1224
                );
 
1225
                if ($querylen) {
 
1226
                    $self->element(
 
1227
                        {
 
1228
                            'Name' => 'FastaOutput_query-len',
 
1229
                            'Data' => $querylen
 
1230
                        }
 
1231
                    );
 
1232
                }
 
1233
                else {
 
1234
                    $self->warn("unable to find and set query length");
 
1235
                }
 
1236
                if ( defined( $_ = $self->_readline() )
 
1237
                    && ( /^\s*vs\s+(\S+)/ || /^Library:\s+(\S+)/ ) )
 
1238
                {
 
1239
                    $self->element(
 
1240
                        {
 
1241
                            'Name' => 'FastaOutput_db',
 
1242
                            'Data' => $1
 
1243
                        }
 
1244
                    );
 
1245
                }
 
1246
 
 
1247
            }
 
1248
        }
 
1249
        elsif ( $self->in_element('hsp') ) {
 
1250
            my @data  = ( [], [], [] );
 
1251
            my $count = 0;
 
1252
            my $len   = $self->idlength + 1;
 
1253
            my ($seq1_id);
 
1254
            while ( defined($_) ) {
 
1255
                chomp;
 
1256
                #$self->debug("$count $_\n");
 
1257
                if (/residues in \d+\s+query\s+sequences/o) {
 
1258
                    $self->_pushback($_);
 
1259
                    last;
 
1260
                }
 
1261
                elsif (/^>>>\*\*\*/o) {
 
1262
                    $self->end_element( { Name => "Hsp" } );
 
1263
                    last;
 
1264
                }
 
1265
                elsif (/^>>/o) {
 
1266
                    $self->_pushback($_);
 
1267
                    last;
 
1268
                }
 
1269
                elsif (/^\s*\d+\s*>>>/o) {
 
1270
                    $self->_pushback($_);
 
1271
                    last;
 
1272
                }
 
1273
                if ( $count == 0 ) {
 
1274
                    if (/^(\S+)\s+/) {
 
1275
                        $self->_pushback($_);
 
1276
                        $count = 2;
 
1277
                    }
 
1278
                    elsif ( /^\s+\d+/ || /^\s+$/ ) {
 
1279
 
 
1280
                        # do nothing, this is really a 0 line
 
1281
                    }
 
1282
                    elsif ( length($_) == 0 ) {
 
1283
                        $count = -1;
 
1284
                    }
 
1285
                    else {
 
1286
                        $self->_pushback($_);
 
1287
                        $count = 0;
 
1288
                    }
 
1289
                }
 
1290
                elsif ( $count == 1 || $count == 3 ) {
 
1291
                    if (/^(\S+)\s+/) {
 
1292
                        $len = CORE::length($1) if $len < CORE::length($1);
 
1293
                        s/\s+$//;    # trim trailing spaces,we don't want them
 
1294
                        push @{ $data[ $count - 1 ] }, substr( $_, $len );
 
1295
                    }
 
1296
                    elsif (/^\s+(\d+)/) {
 
1297
                        $count = -1;
 
1298
                        $self->_pushback($_);
 
1299
                    }
 
1300
                    elsif ( /^\s+$/ || length($_) == 0 ) {
 
1301
                        $count = 5;
 
1302
 
 
1303
                        # going to skip these
 
1304
                    }
 
1305
                    else {
 
1306
                        $self->warn(
 
1307
                            "Unrecognized alignment line ($count) '$_'");
 
1308
                    }
 
1309
                }
 
1310
                elsif ( $count == 2 ) {
 
1311
                    if (/^\s+\d+\s+/) {
 
1312
                        $self->warn("$_\n") if $self->verbose > 0;
 
1313
 
 
1314
                        # we are on a Subject part of the alignment
 
1315
                        # but we THOUGHT we were on the Query
 
1316
                        # move that last line to the proper place
 
1317
                        push @{ $data[2] }, pop @{ $data[0] };
 
1318
                        $count = 4;
 
1319
                    }
 
1320
                    else {
 
1321
 
 
1322
                        # toss the first IDLENGTH characters of the line
 
1323
                        if ( length($_) >= $len ) {
 
1324
                            push @{ $data[ $count - 1 ] }, substr( $_, $len );
 
1325
                        }
 
1326
                    }
 
1327
                }
 
1328
                last if ( $count++ >= 5 );
 
1329
                $_ = $self->_readline();
 
1330
            }
 
1331
            if ( @{ $data[0] } || @{ $data[2] } ) {
 
1332
                $self->characters(
 
1333
                    {
 
1334
                        'Name' => 'Hsp_qseq',
 
1335
                        'Data' => join( '', @{ $data[0] } )
 
1336
                    }
 
1337
                );
 
1338
                $self->characters(
 
1339
                    {
 
1340
                        'Name' => 'Hsp_midline',
 
1341
                        'Data' => join( '', @{ $data[1] } )
 
1342
                    }
 
1343
                );
 
1344
                $self->characters(
 
1345
                    {
 
1346
                        'Name' => 'Hsp_hseq',
 
1347
                        'Data' => join( '', @{ $data[2] } )
 
1348
                    }
 
1349
                );
 
1350
            }
 
1351
        }
 
1352
        else {
 
1353
            if ( !$seentop ) {
 
1354
                $self->debug($_);
 
1355
                #$self->warn("unrecognized FASTA Family report file!");
 
1356
                #return;
 
1357
            }
 
1358
        }
 
1359
    }
 
1360
    if ( $self->in_element('result') ) {
 
1361
        if ( $self->in_element('hsp') ) {
 
1362
            $self->end_element( { 'Name' => 'Hsp' } );
 
1363
        }
 
1364
        if ( $self->in_element('hit') ) {
 
1365
            $self->end_element( { 'Name' => 'Hit' } );
 
1366
        }
 
1367
        $self->end_element( { 'Name' => 'FastaOutput' } );
 
1368
    }
 
1369
    return $self->end_document();
824
1370
}
825
1371
 
826
 
 
827
1372
=head2 start_element
828
1373
 
829
1374
 Title   : start_element
835
1380
 
836
1381
=cut
837
1382
 
838
 
sub start_element{
839
 
   my ($self,$data) = @_;
 
1383
sub start_element {
 
1384
    my ( $self, $data ) = @_;
 
1385
 
840
1386
    # we currently don't care about attributes
841
 
    my $nm = $data->{'Name'};    
842
 
    if( my $type = $MODEMAP{$nm} ) {
843
 
        $self->_mode($type);
844
 
        if( $self->_eventHandler->will_handle($type) ) {
845
 
            my $func = sprintf("start_%s",lc $type);
846
 
            $self->_eventHandler->$func($data->{'Attributes'});
847
 
        }                                                
848
 
        unshift @{$self->{'_elements'}}, $type;
 
1387
    my $nm = $data->{'Name'};
 
1388
    if ( my $type = $MODEMAP{$nm} ) {
 
1389
        $self->_mode($type);
 
1390
        if ( my $handler = $self->_will_handle($type) ) {
 
1391
            my $func = sprintf( "start_%s", lc $type );
 
1392
            $handler->$func( $data->{'Attributes'} );
 
1393
        }
 
1394
        unshift @{ $self->{'_elements'} }, $type;
849
1395
    }
850
 
    if($nm eq 'FastaOutput') {
851
 
        $self->{'_values'} = {};
852
 
        $self->{'_result'}= undef;
853
 
        $self->{'_mode'} = '';
 
1396
    if ( $nm eq 'FastaOutput' ) {
 
1397
        $self->{'_values'} = {};
 
1398
        $self->{'_result'} = undef;
 
1399
        $self->{'_mode'}   = '';
854
1400
    }
855
1401
 
856
1402
}
867
1413
=cut
868
1414
 
869
1415
sub end_element {
870
 
    my ($self,$data) = @_;
 
1416
    my ( $self, $data ) = @_;
871
1417
    my $nm = $data->{'Name'};
872
1418
    my $rc;
 
1419
 
873
1420
    # Hsp are sort of weird, in that they end when another
874
1421
    # object begins so have to detect this in end_element for now
875
 
    if( $nm eq 'Hsp' ) {
876
 
        foreach ( qw(Hsp_qseq Hsp_midline Hsp_hseq) ) {
877
 
            $self->element({'Name' => $_,
878
 
                            'Data' => $self->{'_last_hspdata'}->{$_}});
879
 
        }
880
 
        $self->{'_last_hspdata'} = {}
881
 
    }
882
 
 
883
 
    if( my $type = $MODEMAP{$nm} ) {
884
 
        if( $self->_eventHandler->will_handle($type) ) {
885
 
            my $func = sprintf("end_%s",lc $type);
886
 
            $rc = $self->_eventHandler->$func($self->{'_reporttype'},
887
 
                                              $self->{'_values'});          
888
 
        }
889
 
        shift @{$self->{'_elements'}};
890
 
 
891
 
    } elsif( $MAPPING{$nm} ) {  
892
 
        if ( ref($MAPPING{$nm}) =~ /hash/i ) {
893
 
            my $key = (keys %{$MAPPING{$nm}})[0];           
894
 
            $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
895
 
        } else {
896
 
            $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
897
 
        }
898
 
    } else { 
899
 
        $self->warn( "unknown nm $nm, ignoring\n");
900
 
    }
901
 
    $self->{'_last_data'} = ''; # remove read data if we are at 
902
 
                                # end of an element
903
 
    $self->{'_result'} = $rc if( $nm eq 'FastaOutput' );
 
1422
    if ( $nm eq 'Hsp' ) {
 
1423
        foreach (qw(Hsp_qseq Hsp_midline Hsp_hseq)) {
 
1424
            $self->element(
 
1425
                {
 
1426
                    'Name' => $_,
 
1427
                    'Data' => $self->{'_last_hspdata'}->{$_}
 
1428
                }
 
1429
            );
 
1430
        }
 
1431
        $self->{'_last_hspdata'} = {};
 
1432
    }
 
1433
 
 
1434
    if ( my $type = $MODEMAP{$nm} ) {
 
1435
        if ( my $handler = $self->_will_handle($type) ) {
 
1436
            my $func = sprintf( "end_%s", lc $type );
 
1437
            $rc = $handler->$func( $self->{'_reporttype'}, $self->{'_values'} );
 
1438
        }
 
1439
        shift @{ $self->{'_elements'} };
 
1440
 
 
1441
    }
 
1442
    elsif ( $MAPPING{$nm} ) {
 
1443
        if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
 
1444
            my $key = ( keys %{ $MAPPING{$nm} } )[0];
 
1445
            $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
 
1446
              $self->{'_last_data'};
 
1447
        }
 
1448
        else {
 
1449
            $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
 
1450
        }
 
1451
    }
 
1452
    else {
 
1453
        $self->warn("unknown nm $nm, ignoring\n");
 
1454
    }
 
1455
    $self->{'_last_data'} = '';    # remove read data if we are at
 
1456
                                   # end of an element
 
1457
    $self->{'_result'} = $rc if ( $nm eq 'FastaOutput' );
904
1458
    return $rc;
905
1459
 
906
1460
}
916
1470
 
917
1471
=cut
918
1472
 
919
 
sub element{
920
 
   my ($self,$data) = @_;
921
 
   $self->start_element($data);
922
 
   $self->characters($data);
923
 
   $self->end_element($data);
 
1473
sub element {
 
1474
    my ( $self, $data ) = @_;
 
1475
    $self->start_element($data);
 
1476
    $self->characters($data);
 
1477
    $self->end_element($data);
924
1478
}
925
1479
 
926
 
 
927
1480
=head2 characters
928
1481
 
929
1482
 Title   : characters
935
1488
 
936
1489
=cut
937
1490
 
938
 
sub characters{
939
 
   my ($self,$data) = @_;   
940
 
 
941
 
   return unless ( defined $data->{'Data'} );
942
 
   if( $data->{'Data'} =~ /^\s+$/ ) {
943
 
       return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
944
 
   }
945
 
 
946
 
   if( $self->in_element('hsp') && 
947
 
       $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ ) {
948
 
       
949
 
       $self->{'_last_hspdata'}->{$data->{'Name'}} .= $data->{'Data'};
950
 
   }  
951
 
   
952
 
   $self->{'_last_data'} = $data->{'Data'}; 
 
1491
sub characters {
 
1492
    my ( $self, $data ) = @_;
 
1493
 
 
1494
    return unless ( defined $data->{'Data'} );
 
1495
    if ( $data->{'Data'} =~ /^\s+$/ ) {
 
1496
        return unless $data->{'Name'} =~ /Hsp\_(midline|qseq|hseq)/;
 
1497
    }
 
1498
 
 
1499
    if (   $self->in_element('hsp')
 
1500
        && $data->{'Name'} =~ /Hsp\_(qseq|hseq|midline)/ )
 
1501
    {
 
1502
 
 
1503
        $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'};
 
1504
    }
 
1505
 
 
1506
    $self->{'_last_data'} = $data->{'Data'};
953
1507
}
954
1508
 
955
1509
=head2 _mode
964
1518
 
965
1519
=cut
966
1520
 
967
 
sub _mode{
968
 
    my ($self,$value) = @_;
969
 
    if( defined $value) {
970
 
        $self->{'_mode'} = $value;
 
1521
sub _mode {
 
1522
    my ( $self, $value ) = @_;
 
1523
    if ( defined $value ) {
 
1524
        $self->{'_mode'} = $value;
971
1525
    }
972
1526
    return $self->{'_mode'};
973
1527
}
985
1539
 
986
1540
=cut
987
1541
 
988
 
sub within_element{
989
 
   my ($self,$name) = @_;  
990
 
   return 0 if ( ! defined $name &&
991
 
                 ! defined  $self->{'_elements'} ||
992
 
                 scalar @{$self->{'_elements'}} == 0) ;
993
 
   foreach (  @{$self->{'_elements'}} ) {
994
 
       if( $_ eq $name || $_ eq $MODEMAP{$name} ) {
995
 
           return 1;
996
 
       } 
997
 
   }
998
 
   return 0;
 
1542
sub within_element {
 
1543
    my ( $self, $name ) = @_;
 
1544
    return 0
 
1545
      if (!defined $name && !defined $self->{'_elements'}
 
1546
        || scalar @{ $self->{'_elements'} } == 0 );
 
1547
    foreach ( @{ $self->{'_elements'} } ) {
 
1548
        if ( $_ eq $name || $_ eq $MODEMAP{$name} ) {
 
1549
            return 1;
 
1550
        }
 
1551
    }
 
1552
    return 0;
999
1553
}
1000
1554
 
1001
1555
=head2 in_element
1011
1565
 
1012
1566
=cut
1013
1567
 
1014
 
sub in_element{
1015
 
   my ($self,$name) = @_;  
1016
 
   return 0 if ! defined $self->{'_elements'}->[0];
1017
 
   return ( $self->{'_elements'}->[0] eq $name ||
1018
 
            (exists $MODEMAP{$name} && $self->{'_elements'}->[0] eq $MODEMAP{$name})
1019
 
          );
 
1568
sub in_element {
 
1569
    my ( $self, $name ) = @_;
 
1570
    return 0 if !defined $self->{'_elements'}->[0];
 
1571
    return (
 
1572
        $self->{'_elements'}->[0] eq $name
 
1573
          || ( exists $MODEMAP{$name}
 
1574
            && $self->{'_elements'}->[0] eq $MODEMAP{$name} )
 
1575
    );
1020
1576
}
1021
1577
 
1022
 
 
1023
1578
=head2 start_document
1024
1579
 
1025
1580
 Title   : start_document
1031
1586
 
1032
1587
=cut
1033
1588
 
1034
 
sub start_document{
 
1589
sub start_document {
1035
1590
    my ($self) = @_;
1036
1591
    $self->{'_lasttype'} = '';
1037
 
    $self->{'_values'} = {};
1038
 
    $self->{'_result'}= undef;
1039
 
    $self->{'_mode'} = '';
 
1592
    $self->{'_values'}   = {};
 
1593
    $self->{'_result'}   = undef;
 
1594
    $self->{'_mode'}     = '';
1040
1595
    $self->{'_elements'} = [];
1041
1596
}
1042
1597
 
1043
 
 
1044
1598
=head2 end_document
1045
1599
 
1046
1600
 Title   : end_document
1052
1606
 
1053
1607
=cut
1054
1608
 
1055
 
sub end_document{
1056
 
   my ($self,@args) = @_;
1057
 
   return $self->{'_result'};
 
1609
sub end_document {
 
1610
    my ( $self, @args ) = @_;
 
1611
    return $self->{'_result'};
1058
1612
}
1059
1613
 
1060
1614
=head2 idlength
1070
1624
 
1071
1625
=cut
1072
1626
 
1073
 
sub idlength{
1074
 
   my ($self,$value) = @_;
1075
 
   if( defined $value) {
1076
 
      $self->{'_idlength'} = $value;
 
1627
sub idlength {
 
1628
    my ( $self, $value ) = @_;
 
1629
    if ( defined $value ) {
 
1630
        $self->{'_idlength'} = $value;
1077
1631
    }
1078
1632
    return $self->{'_idlength'} || $IDLENGTH;
1079
1633
}
1080
1634
 
1081
 
 
1082
1635
=head2 result_count
1083
1636
 
1084
1637
 Title   : result_count
1087
1640
 Returns : integer
1088
1641
 Args    : none
1089
1642
 
1090
 
 
1091
1643
=cut
1092
1644
 
1093
1645
sub result_count {
1095
1647
    return $self->{'_result_count'};
1096
1648
}
1097
1649
 
 
1650
sub attach_EventHandler {
 
1651
    my ( $self, $handler ) = @_;
 
1652
 
 
1653
    $self->SUPER::attach_EventHandler($handler);
 
1654
 
 
1655
    # Optimization: caching the EventHandler since it is used a lot
 
1656
    # during the parse.
 
1657
 
 
1658
    $self->{'_handler_cache'} = $handler;
 
1659
    return;
 
1660
}
 
1661
 
 
1662
=head2 _will_handle
 
1663
 
 
1664
 Title   : _will_handle
 
1665
 Usage   : Private method. For internal use only.
 
1666
              if( $self->_will_handle($type) ) { ... }
 
1667
 Function: Provides an optimized way to check whether or not an element of a 
 
1668
           given type is to be handled.
 
1669
 Returns : Reference to EventHandler object if the element type is to be handled.
 
1670
           undef if the element type is not to be handled.
 
1671
 Args    : string containing type of element.
 
1672
 
 
1673
Optimizations:
 
1674
 
 
1675
=over 2
 
1676
 
 
1677
=item 1
 
1678
 
 
1679
Using the cached pointer to the EventHandler to minimize repeated
 
1680
lookups.
 
1681
 
 
1682
=item 2
 
1683
 
 
1684
Caching the will_handle status for each type that is encountered so
 
1685
that it only need be checked by calling
 
1686
handler-E<gt>will_handle($type) once.
 
1687
 
 
1688
=back
 
1689
 
 
1690
This does not lead to a major savings by itself (only 5-10%).  In
 
1691
combination with other optimizations, or for large parse jobs, the
 
1692
savings good be significant.
 
1693
 
 
1694
To test against the unoptimized version, remove the parentheses from
 
1695
around the third term in the ternary " ? : " operator and add two
 
1696
calls to $self-E<gt>_eventHandler().
 
1697
 
 
1698
=cut
 
1699
 
 
1700
sub _will_handle {
 
1701
    my ( $self, $type ) = @_;
 
1702
    my $handler = $self->{'_handler_cache'};
 
1703
    my $will_handle =
 
1704
      defined( $self->{'_will_handle_cache'}->{$type} )
 
1705
      ? $self->{'_will_handle_cache'}->{$type}
 
1706
      : ( $self->{'_will_handle_cache'}->{$type} =
 
1707
          $handler->will_handle($type) );
 
1708
 
 
1709
    return $will_handle ? $handler : undef;
 
1710
}
 
1711
 
1098
1712
1;
1099
1713