~ubuntu-branches/ubuntu/oneiric/bioperl/oneiric

« back to all changes in this revision

Viewing changes to Bio/SeqIO/kegg.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2007-09-21 22:52:22 UTC
  • mfrom: (1.2.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20070921225222-tt20m2yy6ycuy2d8
Tags: 1.5.2.102-1
* Developer release.
* Upgraded source package to debhelper 5 and standards-version 3.7.2.
* Added libmodule-build-perl and libtest-harness-perl to
  build-depends-indep.
* Disabled automatic CRAN download.
* Using quilt instead of .diff.gz to manage modifications.
* Updated Recommends list for the binary package.
* Moved the "production-quality" scripts to /usr/bin/.
* New maintainer: Debian-Med packaging team mailing list.
* New uploaders: Charles Plessy and Steffen Moeller.
* Updated Depends, Recommends and Suggests.
* Imported in Debian-Med's SVN repository on Alioth.
* Executing the regression tests during package building.
* Moved the Homepage: field out from the package's description.
* Updated watch file.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# $Id: kegg.pm,v 1.5 2003/12/15 11:50:38 heikki Exp $
 
1
# $Id: kegg.pm,v 1.18.4.1 2006/10/02 23:10:29 sendu Exp $
2
2
#
3
3
# BioPerl module for Bio::SeqIO::kegg
4
4
#
16
16
 
17
17
=head1 SYNOPSIS
18
18
 
19
 
  #It is probably best not to use this object directly, but
20
 
  #rather go through the SeqIO handler system. Go:
 
19
  # It is probably best not to use this object directly, but
 
20
  # rather go through the SeqIO handler system. Go:
 
21
 
 
22
  use Bio::SeqIO;
21
23
 
22
24
  $stream = Bio::SeqIO->new(-file => $filename, -format => 'KEGG');
23
25
 
42
44
 
43
45
=item 'ENTRY'
44
46
 
45
 
$seq-E<gt>primary_id
 
47
 $seq->primary_id
46
48
 
47
49
=item 'NAME'
48
50
 
49
 
$seq-E<gt>display_id
 
51
 $seq->display_id
50
52
 
51
53
=item 'DEFINITION'
52
54
 
53
 
$seq-E<gt>annotation-E<gt>get_Annotations('description');
 
55
 $seq->annotation->get_Annotations('description');
54
56
 
55
57
=item 'ORTHOLOG'
56
58
 
57
 
grep {$_-E<gt>database eq 'KO'} $seq-E<gt>annotation-E<gt>get_Annotations('dblink')
 
59
 grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
58
60
 
59
61
=item 'CLASS'
60
62
 
61
 
grep {$_-E<gt>database eq 'PATH'} $seq-E<gt>annotation-E<gt>get_Annotations('dblink')
 
63
 grep {$_->database eq 'PATH'}
 
64
          $seq->annotation->get_Annotations('dblink')
62
65
 
63
66
=item 'POSITION'
64
67
 
65
68
FIXME, NOT IMPLEMENTED
66
69
 
 
70
=item 'PATHWAY'
 
71
 
 
72
 for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
 
73
    #
 
74
 }
 
75
 
67
76
=item 'DBLINKS'
68
77
 
69
 
$seq-E<gt>annotation-E<gt>get_Annotations('dblink')
 
78
 $seq->annotation->get_Annotations('dblink')
70
79
 
71
80
=item 'CODON_USAGE'
72
81
 
74
83
 
75
84
=item 'AASEQ'
76
85
 
77
 
$seq-E<gt>translate-E<gt>seq
 
86
 $seq->translate->seq
78
87
 
79
88
=item 'NTSEQ'
80
89
 
81
 
$seq-E<gt>seq
 
90
 $seq-E<gt>seq
82
91
 
83
92
=back
84
93
 
86
95
 
87
96
=head2 Mailing Lists
88
97
 
89
 
User feedback is an integral part of the evolution of this
90
 
and other Bioperl modules. Send your comments and suggestions preferably
91
 
 to one of the Bioperl mailing lists.
92
 
Your participation is much appreciated.
 
98
User feedback is an integral part of the evolution of this and other
 
99
Bioperl modules. Send your comments and suggestions preferably to one
 
100
of the Bioperl mailing lists.  Your participation is much appreciated.
93
101
 
94
102
  bioperl-l@bioperl.org                  - General discussion
95
 
  http://www.bioperl.org/MailList.shtml  - About the mailing lists
 
103
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
96
104
 
97
105
=head2 Reporting Bugs
98
106
 
99
107
Report bugs to the Bioperl bug tracking system to help us keep track
100
 
 the bugs and their resolution.
101
 
 Bug reports can be submitted via email or the web:
 
108
the bugs and their resolution. Bug reports can be submitted via the web:
102
109
 
103
 
  bioperl-bugs@bio.perl.org
104
 
  http://bugzilla.bioperl.org/
 
110
  http://bugzilla.open-bio.org/
105
111
 
106
112
=head1 AUTHOR - Allen Day
107
113
 
117
123
# Let the code begin...
118
124
 
119
125
package Bio::SeqIO::kegg;
120
 
use vars qw(@ISA);
121
126
use strict;
122
127
 
123
 
use Bio::SeqIO;
124
128
use Bio::SeqFeature::Generic;
125
129
use Bio::Species;
126
130
use Bio::Seq::SeqFactory;
128
132
use Bio::Annotation::Comment;
129
133
use Bio::Annotation::DBLink;
130
134
 
131
 
@ISA = qw(Bio::SeqIO);
132
 
 
 
135
use base qw(Bio::SeqIO);
 
136
 
133
137
sub _initialize {
134
 
    my($self,@args) = @_;
135
 
    
136
 
    $self->SUPER::_initialize(@args); 
137
 
    # hash for functions for decoding keys.
138
 
    $self->{'_func_ftunit_hash'} = {}; 
139
 
    if( ! defined $self->sequence_factory ) {
140
 
        $self->sequence_factory(new Bio::Seq::SeqFactory
141
 
                                (-verbose => $self->verbose(), 
142
 
                                 -type => 'Bio::Seq::RichSeq'));
143
 
    }
 
138
        my($self,@args) = @_;
 
139
 
 
140
        $self->SUPER::_initialize(@args);
 
141
        # hash for functions for decoding keys.
 
142
        $self->{'_func_ftunit_hash'} = {};
 
143
        if( ! defined $self->sequence_factory ) {
 
144
                $self->sequence_factory(new Bio::Seq::SeqFactory
 
145
                                                                                (-verbose => $self->verbose(),
 
146
                                                                                 -type => 'Bio::Seq::RichSeq'));
 
147
        }
144
148
}
145
149
 
146
150
=head2 next_seq
154
158
=cut
155
159
 
156
160
sub next_seq {
157
 
  my ($self,@args) = @_;
158
 
  my $builder = $self->sequence_builder();
159
 
  my $seq;
160
 
  my %params;
161
 
 
162
 
  my $buffer;
163
 
  my (@acc, @features);
164
 
  my ($display_id, $annotation);
165
 
  my $species;
166
 
 
167
 
  # initialize; we may come here because of starting over
168
 
  @features = ();
169
 
  $annotation = undef;
170
 
  @acc = ();
171
 
  $species = undef;
172
 
  %params = (-verbose => $self->verbose); # reset hash
173
 
  local($/) = "///\n";
174
 
 
175
 
  $buffer = $self->_readline();
176
 
 
177
 
  return undef if( !defined $buffer ); # end of file
178
 
  $buffer =~ /^ENTRY/ ||
179
 
        $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got '$buffer'");
180
 
 
181
 
  my %FIELDS;
182
 
  my @chunks = split /\n(?=\S)/, $buffer;
183
 
 
184
 
  foreach my $chunk (@chunks){
185
 
        my($key) = $chunk =~ /^(\S+)/;
186
 
        $FIELDS{$key} = $chunk;
187
 
  }
188
 
 
189
 
  my($entry_id,$entry_seqtype,$entry_species) = $FIELDS{ENTRY} =~ /^ENTRY\s+(\d+)\s+(\S+)\s+(\S+)\s*$/;
190
 
 
191
 
  my($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
192
 
 
193
 
  my($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
194
 
  $definition =~ s/\s+/ /gs;
195
 
 
196
 
  my($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
197
 
  $aa_seq =~ s/\s+//g;
198
 
  my($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
199
 
  $nt_seq =~ s/\s+//g;
200
 
 
201
 
  $annotation = Bio::Annotation::Collection->new();
202
 
  $annotation->add_Annotation('description',Bio::Annotation::Comment->new(-text => $definition));
203
 
 
204
 
  my($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG} =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(\S*)\s*$/;
205
 
  $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(-database => $ortholog_db,
206
 
                                                                                                                                        -primary_id => $ortholog_id,
207
 
                                                                                                                                        -comment => $ortholog_desc
208
 
                                                                                                                                   )
209
 
                                                         );
210
 
 
211
 
  $FIELDS{CLASS} =~ s/^CLASS\s+//;
212
 
  while($FIELDS{CLASS} =~ /.+?\[(\S+):(\S+)\]/gs){
213
 
        $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(-database => $1, -primary_id => $2));
214
 
  }
215
 
 
216
 
  $FIELDS{DBLINKS} =~ s/^DBLINKS/       /;
217
 
  while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n/gs){
218
 
        $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(-database => $1, -primary_id => $2));
 
161
        my ($self,@args) = @_;
 
162
        my $builder = $self->sequence_builder();
 
163
        my $seq;
 
164
        my %params;
 
165
 
 
166
        my $buffer;
 
167
        my (@acc, @features);
 
168
        my ($display_id, $annotation);
 
169
        my $species;
 
170
 
 
171
        # initialize; we may come here because of starting over
 
172
        @features = ();
 
173
        $annotation = undef;
 
174
        @acc = ();
 
175
        $species = undef;
 
176
        %params = (-verbose => $self->verbose); # reset hash
 
177
        local($/) = "///\n";
 
178
 
 
179
        $buffer = $self->_readline();
 
180
 
 
181
        return if( !defined $buffer ); # end of file
 
182
        $buffer =~ /^ENTRY/ ||
 
183
          $self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
 
184
 
 
185
        my %FIELDS;
 
186
        my @chunks = split /\n(?=\S)/, $buffer;
 
187
 
 
188
        foreach my $chunk (@chunks){
 
189
                my($key) = $chunk =~ /^(\S+)/;
 
190
                $FIELDS{$key} = $chunk;
 
191
        }
 
192
 
 
193
        # changing to split method to get entry_ids that include
 
194
        # sequence version like Whatever.1
 
195
        my(undef,$entry_id,$entry_seqtype,$entry_species) =
 
196
          split(' ',$FIELDS{ENTRY});
 
197
 
 
198
        my($name);
 
199
        if ($FIELDS{NAME}) {
 
200
          ($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
 
201
        }
 
202
 
 
203
        my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
 
204
 
 
205
        if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
 
206
          ($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
 
207
          $definition =~ s/\s+/ /gs;
 
208
        }
 
209
        if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
 
210
          ($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
 
211
          $aa_seq =~ s/\s+//g;
 
212
        }
 
213
        if(( exists  $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
 
214
          ($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
 
215
          $nt_seq =~ s/\s+//g;
 
216
        }
 
217
 
 
218
        $annotation = Bio::Annotation::Collection->new();
 
219
 
 
220
        $annotation->add_Annotation('description',
 
221
                                                Bio::Annotation::Comment->new(-text => $definition));
 
222
 
 
223
        $annotation->add_Annotation('aa_seq',
 
224
                                                Bio::Annotation::Comment->new(-text => $aa_seq));
 
225
 
 
226
        my($ortholog_db,$ortholog_id,$ortholog_desc);
 
227
        if ($FIELDS{ORTHOLOG}) {
 
228
                ($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
 
229
                  =~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
 
230
 
 
231
        $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
 
232
                     -database => $ortholog_db,
 
233
                     -primary_id => $ortholog_id,
 
234
                     -comment => $ortholog_desc) );
 
235
  }
 
236
 
 
237
  if($FIELDS{MOTIF}){
 
238
     $FIELDS{MOTIF} =~ s/^MOTIF\s+//;
 
239
     while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
 
240
         my $db = $1;
 
241
         my $ids = $2;
 
242
         foreach my $id (split(/\s+/, $ids)){
 
243
 
 
244
     $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
 
245
              -database =>$db,
 
246
              -primary_id => $id,
 
247
              -comment => "")   );
 
248
        }
 
249
     }
 
250
  }
 
251
 
 
252
  if($FIELDS{PATHWAY}) {
 
253
     $FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
 
254
     while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
 
255
        $annotation->add_Annotation('pathway',
 
256
           Bio::Annotation::Comment->new(-text => "$1"));
 
257
     }
 
258
  }
 
259
 
 
260
  if ($FIELDS{CLASS}) {
 
261
      $FIELDS{CLASS} =~ s/^CLASS\s+//;
 
262
      $FIELDS{'CLASS'} =~ s/\n//g;
 
263
      while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
 
264
          my ($pathway,$db,$id) = ($1,$2,$3);
 
265
          $pathway =~ s/\s+/ /g;
 
266
          $pathway =~ s/\s$//g;
 
267
          $pathway =~ s/^\s+//;
 
268
          $annotation->add_Annotation('pathway',
 
269
                  Bio::Annotation::Comment->new(-text => $pathway));
 
270
 
 
271
          $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
 
272
                      -database => $db, -primary_id => $id));
 
273
      }
 
274
  }
 
275
 
 
276
  if($FIELDS{DBLINKS}) {
 
277
      $FIELDS{DBLINKS} =~ s/^DBLINKS/       /;
 
278
      while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ ### modified
 
279
           $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
 
280
                    -database => $1, -primary_id => $2)) if $1;
 
281
      }
219
282
  }
220
283
 
221
284
  $params{'-alphabet'}         = 'dna';
222
285
  $params{'-seq'}              = $nt_seq;
223
286
  $params{'-display_id'}       = $name;
224
287
  $params{'-accession_number'} = $entry_id;
225
 
  $params{'-species'}          = Bio::Species->new(-common_name => $entry_species);
 
288
  $params{'-species'}          = Bio::Species->new(
 
289
                                                                                          -common_name => $entry_species);
226
290
  $params{'-annotation'}       = $annotation;
227
291
 
228
292
  $builder->add_slot_value(%params);