1
# $Id: MZEF.pm,v 1.7 2001/05/16 14:57:44 heikki Exp $
3
# BioPerl module for Bio::Tools::MZEF
5
# Cared for by Hilmar Lapp <hlapp@gmx.net>
7
# Copyright Hilmar Lapp
9
# You may distribute this module under the same terms as perl itself
11
# POD documentation - main docs before the code
15
Bio::Tools::MZEF - Results of one MZEF run
19
$mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
21
$mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
22
# to indicate that the sequence was reversed prior to feeding it to MZEF
23
# and that you want to have this reflected in the strand() attribute of
24
# the exons, as well have the coordinates translated to the non-reversed
26
$mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
30
# note: this class is-a Bio::Tools::AnalysisResult which implements
31
# Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
32
while($gene = $mzef->next_prediction()) {
33
# $gene is an instance of Bio::Tools::Prediction::Gene
35
# $gene->exons() returns an array of
36
# Bio::Tools::Prediction::Exon objects
38
@exon_arr = $gene->exons();
41
@intrl_exons = $gene->exons('Internal');
42
# note that presently MZEF predicts only internal exons!
45
# essential if you gave a filename at initialization (otherwise the file
51
The MZEF module provides a parser for MZEF gene structure prediction
54
This module inherits off L<Bio::Tools::AnalysisResult> and therefore
55
implements L<Bio::SeqAnalysisParserI>.
61
User feedback is an integral part of the evolution of this and other
62
Bioperl modules. Send your comments and suggestions preferably to one
63
of the Bioperl mailing lists. Your participation is much appreciated.
65
bioperl-l@bioperl.org - General discussion
66
http://bio.perl.org/MailList.html - About the mailing lists
70
Report bugs to the Bioperl bug tracking system to help us keep track
71
the bugs and their resolution. Bug reports can be submitted via email
74
bioperl-bugs@bio.perl.org
75
http://bio.perl.org/bioperl-bugs/
77
=head1 AUTHOR - Hilmar Lapp
79
Email hlapp@gmx.net (or hilmar.lapp@pharma.novartis.com)
81
Describe contact details here
85
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
90
# Let the code begin...
93
package Bio::Tools::MZEF;
97
use Bio::Tools::AnalysisResult;
98
use Bio::Tools::Prediction::Gene;
99
use Bio::Tools::Prediction::Exon;
101
@ISA = qw(Bio::Tools::AnalysisResult);
103
sub _initialize_state {
104
my($self,@args) = @_;
106
# first call the inherited method!
107
my $make = $self->SUPER::_initialize_state(@args);
109
# handle our own parameters
110
my ($strand, $params) =
111
$self->_rearrange([qw(STRAND
115
# our private state variables
116
$strand = 1 unless defined($strand);
117
$self->{'_strand'} = $strand;
118
$self->{'_preds_parsed'} = 0;
119
$self->{'_has_cds'} = 0;
120
# array of pre-parsed predictions
121
$self->{'_preds'} = [];
124
=head2 analysis_method
126
Usage : $mzef->analysis_method();
127
Purpose : Inherited method. Overridden to ensure that the name matches
135
sub analysis_method {
137
my ($self, $method) = @_;
138
if($method && ($method !~ /mzef/i)) {
139
$self->throw("method $method not supported in " . ref($self));
141
return $self->SUPER::analysis_method($method);
147
Usage : while($gene = $mzef->next_feature()) {
150
Function: Returns the next gene structure prediction of the MZEF result
151
file. Call this method repeatedly until FALSE is returned.
153
The returned object is actually a SeqFeatureI implementing object.
154
This method is required for classes implementing the
155
SeqAnalysisParserI interface, and is merely an alias for
156
next_prediction() at present.
158
Note that with the present version of MZEF there will only be one
159
object returned, because MZEF does not predict individual genes
160
but just potential internal exons.
162
Returns : A Bio::Tools::Prediction::Gene object.
168
my ($self,@args) = @_;
169
# even though next_prediction doesn't expect any args (and this method
170
# does neither), we pass on args in order to be prepared if this changes
172
return $self->next_prediction(@args);
175
=head2 next_prediction
177
Title : next_prediction
178
Usage : while($gene = $mzef->next_prediction()) {
181
Function: Returns the next gene structure prediction of the MZEF result
182
file. Call this method repeatedly until FALSE is returned.
184
Note that with the present version of MZEF there will only be one
185
object returned, because MZEF does not predict individual genes
186
but just potential internal exons.
188
Returns : A Bio::Tools::Prediction::Gene object.
193
sub next_prediction {
197
# if the prediction section hasn't been parsed yet, we do this now
198
$self->_parse_predictions() unless $self->_predictions_parsed();
200
# return the next gene structure (transcript)
201
return $self->_prediction();
204
=head2 _parse_predictions
206
Title : _parse_predictions()
207
Usage : $obj->_parse_predictions()
208
Function: Parses the prediction section. Automatically called by
209
next_prediction() if not yet done.
215
sub _parse_predictions {
217
my ($method); # set but not used presently
218
my $exon_tag = "InternalExon";
220
# my $seqname; # name given in output is poorly formatted
224
while(defined($_ = $self->_readline())) {
225
if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
227
if(! defined($gene)) {
228
$gene = Bio::Tools::Prediction::Gene->new(
229
'-primary' => "GenePrediction$prednr",
230
'-source' => 'MZEF');
232
# we handle start-end first because may not be space delimited
234
my ($start,$end) = ($1,$2);
235
s/^\s*(\d+)\s*-\s*(\d+)\s+//;
236
# split the rest into fields
238
# format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
239
# index: 0 1 2 3 4 5 6 7
240
my @flds = split(' ', $_);
241
# create the feature object depending on the type of signal --
242
# which is always an (internal) exon for MZEF
243
my $predobj = Bio::Tools::Prediction::Exon->new();
245
$predobj->source_tag('MZEF');
246
$predobj->significance($flds[0]);
247
$predobj->score($flds[0]); # what shall we set as overall score?
248
$predobj->strand($self->{'_strand'}); # MZEF searches only one
249
if($predobj->strand() == 1) {
250
$predobj->start($start);
253
$predobj->start($seqlen-$end+1);
254
$predobj->end($seqlen-$start+1);
257
$predobj->start_signal_score($flds[5]);
258
$predobj->end_signal_score($flds[7]);
259
$predobj->coding_signal_score($flds[6]);
260
# frame -- we simply extract the one with highest score from the
261
# orf field, and store the individual scores for now
262
my $frm = index($flds[4], "1");
263
$predobj->frame(($frm < 0) ? undef : $frm);
264
$predobj->primary_tag($exon_tag);
265
$predobj->is_coding(1);
266
# add to gene structure (should be done only when start and end
267
# are set, in order to allow for proper expansion of the range)
268
$gene->add_exon($predobj);
271
if(/^\s*Internal .*(MZEF)/) {
272
$self->analysis_method($1);
275
if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
276
# $seqname = $1; # this is too poor currently (file name truncated
277
# to 10 chars) in order to be sensible enough
282
# $gene->seqname($seqname);
283
$self->_add_prediction($gene) if defined($gene);
284
$self->_predictions_parsed(1);
289
Title : _prediction()
290
Usage : $gene = $obj->_prediction()
300
return undef unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
301
return shift(@{$self->{'_preds'}});
304
=head2 _add_prediction
306
Title : _add_prediction()
307
Usage : $obj->_add_prediction($gene)
314
sub _add_prediction {
315
my ($self, $gene) = @_;
317
if(! exists($self->{'_preds'})) {
318
$self->{'_preds'} = [];
320
push(@{$self->{'_preds'}}, $gene);
323
=head2 _predictions_parsed
325
Title : _predictions_parsed
326
Usage : $obj->_predictions_parsed
329
Returns : TRUE or FALSE
333
sub _predictions_parsed {
334
my ($self, $val) = @_;
336
$self->{'_preds_parsed'} = $val if $val;
337
if(! exists($self->{'_preds_parsed'})) {
338
$self->{'_preds_parsed'} = 0;
340
return $self->{'_preds_parsed'};