1
# $Id: Array.pm,v 1.5 2003/09/08 12:17:14 heikki Exp $
3
# BioPerl module for Bio::Seq::Meta::Array
5
# Cared for by Heikki Lehvaslaiho
7
# Copyright Heikki Lehvaslaiho
9
# You may distribute this module under the same terms as perl itself
11
# POD documentation - main docs before the code
15
Bio::Seq::Meta::Array - array-based generic implementation of a sequence class with residue-based meta information
19
use Bio::LocatableSeq;
20
use Bio::Seq::Meta::Array;
22
my $seq = Bio::LocatableSeq->new(-id=>'test',
27
-varbose=>1, # to see warnings
29
bless $seq, Bio::Seq::Meta::Array;
30
# the existing sequence object can be a Bio::PrimarySeq, too
32
# to test this is a meta seq object
33
$seq->isa("Bio::Seq::Meta::Array")
34
|| $seq->throw("$seq is not a Bio::Seq::Meta::Array");
36
$seq->meta('1 2 3 4 5 6 7 8 9 10');
39
$arrayref = $seq->meta();
40
$string = $seq->meta_text();
41
$substring = $seq->submeta_text(2,5);
42
$unique_key = $seq->accession_number();
46
This class implements generic methods for sequences with residue-based
47
meta information. Meta sequences with meta data are Bio::LocatableSeq
48
objects with additional methods to store that meta information. See
49
L<Bio::LocatableSeq> and L<Bio::Seq::MetaI>.
51
The meta information in this class can be a string of variable length
52
and can be a complex structure. Blank values are undef or zero.
54
Application specific implementations should inherit from this class to
55
override and add to these methods.
67
User feedback is an integral part of the evolution of this and other
68
Bioperl modules. Send your comments and suggestions preferably to one
69
of the Bioperl mailing lists. Your participation is much appreciated.
71
bioperl-l@bioperl.org - General discussion
72
http://bio.perl.org/MailList.html - About the mailing lists
76
Report bugs to the Bioperl bug tracking system to help us keep track
77
the bugs and their resolution. Bug reports can be submitted via email
80
bioperl-bugs@bio.perl.org
81
http://bugzilla.bioperl.org/
83
=head1 AUTHOR - Heikki Lehvaslaiho
85
Email heikki@ebi.ac.uk
89
Chad Matsalla, bioinformatics@dieselwurks.com
90
Aaron Mackey, amackey@virginia.edu
94
The rest of the documentation details each of the object methods.
95
Internal methods are usually preceded with a _
100
# Let the code begin...
103
package Bio::Seq::Meta::Array;
104
use vars qw(@ISA $DEFAULT_NAME $GAP $META_GAP);
106
use Bio::LocatableSeq;
111
#use overload '""' => \&to_string;
113
@ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI );
117
$DEFAULT_NAME = 'DEFAULT';
125
Usage : $metaseq = Bio::Seq::Meta->new
126
( -meta => 'aaaaaaaabbbbbbbb',
127
-seq => 'TKLMILVSHIVILSRM'
129
-accession_number => 'S000012',
131
Function: Constructor for Bio::Seq::Meta class, meta data being in a
132
string. Note that you can provide an empty quality string.
133
Returns : a new Bio::Seq::Meta object
138
my ($class, %args) = @_;
139
#defined inheritance according to stated baseclass,
140
#if undefined then will be PrimarySeq
141
if (defined($args{'-baseclass'})) {
142
@ISA = ($args{'-baseclass'},"Bio::Seq::MetaI");
145
@ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI );
148
my $self = $class->SUPER::new(%args);
151
$self->_rearrange([qw(META
155
$self->{'_meta'}->{$DEFAULT_NAME} = undef;
157
$meta && $self->meta($meta);
166
Usage : $meta_values = $obj->meta($values_string);
169
Get and set method for the meta data starting from residue
170
position one. Since it is dependent on the length of the
171
sequence, it needs to be manipulated after the sequence.
173
The length of the returned value always matches the length
176
Returns : reference to an array of meta data
177
Args : new value, string or array ref, optional
182
shift->named_meta($DEFAULT_NAME, shift);
188
Usage : $meta_values = $obj->meta_text($values_arrayref);
189
Function: Variant of meta() guarantied to return a textual
190
representation of meta data. For details, see L<meta>.
192
Args : new value, string or array ref, optional
197
return join ' ', @{shift->meta(shift)};
203
Usage : $meta_values = $obj->named_meta($name, $values_arrayref);
204
Function: A more general version of meta(). Each meta data set needs
205
to be named. See also L<meta_names>.
206
Returns : reference to an array of meta data
207
Args : scalar, name of the meta data set
208
new value, string or array ref, optional
213
my ($self, $name, $value) = @_;
215
$name ||= $DEFAULT_NAME;
217
if (defined $value) {
220
if (ref $value eq 'ARRAY' ) { # array ref
223
elsif (not ref($value)) { # scalar
224
$arrayref = [split /\s+/, $value];
226
$self->throw("I need a scalar or array ref, not [". ref($value). "]");
230
my $diff = $self->length - @{$arrayref};
232
foreach (1..$diff) { push @{$arrayref}, 0;}
235
$self->{'_meta'}->{$name} = $arrayref;
237
#$self->_test_gap_positions($name) if $self->verbose > 0;
240
if (defined $self->{'_meta'}->{$name} and
241
scalar @{$self->{'_meta'}->{$name}} > $self->length ) {
242
return [@{$self->{'_meta'}->{$name}}[0..($self->length-1)]];
244
return $self->{'_meta'}->{$name} || (" " x $self->length);
248
=head2 _test_gap_positions
250
Title : _test_gap_positions
251
Usage : $meta_values = $obj->_test_gap_positions($name);
252
Function: Internal test for correct position of gap characters.
253
Gap being only '-' this time.
255
This method is called from named_meta() when setting meta
256
data but only if verbose is positive as this can be an
257
expensive process on very long sequences. Set verbose(1) to
258
see warnings when gaps do not align in sequence and meta
259
data and turn them into errors by setting verbose(2).
261
Returns : true on success, prints warnings
266
sub _test_gap_positions {
271
$self->seq || return $success;
272
my $len = CORE::length($self->seq);
273
for (my $i=0; $i < $len; $i++) {
274
my $s = substr $self->{seq}, $i, 1;
275
my $m = substr $self->{_meta}->{$name}, $i, 1;
276
$self->warn("Gap mismatch in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
278
#if ($s eq '-' || $m eq '-') && $s ne $m;
279
if ($m eq '-') && $s ne $m;
284
=head2 named_meta_text
286
Title : named_meta_text()
287
Usage : $meta_values = $obj->named_meta_text($name, $values_arrayref);
288
Function: Variant of named_meta() guarantied to return a textual
289
representation of the named meta data.
290
For details, see L<meta>.
292
Args : scalar, name of the meta data set
293
new value, string or array ref, optional
297
sub named_meta_text {
298
return join ' ', @{shift->named_meta(@_)};
305
Usage : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
306
$subset_of_meta_values = $obj->submeta(10, undef, $value_string);
309
Get and set method for meta data for subsequences.
311
Numbering starts from 1 and the number is inclusive, ie 1-2
312
are the first two residue of the sequence. Start cannot be
313
larger than end but can be equal.
315
If the second argument is missing the returned values
316
should extend to the end of the sequence.
318
The return value may be a string or an array reference,
319
depending on the implentation. If in doubt, use
320
submeta_text() which is a variant guarantied to return a
321
string. See L<submeta_text>.
323
Returns : A reference to an array or a string
324
Args : integer, start position
325
integer, end position, optional when a third argument present
326
new value, string or array ref, optional
331
shift->named_submeta($DEFAULT_NAME, @_);
337
Usage : $meta_values = $obj->submeta_text(20, $value_string);
338
Function: Variant of submeta() guarantied to return a textual
339
representation of meta data. For details, see L<meta>.
341
Args : new value, string or array ref, optional
347
return join ' ', @{shift->submeta(@_)};
352
Title : named_submeta
353
Usage : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
354
$subset_of_meta_values = $obj->named_submeta($name, 10);
355
Function: Variant of submeta() guarantied to return a textual
356
representation of meta data. For details, see L<meta>.
357
Returns : A reference to an array or a string
358
Args : scalar, name of the meta data set
359
integer, start position
360
integer, end position, optional when a third argument present
361
new value, string or array ref, optional
367
my ($self, $name, $start, $end, $value) = @_;
369
$name ||= $DEFAULT_NAME;
371
$start =~ /^[+]?\d+$/ and $start > 0 or
372
$self->throw("Need at least a positive integer start value");
375
my $metaref = $self->{_meta}->{$name};
377
if (defined $value) {
380
$self->warn("You are setting meta values beyond the length of the sequence\n".
381
"[$start > ". length($self->seq)."] in sequence ". $self->id)
382
if $start > $self->length;
384
if (ref $value eq 'ARRAY' ) { # array ref
387
elsif (not ref($value)) { # scalar
388
$arrayref = [split /\s+/, $value];
390
$self->throw("I need a scalar or array ref, not [". ref($value). "]");
395
$end or $end = @{$arrayref} + $start;
396
#$end = length @{$arrayref} + $start if $end > (length (@{$arrayref}) + $start);
399
# test for length; pad if needed
400
my $diff = $end - $start - scalar @{$arrayref};
402
foreach (1..$diff) { push @{$arrayref}, $META_GAP}
405
@{$metaref}[$start..$end] = @{$arrayref};
410
$end or $end = $self->length;
411
$end = $self->length if $end > $self->length;
413
return [@{$metaref}[$start..$end]];
419
=head2 named_submeta_text
421
Title : named_submeta_text
422
Usage : $meta_values = $obj->named_submeta_text($name, 20, $value_string);
423
Function: Variant of submeta() guarantied to return a textual
424
representation of meta data. For details, see L<meta>.
426
Args : scalar, name of the meta data
427
Args : integer, start position, optional
428
integer, end position, optional
429
new value, string or array ref, optional
433
sub named_submeta_text {
434
return join ' ', @{shift->named_submeta(@_)};
440
Usage : @meta_names = $obj->meta_names()
441
Function: Retrives an array of meta data set names. The default
442
(unnamed) set name is guarantied to be the first name if it
444
Returns : an array of names
453
foreach ( sort keys %{$self->{'_meta'}} ) {
454
push (@r, $_) unless $_ eq $DEFAULT_NAME;
456
unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
461
=head1 Bio::PrimarySeqI methods
466
Usage : $newseq = $seq->revcom();
467
Function: Produces a new Bio::Seq::MetaI implementing object where
468
the order of residues and their meta information is reversed.
469
Returns : A new (fresh) Bio::Seq::Meta object
477
my $new = $self->SUPER::revcom;
478
my $end = $self->length - 1;
480
$new->{_meta}->{$_} = [ reverse @{$self->{_meta}->{$_}}[0..$end]]
481
} keys %{$self->{_meta}};
489
Usage : $subseq = $seq->trunc(10,100);
490
Function: Provides a truncation of a sequence together with meta data
491
Returns : a fresh Bio::Seq::Meta implementing object
492
Args : Two integers denoting first and last residue of the sub-sequence.
497
my ($self, $start, $end) = @_;
500
$start =~ /^[+]?\d+$/ and $start > 0 or
501
$self->throw("Need at least a positive integer start value as start");
502
$end =~ /^[+]?\d+$/ and $end > 0 or
503
$self->throw("Need at least a positive integer start value as end");
505
$self->throw("End position has to be larger or equal to start");
506
$end <= $self->length or
507
$self->throw("End position can not be larger than sequence length");
510
my $new = $self->SUPER::trunc($start, $end);
513
$new->{_meta}->{$_} = [@{$self->{_meta}->{$_}}[$start..$end]]
514
} keys %{$self->{_meta}};