22
22
# you can use either a space-delimited string for quality
24
24
my $string_quals = "10 20 30 40 50 40 30 20 10";
25
my $qualobj = Bio::Seq::PrimaryQual->new
26
( '-qual' => $string_quals,
27
'-id' => 'QualityFragment-12',
28
'-accession_number' => 'X78121',
25
my $qualobj = Bio::Seq::PrimaryQual->new(
26
-qual => $string_quals,
27
-id => 'QualityFragment-12',
28
-accession_number => 'X78121',
31
31
# _or_ you can use an array of quality values
33
33
my @q2 = split/ /,$string_quals;
34
$qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
35
'-primary_id' => 'chads primary_id',
36
'-desc' => 'chads desc',
37
'-accession_number' => 'chads accession_number',
34
$qualobj = Bio::Seq::PrimaryQual->new(
36
-primary_id => 'chads primary_id',
37
-desc => 'chads desc',
38
-accession_number => 'chads accession_number',
41
42
# to get the quality values out:
99
# Let the code begin...
101
100
package Bio::Seq::PrimaryQual;
102
use vars qw(%valid_type);
106
104
use base qw(Bio::Root::Root Bio::Seq::QualI);
106
our $MATCHPATTERN = '0-9eE\.\s+-';
112
Usage : $qual = Bio::Seq::PrimaryQual->new
113
( -qual => '10 20 30 40 50 50 20 10',
115
-accession_number => 'AL000012',
112
Usage : $qual = Bio::Seq::PrimaryQual->new(
113
-qual => '10 20 30 40 50 50 20 10',
115
-accession_number => 'AL000012',
118
118
Function: Returns a new Bio::Seq::PrimaryQual object from basic
119
constructors, being a string _or_ a reference to an array for the
120
sequence and strings for id and accession_number. Note that you
121
can provide an empty quality string.
119
constructors, being a string _or_ a reference to an array for the
120
sequence and strings for id and accession_number. Note that you
121
can provide an empty quality string.
122
122
Returns : a new Bio::Seq::PrimaryQual object
130
127
my ($class, @args) = @_;
131
128
my $self = $class->SUPER::new(@args);
203
join ' ', @{ shift->qual };
200
return join ' ', @{ shift->qual };
207
204
=head2 validate_qual($qualstring)
209
Title : validate_qual($qualstring)
210
Usage : print("Valid.") if { &validate_qual($self,$qualities); }
211
Function: Make sure that the quality, if it has length > 0, contains at
212
least one digit. Note that quality strings are parsed into arrays
213
using split/\d+/,$quality_string, so make sure that your quality
214
scalar looks like this if you want it to be parsed properly.
215
Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
216
Args : a scalar (any scalar, why PrimarySeq author?) and a scalar
217
containing the string to validate.
206
Title : validate_qual($qualstring)
207
Usage : print("Valid.") if { &validate_qual($self, $quality_string); }
208
Function: Test that the given quality string is valid. It is expected to
209
contain space-delimited numbers that can be parsed using split /\d+/.
210
However, this validation takes shortcuts and only tests that the
211
string contains characters valid in numbers: 0-9 . eE +-
212
Note that empty quality strings are valid too.
213
Returns : 1 for a valid sequence, 0 otherwise
214
Args : - Scalar containing the quality string to validate.
215
- Boolean to optionally throw an error if validation failed
221
219
sub validate_qual {
222
# how do I validate quality values?
223
# \d+\s+\d+..., I suppose
224
my ($self,$qualstr) = @_;
225
# why the CORE?? -- (Because Bio::PrimarySeqI namespace has a
226
# length method, you have to qualify
227
# which length to use)
228
return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);
229
return 1 if( $qualstr =~ /\d/);
220
my ($self, $qualstr, $throw) = @_;
221
if ( (defined $qualstr ) &&
222
($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
224
$self->throw("Failed validation of quality score from '".
225
(defined($self->id)||'[unidentified sequence]')."'. No numeric ".
234
234
=head2 subqual($start,$end)
236
236
Title : subqual($start,$end)
237
237
Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
238
238
Function: returns the quality values from $start to $end, where the
239
first value is 1 and the number is inclusive, ie 1-2 are the
240
first two bases of the sequence. Start cannot be larger than
241
end but can be equal.
239
first value is 1 and the number is inclusive, ie 1-2 are the
240
first two bases of the sequence. Start cannot be larger than
241
end but can be equal.
242
242
Returns : A reference to an array.
243
243
Args : a start position and an end position
249
my ($self,$start,$end) = @_;
252
$self->throw("in subqual, start [$start] has to be greater than end [$end]");
255
if( $start <= 0 || $end > $self->length ) {
256
$self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
259
# remove one from start, and then length is end-start
263
my @sub_qual_array = @{$self->{qual}}[$start..$end];
265
# return substr $self->seq(), $start, ($end-$start);
266
return \@sub_qual_array;
248
my ($self,$start,$end) = @_;
251
$self->throw("in subqual, start [$start] has to be greater than end [$end]");
254
if( $start <= 0 || $end > $self->length ) {
255
$self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
258
# remove one from start, and then length is end-start
262
my @sub_qual_array = @{$self->{qual}}[$start..$end];
264
# return substr $self->seq(), $start, ($end-$start);
265
return \@sub_qual_array;
270
270
=head2 display_id()
272
272
Title : display_id()
273
273
Usage : $id_string = $obj->display_id();
274
274
Function: returns the display id, aka the common name of the Quality
276
The semantics of this is that it is the most likely string to be
277
used as an identifier of the quality sequence, and likely to have
278
"human" readability. The id is equivalent to the ID field of the
279
GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
280
database. In fasta format, the >(\S+) is presumed to be the id,
281
though some people overload the id to embed other information.
282
Bioperl does not use any embedded information in the ID field,
283
and people are encouraged to use other mechanisms (accession
284
field for example, or extending the sequence object) to solve
285
this. Notice that $seq->id() maps to this function, mainly for
286
legacy/convience issues
276
The semantics of this is that it is the most likely string to be
277
used as an identifier of the quality sequence, and likely to have
278
"human" readability. The id is equivalent to the ID field of the
279
GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
280
database. In fasta format, the >(\S+) is presumed to be the id,
281
though some people overload the id to embed other information.
282
Bioperl does not use any embedded information in the ID field,
283
and people are encouraged to use other mechanisms (accession
284
field for example, or extending the sequence object) to solve
285
this. Notice that $seq->id() maps to this function, mainly for
286
legacy/convience issues
287
287
Returns : A string
385
my ($obj,$value) = @_;
386
if( defined $value) {
387
$obj->{'desc'} = $value;
387
my ($obj,$value) = @_;
388
if( defined $value) {
389
$obj->{'desc'} = $value;
389
391
return $obj->{'desc'};
396
398
Usage : $id = $qual->id();
397
399
Function: Return the ID of the quality. This should normally be (and
398
actually is in the implementation provided here) just a synonym
400
actually is in the implementation provided here) just a synonym
400
402
Returns : A string.
406
my ($self,$value) = @_;
407
if( defined $value ) {
408
my ($self,$value) = @_;
409
if( defined $value ) {
408
410
return $self->display_id($value);
410
return $self->display_id();
412
return $self->display_id();
416
Usage : $length = $qual->length();
419
Usage : $length = $qual->length();
417
420
Function: Return the length of the array holding the quality values.
418
Under most circumstances, this should match the number of quality
419
values but no validation is done when the PrimaryQual object is
420
constructed and non-digits could be put into this array. Is this
421
a bug? Just enough rope...
421
Under most circumstances, this should match the number of quality
422
values but no validation is done when the PrimaryQual object is
423
constructed and non-digits could be put into this array. Is this
424
a bug? Just enough rope...
422
425
Returns : A scalar (the number of elements in the quality array).
489
492
sub to_string_automatic {
490
493
my ($self,$sub_result,$out) = shift;
491
494
foreach (sort keys %$self) {
492
print("Working on $_\n");
493
eval { $self->$_(); };
494
if ($@) { $sub_result = ref($_); }
495
elsif (!($sub_result = $self->$_())) {
496
$sub_result = "<unset>";
498
if (ref($sub_result) eq "ARRAY") {
499
print("This thing ($_) is an array!\n");
500
$sub_result = join(',',@$sub_result);
502
$out .= "$_: ".$sub_result."\n";
495
print("Working on $_\n");
496
eval { $self->$_(); };
497
if ($@) { $sub_result = ref($_); }
498
elsif (!($sub_result = $self->$_())) {
499
$sub_result = "<unset>";
501
if (ref($sub_result) eq "ARRAY") {
502
print("This thing ($_) is an array!\n");
503
$sub_result = join(',',@$sub_result);
505
$out .= "$_: ".$sub_result."\n";