1
# $Id: Contig.pm 16123 2009-09-17 12:57:27Z cjfields $
3
2
# BioPerl module for Bio::Assembly::Contig
4
3
# Mostly based on Bio::SimpleAlign by Ewan Birney
6
5
# Please direct questions and support issues to <bioperl-l@bioperl.org>
8
# Cared for by Robson francisco de Souza <rfsouza@citri.iq.usp.br>
7
# Cared for by Robson Francisco de Souza <rfsouza@citri.iq.usp.br>
10
9
# Copyright Robson Francisco de Souza
26
25
# Assembly loading methods
27
26
$aio = Bio::Assembly::IO->new(-file=>"test.ace.1",
30
29
$assembly = $aio->next_assembly;
31
30
foreach $contig ($assembly->all_contigs) {
223
use Bio::SeqFeature::Collection;
224
use Bio::Seq::PrimaryQual;
222
use Bio::DB::SeqFeature::Store; # isa Bio::SeqFeature::CollectionI
223
use Bio::Seq::PrimaryQual; # isa Bio::Seq::QualI
226
225
use Scalar::Util qw(weaken);
235
234
Usage : my $contig = Bio::Assembly::Contig->new();
236
235
Function : Creates a new contig object
237
236
Returns : Bio::Assembly::Contig
238
Args : -id => contig unique ID
239
-source => string for the sequence assembly program used
240
-collection => Bio::SeqFeature::Collection instance
237
Args : -id => unique contig ID
238
-source => string for the sequence assembly program used
239
-collection => Bio::SeqFeature::CollectionI instance
273
272
$self->throw("Collection must implement Bio::SeqFeature::CollectionI") unless $collection->isa('Bio::SeqFeature::CollectionI');
274
273
$self->{'_sfc'} = $collection;
276
$self->{'_sfc'} = Bio::SeqFeature::Collection->new()
275
$self->{'_sfc'} = Bio::DB::SeqFeature::Store->new(
276
-adaptor => 'memory',
277
-index_subfeatures => 1,
279
281
# Assembly specifics
280
$self->{'_assembly'} = undef; # Reference to a Bio::Assembly::Scaffold object, if contig belongs to one.
282
$self->{'_assembly'} = undef; # Bio::Assembly::Scaffold the contig belongs to
281
283
$self->{'_strand'} = 0; # Reverse (-1) or forward (1), if contig is in a scaffold. 0 otherwise
282
$self->{'_neighbor_start'} = undef; # Will hold a reference to another contig
283
$self->{'_neighbor_end'} = undef; # Will hold a reference to another contig
284
$self->{'_neighbor_start'} = undef; # Neighbor Bio::Assembly::Contig
285
$self->{'_neighbor_end'} = undef; # Neighbor Bio::Assembly::Contig
285
287
return $self; # success - we hope!
474
476
Title : remove_features
475
477
Usage : $contig->remove_features(@feat)
476
478
Function : Remove an array of contig features
477
Returns : number of features removed.
478
Argument : An array of Bio::SeqFeatureI
479
Returns : true if successful
480
Argument : An array of Bio::SeqFeature::Generic (Bio::SeqFeatureI)
483
485
my ($self, @args) = @_;
485
487
# Removing shortcuts for aligned sequence features
486
foreach my $feat (@args) {
488
for my $feat (@args) {
487
489
if (my $seq = $feat->entire_seq()) {
488
my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
490
my $seqID = $seq->id || $seq->display_id || $seq->primary_id;
489
491
my $tag = $feat->primary_tag;
490
492
$tag =~ s/:$seqID$/$1/g;
491
493
delete( $self->{'_elem'}{$seqID}{'_feat'}{$tag} )
493
495
$self->{'_elem'}{$seqID}{'_feat'}{$tag} eq $feat);
497
# Removing Bio::SeqFeature::Collection features
498
return $self->{'_sfc'}->remove_features(\@args);
499
# Removing Bio::SeqFeature objects
500
return $self->get_features_collection->delete(@args);
501
503
=head2 get_features_collection
503
505
Title : get_features_collection
504
506
Usage : $contig->get_features_collection()
505
Function : Get the collection of all contig features
506
Returns : Bio::SeqFeature::Collection
507
Function : Get the collection of all contig features and seqfeatures
508
Returns : Bio::DB::SeqFeature::Store (Bio::SeqFeature::CollectionI)
723
725
Function : Get "gapped consensus" location for aligned sequence
724
726
Returns : Bio::SeqFeature::Generic for coordinates or undef.
725
727
A warning is printed if sequence coordinates were not set.
726
Argument : Bio::LocatabaleSeq object
728
Argument : Bio::LocatableSeq object
789
791
unless (defined $feat->start);
791
793
my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
792
if (exists( $self->{'_elem'}{$seqID} ) &&
793
exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
794
defined( $self->{'_elem'}{$seqID}{'_seq'} ) &&
795
($seq ne $self->{'_elem'}{$seqID}{'_seq'}) ) {
794
if ( exists( $self->{'_elem'}{$seqID} ) &&
795
exists( $self->{'_elem'}{$seqID}{'_seq'} ) &&
796
defined( $self->{'_elem'}{$seqID}{'_seq'} ) &&
797
($seq ne $self->{'_elem'}{$seqID}{'_seq'}) ) {
796
798
$self->warn("Replacing sequence $seqID\n");
797
799
$self->remove_seq($self->{'_elem'}{$seqID}{'_seq'});
800
$self->remove_features($feat);
803
# Add new sequence and Bio::Generic::SeqFeature
799
804
$self->add_seq($seq);
801
# Remove previous coordinates, if any
802
$self->remove_features($feat);
804
# Add new Bio::Generic::SeqFeature
805
$feat->add_tag_value('contig',$self->id)
806
unless ( $feat->has_tag('contig') );
807
$feat->primary_tag("_aligned_coord:$seqID");
806
$feat->add_tag_value('contig',$self->id) unless ( $feat->has_tag('contig') );
807
$feat->primary_tag("_aligned_coord");
808
$feat->source_tag($seqID);
808
809
$feat->attach_seq($seq);
809
811
$self->{'_elem'}{$seqID}{'_feat'}{"_aligned_coord:$seqID"} = $feat;
810
812
$self->add_features([ $feat ]);
854
856
sub set_consensus_quality {
857
my ($self, $qual) = @_;
858
$self->throw("Consensus quality must be a Bio::Seq::Quality object!")
859
unless ( $qual->isa("Bio::Seq::Quality") );
859
$self->throw("Consensus quality must be a Bio::Seq::QualI object!")
860
unless ( $qual->isa("Bio::Seq::QualI") );
861
862
$self->throw("Consensus quality can't be added before you set the consensus sequence!")
862
863
unless (defined $self->{'_consensus_sequence'});
951
952
my $previous = 0;
953
954
my $i = 0; my $j = 0;
954
while ($i<=$#{$tmp}) {
955
while ($i <= $#{$tmp}) {
955
956
# IF base is a gap, quality is the average for neighbouring sites
956
if (substr($sequence,$j,1) eq '-') {
957
if ($j > $i && substr($sequence,$j,1) eq '-') {
957
958
$previous = $tmp->[$i-1] unless ($i == 0);
958
959
if ($i < $#{$tmp}) {
959
960
$next = $tmp->[$i+1];
975
976
=head2 get_seq_ids
977
978
Title : get_seq_ids
978
Usage : $contig->get_seq_ids(-start=>$start,
980
-type=>"gapped A0QR67B08.b");
979
Usage : $contig->get_seq_ids( -start => $start,
981
-type => "gapped A0QR67B08.b" );
981
982
Function : Get list of sequence IDs overlapping interval [$start, $end]
982
983
The default interval is [1,$contig->length]
983
984
Default coordinate system is "gapped contig"
986
987
-start : consensus subsequence start
987
988
-end : consensus subsequence end
988
989
-type : the coordinate system type for $start and $end arguments
989
Coordinate system avaliable are:
990
Coordinate system available are:
990
991
"gapped consensus" : consensus coordinates with gaps
991
992
"ungapped consensus" : consensus coordinates without gaps
992
993
"aligned $ReadID" : read $ReadID coordinates with gaps
998
999
sub get_seq_ids {
999
1000
my ($self, @args) = @_;
1001
my ($type,$start,$end) =
1002
$self->_rearrange([qw(TYPE START END)], @args);
1002
my ($type, $start, $end) = $self->_rearrange([qw(TYPE START END)], @args);
1004
1005
if (defined($start) && defined($end)) {
1005
1006
if (defined($type) && ($type ne 'gapped consensus')) {
1006
1007
$start = $self->change_coord($type,'gapped consensus',$start);
1007
1008
$end = $self->change_coord($type,'gapped consensus',$end);
1010
my @list = grep { $_->isa("Bio::SeqFeature::Generic") &&
1011
($_->primary_tag =~ /^_aligned_coord:/) }
1012
$self->{'_sfc'}->features_in_range( -start=>$start,
1015
-strandmatch=>'ignore' );
1010
@list = $self->get_features_collection->features(
1011
-type => '_aligned_coord', # primary tag
1015
#-strandmatch => 'ignore',
1016
1017
@list = map { $_->entire_seq->id } @list;
1019
# Entire aligned sequences list
1020
@list = map { $self->{'_order'}{$_} } sort { $a cmp $b } keys %{ $self->{'_order'} };
1020
# Entire aligned sequences list
1021
return map { $self->{'_order'}{$_} } sort { $a cmp $b } keys %{ $self->{'_order'} };
1024
1026
=head2 get_seq_feat_by_tag
1026
1028
Title : get_seq_feat_by_tag
1027
1029
Usage : $seq = $contig->get_seq_feat_by_tag($seq,"_aligned_coord:$seqID")
1030
Get a sequence feature based on its primary_tag.
1030
Function : Get a sequence feature based on its primary_tag.
1033
1031
Returns : a Bio::SeqFeature object
1034
1032
Argument : a Bio::LocatableSeq and a string (feature primary tag)
1041
1039
if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
1042
1040
$self->throw("Unable to process non locatable sequences [".ref($seq)."]");
1044
my $seqID = $seq->id() || $seq->display_id || $seq->primary_id;
1042
my $seqID = $seq->id || $seq->display_id || $seq->primary_id;
1046
1044
return $self->{'_elem'}{$seqID}{'_feat'}{$tag};
1139
1137
# Our locatable sequences are always considered to be complete sequences
1140
1138
$seq->start(1);
1141
1139
$seq->end($seq->_ungapped_len);
1141
my $alphabet = $seq->alphabet;
1143
$alphabet = lc($alphabet) if defined $alphabet;
1143
1145
$self->warn("Adding non-nucleotidic sequence ".$seqID)
1144
if (lc($seq->alphabet) ne 'dna' && lc($seq->alphabet) ne 'rna');
1146
if (!$alphabet || ($alphabet ne 'dna' && $alphabet ne 'rna'));
1146
1148
# build the symbol list for this sequence,
1147
1149
# will prune out the gap and missing/match chars
1408
1410
Creates a new alignment from a subset of
1409
1411
sequences. Numbering starts from 1. Sequence positions
1410
larger than num_sequences() will thow an error.
1412
larger than num_sequences() will throw an error.
1412
1414
Returns : a Bio::Assembly::Contig object
1413
1415
Args : array of integers for the sequences
1649
1651
Usage : $str = $contig->consensus_string($threshold_percent)
1650
1652
Function : Makes a strict consensus
1652
Argument : Optional treshold ranging from 0 to 100.
1654
Argument : Optional threshold ranging from 0 to 100.
1653
1655
The consensus residue has to appear at least threshold %
1654
1656
of the sequences at a given location, otherwise a '?'
1655
1657
character will be placed at that location.
2174
2176
sub no_residues {
2176
$self->deprecated(-warn_version => 1.0069,
2177
-throw_version => 1.0075);
2178
$self->deprecated(-warn_version => 1.0069,
2179
-throw_version => 1.0075);
2178
2180
$self->num_residues(@_);
2192
2194
sub no_sequences {
2194
$self->deprecated(-warn_version => 1.0069,
2195
-throw_version => 1.0075);
2196
$self->deprecated(-warn_version => 1.0069,
2197
-throw_version => 1.0075);
2196
2198
$self->num_sequences(@_);