36
35
Your participation is much appreciated.
38
37
bioperl-l@bioperl.org - General discussion
39
http://bioperl.org/MailList.shtml - About the mailing lists
38
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41
40
=head2 Reporting Bugs
43
42
Report bugs to the Bioperl bug tracking system to help us keep track
44
of the bugs and their resolution.
46
_Bug reports can be submitted via email or the web:
48
bioperl-bugs@bioperl.org
49
http://bugzilla.bioperl.org/
43
of the bugs and their resolution. Bug reports can be submitted via the
46
http://bugzilla.open-bio.org/
51
48
=head1 AUTHOR - Sheldon McKay
53
Email smckay@bcgsc.bc.ca
140
137
my ($self, $gene, $gname, $id) = @_;
139
# use name preferentially over id. We can't edit IDs in Apollo
140
# AFAIK, and this will create an orphan CDS for newly created
141
# transcipts -- I think this needs more work
142
#$id = $gname if $id && $gname;
142
144
unless ( $gene ) {
143
145
if ( defined $self->{curr_gene} ) {
144
146
return $self->{curr_gene};
246
243
if ( $type eq 'gene' ) {
247
244
$feat = $self->has_gene;
248
$feat->add_tag_value( standard_name => $id );
249
$feat->add_tag_value( gene => ($self->{curr_gname} || $id) );
245
$feat->add_tag_value( gene => ($self->{curr_gname} || $id) )
246
unless $feat->has_tag('gene');
253
249
$feat = Bio::SeqFeature::Generic->new;
254
250
$feat->primary_tag($type);
255
251
my $gene = $self->has_gene;
256
$gene->add_tag_value( standard_name => $id );
257
$gene->add_tag_value( gene => ($self->{curr_gname} || $id) );
252
$gene->add_tag_value( gene => ($self->{curr_gname} || $id) )
253
unless $gene->has_tag('gene');
254
$feat->add_tag_value( gene => ($self->{curr_gname} || $id) )
255
unless $feat->has_tag('gene');;
259
257
for ( keys %{$tags} ) {
260
258
# or else add simple tag/value pairs
261
259
if ( $_ eq 'name' && $tags->{type}->[0] eq 'gene' ) {
262
unless ( $feat->has_tag( 'gene' ) ) {
263
$feat->add_tag_value( gene => $tags->{name}->[0] );
260
$feat->add_tag_value( gene => $tags->{name}->[0] )
261
unless $feat->has_tag( 'gene' );
265
262
delete $tags->{name};
385
386
$self->_build_feature_set($set, 1);
386
387
my ($feat) = @{$self->{curr_subfeats}};
387
388
$feat->primary_tag('transcript') if $feat->primary_tag eq 'exon';
389
if ( $feat->primary_tag eq 'transcript' ) {
390
$feat->add_tag_value( gene => ($gname || $id) )
391
unless $feat->has_tag('gene');
389
395
for my $tag ( keys %{$tags} ) {
390
396
for my $val ( @{$tags->{$tag}} ) {
391
$feat->add_tag_value( $tag => $val ) if $val;
397
$feat->add_tag_value( $tag => $val )
398
if $val && ++$seen_tag{$tag.$val} < 2;
394
$feat->add_tag_value( gene => ($gname || $id) );
395
401
@feats = ($feat);
413
419
my $feat = $self->{curr_feat};
414
420
$self->_build_feature_set($set);
416
if ( $self->{curr_ltag} || $gname ) {
417
$self->{curr_feat}->add_tag_value( gene => ($gname || $self->{curr_ltag}) );
422
my $gene = $gname || $self->{curr_ltag};
424
$feat->add_tag_value( gene => $gene )
425
unless $feat->has_tag('gene');
420
427
# if there is an annotated protein product
421
428
my $cds = $self->_has_CDS( $feat );
423
my $gene = $gname || $self->{curr_ltag};
424
$feat->add_tag_value( gene => $gene );
427
my $gene = $gname || $self->{curr_ltag};
431
$feat->primary_tag('mRNA');
428
433
# we really just want one value here
429
434
$cds->remove_tag('standard_name') if $cds->has_tag('standard_name');
430
435
$cds->add_tag_value( standard_name => $sname );
431
436
$cds->remove_tag('gene') if $cds->has_tag('gene');
432
437
$cds->add_tag_value( gene => $gene );
434
# catch missing/empty protein ids
435
if ( $cds->has_tag('protein_id' )) {
436
if ( !$cds->get_tag_values('protein_id') ) {
437
$cds->remove_tag('protein_id');
438
if ( $cds->has_tag('product') ) {
439
$cds->add_tag_value($cds->get_tag_values('product'));
439
# catch empty protein ids
440
if ( $cds->has_tag('protein_id' ) && !$cds->get_tag_values('protein_id') ) {
441
my $pid = $self->protein_id($cds, $sname);
442
$cds->remove_tag('protein_id');
443
$cds->add_tag_value( protein_id => $pid );
444
446
# make sure other subfeats are tied to the transcript
445
447
# via a 'standard_name' qualifier and the gene via a 'gene' qualifier
446
448
my @subfeats = @{$self->{curr_subfeats}};
447
449
for my $sf ( @ subfeats ) {
448
$sf->add_tag_value( standard_name => $sname );
449
$sf->add_tag_value( gene => $gene );
450
$sf->add_tag_value( standard_name => $sname )
451
unless $sf->has_tag('standard_name');
452
$sf->add_tag_value( gene => $gene )
453
unless $sf->has_tag('gene');
452
$feat->add_tag_value( standard_name => $sname );
454
@feats = sort { $a->start <=> $b->start } ($cds, @subfeats);
455
unshift @feats, $feat;
456
$feat->add_tag_value( standard_name => $sname )
457
unless $feat->has_tag('standard_name');
458
$feat->add_tag_value( gene => $gene )
459
unless $feat->has_tag('gene');
461
# if the mRNA and CDS are the same length, the mRNA is redundant
462
# lose the mRNA, steal its tags and give them to the CDS
464
if ( $feat->length == $cds->length ) {
465
for my $t ( $feat->all_tags ) {
466
next if $t =~ /gene|standard_name/;
467
$cds->add_tag_value( $t => $feat->get_tag_values($t) );
472
@feats = sort { $a->start <=> $b->start } ($cds, @subfeats);
473
unshift @feats, $feat if $feat;
458
476
if ( @{$self->{curr_loc}} > 1 ) {
471
489
$feat->location( $self->{curr_loc}->[0] );
492
for ( keys %$tags ) {
493
# expunge duplicate gene attributes
494
next if /gene/ && $feat->has_tag('gene');
495
for my $v ( @{$tags->{$_}} ) {
496
$feat->add_tag_value( $_ => $v );
474
500
# make sure other subfeats are tied to the transcript
475
501
my @subfeats = @{$self->{curr_subfeats}};
476
502
for my $sf ( @ subfeats ) {
477
$sf->add_tag_value( standard_name => $sname );
478
$sf->add_tag_value( gene => $gene );
503
$sf->add_tag_value( standard_name => $sname )
504
unless $sf->has_tag('standard_name');
505
$sf->add_tag_value( gene => $gene )
506
unless $sf->has_tag('gene');
481
509
@feats = ( $feat, @subfeats );
609
637
-primary => $type );
612
# identify the translation product
613
if ( $el->{Attributes}->{produces_seq} ) {
614
my $subseq = $self->{seq_h}->{$el->{Attributes}->{produces_seq}};
640
# identify the translation product
641
my $tscript = $el->{Attributes}->{produces_seq};
642
if ( $tscript && $tscript ne 'null') {
643
my $subseq = $self->{seq_h}->{$el->{Attributes}->{produces_seq}};
615
644
$self->{curr_tags}->{product} = [$el->{Attributes}->{produces_seq}];
616
$self->{curr_tags}->{translation} = [$subseq->seq];
645
$self->{curr_tags}->{translation} = [$subseq->seq] if $subseq;
619
648
$self->flush( $el );
657
684
my @exons = $single ? $loc : $loc->sub_Location(1);
659
686
$feat->location($loc);
661
687
# try to find a peptide
662
my $seq = $self->{seq_h}->{ $tags->{product}->[0] } ||
663
$self->{seq_h}->{ $tags->{protein_id}->[0] } ||
664
$self->{seq_h}->{ $tags->{gene}->[0] } ||
665
$self->{seq_h}->{ $tags->{standard_name}->[0] };
668
$self->warn("I did not find a protein sequence for " .
669
$feat->display_name . ". I can't figure out the CDS boundaries");
670
$feat->location($loc);
673
my $peptide = $seq->display_id;
688
my $seq = $self->{seq_h}->{ $tags->{protein_id}->[0] };
689
$seq ||= $self->{seq_h}->{ $tags->{product}->[0] } ||
690
$self->{seq_h}->{ $tags->{gene}->[0] } ||
691
$self->{seq_h}->{ $tags->{standard_name}->[0] };
675
694
# Can we count on the description format being consistent?
676
# Why is critical CDS info saved as unconstrained text not
677
# specified in the DTD?
678
# Anyone have a better idea?
679
my $desc = $seq->description;
681
$desc =~ s/\]\)/\]\) /g;
682
my ($start, $stop) = ();
684
if ( $desc =~ /cds_boundaries:.+?(\d+)\.\.(\d+)/ ) {
685
($start, $stop) = ($1 - $self->{offset}, $2 - $self->{offset});
695
# Why is CDS coordinate info saved as description text not
696
# specified in the DTD? Anyone have a better idea? Aww,
697
# who am I kidding, I'm the only one who will ever read this!
698
my ($start, $stop, $peptide) = ();
700
$peptide = $seq->display_id;
701
my $desc = $seq->description || '';
703
$desc =~ s/\)(\w)/\) $1/g;
705
if ( $desc =~ /cds_boundaries:.+?(\d+)\.\.(\d+)/ ) {
706
($start, $stop) = ($1 - $self->{offset}, $2 - $self->{offset});
709
# OK, I guess the transcript must be the CDS then
710
$start = $loc->start;
688
$self->warn("I could not find the CDS boundaries");
689
$feat->location($loc);
715
$self->warn("I did not find a protein sequence for " . $feat->display_name);
693
718
delete $tags->{transcript};
695
720
# now chop off the UTRs to create a CDS
696
721
my @exons_to_add = ();
722
#warn scalar(@exons), " exons, $start, $stop\n";
698
724
my $exon = Bio::Location::Simple->new;
700
726
if ( $_->end < $start || $_->start > $stop ) {
727
#warn "exon out of range\n";
703
730
if ( $_->start < $start && $_->end > $start ) {
731
#warn "chopping off left UTR\n";
704
732
$exon->start( $start );
706
734
if ( $_->end > $stop && $_->start < $stop ) {
735
#warn "chopping off right UTR\n";
707
736
$exon->end( $stop );
727
756
my $parent = $self->{curr_gname} || $self->{curr_ltag};
728
my $cds_tags = $tags;
731
#delete $feat->{_gsf_tag_hash};
732
for ( $feat->all_tags ) {
733
$feat->remove_tag($_);
758
# try not to steal too many mRNA attributes for the CDS
760
for my $k ( keys %$tags ) {
761
if ( $k =~ /product|protein|translation|codon_start/ ) {
762
$cds_tags->{$k} = $tags->{$k};
767
for ( keys %$tags ) {
768
for my $v ( @{$tags->{$_}} ) {
769
$feat->add_tag_value( $_ => $v )
770
unless $feat->has_tag($_);
736
$cds_tags->{product_desc} = [$desc];
738
774
if ( $self->{curr_gname} ) {
739
775
$cds_tags->{gene} = [$self->{curr_gname}];