~ubuntu-branches/ubuntu/vivid/bioperl/vivid

« back to all changes in this revision

Viewing changes to Bio/SeqFeature/Tools/Unflattener.pm

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2011-06-17 13:51:18 UTC
  • mfrom: (3.1.6 sid)
  • Revision ID: james.westby@ubuntu.com-20110617135118-hncy38e0134j8oi5
Tags: 1.6.901-1
* New upstream release.
* Point debian/watch to search.cpan.org.
* Build using dh and overrides:
  - Use Debhelper 8 (debian/rules, debian/control).
  - Simplified debian/rules.
* Split into libbio-perl-perl, as discussed with the Debian Perl team.
  (debian/control, debian/bioperl.install, debian libbio-perl-perl.install)
* debian/control:
  - Incremented Standards-Version to reflect conformance with Policy 3.9.2.
    No other changes needed.
  - Vcs-Browser URL made redirectable to viewvc.
  - Removed useless ‘svn’ in the Vcs-Svn URL.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# $Id: Unflattener.pm 16123 2009-09-17 12:57:27Z cjfields $
2
1
#
3
2
# bioperl module for Bio::SeqFeature::Tools::Unflattener
4
3
#
605
604
the bugs and their resolution.  Bug reports can be submitted via the
606
605
web:
607
606
 
608
 
  http://bugzilla.open-bio.org/
 
607
  https://redmine.open-bio.org/projects/bioperl/
609
608
 
610
609
=head1 AUTHOR - Chris Mungall
611
610
 
1109
1108
   my ($self,@args) = @_;
1110
1109
 
1111
1110
    my($seq, $resolver_method, $group_tag, $partonomy, 
1112
 
       $structure_type, $resolver_tag, $use_magic) =
 
1111
       $structure_type, $resolver_tag, $use_magic, $noinfer) =
1113
1112
        $self->_rearrange([qw(SEQ
1114
1113
                              RESOLVER_METHOD
1115
1114
                              GROUP_TAG
1117
1116
                              STRUCTURE_TYPE
1118
1117
                              RESOLVER_TAG
1119
1118
                              USE_MAGIC
 
1119
                              NOINFER
1120
1120
                             )],
1121
1121
                          @args);
1122
1122
 
1123
1123
   # seq we want to unflatten
1124
1124
   $seq = $seq || $self->seq;
 
1125
   if (!$self->seq) {
 
1126
       $self->seq($seq);
 
1127
   }
1125
1128
 
1126
1129
 
1127
1130
   # prevent bad argument combinations
1403
1406
           }
1404
1407
       }
1405
1408
 
 
1409
       $need_to_infer_exons = 0 if $noinfer; #NML
 
1410
 
1406
1411
       if ($need_to_infer_exons) {
1407
1412
           # remove exons and introns from group -
1408
1413
           # we will infer exons later, and we
1526
1531
       if ($self->verbose > 0) {
1527
1532
           printf STDERR "** INFERRING mRNA from CDS\n";
1528
1533
       }
1529
 
       $self->infer_mRNA_from_CDS(-seq=>$seq);
 
1534
       $self->infer_mRNA_from_CDS(-seq=>$seq, -noinfer=>$noinfer);
1530
1535
   }
1531
1536
 
1532
1537
   # INFERRING exons
2393
2398
               $inside = 0;
2394
2399
           }
2395
2400
       }
 
2401
 
 
2402
       # SPECIAL CASE FOR /ribosomal_slippage
 
2403
       # See: http://www.ncbi.nlm.nih.gov/collab/FT/
 
2404
       if (!$inside && $sf->has_tag('ribosomal_slippage')) {
 
2405
           if ($self->verbose > 0) {
 
2406
               printf STDERR "    Checking for ribosomal_slippage\n";
 
2407
           }
 
2408
           my @transcript_splice_sites = @container_coords;
 
2409
           my @cds_splice_sites = @coords;
 
2410
           # find the the first splice site within the CDS
 
2411
           while (scalar(@transcript_splice_sites) &&
 
2412
                  $transcript_splice_sites[0] < $cds_splice_sites[0]) {
 
2413
               shift @transcript_splice_sites;
 
2414
           }
 
2415
 
 
2416
           if ($transcript_splice_sites[0] == $cds_splice_sites[0]) {
 
2417
               my @slips = ();
 
2418
               my $in_exon = 1;
 
2419
               $inside = 1;   # innocent until proven guilty..
 
2420
               while (@cds_splice_sites) {
 
2421
                   if (!@transcript_splice_sites) {
 
2422
                       $inside = 0; # guilty!
 
2423
                       last;
 
2424
                   }
 
2425
                   if ($cds_splice_sites[0] == $transcript_splice_sites[0]) {
 
2426
                       shift @cds_splice_sites;
 
2427
                       shift @transcript_splice_sites;
 
2428
                   }
 
2429
                   else {
 
2430
                       # mismatch
 
2431
                       if ($cds_splice_sites[0] < $transcript_splice_sites[0]) {
 
2432
                           # potential slippage
 
2433
                           #             v
 
2434
                           # ---TTTTTTTTTT----
 
2435
                           # ---CCCC--CCCC----
 
2436
                           #       ^
 
2437
                           my $p1 = shift @cds_splice_sites;
 
2438
                           my $p2 = shift @cds_splice_sites;
 
2439
                           if ($self->verbose > 0) {
 
2440
                               printf STDERR "    Found the ribosomal_slippage: $p1..$p2\n";
 
2441
                           }
 
2442
                           push(@slips, ($p2-$p1)-1);
 
2443
                       }
 
2444
                       else {
 
2445
                           # not a potential ribosomal slippage
 
2446
                           $inside = 0; # guilty!
 
2447
                           last;
 
2448
                       }
 
2449
                   }
 
2450
               }
 
2451
               if ($inside) {
 
2452
                   # TODO: this is currently completely arbitrary. How many ribosomal slippages do we allow?
 
2453
                   # perhaps we need some mini-statistical model here....?
 
2454
                   if (@slips > 1) {
 
2455
                       $inside = 0;
 
2456
                   }
 
2457
                   # TODO: this is currently completely arbitrary. What is the maximum size of a ribosomal slippage?
 
2458
                   # perhaps we need some mini-statistical model here....?
 
2459
                   if (grep {$_ > 2} @slips) {
 
2460
                       $inside = 0;
 
2461
                   }
 
2462
               }
 
2463
           }
 
2464
           else {
 
2465
               # not a ribosomal_slippage, sorry
 
2466
           }
 
2467
       }
2396
2468
       if ($self->verbose > 0) {
2397
2469
           printf STDERR "    Checking containment:[$inside] (@container_coords) IN ($splice_uniq_str)\n";
2398
2470
       }
2404
2476
                $_=>$score);
2405
2477
       }
2406
2478
   }
2407
 
   # return array ( $sf1=>$score1, $sf2=>$score2, ...)
2408
2479
   return @sf_score_pairs;
2409
2480
}
2410
2481
 
2605
2676
sub infer_mRNA_from_CDS{
2606
2677
   my ($self,@args) = @_;
2607
2678
 
2608
 
   my($sf, $seq) =
 
2679
   my($sf, $seq, $noinfer) =
2609
2680
     $self->_rearrange([qw(FEATURE
2610
2681
                           SEQ
 
2682
                           NOINFER
2611
2683
                          )],
2612
2684
                          @args);
2613
2685
   my @sfs = ($sf);
2633
2705
       if (@cdsl) {
2634
2706
           my @children = grep {$_->primary_tag ne 'CDS'} $sf->get_SeqFeatures;
2635
2707
           my @mrnas = ();
 
2708
 
 
2709
 
2636
2710
           foreach my $cds (@cdsl) {
2637
2711
               
2638
2712
               if ($self->verbose > 0) {
2648
2722
                                                -strand=>$cdsexonloc->strand);
2649
2723
                   $loc->add_sub_Location($subloc);
2650
2724
               }
2651
 
               # share the same location
2652
 
               my $mrna =
2653
 
                 Bio::SeqFeature::Generic->new(-location=>$loc,
2654
 
                                               -primary_tag=>'mRNA');
2655
 
               
2656
 
               ## Provide seq_id to new feature:
2657
 
               $mrna->seq_id($cds->seq_id) if $cds->seq_id;
2658
 
               $mrna->source_tag($cds->source_tag) if $cds->source_tag;
2659
 
 
2660
 
               $self->_check_order_is_consistent($mrna->location->strand,$mrna->location->each_Location);
2661
 
 
2662
 
               # make the mRNA hold the CDS; no EXPAND option,
2663
 
               # the CDS cannot be wider than the mRNA
2664
 
               $mrna->add_SeqFeature($cds);
2665
 
 
2666
 
               # mRNA steals children of CDS
2667
 
               foreach my $subsf ($cds->get_SeqFeatures) {
2668
 
                   $mrna->add_SeqFeature($subsf);
2669
 
               }
2670
 
               $cds->remove_SeqFeatures;
2671
 
               push(@mrnas, $mrna);
 
2725
                if ($noinfer) {
 
2726
                    push(@mrnas, $cds);
 
2727
                }
 
2728
                else {
 
2729
#                   share the same location
 
2730
                    my $mrna =
 
2731
                        Bio::SeqFeature::Generic->new(-location=>$loc,
 
2732
                                -primary_tag=>'mRNA');
 
2733
 
 
2734
##                  Provide seq_id to new feature:
 
2735
                    $mrna->seq_id($cds->seq_id) if $cds->seq_id;
 
2736
                    $mrna->source_tag($cds->source_tag) if $cds->source_tag;
 
2737
 
 
2738
                    $self->_check_order_is_consistent($mrna->location->strand,$mrna->location->each_Location);
 
2739
 
 
2740
#                   make the mRNA hold the CDS; no EXPAND option,
 
2741
#                   the CDS cannot be wider than the mRNA
 
2742
                    $mrna->add_SeqFeature($cds);
 
2743
 
 
2744
#                   mRNA steals children of CDS
 
2745
                    foreach my $subsf ($cds->get_SeqFeatures) {
 
2746
                        $mrna->add_SeqFeature($subsf);
 
2747
                    }
 
2748
                    $cds->remove_SeqFeatures;
 
2749
                    push(@mrnas, $mrna);
 
2750
                }
2672
2751
           }
2673
 
           # change gene/CDS to gene/mRNA
 
2752
#          change gene/CDS to gene/mRNA
2674
2753
           $sf->remove_SeqFeatures;
2675
2754
           $sf->add_SeqFeature($_) foreach (@mrnas, @children);
2676
2755
       }
2760
2839
        my $rangeP = $ranges[$i-1];
2761
2840
        my $range = $ranges[$i];
2762
2841
            if ($rangeP->start > $range->end) {
2763
 
                # failed - but still get one more chance..
2764
 
                $pass = 0;
2765
 
                $self->problem(2,"Ranges not in correct order. Strange ensembl genbank entry? Range: $rangestr");
2766
 
                last;
 
2842
                if ($self->seq->is_circular) {
 
2843
                    # see for example NC_006578.gbk
 
2844
                    # we make exceptions for circular genomes here.
 
2845
                    # see Re: [Gmod-ajax] flatfile-to-json.pl error with GFF
 
2846
                    # 2010-07-26
 
2847
                }
 
2848
                else {
 
2849
                    # failed - but still get one more chance..
 
2850
                    $pass = 0;
 
2851
                    $self->problem(2,"Ranges not in correct order. Strange ensembl genbank entry? Range: $rangestr");
 
2852
                    last;
 
2853
                }
2767
2854
            }
2768
2855
    }
2769
2856