~ubuntu-branches/ubuntu/trusty/bioperl/trusty

« back to all changes in this revision

Viewing changes to Bio/SeqFeature/Tools/Unflattener.pm

  • Committer: Package Import Robot
  • Author(s): Charles Plessy
  • Date: 2013-09-22 13:39:48 UTC
  • mfrom: (3.1.11 sid)
  • Revision ID: package-import@ubuntu.com-20130922133948-c6z62zegjyp7ztou
Tags: 1.6.922-1
* New upstream release.
* Replaces and Breaks grinder (<< 0.5.3-3~) because of overlaping contents.
  Closes: #722910
* Stop Replacing and Breaking bioperl ( << 1.6.9 ): not needed anymore. 

Show diffs side-by-side

added added

removed removed

Lines of Context:
1518
1518
   # modify the original Seq object - the top seqfeatures are now
1519
1519
   # the top features from each group
1520
1520
   $seq->remove_SeqFeatures;
1521
 
   $seq->add_SeqFeature(@top_sfs);
 
1521
   $seq->add_SeqFeature($_) foreach @top_sfs;
1522
1522
 
1523
1523
   # --------- FINISHED UNFLATTENING -------------
1524
1524
 
2399
2399
           }
2400
2400
       }
2401
2401
 
 
2402
 
2402
2403
       # SPECIAL CASE FOR /ribosomal_slippage
2403
2404
       # See: http://www.ncbi.nlm.nih.gov/collab/FT/
2404
2405
       if (!$inside && $sf->has_tag('ribosomal_slippage')) {
2405
2406
           if ($self->verbose > 0) {
2406
2407
               printf STDERR "    Checking for ribosomal_slippage\n";
2407
2408
           }
 
2409
 
 
2410
           # TODO: rewrite this to match introns;
 
2411
           #  each slippage will be a "fake" small CDS exon
2408
2412
           my @transcript_splice_sites = @container_coords;
2409
2413
           my @cds_splice_sites = @coords;
 
2414
           ##printf STDERR "xxTR SSs: @transcript_splice_sites :: %s\n", $_->get_tag_values('product');
 
2415
           ##printf STDERR "xxCD SSs: @cds_splice_sites :: %s\n\n", $sf->get_tag_values('product');
 
2416
 
2410
2417
           # find the the first splice site within the CDS
2411
2418
           while (scalar(@transcript_splice_sites) &&
2412
2419
                  $transcript_splice_sites[0] < $cds_splice_sites[0]) {
2413
2420
               shift @transcript_splice_sites;
2414
2421
           }
2415
2422
 
2416
 
           if ($transcript_splice_sites[0] == $cds_splice_sites[0]) {
 
2423
           ##print STDERR "TR SSs: @transcript_splice_sites\n";
 
2424
           ##print STDERR "CD SSs: @cds_splice_sites\n\n";
 
2425
 
 
2426
           if (!(scalar(@transcript_splice_sites)) ||
 
2427
                 $transcript_splice_sites[0] == $cds_splice_sites[0]) {
 
2428
 
 
2429
               # we will now try and align all splice remaining sites in the transcript and CDS;
 
2430
               # any splice site that can't be aligned is assumed to be a ribosomal slippage
 
2431
 
2417
2432
               my @slips = ();
2418
2433
               my $in_exon = 1;
2419
2434
               $inside = 1;   # innocent until proven guilty..
2420
2435
               while (@cds_splice_sites) {
2421
2436
                   if (!@transcript_splice_sites) {
2422
 
                       $inside = 0; # guilty!
2423
 
                       last;
 
2437
 
 
2438
                       # ribosomal slippage is after the last transcript splice site
 
2439
                       # Example: (NC_00007, isoform 3 of PEG10)
 
2440
                       #     mRNA            join(85682..85903,92646..99007)
 
2441
                       #     mRNA            join(85682..85903,92646..99007)
 
2442
                       #     CDS             join(85899..85903,92646..93825,93825..94994)
 
2443
 
 
2444
                       # OR: None of the splice sites align;
 
2445
                       #  may be a single CDS exon with one slippage inside it.
 
2446
                       # Example: (NC_00007, isoform 4 of PEG10)
 
2447
                       #     mRNA            join(85637..85892,92646..99007)
 
2448
                       #     CDS             join(92767..93825,93825..94994)
 
2449
                       
 
2450
                       # Yes, this code is repeated below...
 
2451
                       my $p1 = shift @cds_splice_sites;
 
2452
                       my $p2 = shift @cds_splice_sites;
 
2453
                       if ($self->verbose > 0) {
 
2454
                           printf STDERR "    Found the ribosomal_slippage: $p1..$p2\n";
 
2455
                       }
 
2456
                       push(@slips, ($p2-$p1)-1);
2424
2457
                   }
2425
 
                   if ($cds_splice_sites[0] == $transcript_splice_sites[0]) {
 
2458
                   elsif ($cds_splice_sites[0] == $transcript_splice_sites[0]) {
 
2459
                       # splice sites align: this is not the slippage
2426
2460
                       shift @cds_splice_sites;
2427
2461
                       shift @transcript_splice_sites;
 
2462
                       ##print STDERR "MATCH\n";
2428
2463
                   }
2429
2464
                   else {
2430
2465
                       # mismatch
2434
2469
                           # ---TTTTTTTTTT----
2435
2470
                           # ---CCCC--CCCC----
2436
2471
                           #       ^
 
2472
 
2437
2473
                           my $p1 = shift @cds_splice_sites;
2438
2474
                           my $p2 = shift @cds_splice_sites;
2439
2475
                           if ($self->verbose > 0) {
2444
2480
                       else {
2445
2481
                           # not a potential ribosomal slippage
2446
2482
                           $inside = 0; # guilty!
 
2483
                           ##print STDERR "FAIL\n";
2447
2484
                           last;
2448
2485
                       }
2449
2486
                   }