~ubuntu-branches/ubuntu/precise/gmap/precise

« back to all changes in this revision

Viewing changes to src/gmap.c

  • Committer: Package Import Robot
  • Author(s): Shaun Jackman
  • Date: 2011-12-07 10:46:12 UTC
  • mfrom: (1.1.9)
  • Revision ID: package-import@ubuntu.com-20111207104612-4jdx1gj67oi1tx0w
Tags: 2011-11-30-1
New upstream release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
static char rcsid[] = "$Id: gmap.c 49877 2011-10-17 03:26:57Z twu $";
 
1
static char rcsid[] = "$Id: gmap.c 53583 2011-12-02 18:23:41Z twu $";
2
2
#ifdef HAVE_CONFIG_H
3
3
#include <config.h>
4
4
#endif
132
132
                                   Also should exceed length of
133
133
                                   repeated nucleotides (e.g., a
134
134
                                   string of consecutive T's) */
135
 
static int maxpeelback_distalmedial = 24;
136
135
#else
137
136
/* Making minindexsize too small can lead to spurious exons in stage 2 */
138
137
/* FOOBAR */
145
144
                                   Also should exceed length of
146
145
                                   repeated nucleotides (e.g., a
147
146
                                   string of consecutive T's) */
148
 
static int maxpeelback_distalmedial = 24; /* Needs to be longer to fix bad end exons */
149
147
#endif
 
148
static int maxpeelback_distalmedial = 100; /* Needs to be longer to fix bad end exons */
150
149
 
151
150
/* static int stuttercycles = 2; */
152
151
static int stutterhits = 3;
203
202
#endif
204
203
 
205
204
static int min_intronlength = 9;
 
205
static int max_deletionlength = 50;
206
206
static int maxtotallen_bound = 2400000;
207
207
static int maxintronlen_bound = 1000000;
208
208
static int chimera_margin = 40; /* Useful for finding readthroughs */
234
234
static bool use_shifted_canonical_p = false; /* Use this for cross-species */
235
235
static char *user_chrsubsetname = NULL;
236
236
static int close_indels_mode = +1;
237
 
static double microexon_spliceprob = 0.90;
 
237
static double microexon_spliceprob = 0.95;
 
238
static int suboptimal_score_start = -1; /* Determined by simulations to have minimal effect */
 
239
static int suboptimal_score_end = 3; /* Determined by simulations to have diminishing returns above 3 */
 
240
 
 
241
static int trim_mismatch_score = -3;
 
242
static int trim_indel_score = -4;
238
243
 
239
244
 
240
245
/* Output options */
249
254
static bool checkp = false;
250
255
static int maxpaths = 5;        /* 0 means 1 if nonchimeric, 2 if chimeric */
251
256
static bool quiet_if_excessive_p = false;
 
257
static int suboptimal_score = 1000000;
252
258
 
253
259
 
254
260
/* SAM */
256
262
static bool sam_paired_p = false;
257
263
static bool user_quality_shift = false;
258
264
static int quality_shift = 0;
259
 
static bool cigar_noncanonical_splices_p = true;
260
265
static bool sam_headers_p = true;
261
266
static char *sam_read_group_id = NULL;
262
267
static char *sam_read_group_name = NULL;
384
389
#endif
385
390
  {"allow-close-indels", required_argument, 0, 0}, /* close_indels_mode, extraband_single */
386
391
  {"microexon-spliceprob", required_argument, 0, 0}, /* microexon_spliceprob */
 
392
  {"stage2-start", required_argument, 0, 0},         /* suboptimal_score_start */
 
393
  {"stage2-end", required_argument, 0, 0},           /* suboptimal_score_end */
387
394
 
388
395
  /* Output options */
389
396
  {"output-buffer-size", required_argument, 0, 0}, /* output_buffer_size */
402
409
  {"nofails", no_argument, 0, 0}, /* nofailsp */
403
410
  {"fails-as-input", no_argument, 0, 0}, /* fails_as_input_p */
404
411
  {"split-output", required_argument, 0, 0}, /* sevenway_root */
 
412
  {"suboptimal-score", required_argument, 0, 0}, /* suboptimal_score */
405
413
 
406
414
#ifndef PMAP
407
415
  {"quality-protocol", required_argument, 0, 0}, /* quality_shift */
408
416
  {"quality-print-shift", required_argument, 0, 'j'}, /* quality_shift */
409
417
  {"no-sam-headers", no_argument, 0, 0},        /* sam_headers_p */
410
 
  {"noncanonical-splices", required_argument, 0, 0}, /* cigar_noncanonical_splices_p */
411
418
  {"read-group-id", required_argument, 0, 0},   /* sam_read_group_id */
412
419
  {"read-group-name", required_argument, 0, 0}, /* sam_read_group_name */
413
420
  {"read-group-library", required_argument, 0, 0}, /* sam_read_group_library */
513
520
 
514
521
 
515
522
static Stage3_T *
516
 
stage3array_from_list (int *npaths, List_T stage3list, bool chimerap, bool remove_overlaps_p) {
 
523
stage3array_from_list (int *npaths, int *second_absmq, List_T stage3list, bool chimerap, bool remove_overlaps_p) {
517
524
  Stage3_T *array1, *array0, x, y;
518
525
  bool *eliminate;
519
526
  int norig, i, j;
 
527
  int threshold_score;
 
528
 
520
529
 
521
530
  Stage3_recompute_goodness(stage3list);
522
531
 
523
532
  if ((norig = List_length(stage3list)) == 0) {
 
533
    *second_absmq = 0;
524
534
    return (Stage3_T *) NULL;
525
535
 
526
536
  } else if (chimerap == true) {
527
537
    array0 = (Stage3_T *) List_to_array(stage3list,NULL);
528
538
    List_free(&stage3list);
529
 
    if (norig > 2) {
 
539
    if (norig <= 2) {
 
540
      *second_absmq = 0;
 
541
    } else {
530
542
      qsort(&(array0[2]),norig-2,sizeof(Stage3_T),Stage3_cmp);
 
543
      *second_absmq = Stage3_absmq_score(array0[2]);
531
544
    }
532
545
    *npaths = norig;
533
546
    return array0;
536
549
    array0 = (Stage3_T *) List_to_array(stage3list,NULL);
537
550
    List_free(&stage3list);
538
551
    qsort(array0,norig,sizeof(Stage3_T),Stage3_cmp);
539
 
    *npaths = norig;
 
552
 
 
553
    threshold_score = Stage3_goodness(array0[0]) - suboptimal_score;
 
554
    i = 1;
 
555
    while (i < norig && Stage3_goodness(array0[i]) >= threshold_score) {
 
556
      i++;
 
557
    }
 
558
    *npaths = i;
 
559
 
 
560
    if (*npaths < 2) {
 
561
      *second_absmq = 0;
 
562
    } else {
 
563
      *second_absmq = Stage3_absmq_score(array0[1]);
 
564
    }
 
565
 
540
566
    return array0;
541
567
 
542
568
  } else {
576
602
    FREE(array0);
577
603
    FREE(eliminate);
578
604
 
 
605
    threshold_score = Stage3_goodness(array1[0]) - suboptimal_score;
 
606
    i = 1;
 
607
    while (i < *npaths && Stage3_goodness(array1[i]) >= threshold_score) {
 
608
      i++;
 
609
    }
 
610
    *npaths = i;
 
611
 
 
612
    if (*npaths < 2) {
 
613
      *second_absmq = 0;
 
614
    } else {
 
615
      *second_absmq = Stage3_absmq_score(array1[1]);
 
616
    }
579
617
    return array1;
580
618
  }
581
619
}
599
637
 
600
638
  Sequence_T genomicuc;
601
639
  Genomicpos_T genomicend;
602
 
  List_T path;
 
640
  List_T all_paths, path, p;
603
641
  Stage3_T stage3;
604
642
 
605
643
  struct Pair_T *pairarray;
606
644
  List_T pairs;
607
645
  int npairs, cdna_direction, matches, unknowns, mismatches, qopens, qindels, topens, tindels,
608
646
    ncanonical, nsemicanonical, nnoncanonical;
609
 
  int nmatches_pretrim;
 
647
  int nmatches_pretrim, nmatches_posttrim;
610
648
  int sensedir;
611
649
  int ambig_end_length_5, ambig_end_length_3;
612
650
  Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
625
663
  } else {
626
664
    genomicend = genomicstart + Sequence_fulllength(genomicseg);
627
665
  }
628
 
  path = Stage2_compute(&stage2_source,&stage2_indexsize,
629
 
                        Sequence_trimpointer(queryseq),Sequence_trimpointer(queryuc),
630
 
                        Sequence_trimlength(queryseq),/*query_offset*/0,
631
 
 
632
 
                        Sequence_fullpointer(genomicseg),Sequence_fullpointer(genomicuc),
633
 
                        genomicstart,genomicend,/*mappingstart*/genomicstart,/*mappingend*/genomicend,
634
 
                        /*plusp*/watsonp,/*genomiclength*/Sequence_fulllength(genomicseg),
635
 
                        /*genomic_offset*/0,
636
 
 
637
 
                        oligoindices_major,noligoindices_major,/*proceed_pctcoverage*/0.5,
638
 
                        pairpool,diagpool,sufflookback,nsufflookback,maxintronlen_bound,
639
 
                        /*localp*/true,/*skip_repetitive_p*/true,use_shifted_canonical_p,
640
 
                        /*favor_right_p*/false,debug_graphic_p,diagnosticp,
641
 
                        worker_stopwatch,diag_debug);
642
 
  if (diag_debug == true) {
643
 
    stage3list = path;          /* really diagonals */
644
 
 
645
 
  } else if (path != NULL) {
646
 
    debug(printf("Beginning Stage3_compute\n"));
647
 
 
648
 
    if (canonical_mode == 0) {
649
 
      do_final_p = false;
650
 
    } else if (canonical_mode == 1) {
651
 
      do_final_p = true;
652
 
    } else if (lowidentityp == false) {
653
 
      do_final_p = false;
654
 
    } else {
655
 
      do_final_p = true;
656
 
    }
657
 
 
658
 
    Stopwatch_start(worker_stopwatch);
659
 
    pairarray = Stage3_compute(&pairs,&npairs,&cdna_direction,&sensedir,&matches,
660
 
                               &nmatches_pretrim,&ambig_end_length_5,&ambig_end_length_3,
661
 
                               &ambig_splicetype_5,&ambig_splicetype_3,
662
 
                               &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
663
 
                               &ncanonical,&nsemicanonical,&nnoncanonical,&defect_rate,
664
 
                               path,genomiclength,
 
666
 
 
667
  all_paths = Stage2_compute(&stage2_source,&stage2_indexsize,
 
668
                             Sequence_trimpointer(queryseq),Sequence_trimpointer(queryuc),
 
669
                             Sequence_trimlength(queryseq),/*query_offset*/0,
 
670
 
 
671
                             Sequence_fullpointer(genomicseg),Sequence_fullpointer(genomicuc),
 
672
                             genomicstart,genomicend,/*mappingstart*/genomicstart,/*mappingend*/genomicend,
 
673
                             /*plusp*/watsonp,/*genomiclength*/Sequence_fulllength(genomicseg),
 
674
                             /*genomic_offset*/0,
 
675
 
 
676
                             oligoindices_major,noligoindices_major,/*proceed_pctcoverage*/0.5,
 
677
                             pairpool,diagpool,sufflookback,nsufflookback,maxintronlen_bound,
 
678
                             /*localp*/true,/*skip_repetitive_p*/true,use_shifted_canonical_p,
 
679
                             /*favor_right_p*/false,/*just_one_p*/false,debug_graphic_p,
 
680
                             diagnosticp,worker_stopwatch,diag_debug);
 
681
 
 
682
  for (p = all_paths; p != NULL; p = List_next(p)) {
 
683
    path = (List_T) List_head(p);
 
684
    if (diag_debug == true) {
 
685
      stage3list = path;                /* really diagonals */
 
686
 
 
687
    } else if (path != NULL) {
 
688
      debug(printf("Beginning Stage3_compute\n"));
 
689
 
 
690
      if (canonical_mode == 0) {
 
691
        do_final_p = false;
 
692
      } else if (canonical_mode == 1) {
 
693
        do_final_p = true;
 
694
      } else if (lowidentityp == false) {
 
695
        do_final_p = false;
 
696
      } else {
 
697
        do_final_p = true;
 
698
      }
 
699
 
 
700
      Stopwatch_start(worker_stopwatch);
 
701
      pairarray = Stage3_compute(&pairs,&npairs,&cdna_direction,&sensedir,&matches,
 
702
                                 &nmatches_pretrim,&nmatches_posttrim,
 
703
                                 &ambig_end_length_5,&ambig_end_length_3,
 
704
                                 &ambig_splicetype_5,&ambig_splicetype_3,
 
705
                                 &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
 
706
                                 &ncanonical,&nsemicanonical,&nnoncanonical,&defect_rate,
 
707
                                 path,genomiclength,
665
708
#ifdef PMAP
666
 
                               /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
667
 
                               /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
668
 
                               /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
669
 
                               /*querylength*/Sequence_fulllength(queryntseq),
670
 
                               /*skiplength*/Sequence_skiplength(queryntseq),
671
 
                               /*query_subseq_offset*/Sequence_subseq_offset(queryntseq),
 
709
                                 /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
 
710
                                 /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
 
711
                                 /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
 
712
                                 /*querylength*/Sequence_fulllength(queryntseq),
 
713
                                 /*skiplength*/Sequence_skiplength(queryntseq),
 
714
                                 /*query_subseq_offset*/Sequence_subseq_offset(queryntseq),
672
715
#else
673
 
                               /*queryseq_ptr*/Sequence_fullpointer(queryseq),
674
 
                               /*queryuc_ptr*/Sequence_fullpointer(queryuc),
675
 
                               /*querylength*/Sequence_fulllength(queryseq),
676
 
                               /*skiplength*/Sequence_skiplength(queryseq),
677
 
                               /*query_subseq_offset*/Sequence_subseq_offset(queryseq),
 
716
                                 /*queryseq_ptr*/Sequence_fullpointer(queryseq),
 
717
                                 /*queryuc_ptr*/Sequence_fullpointer(queryuc),
 
718
                                 /*querylength*/Sequence_fulllength(queryseq),
 
719
                                 /*skiplength*/Sequence_skiplength(queryseq),
 
720
                                 /*query_subseq_offset*/Sequence_subseq_offset(queryseq),
678
721
#endif
679
 
                               /*genomicseg_ptr*/Sequence_fullpointer(genomicseg),
680
 
                               /*genomicuc_ptr*/Sequence_fullpointer(genomicuc),
681
 
                               chrnum,chroffset,chrpos,
682
 
                               /*knownsplice_limit_low*/0U,/*knownsplice_limit_high*/-1U,
683
 
                               genome,/*usersegment_p*/usersegment ? true : false,
684
 
                               watsonp,/*jump_late_p*/watsonp ? false : true,
685
 
                               maxpeelback,maxpeelback_distalmedial,nullgap,
686
 
                               extramaterial_end,extramaterial_paired,
687
 
                               extraband_single,extraband_end,extraband_paired,
688
 
                               minendexon,pairpool,dynprogL,dynprogM,dynprogR,ngap,
689
 
                               stage3debug,diagnosticp,checkp,do_final_p,sense_try,sense_filter,
690
 
                               oligoindices_minor,noligoindices_minor,diagpool,
691
 
                               sufflookback,nsufflookback,maxintronlen_bound,close_indels_mode);
692
 
    stage3_runtime = Stopwatch_stop(worker_stopwatch);
693
 
    if (pairarray == NULL) {
694
 
      /* Skip */
695
 
    } else if (matches < min_matches) {
696
 
      FREE_OUT(pairarray);
697
 
    } else if ((stage3 = Stage3_new(pairarray,pairs,npairs,cdna_direction,genomicstart,genomiclength,
698
 
                                    stage2_source,stage2_indexsize,matches,unknowns,mismatches,
699
 
                                    qopens,qindels,topens,tindels,ncanonical,nsemicanonical,nnoncanonical,
700
 
                                    defect_rate,chrnum,chroffset,chrpos,watsonp,
701
 
                                    /*skiplength*/Sequence_skiplength(queryseq),
702
 
                                    /*trimlength*/Sequence_trimlength(queryseq),
703
 
                                    stage3_runtime,straintype,strain,altstrain_iit)) != NULL) {
704
 
      stage3list = List_push(stage3list,stage3);
 
722
                                 /*genomicseg_ptr*/Sequence_fullpointer(genomicseg),
 
723
                                 /*genomicuc_ptr*/Sequence_fullpointer(genomicuc),
 
724
                                 chrnum,chroffset,chrpos,
 
725
                                 /*knownsplice_limit_low*/0U,/*knownsplice_limit_high*/-1U,
 
726
                                 genome,/*usersegment_p*/usersegment ? true : false,
 
727
                                 watsonp,/*jump_late_p*/watsonp ? false : true,
 
728
                                 maxpeelback,maxpeelback_distalmedial,nullgap,
 
729
                                 extramaterial_end,extramaterial_paired,
 
730
                                 extraband_single,extraband_end,extraband_paired,
 
731
                                 minendexon,pairpool,dynprogL,dynprogM,dynprogR,ngap,
 
732
                                 stage3debug,diagnosticp,checkp,do_final_p,sense_try,sense_filter,
 
733
                                 oligoindices_minor,noligoindices_minor,diagpool,
 
734
                                 sufflookback,nsufflookback,maxintronlen_bound,close_indels_mode,
 
735
                                 /*paired_favor_mode*/0,/*zero_offset*/0);
 
736
      stage3_runtime = Stopwatch_stop(worker_stopwatch);
 
737
      if (pairarray == NULL) {
 
738
        /* Skip */
 
739
      } else if (matches < min_matches) {
 
740
        FREE_OUT(pairarray);
 
741
      } else if ((stage3 = Stage3_new(pairarray,pairs,npairs,cdna_direction,genomicstart,genomiclength,
 
742
                                      stage2_source,stage2_indexsize,matches,unknowns,mismatches,
 
743
                                      qopens,qindels,topens,tindels,ncanonical,nsemicanonical,nnoncanonical,
 
744
                                      defect_rate,chrnum,chroffset,chrpos,watsonp,
 
745
                                      /*skiplength*/Sequence_skiplength(queryseq),
 
746
                                      /*trimlength*/Sequence_trimlength(queryseq),
 
747
                                      stage3_runtime,straintype,strain,altstrain_iit)) != NULL) {
 
748
        stage3list = List_push(stage3list,(void *) stage3);
 
749
      }
705
750
    }
706
751
  }
707
752
 
 
753
  List_free(&all_paths);
 
754
 
708
755
  Sequence_free(&genomicuc);
709
756
 
710
757
  return stage3list;
772
819
 
773
820
 
774
821
static Stage3_T *
775
 
stage3_from_usersegment (int *npaths, bool lowidentityp, Sequence_T queryseq,
 
822
stage3_from_usersegment (int *npaths, int *second_absmq, bool lowidentityp, Sequence_T queryseq,
776
823
                         Sequence_T queryuc, Sequence_T usersegment,
777
824
                         Oligoindex_T *oligoindices_major, int noligoindices_major,
778
825
                         Oligoindex_T *oligoindices_minor, int noligoindices_minor,
824
871
    *npaths = 0;
825
872
    return NULL;
826
873
  } else {
827
 
    return stage3array_from_list(&(*npaths),stage3list,/*chimerap*/false,/*remove_overlaps_p*/true);
 
874
    return stage3array_from_list(&(*npaths),&(*second_absmq),stage3list,/*chimerap*/false,/*remove_overlaps_p*/true);
828
875
  }
829
876
}
830
877
 
837
884
 
838
885
  if ((n = List_length(stage3list)) == 0) {
839
886
    return (List_T) NULL;
 
887
  } else if (n == 1) {
 
888
    return stage3list;
840
889
  } else {
841
890
    array = (Stage3_T *) List_to_array(stage3list,NULL);
842
891
    List_free(&stage3list);
844
893
    for (i = n-1; i >= 0; i--) {
845
894
      sorted = List_push(sorted,(void *) array[i]);
846
895
    }
 
896
    FREE(array);
 
897
 
847
898
    return sorted;
848
899
  }
849
900
}
1520
1571
    /* Check to see if we can merge chimeric parts */
1521
1572
    if (Stage3_mergeable(&comp,&genomegap,stage3array_sub1[bestfrom],stage3array_sub2[bestto],
1522
1573
                         exonexonpos,queryntlength,cdna_direction,donor_prob,acceptor_prob) == true) {
1523
 
      debug2(printf("Mergeable! -- Merging left and right readthrough\n"));
 
1574
      debug2(printf("Mergeable! -- Merging left and right as a readthrough\n"));
1524
1575
      List_free(&stage3list);
1525
1576
      stage3list = merge_left_and_right_readthrough(stage3array_sub1,npaths_sub1,bestfrom,
1526
1577
                                                    stage3array_sub2,npaths_sub2,bestto,
1540
1591
               Stage3_test_bounds(stage3array_sub2[bestto],chimerapos+1-chimera_overlap,queryntlength) == true) {
1541
1592
      *chimera = Chimera_new(chimerapos,chimeraequivpos,exonexonpos,cdna_direction,
1542
1593
                             donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
1543
 
      debug2(printf("Merging left and right transloc\n"));
 
1594
      debug2(printf("Not mergeable -- Merging left and right as a transloc\n"));
1544
1595
      List_free(&stage3list);
1545
1596
      stage3list = merge_left_and_right_transloc(stage3array_sub1,npaths_sub1,bestfrom,
1546
1597
                                                 stage3array_sub2,npaths_sub2,bestto,
1570
1621
  Stage3_T nonchimericbest;
1571
1622
  bool testchimerap = false;
1572
1623
  int effective_start, effective_end;
 
1624
 
1573
1625
  
1574
1626
  *chimera = NULL;
1575
1627
 
1649
1701
 
1650
1702
  List_T gregions, stage3list;
1651
1703
  Stage3_T *stage3array;
1652
 
  int npaths;
 
1704
  int npaths, second_absmq;
1653
1705
 
1654
1706
  jobid = Request_id(request);
1655
1707
  queryseq = Request_queryseq(request);
1658
1710
  Diagpool_reset(diagpool);
1659
1711
 
1660
1712
  if (Sequence_fulllength_given(queryseq) <= 0) {
1661
 
    result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,0,/*diagnostic*/NULL,
1662
 
                        EMPTY_SEQUENCE);
 
1713
    result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,/*npaths*/0,/*second_absmq*/0,
 
1714
                        /*diagnostic*/NULL,EMPTY_SEQUENCE);
1663
1715
      
1664
1716
  } else if (Sequence_fulllength_given(queryseq) < 
1665
1717
#ifdef PMAP
1668
1720
             index1part
1669
1721
#endif
1670
1722
             ) {
1671
 
    result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,0,/*diagnostic*/NULL,
1672
 
                        SHORT_SEQUENCE);
 
1723
    result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,/*npaths*/0,/*second_absmq*/0,
 
1724
                        /*diagnostic*/NULL,SHORT_SEQUENCE);
1673
1725
 
1674
1726
  } else {                      /* Sequence_fulllength_given(queryseq) > 0 */
1675
1727
    queryuc = Sequence_uppercase(queryseq);
1710
1762
 
1711
1763
#ifndef PMAP
1712
1764
    if (poorp == true && prune_poor_p == true) {
1713
 
      result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,0,diagnostic,POOR_SEQUENCE);
 
1765
      result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,/*npaths*/0,/*second_absmq*/0,
 
1766
                          diagnostic,POOR_SEQUENCE);
1714
1767
 
1715
1768
    } else if (repetitivep == true && prune_repetitive_p == true) {
1716
 
      result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,0,diagnostic,REPETITIVE);
 
1769
      result = Result_new(jobid,(Chimera_T) NULL,(Stage3_T *) NULL,/*npaths*/0,/*second_absmq*/0,
 
1770
                          diagnostic,REPETITIVE);
1717
1771
 
1718
1772
    }
1719
1773
#endif
1726
1780
      Sequence_trim(queryuc,diagnostic->query_trim_start,diagnostic->query_trim_end);
1727
1781
#endif
1728
1782
#endif
1729
 
      stage3array = stage3_from_usersegment(&npaths,/*lowidentityp*/false,queryseq,queryuc,usersegment,
 
1783
      stage3array = stage3_from_usersegment(&npaths,&second_absmq,/*lowidentityp*/false,queryseq,queryuc,usersegment,
1730
1784
                                            oligoindices_major,noligoindices_major,oligoindices_minor,noligoindices_minor,
1731
1785
                                            pairpool,diagpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
1732
 
      result = Result_new(jobid,(Chimera_T) NULL,stage3array,npaths,diagnostic,NO_FAILURE);
 
1786
      result = Result_new(jobid,(Chimera_T) NULL,stage3array,npaths,second_absmq,diagnostic,NO_FAILURE);
1733
1787
 
1734
1788
    } else {            /* Not user segment and not maponly */
1735
1789
#ifndef PMAP
1758
1812
        if (diag_debug == true) {
1759
1813
          result = Result_new_diag_debug(jobid,/*diagonals*/stage3list,diagnostic,NO_FAILURE);
1760
1814
        } else if (stage3list == NULL) {
1761
 
          npaths = 0;
1762
 
          stage3array = (Stage3_T *) NULL;
1763
 
          result = Result_new(jobid,chimera,stage3array,npaths,diagnostic,NO_FAILURE);
 
1815
          result = Result_new(jobid,chimera,/*stage3array*/NULL,/*npaths*/0,/*second_absmq*/0,diagnostic,NO_FAILURE);
1764
1816
        } else if (chimera == NULL) {
1765
 
          stage3array = stage3array_from_list(&npaths,stage3list,/*chimerap*/false,/*remove_overlaps_p*/true);
1766
 
          result = Result_new(jobid,chimera,stage3array,npaths,diagnostic,NO_FAILURE);
 
1817
          stage3array = stage3array_from_list(&npaths,&second_absmq,stage3list,/*chimerap*/false,/*remove_overlaps_p*/true);
 
1818
          result = Result_new(jobid,/*chimera*/NULL,stage3array,npaths,second_absmq,diagnostic,NO_FAILURE);
1767
1819
        } else {
1768
 
          stage3array = stage3array_from_list(&npaths,stage3list,/*chimerap*/true,/*remove_overlaps_p*/false);
1769
 
          result = Result_new(jobid,chimera,stage3array,npaths,diagnostic,NO_FAILURE);
 
1820
          stage3array = stage3array_from_list(&npaths,&second_absmq,stage3list,/*chimerap*/true,/*remove_overlaps_p*/false);
 
1821
          result = Result_new(jobid,chimera,stage3array,npaths,second_absmq,diagnostic,NO_FAILURE);
1770
1822
        }
1771
1823
      }
1772
1824
 
2407
2459
      } else if (!strcmp(long_name,"cmdline")) {
2408
2460
        user_cmdline = optarg; break;
2409
2461
 
 
2462
      } else if (!strcmp(long_name,"suboptimal-score")) {
 
2463
        suboptimal_score = atoi(check_valid_int(optarg));
 
2464
 
2410
2465
      } else if (!strcmp(long_name,"splicingdir")) {
2411
2466
        user_splicingdir = optarg;
2412
2467
 
2435
2490
        }
2436
2491
      } else if (!strcmp(long_name,"microexon-spliceprob")) {
2437
2492
        microexon_spliceprob = atof(check_valid_float(optarg));
 
2493
      } else if (!strcmp(long_name,"stage2-start")) {
 
2494
        suboptimal_score_start = atoi(check_valid_int(optarg));
 
2495
      } else if (!strcmp(long_name,"stage2-end")) {
 
2496
        suboptimal_score_end = atoi(check_valid_int(optarg));
2438
2497
 
2439
2498
      } else if (!strcmp(long_name,"canonical-mode")) {
2440
2499
        if (!strcmp(optarg,"0")) {
2480
2539
#ifndef PMAP
2481
2540
      } else if (!strcmp(long_name,"no-sam-headers")) {
2482
2541
        sam_headers_p = false;
2483
 
      } else if (!strcmp(long_name,"noncanonical-splices")) {
2484
 
        if (optarg[0] == 'N') {
2485
 
          cigar_noncanonical_splices_p = true;
2486
 
        } else if (optarg[0] == 'D') {
2487
 
          cigar_noncanonical_splices_p = false;
2488
 
        } else {
2489
 
          fprintf(stderr,"Argument %s to --noncanonical-splices not recognized.  Allowed values: N, D.\n",optarg);
2490
 
          exit(9);
2491
 
        }
2492
2542
      } else if (!strcmp(long_name,"quality-protocol")) {
2493
2543
        if (user_quality_shift == true) {
2494
2544
          fprintf(stderr,"Cannot specify both -j (--quality-print-shift) and --quality-protocol\n");
3245
3295
  Indexdb_setup(index1part);
3246
3296
  Stage1_setup(index1part);
3247
3297
#endif
3248
 
  Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true);
 
3298
  Stage2_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,
 
3299
               suboptimal_score_start,suboptimal_score_end);
3249
3300
  Dynprog_setup(splicing_iit,splicing_divint_crosstable,donor_typeint,acceptor_typeint,
3250
3301
                splicesites,splicetypes,splicedists,nsplicesites,
3251
 
                trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max,
3252
 
                microexon_spliceprob);
 
3302
                trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
 
3303
  Pair_setup(trim_mismatch_score,trim_indel_score);
3253
3304
  Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,
3254
3305
               splicing_iit,splicing_divint_crosstable,donor_typeint,acceptor_typeint,
3255
 
               splicesites,min_intronlength);
 
3306
               splicesites,min_intronlength,max_deletionlength,
 
3307
               /*expected_pairlength*/0,/*pairlength_deviation*/0);
3256
3308
  Splicetrie_setup(splicesites,splicefrags_ref,splicefrags_alt,
3257
3309
                   trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max,
3258
 
                   /*snpp*/false,amb_closest_p);
 
3310
                   /*snpp*/false,amb_closest_p,/*amb_clip_p*/true,/*min_shortend*/2);
3259
3311
 
3260
3312
  /* Setup outbuffer */
3261
3313
#ifndef PMAP
3274
3326
                            chrsubset,contig_iit,altstrain_iit,map_iit,
3275
3327
                            map_divint_crosstable,printtype,checksump,chimera_margin,
3276
3328
#ifndef PMAP
3277
 
                            sam_headers_p,quality_shift,sam_paired_p,cigar_noncanonical_splices_p,
 
3329
                            sam_headers_p,quality_shift,sam_paired_p,
3278
3330
                            sam_read_group_id,sam_read_group_name,
3279
3331
                            sam_read_group_library,sam_read_group_platform,
3280
3332
#endif
3506
3558
  -x, --chimera-margin=INT       Amount of unaligned sequence that triggers\n\
3507
3559
                                   search for the remaining sequence (default 40).\n\
3508
3560
                                   Enables alignment of chimeric reads, and may help\n\
3509
 
                                   with some non-chimeric reads.  To turn off, set to 0.\n\
 
3561
                                   with some non-chimeric reads.  To turn off, set to\n\
 
3562
                                   a large value (greater than the query length).\n\
3510
3563
");
3511
3564
 
3512
3565
#if 0
3614
3667
                                 prints two paths if chimera detected, else one.\n\
3615
3668
  --quiet-if-excessive           If more than maximum number of paths are found,\n\
3616
3669
                                   then nothing is printed.\n\
 
3670
  --suboptimal-score=INT         Report only paths whose score is within this value of the\n\
 
3671
                                   best path.  By default, if this option is not provided,\n\
 
3672
                                   the program prints all paths found.\n\
3617
3673
  -O, --ordered                  Print output in same order as input (relevant\n\
3618
3674
                                   only if there is more than one worker thread)\n\
3619
3675
  -5, --md5                      Print MD5 checksum for each query sequence\n\
3655
3711
  fprintf(stdout,"Options for SAM output\n");
3656
3712
  fprintf(stdout,"\
3657
3713
  --no-sam-headers               Do not print headers beginning with '@'\n\
3658
 
  --noncanonical-splices=STRING  Print non-canonical genomic gaps greater than 20 nt\n\
3659
 
                                   in CIGAR string as STRING.  Allowed values: N (default), D.\n\
3660
3714
  --read-group-id=STRING         Value to put into read-group id (RG-ID) field\n\
3661
3715
  --read-group-name=STRING       Value to put into read-group name (RG-SM) field\n\
3662
3716
  --read-group-library=STRING    Value to put into read-group library (RG-LB) field\n\