2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
28
* Author: Colombe Chappey
30
* Version Creation Date: 8/22/99
35
* --------------------------------------------------------------------------
36
* Date Name Description of modification
37
* ------- ---------- -----------------------------------------------------
40
* ==========================================================================
61
#define BAND_LIMIT 100
63
typedef struct nodehit
69
Nlm_FloatHi bit_score;
71
struct nodehit PNTR child;
72
struct nodehit PNTR next;
73
} NodeHit, PNTR NodeHitPtr;
75
static ValNodePtr traverseGraph (NodeHitPtr head, NodeHitPtr node, Int2 total, Int2 level, Int2Ptr tab, ValNodePtr vnp, Boolean * change);
77
NLM_EXTERN MashPtr MashNew (Boolean is_prot)
81
if((msp = (MashPtr)MemNew(sizeof(Mash)))==NULL) {
82
ErrPostEx(SEV_WARNING, 0, 0, "MemNew returned NULL");
85
msp->is_prot = is_prot;
87
msp->multidim = FALSE;
94
msp->matrixname="BLOSUM62";
96
msp->gap_x_dropoff = 15;
97
msp->gap_x_dropoff_final = 500; /****25****/
98
msp->filter = FILTER_SEG;
105
msp->matrixname=NULL;
107
msp->gap_x_dropoff = 50;
108
msp->gap_x_dropoff_final = 50;
109
msp->filter = FILTER_DUST;
111
msp->translate_prot = FALSE;
112
msp->transalp = NULL;
113
msp->use_gapped_blast = TRUE;
122
msp->blast_threshold = 50;
123
msp->choice_blastfilter = 2; /* 1 */
124
msp->splicing = TRUE; /*FALSE;*/
125
msp->map_align = FALSE;
126
msp->align_ends = TRUE;
130
static Int4Ptr PNTR LIBCALL BlastStyleMatDestruct(Int4Ptr PNTR matrix){
136
static GlobalBandStructPtr LIBCALL DestructBandStruct(GlobalBandStructPtr gbsp){
137
gbsp->matrix = BlastStyleMatDestruct(gbsp->matrix);
138
if(gbsp->seq2!=NULL) MemFree(gbsp->seq2);
139
if(gbsp->seq1!=NULL) MemFree(gbsp->seq1);
142
gbsp=GlobalBandStructDelete(gbsp);
147
static GlobalBandStructPtr CC_CreatBandStruct(SeqLocPtr slp1, SeqLocPtr slp2, MashPtr msp)
149
GlobalBandStructPtr gbsp;
152
is_prot = (Boolean)(msp->is_prot || msp->translate_prot);
153
gbsp = CreatBandStruct(slp1, slp2, NULL,(Boolean) is_prot, (Int2) msp->band_method);
154
if (( ChangeGlobalBandMatrix(gbsp, is_prot, msp->matrixname,(Int4) msp->penalty, (Int4)msp->reward)) != 0) {
155
gbsp = GlobalBandStructDelete (gbsp);
158
SetGlobaltOptions(gbsp,msp->lg1_ext,msp->rg1_ext,
159
msp->lg2_ext, msp->rg2_ext,
160
msp->lg1_open, msp->lg2_open,
161
msp->rg1_open, msp->rg2_open,
162
(Int2)msp->gap_open, (Int2) msp->gap_extend);
165
/********************************************************/
166
static SeqAlignPtr BandAlignTwoSeqLocsFunc (SeqLocPtr slp1, SeqLocPtr slp2, MashPtr msp)
168
GlobalBandStructPtr gbsp;
169
SeqAlignPtr newalign = NULL;
173
if (slp1==NULL || slp2==NULL)
175
gbsp = CC_CreatBandStruct(slp1, slp2, msp);
178
is_prot = (Boolean)(msp->is_prot || msp->translate_prot);
179
gbsp->options->low = -(Int4) SeqLocLen(slp1);
180
gbsp->options->up =(Int4) SeqLocLen(slp2);
181
gbsp->options->start_diag = gbsp->options->low;
182
gbsp->options->width = gbsp->options->up-gbsp->options->low + 1;
184
if (gbsp->options->width>2*BAND_LIMIT) {
185
SetLowUpFromBlast(gbsp->options, is_prot, (Int2)0, (Int2)30,slp1, slp2);
189
gbsp->search_type = (Uint1) G_BAND_QGAP;
192
newalign = GlobalBandToSeqAlign(gbsp);
193
if (newalign != NULL)
195
if (SeqLocStrand(slp1) == Seq_strand_minus) {
196
sit=(SeqIntPtr)slp1->data.ptrvalue;
197
sit->strand = Seq_strand_plus;
199
if (SeqLocStrand(slp2) == Seq_strand_minus) {
200
sit=(SeqIntPtr)slp2->data.ptrvalue;
201
sit->strand = Seq_strand_plus;
203
AdjustOffSetsInSeqAlign (newalign, slp1, slp2);
205
gbsp = DestructBandStruct(gbsp);
208
ErrPostEx(SEV_WARNING, 0, 0, "Could not Create GlobalBandStruct");
213
static SeqAlignPtr BandAlignTwoSeqLocs (SeqLocPtr slp1, SeqLocPtr slp2, MashPtr msp)
215
SeqAlignPtr newalign = NULL;
218
newalign = BandAlignTwoSeqLocsFunc (slp1, slp2, msp);
219
if (newalign == NULL) {
220
if ((msp->is_prot || msp->translate_prot)) {
223
if (msp->penalty < -1)
229
if (msp->penalty < -1)
232
newalign = BandAlignTwoSeqLocsFunc (slp1, slp2, msp);
234
if (newalign == NULL) {
236
ValNodeAddPointer (&vnp, 0, slp1);
237
ValNodeAddPointer (&vnp, 0, slp2);
238
newalign = SeqLocToFastaSeqAlign (vnp);
244
/********************************************************/
245
static Int2 number_hits (SeqAlignPtr salp)
252
for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
257
/*****************************************************
259
*** try_insert_seqalign
260
*** salplst = list of seqalign
261
*** salp = tobe inserted
262
*** choice = 0 everything is kept
264
*** 2 overlap in seq 1
265
*** 3 overlap in seq 2
266
*** 4 overlaps in both sequences of same strand
269
*******************************************************/
270
static Boolean precede (Int4 pos1, Int4 pos2, Boolean is_plus)
273
return (Boolean) (pos1 < pos2);
274
return (Boolean) (pos1 > pos2);
277
static SeqAlignPtr try_insert_seqalign (SeqAlignPtr salplst, SeqAlignPtr salp, Uint1 choice)
279
SeqAlignPtr tmp, tmp2;
280
Int4 start, stop, start2, stop2,
284
startnext2, stopnext2;
290
if (salplst == NULL) {
291
salplst = SeqAlignDup (salp);
294
starti= SeqAlignStart (salp, 0);
295
stopi = SeqAlignStop (salp, 0);
296
starti2= SeqAlignStart (salp, 1);
297
stopi2 = SeqAlignStop (salp, 1);
298
st1 = (Boolean) !(SeqAlignStrand(salp,0) == Seq_strand_minus);
299
st2 = (Boolean) !(SeqAlignStrand(salp,1) == Seq_strand_minus);
302
start = SeqAlignStart (tmp, 0);
303
stop = SeqAlignStop (tmp, 0);
304
start2= SeqAlignStart (tmp, 1);
305
stop2 = SeqAlignStop (tmp, 1);
307
goOn =(Boolean) (precede(stopi, start, st1) && precede(stopi2, start2, st2));
308
else if (choice == 2)
309
goOn = (Boolean) (precede(stopi, start, st1) && precede(starti2, start2, st2) && precede(stopi2, stop2, st2));
310
else if (choice == 3)
311
goOn = (Boolean) (precede(stopi2, start2, st2) && precede(starti, start, st1) && precede(stopi, stop, st1));
312
else if (choice == 4)
313
goOn = (Boolean) (precede(starti, start, st1) && precede(stopi, stop, st1) && precede(starti2, start2, st2) && precede(stopi2, stop2, st2));
314
else if (choice == 5)
315
goOn = (Boolean) (precede(starti, start, st1));
320
tmp2 = SeqAlignDup (salp);
321
tmp2->next = salplst;
328
goOn = (Boolean) (precede(stop, starti, st1) && precede(stop2, starti2, st2));
329
else if (choice == 2)
330
goOn = (Boolean) (precede(stop, starti, st1) && precede(start2, starti2, st2) && precede(stop2, stopi2, st2));
331
else if (choice == 3)
332
goOn = (Boolean) (precede(stop2, starti2, st2) && precede(start, starti, st1) && precede(stop, stopi, st1));
333
else if (choice == 4)
334
goOn = (Boolean) (precede(start, starti, st1) && precede(stop, stopi, st1) && precede(start2, starti2, st2) && precede(stop2, stopi2, st2));
336
goOn=(Boolean)(precede(start, starti, st1));
339
if (tmp->next == NULL) {
340
tmp2 = SeqAlignDup (salp);
345
startnext = SeqAlignStart (tmp->next, 0);
346
startnext2= SeqAlignStart (tmp->next, 1);
347
stopnext2 = SeqAlignStop (tmp->next, 1);
349
goOn=(Boolean)(precede(starti, startnext, st1));
352
goOn=(Boolean)(precede(stopi, startnext, st1) && precede(starti2, startnext2, st2) && precede(stopi2, stopnext2, st2)) ;
356
tmp2 = SeqAlignDup (salp);
357
tmp2->next = tmp->next;
365
start = SeqAlignStart (tmp, 0);
366
stop = SeqAlignStop (tmp, 0);
367
start2= SeqAlignStart (tmp, 1);
368
stop2 = SeqAlignStop (tmp, 1);
375
static SeqAlignPtr SortBlastHits (SeqAlignPtr salp, Int4 threshold, Uint1 choice)
377
SeqAnnotPtr sap, sap2;
382
tmp = NULL, pre=NULL, preselect=NULL;
383
Nlm_FloatHi bit_score;
385
FloatLo gap_length_ratio,
386
gap_length_ratio1=1.00;
389
totalvalmax = INT4_MAX,
396
if (salp == NULL || choice < 0)
398
sap = SeqAnnotForSeqAlign (salp);
399
sap2 = (SeqAnnotPtr) AsnIoMemCopy ((Pointer) sap, (AsnReadFunc) SeqAnnotAsnRead, (AsnWriteFunc) SeqAnnotAsnWrite);
401
salpdup = (SeqAlignPtr)sap2->data;
408
pre = select = preselect = NULL;
409
for (salptmp=salpdup; salptmp!=NULL; salptmp=salptmp->next)
411
GetScoreAndEvalue (salptmp, &score, &bit_score, &evalue, &number);
412
gap_count = SeqAlignGapCount (salptmp);
413
gap_length_ratio = (FloatLo)gap_count/SeqAlignLength(salptmp);
414
if (score > valmax || (gap_count1>0 && gap_count == 0 && gap_length_ratio1 > 0.05 && score >= threshold))
420
gap_count1 = gap_count;
421
gap_length_ratio1 = gap_length_ratio;
426
if (valmax < threshold && head == NULL)
430
if (valmax >= threshold && select != NULL)
433
ok=(Boolean)((SeqAlignStrand(head,0) == SeqAlignStrand(select,0))
434
&& (SeqAlignStrand(head,1) == SeqAlignStrand(select,1)));
437
head = try_insert_seqalign (head, select, choice);
440
if (preselect==NULL) {
441
salpdup = select->next;
443
preselect->next = select->next;
446
SeqAlignSetFree (tmp);
447
totalvalmax = valmax;
453
SeqAlignSetFree (salpdup);
457
static void check_strand_inSeqalign (SeqAlignPtr salp, Uint1 strand, Int2 index)
464
dsp = (DenseSegPtr) salp->segs;
465
if (dsp->strands != NULL) {
466
strandp = dsp->strands;
468
for (j=0; j<dsp->numseg; j++) {
469
if (*strandp != strand)
477
static SeqAlignPtr SeqAlignList2PosStrand (SeqAlignPtr salp)
488
for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
490
if ((SeqAlignStrand(salptmp,0) == Seq_strand_minus)
491
&& (SeqAlignStrand(salptmp,1) != Seq_strand_minus))
493
if (salptmp->segtype == 2)
496
dsp = (DenseSegPtr) salptmp->segs;
497
strandp = dsp->strands;
498
numseg = dsp->numseg;
501
startp = dsp->starts;
502
for (j=0; j < numseg*dim && strandp!=NULL; j++, strandp++)
504
if (*strandp == Seq_strand_minus)
505
*strandp = Seq_strand_plus;
506
else if (*strandp != Seq_strand_minus)
507
*strandp = Seq_strand_minus;
509
for (j=0, k=numseg-1; j<numseg/2; j++, k--) {
514
for (j=0, k=(dim*numseg-dim); j<(dim*numseg-1)/2; j+=dim, k-=dim) {
515
for (n=0; n<dim; n++) {
517
startp[j+n]=startp[k+n];
527
static SeqAlignPtr BlastTwoSeqLocs (SeqLocPtr slp1, SeqLocPtr slp2, Boolean is_prot, MashPtr msp)
529
BLAST_OptionsBlkPtr options;
530
SeqAlignPtr seqalign;
532
Uint1 strand1, strand2;
533
Boolean delete_mash = FALSE;
535
if (slp1 == NULL || slp2 == NULL)
538
msp = MashNew (is_prot);
543
options = BLASTOptionNew((is_prot == FALSE) ? "blastn":"blastp",(Boolean) msp->use_gapped_blast);
545
options->gap_open= msp->gap_open;
546
options->gap_extend= msp->gap_extend;
548
if(msp->matrixname!=NULL)
549
options->matrix=StringSave(msp->matrixname);
552
options->reward = msp->reward;
553
options->penalty= msp->penalty;
554
options->wordsize = msp->wordsize;
557
options->filter = 0; *//** msp->filter; **/
558
options->gap_x_dropoff= msp->gap_x_dropoff;
559
options->gap_x_dropoff_final= msp->gap_x_dropoff_final;
562
options = BLASTOptionValidate (options, "blastp");
564
options = BLASTOptionValidate (options, "blastn");
568
strand1=SeqLocStrand(slp1);
569
strand2=SeqLocStrand(slp2);
570
sit=(SeqIntPtr)slp1->data.ptrvalue;
571
sit->strand = Seq_strand_both;
572
sit=(SeqIntPtr)slp2->data.ptrvalue;
573
sit->strand = Seq_strand_both;
575
seqalign = BlastTwoSequencesByLoc (slp1, slp2, NULL, options);
576
if (options->wordsize==0) {
577
while (seqalign==NULL && (is_prot==FALSE && options->wordsize>8)) {
579
seqalign = BlastTwoSequencesByLoc (slp1, slp2, NULL, options);
582
options=BLASTOptionDelete(options);
586
sit=(SeqIntPtr)slp1->data.ptrvalue;
587
sit->strand = strand1;
588
sit=(SeqIntPtr)slp2->data.ptrvalue;
589
sit->strand = strand2;
591
seqalign = SeqAlignList2PosStrand (seqalign);
596
static Uint1 find_bestframe (SeqLocPtr slp, Int2 genCode)
606
/* Only works for genCode == ncbieaa */
607
for (frame = 1; frame <= 3; frame ++)
609
bs = cds_to_pept (slp, frame, (Int2) Seq_code_ncbieaa, TRUE);
611
str = (CharPtr) BSMerge (bs, NULL);
614
for (len = 0; len<bslen; len++)
627
static SeqLocPtr TranslateSeqLoc (SeqLocPtr slp, Int2 genCode, Uint1 *frame)
629
BioseqPtr bsp = NULL;
634
slp = seqloc2fuzzloc (slp, TRUE, TRUE);
635
*frame = find_bestframe (slp, genCode);
636
bs = cds_to_pept (slp, *frame, (Int2) Seq_code_ncbieaa, TRUE);
638
if(genCode != (Int2) Seq_code_ncbieaa ) {
640
bs=BSConvertSeq(bs, (Uint1) genCode,(Uint1)Seq_code_ncbieaa,(Int4) bslen);
644
bsp->repr = Seq_repr_raw;
645
bsp->mol = Seq_mol_aa;
646
bsp->seq_data_type = (Uint1)genCode;
648
bsp->length = BSLen (bs);
649
bsp->id = MakeNewProteinSeqId (slp, NULL);
651
length = (Int4)((SeqLocLen(slp)-1) / (Int4)3);
653
length = (Int4)((SeqLocLen(slp)-2) / (Int4)3);
655
length = (Int4)(SeqLocLen(slp) / (Int4)3);
656
slpp = SeqLocIntNew (0, length-1, Seq_strand_plus, bsp->id);
664
/*************************************************************
665
*** GlobalAlignTwoSeqLocs
666
*** if short sequences (length < 150)
667
*** aligns using BandAlign only
669
*** Blast the 2 seqlocs
671
*** aligns using BandAlign
672
*** else if finds 1 hit (NOW: the longest)
673
*** if alignment reaches ends (3' or 5')
674
*** extend seqalign with the missing ends
676
*** aligns unaligned seqlocs using BandAlign
679
*** !!!SelectBestHit : select 1 hit
680
*** should select several if any
682
**************************************************************/
683
static SeqAlignPtr AlignExtreme5 (SeqAlignPtr salp, MashPtr msp, Int4 slpstart1, Int4 start1, Int4 slpstart2, Int4 slpstop2, Int4 start2, SeqIdPtr sip1, SeqIdPtr sip2, Uint1 strand1, Uint1 strand2)
686
SeqLocPtr newslp1, newslp2;
689
if (strand2 == Seq_strand_minus)
691
touch_end5 = (Boolean)((slpstart1==start1) || (slpstop2==start2));
694
salp = SeqAlignEndExtend (salp, slpstart1, slpstop2, -1, -1, start1, start2, -1, -1, strand1, strand2);
700
newslp1=SeqLocIntNew (0, start1, strand1, sip1);
701
newslp2=SeqLocIntNew (start2, slpstop2, strand2, sip2);
702
msp->rg1_ext=(Int4)msp->gap_extend;
704
msp->rg2_ext=(Int4)msp->gap_extend;
706
msp->rg1_open=(Int4)msp->gap_open;
708
msp->rg2_open=(Int4)msp->gap_open;
710
salp2 = BandAlignTwoSeqLocs (newslp1, newslp2, msp);
711
salp = SeqAlignMerge (salp, salp2, FALSE);
716
touch_end5 = (Boolean)((ABS(slpstart1-start1)<3) || (ABS(slpstart2-start2)<2));
719
salp = SeqAlignEndExtend (salp, slpstart1, slpstart2, -1, -1, start1, start2, -1, -1, strand1, strand2);
725
newslp1=SeqLocIntNew (0, start1, strand1, sip1);
726
newslp2=SeqLocIntNew (0, start2, strand2, sip2);
727
msp->rg1_ext=(Int4)msp->gap_extend;
729
msp->rg2_ext=(Int4)msp->gap_extend;
731
msp->rg1_open=(Int4)msp->gap_open;
733
msp->rg2_open=(Int4)msp->gap_open;
735
salp2 = BandAlignTwoSeqLocs (newslp1, newslp2, msp);
736
salp = SeqAlignMerge (salp, salp2, FALSE);
742
static SeqAlignPtr AlignExtreme3 (SeqAlignPtr salp, MashPtr msp, Int4 slpstop1, Int4 stop1, Int4 slpstart2, Int4 slpstop2, Int4 stop2, SeqIdPtr sip1, SeqIdPtr sip2, Uint1 strand1, Uint1 strand2)
745
SeqLocPtr newslp1, newslp2;
748
if (strand2 == Seq_strand_minus)
750
touch_end3 = (Boolean)((slpstop1==stop1) || (slpstart2==stop2));
753
salp = SeqAlignEndExtend (salp, -1, -1, slpstop1, slpstart2, -1, -1, stop1, stop2, strand1, strand2);
759
newslp1=SeqLocIntNew(stop1,slpstop1, strand1, sip1);
760
newslp2=SeqLocIntNew(0,stop2, strand2, sip2);
761
msp->lg1_ext= (Int4)msp->gap_extend;
763
msp->lg2_ext= (Int4) msp->gap_extend;
765
msp->lg1_open= (Int4)msp->gap_open;
767
msp->lg2_open= (Int4)msp->gap_open;
769
salp2= BandAlignTwoSeqLocs (newslp1,newslp2, msp);
770
salp = SeqAlignMerge (salp, salp2, TRUE);
775
touch_end3 = (Boolean)((ABS(slpstop1-stop1)<3) || (ABS(slpstop2-stop2)<3));
778
salp = SeqAlignEndExtend (salp, -1, -1, slpstop1, slpstop2, -1, -1, stop1, stop2, strand1, strand2);
784
newslp1=SeqLocIntNew(stop1,slpstop1, strand1, sip1);
785
newslp2=SeqLocIntNew(stop2,slpstop2, strand2, sip2);
786
msp->lg1_ext=(Int4)msp->gap_extend;
788
msp->lg2_ext=(Int4) msp->gap_extend;
790
msp->lg1_open=(Int4)msp->gap_open;
792
msp->lg2_open=(Int4)msp->gap_open;
794
salp2 = BandAlignTwoSeqLocs (newslp1, newslp2, msp);
795
salp = SeqAlignMerge (salp, salp2, TRUE);
801
static SeqAlignPtr align_extrem (SeqAlignPtr salp, SeqLocPtr slp1, SeqLocPtr slp2, MashPtr msp)
807
Int4 slpstart1, slpstart2,
809
Uint1 strand1, strand2;
811
sip1 = SeqLocId(slp1);
812
sip2 = SeqLocId(slp2);
813
strand1 = SeqAlignStrand (salp, 0);
814
strand2 = SeqAlignStrand (salp, 1);
815
slpstart1= SeqLocStart(slp1);
816
slpstop1 = SeqLocStop(slp1);
817
slpstart2= SeqLocStart(slp2);
818
slpstop2 = SeqLocStop(slp2);
819
start1 = SeqAlignStart (salp, 0);
820
start2 = SeqAlignStart (salp, 1);
821
salp = AlignExtreme5 (salp, msp, slpstart1, start1, slpstart2, slpstop2, start2, sip1, sip2, strand1, strand2);
822
check_strand_inSeqalign (salp, strand1, 0);
823
check_strand_inSeqalign (salp, strand2, 1);
828
while (tmp->next!=NULL)
830
stop1 = SeqAlignStop (tmp, 0);
831
stop2 = SeqAlignStop (tmp, 1);
832
tmp = AlignExtreme3 (tmp, msp, slpstop1, stop1, slpstart2, slpstop2, stop2, sip1, sip2, strand1, strand2);
833
check_strand_inSeqalign (tmp, strand1, 0);
834
check_strand_inSeqalign (tmp, strand2, 1);
839
/******************************************************
840
static Boolean is_intron (CharPtr str, Int4 len)
842
if (str[0]=='G' && str[1]=='T' && str[len-2]=='A' && str[len-1]=='G')
846
*********************************************************/
847
static FloatHi is_donor (CharPtr str, Int4 len)
849
FloatHi one[4]={0.35, 0.35, 0.19, 0.11};
850
FloatHi two[4]={0.59, 0.13, 0.14, 0.14};
851
FloatHi three[4]={0.08, 0.02, 0.82, 0.08};
852
FloatHi four[4]={0.01, 0.01, 1.00, 0.01};
853
FloatHi five[4]={0.01, 0.01, 0.01, 1.00};
854
FloatHi six[4]={0.51, 0.03, 0.43, 0.03};
855
FloatHi seven[4]={0.71, 0.08, 0.12, 0.09};
856
FloatHi eight[4]={0.06, 0.05, 0.84, 0.05};
857
FloatHi nine[4]={0.15, 0.16, 0.17, 0.52};
858
FloatHi score =1.000;
862
if ((num = (Int4Ptr)MemNew(len*sizeof(Int4)))==NULL) {
866
for (i = 0; i <= len; i++){
876
score *= one[num[0]];
877
score *= two[num[1]];
878
score *= three[num[2]];
879
score *= four[num[3]];
880
score *= five[num[4]];
881
score *= six[num[5]];
882
score *= seven[num[6]];
883
score *= eight[num[7]];
884
score *= nine[num[8]];
892
static Int4 getSplicePos (CharPtr str)
897
FloatHi topscore = -FLT_MAX,
904
length = MIN(StringLen(str)/2-10, 10);
905
while (xcursor <= length)
907
for (c = 0; c < 9; c++)
909
tmpstr[c] = str[xcursor+c];
911
if ((score=is_donor(tmpstr, 8)) > topscore)
918
if (topscore > 0.000010 && offset >=0)
925
static SeqAlignPtr align_btwhits (SeqAlignPtr salp, SeqIdPtr sip1, SeqIdPtr sip2, MashPtr msp)
930
SeqLocPtr slp1, slp2;
936
Boolean search_intron = FALSE;
938
if (salp == NULL) return NULL;
940
search_intron = msp->splicing;
942
newsalphead = SeqAlignDup (salp);
943
newtmp = newsalphead;
947
x1 = SeqAlignStop (newtmp, 0);
948
y1 = SeqAlignStart (tmp, 0);
949
x2 = SeqAlignStop (newtmp, 1);
950
y2 = SeqAlignStart (tmp, 1);
951
st1= SeqAlignStrand (newtmp, 0);
952
st2= SeqAlignStrand (newtmp, 1);
957
newtmp = SeqAlignMerge (newtmp, tmp, TRUE);
961
SeqAlignTrunc2 (newtmp, 0, -(abs(x2-y2+1)));
964
if(st1!=Seq_strand_minus) y1 -= 1; else y1+=1;
965
if(st2!=Seq_strand_minus) y2 -= 1; else y2+=1;
966
newtmp = SeqAlignEndExtend (newtmp, -1, -1, y1, y2, -1, -1, x1, x2, st1, st2);
967
newtmp = SeqAlignMerge (newtmp, tmp, TRUE);
972
SeqAlignTrunc2 (newtmp, +(abs(x1-y1+1)), 0);
976
if ((st2!=Seq_strand_minus && y2 == x2 + 1) || (st2==Seq_strand_minus && x2 == y2+1))
980
str =load_seq_data(sip1, x1-5, y1+1, FALSE, &len);
981
offset = getSplicePos (str);
982
if (offset>= -1 && offset<len-1)
985
if ((offset>=-6 && offset<0) || (offset>0))
987
SeqAlignTrunc2 (newtmp, 0, abs(offset));
988
SeqAlignTrunc2 (tmp, abs(offset), 0);
990
x1 = SeqAlignStop (newtmp, 0);
991
y1 = SeqAlignStart (tmp, 0);
992
x2 = SeqAlignStop (newtmp, 1);
993
y2 = SeqAlignStart (tmp, 1);
994
if(st1!=Seq_strand_minus) y1 -= 1; else y1+=1;
995
if(st2!=Seq_strand_minus) y2 -= 1; else y2+=1;
996
newtmp=SeqAlignEndExtend (newtmp,-1,-1,y1, y2, -1, -1, x1, x2, st1, st2);
997
newtmp = SeqAlignMerge (newtmp, tmp, TRUE);
1003
if(st1!=Seq_strand_minus) y1 -= 1; else y1+=1;
1004
if(st2!=Seq_strand_minus) y2 -= 1; else y2+=1;
1005
newtmp=SeqAlignEndExtend (newtmp, -1, -1, y1, y2, -1, -1, x1, x2, st1, st2);
1006
newtmp = SeqAlignMerge (newtmp, tmp, TRUE);
1010
else if (st2!=Seq_strand_minus && x2 >= y2) {
1011
SeqAlignTrunc2 (newtmp, 0, -(abs(x2-y2+1)));
1013
else if (st2==Seq_strand_minus && y2 >= x2) {
1014
SeqAlignTrunc2 (tmp, abs(y2-x2+1), 0);
1017
slp1 = SeqLocIntNew (x1+1, y1-1, st1, sip1);
1018
if (st2!=Seq_strand_minus)
1019
slp2 = SeqLocIntNew (x2+1, y2-1, st2, sip2);
1021
slp2 = SeqLocIntNew (y2+1, x2-1, st2, sip2);
1022
newsalp = BandAlignTwoSeqLocs (slp1, slp2, msp);
1023
if (newsalp != NULL)
1025
newtmp = SeqAlignMerge (newtmp, newsalp, TRUE);
1026
newtmp = SeqAlignMerge (newtmp, tmp, TRUE);
1035
/*************************************/
1036
static Boolean possible_child (SeqAlignPtr salp1, SeqAlignPtr salp2, Uint1 choice)
1038
Int4 start, stop, start2, stop2,
1044
starti= SeqAlignStart (salp1, 0);
1045
stopi = SeqAlignStop (salp1, 0);
1046
starti2= SeqAlignStart (salp1, 1);
1047
stopi2 = SeqAlignStop (salp1, 1);
1048
st1 = (Boolean) !(SeqAlignStrand(salp1,0) == Seq_strand_minus);
1049
st2 = (Boolean) !(SeqAlignStrand(salp1,1) == Seq_strand_minus);
1051
start = SeqAlignStart (salp2, 0);
1052
stop = SeqAlignStop (salp2, 0);
1053
start2= SeqAlignStart (salp2, 1);
1054
stop2 = SeqAlignStop (salp2, 1);
1056
goOn = (Boolean) (precede(stopi, start, st1) && precede(stopi2, start2, st2));
1057
else if (choice == 2)
1058
goOn = (Boolean) (precede(stopi, start, st1) && precede(starti2, start2, st2) && precede(stopi2, stop2, st2));
1059
else if (choice == 3)
1060
goOn = (Boolean) (precede(stopi2, start2, st2) && precede(starti, start, st1) && precede(stopi, stop, st1));
1061
else if (choice == 4)
1062
goOn = (Boolean) (precede(starti, start, st1) && precede(stopi, stop, st1) && precede(starti2, start2, st2) && precede(stopi2, stop2, st2));
1063
else if (choice == 5)
1064
goOn = (Boolean) (precede(starti, start, st1));
1070
static NodeHitPtr CreateGraph (SeqAlignPtr salp, Uint1 choice)
1072
NodeHitPtr headnode,
1076
SeqAlignPtr tmp1, tmp2;
1077
Nlm_FloatHi bit_score;
1082
headnode = (NodeHitPtr) MemNew (sizeof(NodeHit));
1083
headnode->index = 1;
1084
headnode->salp = salp;
1085
headnode->child = NULL;
1086
headnode->next = NULL;
1088
for (tmp1=salp->next, j=2; tmp1!=NULL; tmp1=tmp1->next, j++)
1090
newnode = (NodeHitPtr) MemNew (sizeof(NodeHit));
1092
newnode->open = TRUE;
1093
newnode->salp = tmp1;
1094
newnode->child = NULL;
1095
newnode->next = NULL;
1096
GetScoreAndEvalue (tmp1, &score, &bit_score, &evalue, &number);
1097
newnode->evalue = evalue;
1098
newnode->score = score;
1099
newnode->bit_score = bit_score;
1100
node1->next = newnode;
1101
node1 = node1->next;
1104
for (tmp1=salp; tmp1!=NULL; tmp1=tmp1->next)
1108
for (tmp2=salp; tmp2!=NULL; tmp2=tmp2->next)
1110
if (possible_child (tmp1, tmp2, choice)) {
1111
newnode = (NodeHitPtr) MemNew (sizeof(NodeHit));
1112
newnode->index = -1;
1113
newnode->open = TRUE;
1114
newnode->salp = NULL;
1115
newnode->child = node2;
1116
newnode->next = NULL;
1117
if (node1->child == NULL) {
1118
node1->child = newnode;
1119
child = node1->child;
1121
child->next = newnode;
1122
child = child->next;
1125
node2 = node2->next;
1127
node1 = node1->next;
1132
static NodeHitPtr SimplifyGraph (NodeHitPtr headnode)
1134
NodeHitPtr node1, node2, node3, node4, node5;
1137
for (node1 = headnode; node1!=NULL; node1 = node1->next)
1139
for (node2=node1->child; node2!=NULL; node2 = node2->next)
1143
for (node4=node3->child; node4!=NULL; node4=node4->next)
1145
gdchild = node4->child->index;
1146
for (node5=node1->child; node5!=NULL; node5 = node5->next)
1148
if (node5->child->index == gdchild)
1150
node5->open = FALSE;
1162
static SeqAlignPtr link_solution (ValNodePtr vnp, NodeHitPtr head, Int2 total)
1165
SeqAlignPtr headsalp = NULL,
1170
Boolean first = TRUE;
1174
tab = (Int2Ptr) vnp->data.ptrvalue;
1177
for (j=0; j<total; j++)
1181
for(nodetmp=head; nodetmp!=NULL; nodetmp=nodetmp->next) {
1182
if (nodetmp->index == tab[j])
1185
if (nodetmp!=NULL && nodetmp->index == tab[j]) {
1188
newsalp = SeqAlignDup (salp);
1189
if (headsalp==NULL) {
1193
tmpsalp->next = newsalp;
1204
static ValNodePtr find_maxsolution (ValNodePtr vnp, NodeHitPtr head, Int2 total, float *maxscore, float *minintron, Int4 *alignlens)
1211
Int4 start, start1, stop;
1214
float bestscore = -100.00;
1216
float bestintron = FLT_MAX;
1220
for (tmp=vnp; tmp!=NULL; tmp=tmp->next)
1222
tab = (Int2Ptr) tmp->data.ptrvalue;
1224
intronlg = (float)0.00;
1226
for (j=0; j<total; j++)
1230
for(nodetmp=head; nodetmp!=NULL; nodetmp=nodetmp->next) {
1231
if (nodetmp->index == tab[j])
1234
if (nodetmp!=NULL && nodetmp->index == tab[j]) {
1237
sum += nodetmp->score;
1238
start = SeqAlignStart(salp,0);
1243
stop = SeqAlignStop(salp,0);
1244
alignlen += abs(stop - start);
1246
WriteLog ("%ld..%ld %ld %ld ", start, stop, alignlen, (long)sum);
1252
intronlg = (float)(abs(stop - start1) - alignlen)/(float)(j-1);
1253
if (sum > bestscore) {
1256
if (intronlg < bestintron) {
1257
bestintron = intronlg;
1259
if (alignlen > bestlen) {
1263
WriteLog ("= %ld > %ld, %f > %f, %ld > %ld\n",(long)sum, (long)bestscore, intronlg, bestintron, alignlen, bestlen);
1269
*maxscore = bestscore;
1270
*minintron = bestintron;
1271
*alignlens = bestlen;
1275
static ValNodePtr get_solutions (ValNodePtr vnp, NodeHitPtr head, Int2 total, Int4 totalens)
1283
Int4 start, start1, stop;
1285
Int4 maxalignlens, alignlens;
1290
float bestratio=0.00;
1295
find_maxsolution (vnp, head, total, &maxscore, &minintron, &maxalignlens);
1298
for (tmp=vnp; tmp!=NULL; tmp=tmp->next)
1302
tab = (Int2Ptr) tmp->data.ptrvalue;
1304
intronlg = (float)0.00;
1307
for (j=0; j<total; j++)
1311
for(nodetmp=head; nodetmp!=NULL; nodetmp=nodetmp->next) {
1312
if (nodetmp->index == tab[j])
1315
if (nodetmp!=NULL && nodetmp->index == tab[j]) {
1318
sum += nodetmp->score;
1319
start = SeqAlignStart(salp,0);
1324
stop = SeqAlignStop(salp,0);
1325
alignlens += abs(stop - start);
1330
intronlg = (float)(abs(stop - start1) - alignlens)/(float)(j-1);
1331
x = (float)sum / (float)maxscore;
1332
y = (float)intronlg / (float)minintron;
1333
z = (float)alignlens / (float)maxalignlens;
1334
if ((Int4)(1000.0*x*z) > bestratio1 && (float)(x*z/y) > (float)bestratio) {
1336
WriteLog ("FFF %ld %ld =%f / %f %d = %f/ %d %d %f %f %f %ld ++ %d %d %d %d %f %f\n", (long)sum, (long)maxscore, x, intronlg, j-1, y, alignlens, maxalignlens, z, (float)((x*z)/y), (float)bestratio, (long)bestratio1, abs(stop - start1), alignlens, start1, stop, (float)(abs(stop - start1) - alignlens), (float)minintron );
1338
bestratio1 = (Int4) 1000*(Int4)(x*z);
1339
bestratio = (float)(x*z/y);
1349
bestvnp->choice = 5;
1353
static ValNodePtr traverseGraph (NodeHitPtr head, NodeHitPtr node, Int2 total, Int2 level, Int2Ptr tab, ValNodePtr vnp, Boolean * change)
1360
child = node->child;
1361
tab[level] = node->index;
1366
nhtmp = child->child;
1367
if (child->open && nhtmp!=NULL) {
1368
vnp = traverseGraph(head, nhtmp, total, level+1, tab, vnp, change);
1370
child = child->next;
1372
if (level > 0 && *change)
1374
tab2 = (Int2Ptr)MemNew ((size_t)((total+2)*sizeof(Int2)));
1375
for (j=0; j<=level; j++) {
1378
for (; j<total; j++)
1380
ValNodeAddPointer (&vnp, 0, (Pointer)tab2);
1387
static SeqAlignPtr seqalign_reverse_sorting (SeqAlignPtr salp)
1389
SeqAlignPtr salptmp, tmp2,next,
1394
while (salptmp->next)
1398
while (next->next) {
1402
if (tmp2->next!=NULL) {
1403
if (salpnew==NULL) {
1404
salpnew = tmp2->next;
1407
tail->next = tmp2->next;
1415
tail->next = salptmp;
1421
/*************************************/
1422
static SeqAlignPtr BlastBandAlignTwoSeqLocs (SeqLocPtr slp1, SeqLocPtr slp2, SeqAlignPtr salp, Boolean is_prot, MashPtr msp)
1424
SeqAlignPtr newsalp = NULL;
1426
Boolean delete_mash = FALSE;
1429
NodeHitPtr headnode, node;
1437
msp = MashNew (FALSE);
1442
if (number_hits (salp) > 1)
1444
newsalp = SortBlastHits (salp, msp->blast_threshold, msp->choice_blastfilter);
1446
else newsalp = salp;
1448
if (number_hits (newsalp) > 1)
1450
if (newsalp != NULL && (msp->splicing))
1452
headnode = CreateGraph (newsalp, msp->choice_blastfilter);
1453
headnode = SimplifyGraph (headnode);
1454
for(total=1, node=headnode; node!=NULL; node=node->next)
1456
tab = (Int2Ptr)MemNew ((size_t)((total+2)*sizeof(Int2)));
1457
for(j=0; j<total; j++)
1460
for (node=headnode; node!=NULL; node=node->next) {
1461
vnp = traverseGraph (node, node, total, 0, tab, vnp, &change);
1463
bestsol = get_solutions (vnp, headnode, total, SeqLocLen(slp2));
1464
newsalp = link_solution (bestsol, headnode, total);
1466
if (newsalp != NULL)
1468
if ((strand=SeqAlignStrand(newsalp, 0)) == Seq_strand_minus)
1470
newsalp = SeqAlignListReverseStrand (newsalp);
1471
newsalp = seqalign_reverse_sorting (newsalp);
1473
newsalp = align_btwhits (newsalp, SeqLocId(slp1), SeqLocId(slp2), msp);
1476
if (newsalp != NULL && msp->align_ends) {
1477
newsalp = align_extrem (newsalp, slp1, slp2, msp);
1486
static SeqAlignPtr AlignAnyway (SeqLocPtr slp1, SeqLocPtr slp2, Boolean is_prot, MashPtr msp, Boolean message)
1491
Char st1[50], st2[50];
1493
SeqIdWrite (SeqLocId(slp1), st1, PRINTID_FASTA_SHORT, 50);
1494
SeqIdWrite (SeqLocId(slp2), st2, PRINTID_FASTA_SHORT, 50);
1495
salpblast = BlastTwoSeqLocs (slp1, slp2, is_prot, msp);
1496
if (salpblast!=NULL) {
1497
salp = BlastBandAlignTwoSeqLocs (slp1, slp2, salpblast, is_prot, msp);
1500
Message (MSG_OK, "%s - %s : Global alignment based on BLAST local similarity", st1, st2);
1503
SeqAlignSetFree (salpblast);
1505
salp = BandAlignTwoSeqLocs (slp1, slp2, msp);
1508
Message (MSG_OK, "%s - %s : Global alignment using dynamic programing algorithm", st1, st2);
1512
ValNodeAddPointer (&vnp, 0, slp1);
1513
ValNodeAddPointer (&vnp, 0, slp2);
1514
salp = SeqLocToFastaSeqAlign (vnp);
1515
ValNodeFreeType (&vnp, TypeEmpty);
1518
Message (MSG_OK, "Import sequence %s without alignment", st2);
1522
ErrPostEx (SEV_WARNING, 0, 0, "No alignment");
1527
static SeqAlignPtr AlignmRNA2genomicToSeqAlign (SeqLocPtr slp1, SeqLocPtr slp2, SeqAlignPtr salpblast, MashPtr msp)
1529
SeqAlignPtr salp=NULL;
1530
Boolean delete_mash = FALSE;
1532
if (salpblast != NULL)
1535
msp = MashNew (FALSE);
1539
msp->splicing = TRUE;
1540
msp->choice_blastfilter = 2;
1541
salp = BlastBandAlignTwoSeqLocs (slp1, slp2, salpblast, FALSE, msp);
1549
/*********************************************
1550
*** SeqLocListToSeqAlign
1551
*** aligns the sequences from the SeqLocs list (sqloc_list)
1552
*** returns a SeqAlign
1553
*** Alignment methods:
1554
*** 1: FASTA (assume that input is FASTA aligned )
1555
*** 5: BandAlign (GlobalAlignTwoSeqLocs)
1556
**********************************************/
1558
NLM_EXTERN SeqAlignPtr SeqLocListToSeqAlign (ValNodePtr sqloc_list, Int2 choice, Pointer param)
1561
SeqLocPtr master_slp,
1565
SeqAlignPtr salphead= NULL,
1574
ValNodePtr framep = NULL;
1575
Int2 seq_number = 0;
1577
Boolean delete_mash = FALSE;
1578
Boolean is_prot = FALSE;
1582
if (sqloc_list == NULL) {
1583
ErrPostEx(SEV_WARNING, 0, 0, "NULL SeqLocList");
1586
if (choice==2 || choice==3 || choice==4 || choice==5)
1589
return (SeqAlignPtr)SeqLocToFastaSeqAlign (sqloc_list);
1591
if((master_slp = (SeqLocPtr) sqloc_list->data.ptrvalue)==NULL) {
1592
ErrPostEx(SEV_WARNING, 0, 0, "Can not read master sequence");
1596
msp = (MashPtr)param;
1598
bsp = BioseqLockById (SeqLocId(master_slp));
1600
is_prot = ISA_aa(bsp->mol);
1605
msp = MashNew (is_prot);
1611
is_prot = (Boolean)(msp->is_prot || msp->translate_prot);
1612
if (msp->translate_prot) {
1613
slptmp1 = master_slp;
1614
master_slp = TranslateSeqLoc(slptmp1, (Int2) Seq_code_ncbistdaa, &frame);
1615
ValNodeAddInt (&framep, 1, (Int4)frame);
1618
if(msp->is_prot && msp->matrixname==NULL) msp->matrixname="BLOSUM62";
1620
for (vnp = sqloc_list->next; vnp!=NULL; vnp = vnp->next)
1623
slp = (SeqLocPtr) vnp->data.ptrvalue;
1626
if (msp->translate_prot) {
1628
slp = TranslateSeqLoc(slptmp2, (Int2) Seq_code_ncbistdaa, &frame);
1629
ValNodeAddInt (&framep, 1, (Int4)frame);
1634
COMMENT salpnew = (SeqAlignPtr) SIM2ALN (master_slp, slp, 1); */
1637
else if (choice == 3) {
1640
COMMENT salpnew=(SeqAlignPtr)SIM3ALN_choice (master_slp, slp, (Int4) 100, TRUE, TRUE); */
1643
else if (choice == 4) {
1645
COMMENT salpnew = (SeqAlignPtr) CSIM (master_slp, slp, 1, 0.00, NULL); */
1647
else if (choice == 5) {
1649
COMMENT salpnew = SIM4ALN_choice (master_slp, slp, 1000, 8); */
1651
else if (choice == 6) {
1652
salpblast = BlastTwoSeqLocs (master_slp, slp, is_prot, msp);
1653
if (salpblast!=NULL) {
1654
salpnew = BandAlignTwoSeqLocs (master_slp, slp, msp);
1656
SeqAlignSetFree (salpblast);
1658
else if (choice == 7) {
1659
salpnew = BlastTwoSeqLocs (master_slp, slp, is_prot, msp);
1660
msp->choice_blastfilter = 0;
1661
salpnew = SortBlastHits (salpnew, msp->blast_threshold, msp->choice_blastfilter);
1663
else if (choice == 10) {
1664
salpblast = BlastTwoSeqLocs (master_slp, slp, is_prot, msp);
1665
if (salpblast!=NULL)
1667
salpnew = BlastBandAlignTwoSeqLocs (master_slp, slp, salpblast, is_prot, msp);
1669
SeqAlignSetFree (salpblast);
1672
else if (choice == 9) {
1673
salpblast = BlastTwoSeqLocs (master_slp, slp, is_prot, msp);
1674
if (salpblast!=NULL) {
1675
salpnew = AlignmRNA2genomicToSeqAlign (master_slp, slp, salpblast, msp);
1677
SeqAlignSetFree (salpblast);
1680
else if (choice == 8) {
1681
salpblast = BlastTwoSeqLocs (master_slp, slp, is_prot, msp);
1682
if (salpblast!=NULL) {
1683
salpnew = AlignmRNA2genomicToSeqAlign (master_slp, slp, salpblast, msp);
1684
if (salpnew == NULL)
1686
salpnew = BlastBandAlignTwoSeqLocs (master_slp, slp, salpblast, is_prot, msp);
1689
SeqAlignSetFree (salpblast);
1692
else if (choice == 0)
1695
salpnew = AlignAnyway (master_slp, slp, is_prot, msp, TRUE);
1697
salpnew = AlignAnyway (master_slp, slp, is_prot, msp, FALSE);
1699
if (salpnew != NULL)
1701
if (msp->translate_prot) {
1702
master_slp = slptmp1;
1703
sapna =SeqAnnotForSeqAlign (salpna);
1704
sapna = aaSeqAnnot_to_dnaSeqAnnotFunc (&sapna, salpnew, sqloc_list, framep);
1705
salpna = (SeqAlignPtr)sapna->data;
1707
SeqAnnotFree(sapna);
1709
if (salphead == NULL) {
1714
salptmp->next = salpnew;
1715
salptmp = salptmp->next;
1721
ErrPostEx(SEV_WARNING, 0, 0, "Last SeqLoc was NULL");
1725
if (salphead != NULL) {
1727
salphead = SeqAlignMapOnFirstSeq (salphead);
1728
if (seq_number > 2 && msp->multidim) {
1729
salphead2 = PairSeqAlign2MultiSeqAlign (salphead);
1730
if (salphead2!=NULL)
1732
salphead = SeqAlignSetFree (salphead);
1733
salphead = salphead2;
1737
if (salpna != NULL) {
1738
if (seq_number > 2 && msp->multidim) {
1739
salpna2 = PairSeqAlign2MultiSeqAlign (salpna);
1742
salpna = SeqAlignSetFree (salpna);
1747
msp->transalp = salpna;
1755
NLM_EXTERN SeqLocPtr AlignmRNA2genomic (BioseqPtr bsp1, BioseqPtr bsp2)
1757
ValNodePtr vnp = NULL;
1758
SeqLocPtr slp = NULL,
1766
if (bsp1==NULL || bsp2==NULL)
1768
if ((Boolean)ISA_aa(bsp1->mol) || (Boolean)ISA_aa(bsp2->mol))
1770
sip = SeqIdFindBest (bsp1->id, 0);
1773
slp1 = SeqLocIntNew (0, bsp1->length - 1, Seq_strand_plus, sip);
1774
ValNodeAddPointer(&vnp, 0, (Pointer)slp1);
1775
sip = SeqIdFindBest (bsp2->id, 0);
1778
slp2 = SeqLocIntNew (0, bsp2->length - 1, Seq_strand_plus, sip);
1779
ValNodeAddPointer(&vnp, 0, (Pointer)slp2);
1780
salpblast = BlastTwoSeqLocs (slp1, slp2, FALSE, msp);
1781
if (salpblast!=NULL)
1783
salp = AlignmRNA2genomicToSeqAlign (slp1, slp2, salpblast, msp);
1784
SeqAlignSetFree (salpblast);
1785
slp = SeqLocMixFromSeqAlign (salp, NULL);
1790
NLM_EXTERN SeqAnnotPtr BlastBandAlignFromBlastSeqAlign (SeqAlignPtr salpblast, Boolean align_ends)
1793
SeqAnnotPtr sap, sap2;
1794
SeqAlignPtr salptmp,
1799
ValNodePtr vnp_sameid=NULL,
1801
SeqLocPtr master_slp,
1805
Boolean found=FALSE,
1813
sap = SeqAnnotForSeqAlign (salpblast);
1815
sap2 = (SeqAnnotPtr) AsnIoMemCopy((Pointer)sap,
1816
(AsnReadFunc)SeqAnnotAsnRead, (AsnWriteFunc)SeqAnnotAsnWrite);
1818
salp_cp = (SeqAlignPtr)sap2->data;
1822
SeqAnnotFree (sap2);
1824
sip = SeqIdPtrFromSeqAlign (salp_cp);
1825
bsp = BioseqLockById (sip);
1827
is_prot = ISA_aa(bsp->mol);
1828
master_slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, sip);
1833
ValNodeAddPointer (&vnp_sameid, 0, (Pointer)salp_cp);
1834
salp_next = salp_cp->next;
1835
salp_cp->next = NULL;
1836
salp_cp = salp_next;
1838
sip = SeqIdPtrFromSeqAlign(salp_cp);
1841
while (vnp && !found)
1843
salptmp = (SeqAlignPtr)vnp->data.ptrvalue;
1844
siptmp = SeqIdPtrFromSeqAlign (salptmp);
1845
if ((val=SeqIdComp(sip->next, siptmp->next)) == SIC_YES)
1847
while (salptmp->next!=NULL)
1848
salptmp=salptmp->next;
1849
salptmp->next = salp_cp;
1850
salp_next = salp_cp->next;
1851
salp_cp->next = NULL;
1852
salp_cp = salp_next;
1858
ValNodeAddPointer (&vnp_sameid, 0, (Pointer)salp_cp);
1859
salp_next = salp_cp->next;
1860
salp_cp->next = NULL;
1861
salp_cp = salp_next;
1865
msp = MashNew (is_prot);
1866
msp->align_ends = align_ends;
1867
for (vnp=vnp_sameid; vnp!=NULL; vnp=vnp->next) {
1868
salptmp = (SeqAlignPtr)vnp->data.ptrvalue;
1869
sip = SeqIdPtrFromSeqAlign (salptmp);
1871
bsp = BioseqLockById (sip);
1874
slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, sip);
1878
salpnew = BlastBandAlignTwoSeqLocs (master_slp, slp, salptmp, is_prot, msp);
1880
if(salp_head == NULL)
1881
salp_head = salpnew;
1883
for(salptmp=salp_head; salptmp->next!=NULL; salptmp=salptmp->next)
1885
salptmp->next=salpnew;
1892
if (salp_head==NULL)
1894
return SeqAnnotForSeqAlign (salp_head);
1898
/*******************************************************
1899
*** SeqEntryAlignToMasterFunc
1900
*** aligns the bioseqs that are present in a SeqEntry (sep)
1901
*** returns a SeqAlign
1902
*** SeqEntryToSeqAlign
1903
*** calls SeqEntryAlignToMaster
1904
*** returns a SeqAnnot
1905
******************************************************/
1907
static SeqAlignPtr SeqEntryAlignToMasterFunc (SeqEntryPtr sep, SeqLocPtr master, Uint1 bsp_mol, Int2
1910
ValNodePtr vnp = NULL,
1912
SeqAlignPtr salp = NULL;
1915
vnp = SeqEntryToSeqLoc (sep, &nb, bsp_mol);
1919
ValNodeAddPointer (&vnp2, 0, master);
1923
salp = SeqLocListToSeqAlign (vnp, method, NULL);
1928
NLM_EXTERN SeqAnnotPtr LIBCALL SeqEntryToSeqAlign (SeqEntryPtr sep, Uint1 bsp_mol)
1933
salp = SeqEntryAlignToMasterFunc (sep, NULL, bsp_mol, PRG_ANYALIGN);
1934
sap = SeqAnnotForSeqAlign (salp);
1935
/* this doesn't seem to produce a valid alignment and nobody knows what
1936
it's for anyway, so we'll comment it out and see who complains -- 8/8/01 SW
1937
if (ISA_aa(bsp_mol)) {
1938
sap = aaSeqAnnot_to_dnaSeqAnnotFunc (&sap, salp, NULL, NULL);