29
29
* Version Creation Date: 10/01
33
33
* File Description: SeqAlign indexing, access, and manipulation functions
36
36
* --------------------------------------------------------------------------
37
37
* $Log: alignmgr2.c,v $
38
* Revision 6.56 2004/09/15 14:59:19 bollin
39
* make sure we do not read outside the alignment index arrays
41
* Revision 6.55 2004/05/20 19:46:25 bollin
42
* removed unused variables
44
* Revision 6.54 2004/05/11 13:19:49 bollin
45
* update the dimension of the shared alignment after adding a sequence.
47
* Revision 6.53 2004/04/13 14:43:07 kskatz
48
* Final resolution of revisions 6.51 and 6.52: reverted 6.52; then cleaned up readability of AlnMgr2SeqPortRead() and ensured that it will never call SeqPortRead for a length > AM_SEQPORTSIZE
50
* Revision 6.52 2004/04/12 19:52:15 kskatz
51
* Revision 6.51 was right neighborhood,wrong off-by-one: It was in AlnMgr2ComputeFreqMatrix() call to AlnMgr2SeqPortRead() when using l+AM_SEQPORTSIZE instead of l+AM_SEQPORTSIZE-1
53
* Revision 6.51 2004/04/12 17:00:44 kskatz
54
* Fixed off-by-one error in AlnMgr2SeqPortRead() length passed to SeqPortRead(); stop-start+1 changed to stop-start
56
* Revision 6.50 2004/03/11 14:15:41 bollin
57
* added extra check in AlnMgr2GetNthSeqIdPtr to avoid core dump if there are
58
* fewer than N SeqIDs in the alignment.
60
* Revision 6.49 2003/10/20 17:54:34 kans
61
* AlnMgr2ComputeFreqMatrix protect against dereferencing NULL bsp
63
* Revision 6.48 2003/10/09 13:46:52 rsmith
64
* Add AlnMgr2GetFirstNForSipList.
66
* Revision 6.47 2003/05/15 18:53:10 rsmith
67
* in AlnMgr2GetSeqRangeForSipInStdSeg always return start & stop in coordinate order. Do not assume what minus strand will do or not.
69
* Revision 6.46 2003/04/24 20:28:48 rsmith
70
* made AlnMgr2GetNthStdSeg use 1 based numbering like the other Nth functions.
72
* Revision 6.45 2003/04/23 20:36:13 rsmith
73
* Added four functions in Section 11 to get information about Std-Seg alignments.
75
* Revision 6.44 2003/03/31 20:17:11 todorov
76
* Added AlnMgr2IndexSeqAlignEx
78
* Revision 6.43 2003/02/03 12:36:22 kans
79
* AlnMgr2ComputeScoreForSeqAlign checks return value of AlnMgr2ComputeFreqMatrix, returns -1 if NULL to avoid dereference crash
81
* Revision 6.42 2002/10/23 16:32:19 todorov
82
* CondenseColumns fixed: needed to move the lens too.
84
* Revision 6.40 2002/10/16 15:54:28 todorov
85
* use the default dim value if not set
87
* Revision 6.39 2002/08/07 21:57:33 kans
88
* added AlignMgr2GetFirstNForStdSeg
90
* Revision 6.38 2002/07/11 14:35:51 kans
91
* fixed Mac complaints about prototypes
93
* Revision 6.37 2002/07/11 12:55:38 wheelan
94
* added support for std-seg alignments
96
* Revision 6.36 2002/06/04 17:43:07 todorov
97
* 1) Substituted AddInNewSA with a new and optimized AddInNewPairwiseSA function.
98
* 2) Fixed a few bugs in other functions.
100
* Revision 6.35 2002/05/17 15:04:42 wheelan
101
* bug fix in ExtendToCoords
103
* Revision 6.34 2002/05/17 11:02:36 wheelan
104
* bug fixes in Merge func
106
* Revision 6.32 2002/03/04 17:19:18 wheelan
107
* added AlnMgr2FuseSet, changed behavior of RemoveInconsistent, fixed GetNextAlnBitBugs
109
* Revision 6.31 2002/01/31 17:41:47 wheelan
110
* various bug fixes -- no more 0 len segments, better handling of rows that are one big insert, etc.
112
* Revision 6.30 2002/01/30 19:12:53 wheelan
113
* added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo, added AlnMgr2IndexAsRows, bug fixes in indexing routines
115
* Revision 6.29 2002/01/02 15:05:07 wheelan
116
* changes to force more efficient ordering in CompareAsp callbacks, plus more stringent checks in AlnMgr2AddInNewSA
118
* Revision 6.28 2001/12/28 22:53:20 wheelan
119
* bug fixes; added AlnMgr2DupAlnAndIndexes, changed some New and Free funcs
121
* Revision 6.27 2001/12/27 16:07:22 wheelan
122
* bug fix in ExtendToEnd
124
* Revision 6.26 2001/12/20 19:43:20 wheelan
125
* bug fix in GetNextAlnBit -- no more incorrect inserts
38
127
* Revision 6.25 2001/12/18 16:36:57 wheelan
39
128
* scattered fixes to unaligned region code
974
1281
MemFree(vertexarray);
1284
static int LIBCALLBACK AlnMgr2CompareByAnchor(VoidPtr ptr1, VoidPtr ptr2)
1299
sap1 = *((SeqAlignPtr PNTR)ptr1);
1300
sap2 = *((SeqAlignPtr PNTR)ptr2);
1301
saip1 = (SAIndex2Ptr)(sap1->saip);
1302
saip2 = (SAIndex2Ptr)(sap2->saip);
1303
dsp = (DenseSegPtr)(sap1->segs);
1304
if (saip1->tmp == 1)
1305
sip1 = dsp->ids->next;
1308
dsp = (DenseSegPtr)(sap2->segs);
1309
if (saip2->tmp == 1)
1310
sip2 = dsp->ids->next;
1313
ret = AlnMgr2OrderSeqIds(sip1, sip2);
1316
/* these share both ids -- put best first */
1317
if (saip1->score == 0)
1318
saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
1319
if (saip2->score == 0)
1320
saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
1321
if (saip1->score > saip2->score)
1323
else if (saip1->score < saip2->score)
1325
AlnMgr2GetNthSeqRangeInSA(sap1, saip1->tmp, &start1, &stop1);
1326
AlnMgr2GetNthSeqRangeInSA(sap2, saip2->tmp, &start2, &stop2);
1327
if (start1 < start2)
1329
else if (start1 > start2)
1331
else if (stop1 > stop2)
1333
else if (stop1 < stop2)
1339
NLM_EXTERN Boolean AlnMgr2IndexAsRows(SeqAlignPtr sap, Uint1 strand, Boolean truncate)
1341
AMAlignIndex2Ptr amaip;
1343
DenseSegPtr dsp_tmp;
1350
SeqAlignPtr sap_head;
1351
SeqAlignPtr sap_prev;
1352
SeqAlignPtr sap_tmp;
1353
SeqAlignPtr PNTR saparray;
1354
SeqAlignPtr set_head;
1355
SeqAlignPtr set_prev;
1364
if (sap->saip != NULL)
1365
AMAlignIndexFreeEitherIndex(sap);
1366
AlnMgr2IndexLite(sap);
1367
AlnMgr2DecomposeToPairwise(sap);
1368
/* need to figure out which row is shared by all saps */
1369
sap_tmp = (SeqAlignPtr)(sap->segs);
1370
dsp = (DenseSegPtr)(sap_tmp->segs);
1373
while (!found && sip != NULL)
1375
sap_tmp = (SeqAlignPtr)(sap->segs);
1376
sip_next = sip->next;
1379
while (!impossible && sap_tmp != NULL)
1381
dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1382
if (AlnMgr2SeqIdListsOverlap(sip, dsp_tmp->ids) == NULL)
1384
sap_tmp = sap_tmp->next;
1386
sip->next = sip_next;
1387
if (!impossible) /* found one that matched a row in every alignment */
1392
if (!found) /* didn't find a seqid that was contained in all alignments */
1394
/* mark the shared row to make things easier */
1395
sharedsip = SeqIdDup(sip);
1396
sap_tmp = (SeqAlignPtr)(sap->segs);
1398
while (sap_tmp != NULL)
1400
saip = (SAIndex2Ptr)(sap_tmp->saip);
1401
dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1402
if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1406
sap_tmp = sap_tmp->next;
1409
saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
1410
sap_tmp = (SeqAlignPtr)(sap->segs);
1412
while (sap_tmp != NULL)
1414
saparray[i] = sap_tmp;
1416
sap_tmp = sap_tmp->next;
1419
HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByAnchor);
1420
/* now each clump of alignments is a row -- need to eliminate overlaps next */
1423
sap_head = sap_prev = NULL;
1426
saparray[i]->next = NULL;
1427
set_head = set_prev = saparray[i];
1428
saip = (SAIndex2Ptr)(saparray[i]->saip);
1429
sip = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp); /* get other seqid */
1432
sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1433
while (i<numsaps && SeqIdComp(sip, sip_tmp) == SIC_YES)
1435
set_prev->next = saparray[i];
1436
set_prev = saparray[i];
1437
saparray[i]->next = NULL;
1441
sip_tmp = AlnMgr2GetNthSeqIdPtr(saparray[i], 3-saip->tmp);
1443
AlnMgr2IndexLite(set_head);
1445
AlnMgr2RemoveInconsistentAlnsFromSet(set_head, 0);
1447
AlnMgr2RemoveInconsistentAlnsFromSet(set_head, -1);
1448
sap_tmp = (SeqAlignPtr)(set_head->segs);
1449
while (sap_tmp != NULL)
1451
saip = (SAIndex2Ptr)(sap_tmp->saip);
1452
dsp_tmp = (DenseSegPtr)(sap_tmp->segs);
1453
if (SeqIdComp(sharedsip, dsp_tmp->ids) == SIC_YES)
1457
sap_tmp = sap_tmp->next;
1459
if (sap_head != NULL)
1460
sap_prev->next = set_head;
1462
sap_head = sap_prev = set_head;
1463
while (sap_prev->next != NULL)
1465
sap_prev = sap_prev->next;
1467
sap_prev->next = NULL;
1469
/* now we have lots of freed pointers sitting in the array */
1472
/* sap_head is the head of a chain of LITE-indexed alignments, each of which is one row */
1473
/* first make sure that the shared row is on the requested strand */
1475
if (strand == Seq_strand_both || strand == Seq_strand_unknown || strand == 0)
1476
strand = Seq_strand_plus;
1477
while (sap_tmp != NULL)
1479
salp = (SeqAlignPtr)(sap_tmp->segs);
1480
saip = (SAIndex2Ptr)(salp->saip);
1481
/* strand is same for all children */
1482
if (AlnMgr2GetNthStrand(salp, saip->tmp) != strand)
1484
SeqAlignListReverseStrand(salp);
1485
while (salp != NULL)
1487
saip = (SAIndex2Ptr)salp->saip;
1489
SAIndex2Free2(salp->saip);
1491
AlnMgr2IndexSingleChildSeqAlign(salp);
1492
saip = (SAIndex2Ptr)salp->saip;
1497
sap_tmp = sap_tmp->next;
1501
AMAlignIndex2Free2(sap->saip);
1502
sap->saip = (SeqAlignIndexPtr)AMAlignIndex2New();
1503
amaip = (AMAlignIndex2Ptr)(sap->saip);
1504
amaip->alnstyle = AM2_FULLINDEX;
1505
set_head = set_prev = NULL;
1506
while (sap_tmp != NULL)
1508
salp = (SeqAlignPtr)(sap_tmp->segs);
1509
while (salp != NULL)
1511
AlnMgr2AddInNewPairwiseSA(sap, salp);
1512
if (set_head != NULL)
1514
set_prev->next = salp;
1517
set_head = set_prev = salp;
1520
sap_tmp->segs = NULL;
1521
sap_tmp = sap_tmp->next;
1523
AlnMgr2CondenseColumns((DenseSegPtr)(amaip->sharedaln->segs));
1524
AlnMgr2IndexSingleChildSeqAlign(amaip->sharedaln);
1525
set_prev->next = NULL;
1526
sap->segs = (Pointer)(set_head);
1527
SeqAlignListFree(sap_head);
1528
SeqIdFree(sharedsip);
977
1532
/* SECTION 2c */
978
1533
/***************************************************************************
2060
2619
/* SECTION 2c */
2621
static Boolean AlnMgr2GetFirstRowForSeqId(
2626
SeqIdPtr PNTR sip_curr)
2628
Boolean found = FALSE;
2632
if (SeqIdComp(sip, *sip_curr) == SIC_YES &&
2633
strand == dsp->strands[*row_curr]) {
2636
*sip_curr = (*sip_curr)->next;
2637
if (found) return TRUE;
2643
static AMSeqPieceSetPtr AlnMgr2CreateSeqPieceSet(DenseSegPtr dsp, Int4 row)
2645
AMSeqPieceSetPtr s_set = (AMSeqPieceSetPtr)MemNew(sizeof(AMSeqPieceSet));
2646
AMSeqPiecePtr s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2650
s->pos = row - dsp->dim;
2668
s_set->alt_row = -1;
2669
s_set->alt_row2 = -1;
2672
s_set->max_pos = dsp->dim * dsp->numseg;
2673
s_set->strand = dsp->strands[row];
2674
s_set->plus = s_set->strand != Seq_strand_minus;
2680
static AMSeqPiecePtr AlnMgr2GetNextSeqPiece(AMSeqPiecePtr s)
2684
AMSeqPiecePtr s_new;
2687
max_pos = s->set->max_pos;
2689
if (s->pos < max_pos) {
2690
s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2691
s_new->pos = s->pos + dsp->dim;
2692
s_new->seg = s->seg + 1;
2693
s_new->set = s->set;
2695
s = s->next = s_new;
2700
/* initialize the following */
2710
/* find the beg and end */
2711
while (s->pos < max_pos) {
2712
if (dsp->starts[s->pos] != -1) {
2713
s->beg = s->end = dsp->starts[s->pos];
2715
s->end += dsp->lens[s->seg] - 1;
2717
s->beg += dsp->lens[s->seg] - 1;
2732
static AMSeqPiecePtr AlnMgr2GetNextLimitedSeqPiece(
2734
AMSeqPiecePtr right)
2737
Int4 new_pos, new_seg, max_pos, max_seg;
2738
AMSeqPiecePtr s_new;
2740
AMSeqPiecePtr left = right->prev;
2743
max_pos = s->set->max_pos;
2744
max_seg = right->seg;
2745
new_pos = s->pos + dsp->dim;
2746
new_seg = s->seg + 1;
2748
while (new_pos < max_pos && new_seg <= max_seg) {
2749
if (dsp->starts[new_pos] != -1) {
2750
s_new = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2751
s_new->pos = new_pos;
2752
s_new->seg = new_seg;
2753
s_new->set = s->set;
2756
s = s->next = s_new;
2758
s->beg = s->end = dsp->starts[s->pos];
2760
s->end += dsp->lens[s->seg] - 1;
2762
s->beg += dsp->lens[s->seg] - 1;
2764
/* aligned to a sequence in anchor or not */
2765
if (s->seg == right->seg) {
2767
s->left = right->beg;
2768
s->right = right->end;
2771
s->left = left->end;
2772
s->right = right->beg;
2774
/* these are not yet used */
2782
new_pos += dsp->dim;
2788
static Boolean AlnMgr2AddSeqPiece(
2789
AMSeqPieceSetPtr set,
2793
DenseSegPtr dsp = set->dsp;
2794
DenseSegPtr alt_dsp = what->set->dsp;
2796
s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2800
if (alt_dsp == dsp) {
2809
s->alt_dsp = alt_dsp;
2810
s->alt_seg = what->seg;
2811
s->alt_pos = what->pos;
2813
s->left = what->left;
2814
s->right = what->right;
2815
s->orig_left = what->orig_left;
2816
s->orig_right = what->orig_right;
2817
s->aligned = what->aligned;
2820
if (s->prev = set->tail) {
2826
static Boolean AlnMgr2InsertSeqPiece(
2827
AMSeqPiecePtr where,
2832
DenseSegPtr dsp = where->set->dsp;
2833
DenseSegPtr alt_dsp = what->set->dsp;
2836
s = (AMSeqPiecePtr)MemNew(sizeof(AMSeqPiece));
2840
if (where->beg == what->beg) {
2841
s->seg = where->seg;
2842
s->pos = where->pos;
2843
where->beg = end + (where->set->plus? 1 : -1);
2844
if (alt_dsp == dsp) {
2849
s->alt_dsp = alt_dsp;
2850
s->alt_seg = what->seg;
2851
s->alt_pos = what->pos;
2854
if (alt_dsp == dsp) {
2863
s->alt_dsp = alt_dsp;
2864
s->alt_seg = what->seg;
2865
s->alt_pos = what->pos;
2868
s->left = what->left;
2869
s->right = what->right;
2870
s->orig_left = what->orig_left;
2871
s->orig_right = what->orig_right;
2872
s->aligned = what->aligned;
2873
s->set = where->set;
2875
if (s->prev = where->prev) {
2879
if (s->set->head == where) {
2887
static void AlnMgr2CopySeg(
2894
AMSeqPiecePtr PNTR s_ptr)
2896
Int4 i, rdelta, ldelta, POS, Pos, max_Pos, pos2, alt_pos2, SEG, Seg,
2901
POS = *POS_ptr; Pos = *Pos_ptr;
2902
SEG = *SEG_ptr; Seg = *Seg_ptr;
2905
if (s->set->row != s->set->row2) { /* if not a B */
2908
return; /* skip the last A */
2912
max_Pos = POS+Dsp->dim;
2914
DSP->lens[SEG] = ABS(s->end - s->beg) + 1;
2916
if (s->set->dsp != Dsp) { /* the extra row for the non-anchor seq */
2917
for (i = 0; POS < max_Pos; POS++, i++) {
2918
DSP->starts[POS] = -1;
2919
DSP->strands[POS] = Dsp->strands[i];
2921
DSP->starts[POS] = MIN(s->beg, s->end);
2922
DSP->strands[POS] = s->set->strand;
2925
} else { /* not dealing with the extra row itself */
2927
if (s->pos >= 0 && s->set->row != s->set->row2) { /* Dsp involved */
2928
beg = end = s->set->dsp->starts[s->pos];
2930
end += s->set->dsp->lens[s->seg]-1;
2932
beg += s->set->dsp->lens[s->seg]-1;
2934
if (ldelta = ABS(s->beg - beg)) {
2935
/* need to "continue" from the orig seg */
2936
Pos = s->pos - s->set->row;
2939
rdelta = ABS(end - s->end);
2941
for (; POS < max_Pos; POS++, Pos++) {
2942
DSP->strands[POS] = Dsp->strands[Pos];
2943
plus = DSP->strands[POS] != Seq_strand_minus;
2944
if (Dsp->starts[Pos] != -1) {
2945
DSP->starts[POS] = Dsp->starts[Pos] + (plus ? ldelta : rdelta);
2947
DSP->starts[POS] = -1;
2958
if (s->alt_dsp) { /* dsp involved too */
2960
s->alt_pos + s->set->alt_row2 - s->set->alt_row;
2961
beg = end = s->alt_dsp->starts[s->alt_pos];
2962
if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
2963
beg += s->alt_dsp->lens[s->alt_seg]-1;
2965
end += s->alt_dsp->lens[s->alt_seg]-1;
2967
ldelta = ABS(s->beg - beg);
2968
rdelta = ABS(end - s->end);
2970
if (s->set->row2 != -1) { /* 2nd row merged*/
2971
pos2 = POS - DSP->dim + s->set->row2;
2972
} else { /* extra row */
2976
DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
2977
plus = DSP->strands[pos2] != Seq_strand_minus;
2978
if (s->alt_dsp->starts[alt_pos2] != -1) {
2979
DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
2980
(plus ? ldelta : rdelta);
2982
DSP->starts[pos2] = -1;
2984
} else { /* dsp not involved */
2985
if (s->set->row2 == -1) { /* 2nd row not merged */
2986
DSP->starts[POS] = -1;
2988
s->set->alt_dsp->strands[s->set->alt_row2];
2992
} else { /* Dsp not involved */
2993
for (i = 0; POS < max_Pos; POS++, i++) {
2994
DSP->starts[POS] = -1;
2995
DSP->strands[POS] = Dsp->strands[i];
2997
if (s->set->row == s->set->row2) { /* if a B */
2998
if (!(s->alt_dsp)) {
2999
Pos += s->set->dsp->dim; /* move to next seg */
3002
} else { /* not a B */
3004
s->alt_pos + s->set->alt_row2 - s->set->alt_row;
3006
beg = end = s->alt_dsp->starts[s->alt_pos];
3007
if (s->alt_dsp->strands[s->alt_pos] == Seq_strand_minus) {
3008
beg += s->alt_dsp->lens[s->alt_seg]-1;
3010
end += s->alt_dsp->lens[s->alt_seg]-1;
3012
ldelta = ABS(s->beg - beg);
3013
rdelta = ABS(end - s->end);
3015
if (s->set->row2 != -1) { /* merged row2 */
3016
pos2 = POS - DSP->dim + s->set->row2;
3021
DSP->strands[pos2] = s->alt_dsp->strands[alt_pos2];
3022
plus = DSP->strands[pos2] != Seq_strand_minus;
3023
if (s->alt_dsp->starts[alt_pos2] != -1) {
3024
DSP->starts[pos2] = s->alt_dsp->starts[alt_pos2] +
3025
(plus ? ldelta : rdelta);
3027
DSP->starts[pos2] = -1;
3030
DSP->starts[POS + s->set->row - DSP->dim] = MIN(s->beg, s->end);
3035
*s_ptr = (*s_ptr)->next;
3040
NLM_EXTERN void AlnMgr2AddInNewPairwiseSA(SeqAlignPtr parent, SeqAlignPtr sap)
3042
AMAlignIndex2Ptr amaip;
3043
DenseSegPtr dsp, Dsp, DSP;
3045
Int4 Pos, POS, max_POS;
3047
Int4 anchor, Anchor;
3049
SeqIdPtr sip, extra_sip;
3050
AMSeqPieceSetPtr a_set, A_set, b_set, B_set_head, B_set;
3051
AMSeqPiecePtr a, A, b, B;
3053
Boolean a_plus, b_plus;
3057
dsp = (DenseSegPtr)(sap->segs);
3058
if (dsp->dim != 2) {
3059
if (dsp->dim == 0) {
3060
dsp->dim = 2; /* set to default */
3062
ErrPostEx(SEV_ERROR, 0,0,
3063
"AlnMgr2AddInNewPairwiseSA: dsp->dim (=%d) should be 2.",
3068
if (dsp->numseg < 1) {
3069
ErrPostEx(SEV_ERROR, 0,0,
3070
"AlnMgr2AddInNewPairwiseSA: dsp->numseg (=%d) should be > 0.",
3075
amaip = (AMAlignIndex2Ptr)(parent->saip);
3076
if (amaip->sharedaln == NULL) {/* first alignment to be added */
3080
salp = SeqAlignDup(sap);
3081
AlnMgr2IndexSingleChildSeqAlign(salp);
3082
amaip->sharedaln = salp;
3083
amaip->numrows = dsp->dim;
3085
amaip->ids = (SeqIdPtr PNTR)MemNew((dsp->dim)*sizeof(SeqIdPtr));
3087
while (sip != NULL) {
3088
amaip->ids[i] = SeqIdDup(sip);
3092
MemFree(amaip->saps);
3093
amaip->saps = (SeqAlignPtr PNTR)MemNew(sizeof(SeqAlignPtr));
3094
amaip->saps[0] = sap;
3096
MemFree(amaip->aligned);
3097
amaip->aligned = (Boolean PNTR) MemNew(sizeof(Boolean));
3098
amaip->aligned[0] = TRUE;
3103
/* add the new sap */
3105
amaip->saps = (SeqAlignPtr PNTR) MemMore
3106
(amaip->saps, amaip->numsaps*sizeof(SeqAlignPtr));
3107
amaip->saps[amaip->numsaps-1] = sap;
3108
amaip->aligned = (Boolean PNTR) MemMore
3109
(amaip->aligned, (amaip->numsaps)*sizeof(Boolean));
3110
amaip->aligned[amaip->numsaps-1] = TRUE;
3112
Dsp = (DenseSegPtr)(amaip->sharedaln->segs);
3114
AlnMgr2GetFirstSharedRow(amaip->sharedaln, sap, &Anchor, &anchor);
3116
{{ /* make sure the shared rows are on the same strand */
3117
Uint1 Strand, strand;
3119
Strand = AlnMgr2GetNthStrand(amaip->sharedaln, Anchor);
3120
if (Strand == Seq_strand_unknown)
3121
Strand = Seq_strand_plus;
3122
strand = AlnMgr2GetNthStrand(sap, anchor);
3123
if (strand == Seq_strand_unknown)
3124
strand = Seq_strand_plus;
3125
if (Strand != strand) {
3126
SeqAlignListReverseStrand(sap);
3127
SAIndex2Free2(sap->saip);
3129
AlnMgr2IndexSingleChildSeqAlign(sap);
3130
dsp = (DenseSegPtr)(sap->segs);
3131
strand = AlnMgr2GetNthStrand(sap, anchor);
3132
if (strand == Seq_strand_unknown)
3133
strand = Seq_strand_plus;
3135
a_plus = strand != Seq_strand_minus;
3137
anchor--; Anchor--; /* make them 0-based */
3139
/* create new dsp */
3140
DSP = DenseSegNew();
3141
DSP->numseg = Dsp->numseg;
3142
DSP->dim = Dsp->dim;
3143
/* DSP->ids = SeqIdDupList(Dsp->ids); */
3145
/* collect other shared seqids */
3146
b_set = B_set = B_set_head = NULL;
3147
row = -1; sip = Dsp->ids;
3148
extra_sip = dsp->ids;
3150
extra_sip = extra_sip->next;
3152
while (AlnMgr2GetFirstRowForSeqId
3153
(Dsp, extra_sip, dsp->strands[1-anchor], &row, &sip)) {
3155
B_set->next = AlnMgr2CreateSeqPieceSet(Dsp, row);
3156
B_set = B_set->next;
3158
B_set = B_set_head = AlnMgr2CreateSeqPieceSet(Dsp, row);
3161
b_plus = dsp->strands[1-anchor] != Seq_strand_minus;
3164
DSP->ids = Dsp->ids;
3168
a_set = AlnMgr2CreateSeqPieceSet(dsp, anchor);
3170
b_set = AlnMgr2CreateSeqPieceSet(dsp, 1-anchor);
3171
while (a = AlnMgr2GetNextSeqPiece(a)) {
3173
while (b = AlnMgr2GetNextLimitedSeqPiece(b, a)) {
3181
A_set = AlnMgr2CreateSeqPieceSet(Dsp, Anchor);
3183
while (A = AlnMgr2GetNextSeqPiece(A)) {
3187
while (B = AlnMgr2GetNextLimitedSeqPiece(B, A)) {};
3193
A_set->alt_row = a_set->row;
3194
a = a_set->head->next;
3195
A = A_set->head->next;
3196
while (a && A && a->next && A->next) {
3197
if (a_plus ? a->beg < A->beg : a->beg > A->beg) {
3198
AlnMgr2InsertSeqPiece
3199
(A, a, a_plus ? MIN(a->end, A->beg-1) : MAX(a->end, A->beg+1));
3201
if (a_plus ? a->end < A->beg : a->end > A->beg) {
3206
} else if (a_plus ? A->beg < a->beg : A->beg > a->beg) {
3207
if (a_plus ? A->end < a->beg : A->end > a->beg) {
3210
AlnMgr2InsertSeqPiece(A, A, a_plus ? a->beg - 1 : a->beg + 1);
3213
} else { /* a->beg == A->beg */
3214
if (a_plus ? a->end < A->end : a->end > A->end) {
3215
AlnMgr2InsertSeqPiece(A, a, a->end);
3218
} else if (a_plus ? a->end > A->end : a->end < A->end) {
3219
a->beg = A->end + (a_plus ? 1 : -1);
3220
A->alt_dsp = a->set->dsp;
3221
A->alt_seg = a->seg;
3222
A->alt_pos = a->pos;
3224
} else { /* a->end == A->end */
3225
A->alt_dsp = a->set->dsp;
3226
A->alt_seg = a->seg;
3227
A->alt_pos = a->pos;
3233
while (a && a->next) {
3234
AlnMgr2InsertSeqPiece(A, a, a->end);
3239
/* set the upper limits */
3243
A_set->tail->end = A_set->tail->beg = A_set->tail->prev->end + 1;
3246
while (b && b->right == -1) {
3247
b->right = upper_limit;
3254
while (B && B->right == -1) {
3255
B->right = upper_limit;
3258
B_set = B_set->next;
3263
A_set->head->beg = A_set->head->end = A_set->head->next->beg + 1;
3266
while (b && b->left == -1) {
3267
b->left = upper_limit;
3274
while (B && B->left == -1) {
3275
B->left = upper_limit;
3278
B_set = B_set->next;
3284
/* try to resolve b, B */
3286
b = b_set->head->next;
3289
B = B_set->head->next;
3293
if (b_plus ? b->beg < B->beg : b->beg > B->beg) {
3294
if (b_plus ? b->end < B->beg : b->end > B->beg) {
3295
/* trim the limits */
3296
if (a_plus ? B->left <= b->left : B->left >= b->left) {
3297
if (a_plus ? B->right < b->left : B->right > b->left) {
3298
conflict = TRUE; break;
3301
conflict = TRUE; break; /* no trimming allowed */
3306
if (a_plus ? b->right > B->right : b->right < B->right) {
3308
conflict = TRUE; break; /* no trimming allowed */
3310
b->orig_right = b->right; /* for recovering */
3311
b->right = B->right;
3315
AlnMgr2InsertSeqPiece(B, b, b->end);
3316
if (!(b->aligned)) extra_segs++;
3319
conflict = TRUE; break;
3322
} else if (b_plus ? B->beg < b->beg : B->beg > b->beg) {
3323
if (b_plus ? B->end < b->beg : B->end > b->beg) {
3324
/* trim the limits */
3325
if (a_plus ? b->left < B->left : b->left > B->left) {
3326
if (a_plus ? b->right < B->left : b->right > B->left) {
3327
conflict = TRUE; break;
3330
conflict = TRUE; break; /* no trimming allowed */
3332
b->orig_left = b->left; /* for recovering */
3336
if (a_plus ? B->right > b->right : B->right < b->right) {
3338
conflict = TRUE; break; /* no trimming allowed */
3340
B->right = b->right;
3348
conflict = TRUE; break;
3350
} else { /* B->beg == b->beg */
3351
conflict = TRUE; break;
3356
AlnMgr2AddSeqPiece(B_set, b);
3357
if (!(b->aligned)) extra_segs++;
3360
/* DSP->numseg += extra_segs; */
3363
/* conflict, roll back b, recovering limits, try next B */
3368
if (b->orig_left != -2) {
3369
b->left = b->orig_left;
3371
if (b->orig_right != -2) {
3372
b->right = b->orig_right;
3376
b = b_set->head->next;
3377
B_set = B_set->next;
3380
if (B_set) { /* B_set has no conflict with b_set */
3381
B = B_set->head->next;
3382
B_set->row2 = B_set->row; /* mark the set */
3383
A_set->row2 = B_set->row;
3384
A_set->alt_row2 = b_set->row;
3385
} else { /* this mean extra row */
3387
A_set->alt_row2 = b_set->row;
3388
A_set->alt_dsp = b_set->dsp;
3394
AddSeqId(&sip, extra_sip);
3396
/* fix the index too */
3397
amaip->numrows = DSP->dim;
3398
amaip->ids = (SeqIdPtr PNTR)MemMore
3399
(amaip->ids,amaip->numrows*sizeof(SeqIdPtr));
3400
amaip->ids[amaip->numrows-1] = SeqIdDup(extra_sip);
3402
b_set->row2 = b_set->row; /* mark the set */
3403
B = b_set->head->next;
3404
B_beg = -1; /* nothing to comp Bs to */
3407
/* allocate memory for the new sharedaln matrix */
3408
DSP->starts = (Int4Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Int4));
3409
DSP->strands = (Uint1Ptr)MemNew(DSP->numseg * DSP->dim * sizeof(Uint1));
3410
DSP->lens = (Int4Ptr)MemNew(DSP->numseg * sizeof(Int4));
3412
/* loop through segments */
3413
POS = 0; Pos = 0; Seg = 0; SEG = 0;
3414
A = A_set->head->next;
3415
while (Seg < Dsp->numseg) {
3417
A_end = Dsp->starts[Pos+A_set->row];
3418
if (a_plus && A_end >= 0) {
3419
A_end += Dsp->lens[Seg] - 1;
3422
B_beg = Dsp->starts[Pos+B_set->row];
3426
while (A && (a_plus ? A->end <= A_end : A->end >= A_end)) {
3427
while (B && (a_plus ? B->left < A->beg : B->left > A->beg)) {
3430
break; /* the aligned piece should be last */
3432
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3435
if (B && B->aligned && B->left == A->beg) {
3438
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3440
} else if (B && B_beg >= 0) {
3441
while (B && (b_plus ? B->beg <= B_beg : B->beg >= B_beg)) {
3442
while (A && (a_plus ? A->beg <= B->left : A->beg >= B->left)) {
3443
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3448
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3452
/* just copy the Dsp segment */
3453
DSP->lens[SEG] = Dsp->lens[Seg];
3454
max_POS = POS + Dsp->dim;
3455
for (; POS < max_POS; POS++, Pos++) {
3456
DSP->starts[POS] = Dsp->starts[Pos];
3457
DSP->strands[POS] = Dsp->strands[Pos];
3459
if (DSP->dim > Dsp->dim) {
3460
DSP->starts[POS] = -1;
3461
DSP->strands[POS] = dsp->strands[1-anchor];
3469
while (B && (a_plus ? B->right <= A->beg : B->right >= A->beg)) {
3473
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3476
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &A);
3482
AlnMgr2CopySeg(DSP, &SEG, &POS, Dsp, &Seg, &Pos, &B);
3487
AMSeqPieceSetFree(A_set);
3488
AMSeqPieceSetFree(a_set);
3489
AMSeqPieceSetFree(B_set_head);
3490
AMSeqPieceSetFree(b_set);
3492
amaip->sharedaln->segs = DSP;
3493
/* update the dim for the shared_aln to match the new DensegPtr */
3494
amaip->sharedaln->dim = DSP->dim;
2061
3499
/***************************************************************************
2063
3501
* AlnMgr2AddInNewSA adds a seqalign to an existing seqalign. The new
2814
4459
***************************************************************************/
2815
4460
static Boolean AlnMgr2DoCondense(DenseSegPtr dsp, Int4 rownum1, Int4 rownum2)
4463
SeqAlignPtr fake_sap;
4484
AM_Small2Ptr window;
4485
AM_Small2Ptr window_head;
4486
AM_Small2Ptr window_prev;
4488
/* always merge up to rownum1 (better rows are first) */
4489
if (rownum1 > rownum2)
4495
strand1 = dsp->strands[rownum1-1];
4496
strand2 = dsp->strands[rownum2-1];
4497
if (strand1 != strand2)
4500
window_head = window_prev = NULL;
4501
while (i < dsp->numseg)
4504
someseq1 = someseq2 = FALSE;
4505
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
4508
while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum2-1] < 0)
4512
} else if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
4515
while (j<dsp->numseg && dsp->starts[dsp->dim*j+rownum1-1] < 0)
4523
if (strand1 == Seq_strand_minus)
4525
if (someseq1 == FALSE)
4528
for (k=j; min1 == -1 && k<dsp->numseg; k++)
4530
if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4531
min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4534
for (k=(i-1); max1 == -1 && k>=0; k--)
4536
max1 = dsp->starts[dsp->dim*k+rownum1-1];
4541
for (k=j-1; min1 == -1 && k>=i; k--)
4543
min1 = dsp->starts[dsp->dim*(k)+rownum1-1];
4546
for (k=i; min1 == -1 && k<j; k++)
4548
if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4549
max1 = dsp->starts[dsp->dim*k+rownum1-1] + dsp->lens[k] -1;
4554
if (someseq1 == FALSE)
4557
for (k=i-1; min1 == -1 && k >= 0; k--)
4559
if (dsp->starts[dsp->dim*k+rownum1-1] > -1)
4560
min1 = dsp->starts[dsp->dim*k+rownum1-1]+dsp->lens[k]-1;
4563
for (k=j; max1 == -1 && k<dsp->numseg; k++)
4565
max1 = dsp->starts[dsp->dim*k+rownum1-1];
4570
for (k=i; min1 == -1 && k<j; k++)
4572
min1 = dsp->starts[dsp->dim*k+rownum1-1];
4575
for (k=j-1; max1 == -1 && k>i; k--)
4577
if (dsp->starts[dsp->dim*k+rownum1-1] >= 0)
4578
max1 = dsp->starts[dsp->dim*(k)+rownum1-1] + dsp->lens[k] - 1;
4582
if (strand2 == Seq_strand_minus)
4584
if (someseq2 == FALSE)
4587
for (k=j; min2 == -1 && k<dsp->numseg; k++)
4589
if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4590
min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4593
for (k=(i-1); max2 == -1 && k>=0; k--)
4595
max2 = dsp->starts[dsp->dim*k+rownum2-1];
4600
for (k=j-1; min2 == -1 && k>=i; k--)
4602
min2 = dsp->starts[dsp->dim*(k)+rownum2-1];
4605
for (k=i; max2 == -1 && k<j; k++)
4607
if (dsp->starts[dsp->dim*k+rownum2-1] >= 0)
4608
max2 = dsp->starts[dsp->dim*k+rownum2-1] + dsp->lens[k]-1;
4613
if (someseq2 == FALSE)
4616
for (k=i-1; min2 == -1 && k >= 0; k--)
4618
if (dsp->starts[dsp->dim*k+rownum2-1] > -1)
4619
min2 = dsp->starts[dsp->dim*k+rownum2-1]+dsp->lens[k]-1;
4622
for (k=j; max2 == -1 && k<dsp->numseg; k++)
4624
max2 = dsp->starts[dsp->dim*k+rownum2-1];
4629
for (k=i; min2 == -1 && k<j; k++)
4631
min2 = dsp->starts[dsp->dim*k+rownum2-1];
4634
for (k=j-1; max2 == -1 && k>=i; k--)
4636
if (dsp->starts[dsp->dim*(k)+rownum2-1] >= 0)
4637
max2 = dsp->starts[dsp->dim*(k)+rownum2-1] + dsp->lens[k] - 1;
4641
if (someseq1 == FALSE)
4643
if ((min1 < min2 || min2 == -1) && (max1 > max2 || max1 == -1))
4647
if ((min2 < min1 || min1 == -1) && (max2 > max1 || max2 == -1))
4650
window = (AM_Small2Ptr)MemNew(sizeof(AM_Small2));
4655
if (window_head != NULL)
4657
window_prev->next = window;
4658
window_prev = window;
4660
window_head = window_prev = window;
4667
if (window_head == NULL)
4669
fake_sap = SeqAlignNew();
4670
fake_sap->segtype = SAS_DENSEG;
4671
fake_sap->segs = (Pointer)dsp;
4672
AlnMgr2IndexSingleChildSeqAlign(fake_sap);
4673
aln = AlnMgr2GetNumAlnBlocks(fake_sap);
4674
if (aln == 1) /* only merge if there is a single fitted window flanked by gaps */
4675
/*or if there are several contiguous fitted windows flanked by gaps */
4677
if (window_head->next != NULL && window_head->n4 == 0)
4679
window = window_head->next;
4680
while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4684
while (window != NULL && window->n4 == 0 && window->n1 == window_head->n2+1)
4686
window_head->n2 = window->n2;
4687
window = window->next;
4688
while (window_head->n2+1 < dsp->numseg && dsp->starts[dsp->dim*(window_head->n2+1)+rownum1-1] == -1 && dsp->starts[dsp->dim*(window_head->n2+1)+rownum2-1] == -1)
4695
while (window_head != NULL)
4697
window = window_head->next;
4698
MemFree(window_head);
4699
window_head = window;
4701
fake_sap->segs = NULL;
4702
SeqAlignFree(fake_sap);
4706
if (window_head->n4 == -1)
4708
while (window_head != NULL)
4710
window = window_head->next;
4711
MemFree(window_head);
4712
window_head = window;
4714
fake_sap->segs = NULL;
4715
SeqAlignFree(fake_sap);
4719
for (i=0; !found && i<window_head->n1; i++)
4721
if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4724
for (i=window_head->n2+1; !found && i<dsp->numseg; i++)
4726
if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4731
while (window_head != NULL)
4733
window = window_head->next;
4734
MemFree(window_head);
4735
window_head = window;
4737
fake_sap->segs = NULL;
4738
SeqAlignFree(fake_sap);
4741
/* merge whole row up to rownum1 */
4742
for (i=0; i<dsp->numseg; i++)
4744
dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4746
starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4747
strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4749
for (i=0; i<dsp->dim; i++)
4753
for (j=0; j<dsp->numseg; j++)
4755
starts[(dsp->dim-1)*j+k] = dsp->starts[dsp->dim*j+i];
4756
strands[(dsp->dim-1)*j+k] = dsp->strands[dsp->dim*j+i];
4761
MemFree(dsp->starts);
4762
MemFree(dsp->strands);
4763
dsp->starts = starts;
4764
dsp->strands = strands;
4766
id_head = id_prev = NULL;
4773
if (id_head != NULL)
4775
id_prev->next = SeqIdDup(id);
4776
id_prev = id_prev->next;
4778
id_head = id_prev = SeqIdDup(id);
4783
SeqIdSetFree(dsp->ids);
4785
while (window_head != NULL)
4787
window = window_head->next;
4788
MemFree(window_head);
4789
window_head = window;
4791
fake_sap->segs = NULL;
4792
SeqAlignFree(fake_sap);
4795
/* now go through and find the largest piece of every window that can be merged */
4796
/* (can't split up an aligned region with the merge, though) */
4797
window = window_head;
4798
saip = (SAIndex2Ptr)(fake_sap->saip);
4799
while (window != NULL)
4803
for (i=0; !found && i<window->n1; i++)
4805
if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4811
for (i=window->n2+1; !found && i<dsp->numseg; i++)
4813
if (dsp->starts[dsp->dim*i+rownum1-1] != -1 && dsp->starts[dsp->dim*i+rownum2-1] != -1)
4821
for (i = window->n1-1; !found && i<window->n2; i++)
4823
j = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4833
for (i = window->n2; !found && i>=window->n1; i++)
4835
k = binary_search_on_uint4_list(saip->unaln, i, saip->numunaln);
4842
if (j > -1 && k > -1 && k > j)
4848
window = window->next;
4850
window = window_head;
4851
while (window != NULL)
4853
if (window->n4 == -1 && i >= 0) /* see if it fits now */
4857
if (strand1 == Seq_strand_minus)
4859
if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4862
for (k=j; min1 == -1 && k<dsp->numseg; k++)
4864
min1 = dsp->starts[dsp->dim*k+rownum1-1];
4867
for (k=(i-1); max1 == -1 && k>=0; k--)
4869
max1 = dsp->starts[dsp->dim*k+rownum1-1];
4873
min1 = dsp->starts[dsp->dim*(j-1)+rownum1-1];
4874
max1 = dsp->starts[dsp->dim*i+rownum1-1] + dsp->lens[i];
4878
if (dsp->starts[dsp->dim*(j-1)+rownum1-1] == -1)
4881
for (k=i-1; min1 == -1 && k >= 0; k--)
4883
min1 = dsp->starts[dsp->dim*k+rownum1-1];
4886
for (k=j; max1 == -1 && k<dsp->numseg; k++)
4888
max1 = dsp->starts[dsp->dim*k+rownum1-1];
4892
min1 = dsp->starts[dsp->dim*i+rownum1-1];
4893
max1 = dsp->starts[dsp->dim*(j-1)+rownum1-1] + dsp->lens[j-1];
4896
if (strand2 == Seq_strand_minus)
4898
if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4901
for (k=j; min2 == -1 && k<dsp->numseg; k++)
4903
min2 = dsp->starts[dsp->dim*k+rownum2-1];
4906
for (k=(i-1); max2 == -1 && k>=0; k--)
4908
max2 = dsp->starts[dsp->dim*k+rownum2-1];
4912
min2 = dsp->starts[dsp->dim*(j-1)+rownum2-1];
4913
max2 = dsp->starts[dsp->dim*i+rownum2-1] + dsp->lens[i];
4917
if (dsp->starts[dsp->dim*(j-1)+rownum2-1] == -1)
4920
for (k=i-1; min2 == -1 && k >= 0; k--)
4922
min2 = dsp->starts[dsp->dim*k+rownum2-1];
4925
for (k=j; max2 == -1 && k<dsp->numseg; k++)
4927
max2 = dsp->starts[dsp->dim*k+rownum2-1];
4931
min2 = dsp->starts[dsp->dim*i+rownum2-1];
4932
max2 = dsp->starts[dsp->dim*(j-1)+rownum2-1] + dsp->lens[j-1];
4935
if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
4937
if (min1 < min2 && (max1 > max2 || max1 == -1))
4941
if (min2 < min1 && (max2 > max1 || max2 == -1))
4945
if (window->n1 >= 0 && window->n4 >= 0)
4947
for (i=window->n1; i<=window->n2; i++)
4949
dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim+i+rownum2-1]);
4952
window = window->next;
4955
/* check to see if rownum2 is all gaps now */
4956
for (i=0; !found && i<dsp->numseg; i++)
4958
if (dsp->starts[dsp->dim*i+rownum2-1] != -1)
2838
4961
merged = FALSE;
2839
while (i < dsp->numseg && !merged)
2841
if (dsp->starts[dsp->dim*i+rownum1-1] == -1 && dsp->starts[dsp->dim*i+rownum2-1] >= 0)
2845
for (j=i-1; j>=0 && left == -1; j--)
2847
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0 || dsp->starts[dsp->dim*j+rownum2-1] == -1)
2852
for (j=i+1; j<dsp->numseg && right == -1; j++)
2854
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0 || dsp->starts[dsp->dim*j+rownum2-1] == -1)
2858
right = dsp->numseg-1;
2859
if (dsp->strands[rownum2-1] == Seq_strand_minus)
2861
left2 = dsp->starts[dsp->dim*left+rownum2-1] + dsp->lens[left]-1;
2862
right2 = dsp->starts[dsp->dim*right+rownum2-1];
2865
left2 = dsp->starts[dsp->dim*left+rownum2-1];
2866
right2 = dsp->starts[dsp->dim*right+rownum2-1] + dsp->lens[right]-1;
2868
left1 = right1 = -1;
2869
for (j=i-1; j>=0 && left1 == -1; j--)
2871
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
2873
if (dsp->strands[rownum1-1] == Seq_strand_minus)
2874
left1 = dsp->starts[dsp->dim*j+rownum1-1];
2876
left1 = dsp->starts[dsp->dim*j+rownum1-1]+dsp->lens[j] - 1;
2879
for (j=i+1; j<dsp->numseg && right1 == -1; j++)
2881
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
2883
if (dsp->strands[rownum1-1] == Seq_strand_minus)
2884
right1 = dsp->starts[dsp->dim*j+rownum1-1]+dsp->lens[j] - 1;
2886
right1 = dsp->starts[dsp->dim*j+rownum1-1];
2890
if (dsp->strands[rownum1-1] == Seq_strand_minus)
2892
if (right2 > right1 && (left1 > left2 || left1 == -1))
2896
if (left2 > left1 && (right2 < right1 || right1 == -1))
2899
if (OK == TRUE && (left1 == -1 || right1 == -1 || abs(left1-right1) > 1))
2905
if (dsp->starts[dsp->dim*j+rownum2-1] == -1)
2909
while (j>=0 && !done)
2911
if (dsp->starts[dsp->dim*j+rownum1-1] >= 0)
2914
if (dsp->strands[rownum2-1] == Seq_strand_minus)
2916
if (dsp->starts[dsp->dim*i+rownum2-1]+dsp->lens[i] >= dsp->starts[dsp->dim*j+rownum2-1])
2921
if (dsp->starts[dsp->dim*j+rownum2-1]+dsp->lens[j] >= dsp->starts[dsp->dim*i+rownum2-1])
2929
if (dsp->strands[rownum2-1] == Seq_strand_minus)
2931
if (dsp->starts[dsp->dim*i+rownum2-1]+dsp->lens[i] >= dsp->starts[dsp->dim*j+rownum2-1])
2935
if (dsp->starts[dsp->dim*j+rownum2-1]+dsp->lens[j] >= dsp->starts[dsp->dim*i+rownum2-1])
2942
for (j=left; j<=right; j++)
2944
dsp->starts[dsp->dim*j+rownum1-1] = dsp->starts[dsp->dim*j+rownum2-1];
2945
dsp->starts[dsp->dim*j+rownum2-1] = -1;
2947
/* now see if rownum2 has anything left on it; if not, delete it */
2949
for (j=0; j<dsp->numseg && !done; j++)
2951
if (dsp->starts[dsp->dim*j+rownum2-1] != -1)
2954
if (!done) /* nothing but gaps */
2956
starts = (Int4Ptr)MemNew((dsp->dim-1)*dsp->numseg*sizeof(Int4));
2957
strands = (Uint1Ptr)MemNew((dsp->dim-1)*dsp->numseg*sizeof(Uint1));
2959
for (j=0; j<dsp->dim; j++)
2963
for (k=0; k<dsp->numseg; k++)
2965
starts[(dsp->dim-1)*k+row] = dsp->starts[(dsp->dim)*k+j];
2966
strands[(dsp->dim-1)*k+row] = dsp->strands[(dsp->dim)*k+j];
2971
MemFree(dsp->starts);
2972
MemFree(dsp->strands);
2973
dsp->starts = starts;
2974
dsp->strands = strands;
2976
id_head = id_prev = NULL;
2983
if (id_head != NULL)
2985
id_prev->next = SeqIdDup(id);
2986
id_prev = id_prev->next;
2988
id_head = id_prev = SeqIdDup(id);
2993
SeqIdSetFree(dsp->ids);
3000
} else if (dsp->starts[dsp->dim*i+rownum1-1] >=0 && dsp->starts[dsp->dim*i+rownum2-1] == -1)
3004
for (j=i-1; j>=0 && left == -1; j--)
3006
if (dsp->starts[dsp->dim*j+rownum2-1] >= 0 || dsp->starts[dsp->dim*j+rownum1-1] == -1)
3011
for (j=i+1; j<dsp->numseg && right == -1; j++)
3013
if (dsp->starts[dsp->dim*j+rownum2-1] >= 0 || dsp->starts[dsp->dim*j+rownum1-1] == -1)
3017
right = dsp->numseg-1;
3018
if (dsp->strands[rownum1-1] == Seq_strand_minus)
3020
left2 = dsp->starts[dsp->dim*left+rownum1-1] + dsp->lens[left]-1;
3021
right2 = dsp->starts[dsp->dim*right+rownum1-1];
3024
left2 = dsp->starts[dsp->dim*left+rownum1-1];
3025
right2 = dsp->starts[dsp->dim*right+rownum1-1] + dsp->lens[right]-1;
3027
left1 = right1 = -1;
3028
for (j=i-1; j>=0 && left1 == -1; j--)
3030
if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
3032
if (dsp->strands[rownum2-1] == Seq_strand_minus)
3033
left1 = dsp->starts[dsp->dim*j+rownum2-1];
3035
left1 = dsp->starts[dsp->dim*j+rownum2-1]+dsp->lens[j] - 1;
3038
for (j=i+1; j<dsp->numseg && right1 == -1; j++)
3040
if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
3042
if (dsp->strands[rownum2-1] == Seq_strand_minus)
3043
right1 = dsp->starts[dsp->dim*j+rownum2-1]+dsp->lens[j] - 1;
3045
right1 = dsp->starts[dsp->dim*j+rownum2-1];
3049
if (dsp->strands[rownum2-1] == Seq_strand_minus)
3051
if (right2 > right1 && (left1 > left2 || left1 == -1))
3055
if (left2 > left1 && (right2 < right1 || right1 == -1))
3058
if (OK == TRUE && (left1 == -1 || right1 == -1 || abs(left1-right1) > 1))
3064
if (dsp->starts[dsp->dim*j+rownum1-1] == -1)
3068
while (j>=0 && !done)
3070
if (dsp->starts[dsp->dim*j+rownum2-1] >= 0)
3073
if (dsp->strands[rownum1-1] == Seq_strand_minus)
3075
if (dsp->starts[dsp->dim*i+rownum1-1]+dsp->lens[i] >= dsp->starts[dsp->dim*j+rownum1-1])
3080
if (dsp->starts[dsp->dim*j+rownum1-1]+dsp->lens[j] >= dsp->starts[dsp->dim*i+rownum1-1])
3088
if (dsp->strands[rownum1-1] == Seq_strand_minus)
3090
if (dsp->starts[dsp->dim*i+rownum1-1]+dsp->lens[i] >= dsp->starts[dsp->dim*j+rownum1-1])
3094
if (dsp->starts[dsp->dim*j+rownum1-1]+dsp->lens[j] >= dsp->starts[dsp->dim*i+rownum1-1])
3101
for (j=left; j<=right; j++)
3103
dsp->starts[dsp->dim*j+rownum2-1] = dsp->starts[dsp->dim*j+rownum1-1];
3104
dsp->starts[dsp->dim*j+rownum1-1] = -1;
3106
/* now see if rownum1 has anything left on it; if not, delete it */
3108
for (j=0; j<dsp->numseg && !done; j++)
3110
if (dsp->starts[dsp->dim*j+rownum1-1] != -1)
3113
if (!done) /* nothing but gaps */
3115
starts = (Int4Ptr)MemNew((dsp->dim-1)*dsp->numseg*sizeof(Int4));
3116
strands = (Uint1Ptr)MemNew((dsp->dim-1)*dsp->numseg*sizeof(Uint1));
3118
for (j=0; j<dsp->dim; j++)
3122
for (k=0; k<dsp->numseg; k++)
3124
starts[(dsp->dim-1)*k+row] = dsp->starts[(dsp->dim)*k+j];
3125
strands[(dsp->dim-1)*k+row] = dsp->strands[(dsp->dim)*k+j];
3130
MemFree(dsp->starts);
3131
MemFree(dsp->strands);
3132
dsp->starts = starts;
3133
dsp->strands = strands;
3135
id_head = id_prev = NULL;
3142
if (id_head != NULL)
3144
id_prev->next = SeqIdDup(id);
3145
id_prev = id_prev->next;
3147
id_head = id_prev = SeqIdDup(id);
3152
SeqIdSetFree(dsp->ids);
4962
if (!found) /* just gaps */
4964
/* merge whole row up to rownum1 */
4965
for (i=0; i<dsp->numseg; i++)
4967
dsp->starts[dsp->dim*i+rownum1-1] = MAX(dsp->starts[dsp->dim*i+rownum1-1], dsp->starts[dsp->dim*i+rownum2-1]);
4969
starts = (Int4Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Int4));
4970
strands = (Uint1Ptr)MemNew((dsp->dim-1)*(dsp->numseg)*sizeof(Uint1));
4972
for (i=0; i<dsp->dim; i++)
4976
for (j=0; j<dsp->numseg; j++)
4978
starts[dsp->dim*j+k] = dsp->starts[dsp->dim*j+i];
4979
strands[dsp->dim*j+k] = dsp->strands[dsp->dim*j+i];
4984
MemFree(dsp->starts);
4985
MemFree(dsp->strands);
4986
dsp->starts = starts;
4987
dsp->strands = strands;
4989
id_head = id_prev = NULL;
4996
if (id_head != NULL)
4998
id_prev->next = SeqIdDup(id);
4999
id_prev = id_prev->next;
5001
id_head = id_prev = SeqIdDup(id);
5006
SeqIdSetFree(dsp->ids);
5010
while (window_head != NULL)
5012
window = window_head->next;
5013
MemFree(window_head);
5014
window_head = window;
5016
fake_sap->segs = NULL;
5017
SeqAlignFree(fake_sap);
3844
5773
amp->from_row = dsp->starts[trans[start_sect]*dsp->dim+amp->row_num-1] + offset;
3845
5774
amp->to_row = amp->from_row + len - 1;
3846
5775
amp->real_from += amp->to_row - amp->from_row + 1;
5776
if (saip->anchor <= 0)
3849
5780
/* look for limits of aligned/gapped region */
3851
if (index+1 < arraylen && array[index+1] > trans[stop_sect])
3854
stop_sect = binary_search_on_uint2_list(trans, array[index], translen);
3856
for (i=index+1; i<arraylen && !found && array[i-1]<=trans[stop_sect]; i++)
3858
if (array[i] != array[i-1]+1)
3861
stop_sect = binary_search_on_uint2_list(trans, array[i-1], translen);
3864
if (i > 0 && array[i-1] > trans[stop_sect]) /* all gap/aligned from start to stop */
3866
if (!found) /* make sure the last segments aren't different (switch to gap, etc) */
3868
if (array[arraylen-1] < dsp->numseg-1) /* this gap/aligned region doesn't go to the end */
3869
stop_sect = binary_search_on_uint2_list(trans, array[arraylen-1], translen);
3871
/* look for interrupting unaligned or inserted regions */
3874
for (i=0; i<saip->srdp[amp->row_num-1]->numunaln && !found; i++)
3876
if (saip->srdp[amp->row_num-1]->unaligned[i] >= trans[start_sect] && saip->srdp[amp->row_num-1]->unaligned[i] <= trans[stop_sect])
3878
disc = saip->srdp[amp->row_num-1]->unaligned[i];
3879
stop_sect = binary_search_on_uint2_list(trans, disc, translen);
3883
for (j=trans[stop_sect]+1; srdp->numinsect > 0 && stop_sect < translen-1 && j<trans[stop_sect+1] && trans[stop_sect] < srdp->insect[srdp->numinsect-1]; j++)
3885
if ((insert = binary_search_on_uint2_list(srdp->insect, j, srdp->numinsect)) != -1)
3887
amp->right_insert = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
3888
ilen = dsp->lens[srdp->insect[insert]];
3889
for (i = insert; i<srdp->numinsect-1 && srdp->insect[i] == srdp->insect[i+1]-1; i++)
3891
ilen += dsp->lens[srdp->insect[i+1]];
3893
if (amp->strand == Seq_strand_minus)
3895
amp->right_insert->from = dsp->starts[srdp->insect[i]*dsp->dim+amp->row_num-1];
3896
amp->right_insert->to = amp->right_insert->from + ilen - 1;
3899
amp->right_insert->from = dsp->starts[srdp->insect[insert]*dsp->dim+amp->row_num-1];
3900
amp->right_insert->to = amp->right_insert->from + ilen - 1;
3904
if (disc != -1) /* found an unaligned region */
3906
amp->right_unaligned = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
3907
AlnMgr2GetUnalignedInfo(sap, disc, amp->row_num, &->right_unaligned->from, &->right_unaligned->to);
3908
if (amp->right_unaligned->from == amp->right_unaligned->to && amp->right_unaligned->to == -1)
3910
MemFree(amp->right_unaligned);
3911
amp->right_unaligned = NULL;
3914
/* now look for inserts/unaligned regions to the left */
3917
for (i=0; offset == 0 && i<saip->srdp[amp->row_num-1]->numunaln && !found && saip->srdp[amp->row_num-1]->unaligned[i] < trans[start_sect]; i++)
3919
if (saip->srdp[amp->row_num-1]->unaligned[i] == trans[start_sect]-1)
3921
disc = saip->srdp[amp->row_num-1]->unaligned[i];
3927
amp->left_unaligned = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
3928
AlnMgr2GetUnalignedInfo(sap, disc, amp->row_num, &->left_unaligned->from, &->left_unaligned->to);
3929
if (amp->left_unaligned->from == amp->left_unaligned->to && amp->left_unaligned->to == -1)
3931
MemFree(amp->left_unaligned);
3932
amp->left_unaligned = NULL;
3935
for (j=trans[start_sect]-1; srdp->numinsect > 0 && start_sect > 0 && trans[start_sect] > srdp->insect[0] && j>trans[start_sect-1]; j--)
3937
if ((insert = binary_search_on_uint2_list(srdp->insect, j, srdp->numinsect)) != -1)
3939
amp->left_insert = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
3940
ilen = dsp->lens[srdp->insect[insert]];
3941
for (i=insert; i>0 && srdp->insect[i-1] == srdp->insect[i]-1; i--)
3944
ilen += dsp->lens[srdp->insect[i-1]];
3946
if (amp->strand == Seq_strand_minus)
3948
amp->left_insert->from = dsp->starts[srdp->insect[insert]*dsp->dim+amp->row_num-1];
3949
amp->left_insert->to = amp->left_insert->from + ilen - 1;
3953
amp->left_insert->from = dsp->starts[srdp->insect[i]*dsp->dim+amp->row_num-1];
3955
amp->left_insert->from = dsp->starts[srdp->insect[insert]*dsp->dim+amp->row_num-1];
3956
amp->left_insert->to = amp->left_insert->from + ilen - 1;
5785
while (i+1<arraylen && disc == -1 && array[i] <= trans[stop_sect] && array[i+1]-1 == array[i])
5787
disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5791
disc = binary_search_on_uint2_list(srdp->unaligned, array[i], srdp->numunaln);
5792
j = binary_search_on_uint2_list(trans, array[i], translen);
5793
if (amp->type == AM_SEQ && j <= stop_sect) /* there is an interrupting region, either seq/gap, insert, or unaligned, plus just check last piece */
5795
i = binary_search_on_uint2_list(srdp->insect, trans[j]+1, srdp->numinsect);
5796
if (i != -1) /* there's an insert */
5798
amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5799
amp->right_interrupt->row = amp->row_num;
5800
amp->right_interrupt->segnum = trans[j];
5801
amp->right_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5802
amp->right_interrupt->which_side = AM2_RIGHT;
5803
/* look for unaligned regions off insert */
5806
disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1, srdp->numunaln);
5809
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5810
amp->right_interrupt->unalnlen = intto - intfrom + 1;
5814
while (i<srdp->numinsect && srdp->insect[i] == srdp->insect[i-1]+1)
5816
amp->right_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5817
/* look for unaligned regions off insert */
5820
disc1 = binary_search_on_uint2_list(srdp->unaligned, trans[j]+1+ctr, srdp->numunaln);
5824
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5825
amp->right_interrupt->unalnlen += intto - intfrom + 1;
5831
if (disc != -1) /* there's an unaligned region */
5833
if (amp->right_interrupt == NULL)
5834
amp->right_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5835
amp->right_interrupt->row = amp->row_num;
5836
amp->right_interrupt->segnum = trans[j];
5837
amp->right_interrupt->which_side = AM2_RIGHT;
5838
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc], amp->row_num, &intfrom, &intto);
5839
amp->right_interrupt->unalnlen += intto - intfrom + 1;
5843
/* now look for left-side unaligned or inserted regions if offset == 0 */
5844
if (amp->type == AM_SEQ && offset == 0)
5849
if ((Int2)trans[start_sect]-j > 0)
5850
i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j, srdp->numsect);
5851
while (i == -1 && (Int2)(trans[start_sect])-j-1 >= 0)
5853
i = binary_search_on_uint2_list(srdp->sect, trans[start_sect]-j-1, srdp->numsect);
5856
disc = binary_search_on_uint2_list(srdp->unaligned, trans[start_sect]-j, srdp->numunaln);;
5859
AlnMgr2GetUnalignedInfo(sap, trans[start_sect]-j, amp->row_num, &intfrom, &intto);
5860
amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5861
amp->left_interrupt->row = amp->row_num;
5862
amp->left_interrupt->segnum = trans[start_sect];
5863
amp->left_interrupt->which_side = AM2_LEFT;
5864
amp->left_interrupt->unalnlen = intto - intfrom + 1;
5866
i = binary_search_on_uint2_list(srdp->insect, trans[start_sect]-j, srdp->numinsect);
5867
if (i != -1) /* there's an insert */
5869
if (amp->left_interrupt == NULL)
5870
amp->left_interrupt = (AMInterruptPtr)MemNew(sizeof(AMInterrupt));
5871
amp->left_interrupt->row = amp->row_num;
5872
amp->left_interrupt->segnum = trans[start_sect];
5873
amp->left_interrupt->which_side = AM2_LEFT;
5874
amp->left_interrupt->insertlen = dsp->lens[srdp->insect[i]];
5875
/* look for unaligned regions off insert */
5876
j = trans[start_sect]-j;
5877
disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5880
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5881
amp->left_interrupt->unalnlen += intto - intfrom + 1;
5885
while (i-1>=0 && srdp->insect[i] == srdp->insect[i+1]-1)
5887
amp->left_interrupt->insertlen += dsp->lens[srdp->insect[i]];
5888
disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5891
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5892
amp->left_interrupt->unalnlen += intto - intfrom + 1;
5897
if (i>=0) /* look one more over for unaligned */
5899
disc1 = binary_search_on_uint2_list(srdp->unaligned, j, srdp->numunaln);
5902
AlnMgr2GetUnalignedInfo(sap, srdp->unaligned[disc1], amp->row_num, &intfrom, &intto);
5903
amp->left_interrupt->unalnlen += intto - intfrom + 1;
3960
5908
endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[stop_sect]) - 1;
3961
5909
if (endoffset < 0)
3963
if (amp->right_unaligned != NULL && endoffset > 0)
5911
if (amp->right_interrupt != NULL && endoffset > 0)
3965
MemFree(amp->right_unaligned);
3966
amp->right_unaligned = NULL;
5913
MemFree(amp->right_interrupt);
5914
amp->right_interrupt = NULL;
3969
5917
for (i=start_sect; i<=stop_sect; i++)
3996
5944
/* SECTION 4a */
3997
static Boolean AlnMgr2GetNextAnchoredChildBit(SeqAlignPtr sap, AlnMsg2Ptr amp)
3999
AMAlignIndex2Ptr amaip;
4022
if (sap->saip->indextype == INDEX_CHILD)
4024
dsp = (DenseSegPtr)(sap->segs);
4025
saip = (SAIndex2Ptr)(sap->saip);
4026
} else if (sap->saip->indextype == INDEX_PARENT)
4028
amaip = (AMAlignIndex2Ptr)(sap->saip);
4029
saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
4030
dsp = (DenseSegPtr)(amaip->sharedaln->segs);
4032
if (saip->anchor > 0)
4033
trans = saip->srdp[saip->anchor-1]->sect;
4036
trans = (Uint2Ptr)MemNew((dsp->numseg)*sizeof(Uint2));
4037
for (i=0; i<dsp->numseg; i++)
4043
start_sect = binary_search_on_uint4_list(saip->aligncoords, amp->real_from, saip->numseg);
4045
for (i=0; i<saip->srdp[amp->row_num-1]->numunaln && !found && saip->srdp[amp->row_num-1]->unaligned[i] < start_sect; i++)
4047
if (saip->srdp[amp->row_num-1]->unaligned[i] == start_sect-1)
4053
left = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
4054
AlnMgr2GetUnalignedInfo(sap, saip->srdp[amp->row_num-1]->unaligned[i], amp->row_num, &left->from, &left->to);
4055
if (left->from == left->to == -1)
4058
amp->left_unaligned = left;
4061
for (i=0; i<saip->srdp[amp->row_num-1]->numunaln && !found; i++)
4063
if (saip->srdp[amp->row_num-1]->unaligned[i] >= start_sect)
4067
stop_sect = binary_search_on_uint4_list(saip->aligncoords, amp->to_aln, saip->numseg);
4068
if (found && i>=0 && saip->srdp[amp->row_num-1]->unaligned[i] < stop_sect)
4070
right = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
4071
AlnMgr2GetUnalignedInfo(sap, saip->srdp[amp->row_num-1]->unaligned[i], amp->row_num, &right->from, &right->to);
4072
if (right->from == right->to == -1)
4076
amp->right_unaligned = right;
4077
stop_sect = saip->srdp[amp->row_num-1]->unaligned[i];
4081
offset = amp->real_from - saip->aligncoords[start_sect];
4082
srdp = saip->srdp[amp->row_num - 1];
4083
if (saip->anchor == amp->row_num)
4086
if (start_sect == stop_sect)
4087
len = dsp->lens[srdp->sect[start_sect]] - offset - endoffset;
4089
len = dsp->lens[srdp->sect[start_sect]] - offset;
4090
for (i=start_sect+1; i<stop_sect; i++)
4092
len += dsp->lens[srdp->sect[i]];
4094
if (start_sect+1 < stop_sect)
4096
if (amp->strand != Seq_strand_minus)
4098
amp->from_row = dsp->starts[(trans[start_sect])*(dsp->dim) + amp->row_num -1] + offset;
4099
amp->to_row = amp->from_row + len - 1;
4102
amp->from_row = dsp->starts[(trans[stop_sect])*(dsp->dim) + amp->row_num - 1];
4103
amp->to_row = amp->from_row + len - offset - 1;
4105
amp->real_from += amp->to_row - amp->from_row + 1;
4108
if ((start_row = binary_search_on_uint2_list(srdp->sect, trans[start_sect], srdp->numsect)) != -1)
4113
arraylen = srdp->numsect;
4114
} else if ((start_row = binary_search_on_uint2_list(srdp->unsect, trans[start_sect], srdp->numunsect)) != -1)
4117
array = srdp->unsect;
4119
arraylen = srdp->numunsect;
4122
offset = amp->real_from - saip->aligncoords[start_sect];
4125
if (array[index] == trans[stop_sect])
4126
len += dsp->lens[trans[index]];
4127
for (i=index; !space && i<arraylen-1 && array[i] < trans[stop_sect]; i++)
4129
len += dsp->lens[trans[i]];
4130
if (i>0 && i<arraylen-1)
4132
if (array[i+1] == array[i]+1)
4140
if (array[index] == trans[stop_sect])
4142
end = trans[stop_sect];
4143
endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[trans[stop_sect]]);
4146
} else if (array[i] == trans[stop_sect] && array[i] == array[i-1]+1)
4148
endoffset = dsp->lens[trans[stop_sect]] - (amp->to_aln - saip->aligncoords[trans[stop_sect]]);
4156
beg = trans[start_sect];
4157
if (amp->type == AM_GAP)
4159
amp->from_row = amp->real_from;
4160
amp->to_row = amp->from_row + len + endoffset - 1;
4163
if (amp->strand != Seq_strand_minus)
4165
amp->from_row = dsp->starts[trans[start_sect]*(dsp->dim)+amp->row_num-1] + offset;
4166
amp->to_row = amp->from_row + len + endoffset - 1;
4169
amp->from_row = dsp->starts[trans[i-1]*(dsp->dim)+amp->row_num-1] + endoffset;
4170
amp->to_row = amp->from_row + len - 1;
4173
amp->real_from += (amp->to_row - amp->from_row) + 1;
4174
/* now look for adjacent inserts */
4175
if ((start_row = binary_search_on_uint2_list(srdp->insect, beg-1, srdp->numinsect)) != -1)
4177
array = srdp->insect;
4178
amp->left_insert = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
4180
for (i=start_row-1; array[i] == array[i+1]-1 && i >= 0; i--)
4182
len += dsp->lens[array[i]];
4184
if (amp->strand != Seq_strand_minus)
4186
amp->left_insert->from = dsp->starts[array[start_row]*(dsp->dim) + amp->row_num-1] - len;
4187
amp->left_insert->to = amp->from_row - 1;
4190
amp->left_insert->from = amp->to_row + 1;
4191
amp->left_insert->to = dsp->starts[(array[start_row]-1)*(dsp->dim) + amp->row_num-1] + len;
4194
if ((stop_row = binary_search_on_uint2_list(srdp->insect, end+1, srdp->numinsect)) != -1)
4196
array = srdp->insect;
4197
amp->right_insert = (AMAdjacPtr)MemNew(sizeof(AMAdjac));
4199
for (i=stop_row+1; array[i] == array[i-1] + 1 && i < srdp->numinsect; i++)
4201
len += dsp->lens[array[i]];
4203
if (amp->strand != Seq_strand_minus)
4205
amp->right_insert->from = amp->to_row + 1;
4206
amp->right_insert->to = dsp->starts[(array[stop_row])*(dsp->dim) + amp->row_num - 1] + dsp->lens[array[stop_row]] + len-1;
4209
amp->right_insert->from = dsp->starts[array[stop_row]*(dsp->dim) + amp->row_num - 1] - len;
4210
amp->right_insert->to = amp->from_row - 1;
4214
if (saip->anchor < 1)
4220
5945
static Int4 binary_search_on_uint4_list(Uint4Ptr list, Uint4 pos, Uint4 listlen)
4314
6040
if (*from > *to)
6049
/***************************************************************************
6051
* AlnMgr2GetInterruptInfo returns a structure describing the inserts and
6052
* unaligned regions in an interrupt. The structure is allocated by this
6053
* function and must be freed with AlnMgr2FreeInterruptInfo.
6055
***************************************************************************/
6056
NLM_EXTERN AMInterrInfoPtr AlnMgr2GetInterruptInfo(SeqAlignPtr sap, AMInterruptPtr interrupt)
6058
AMAlignIndex2Ptr amaip;
6063
AMInterrInfoPtr iip;
6077
if (interrupt == NULL || sap == NULL || sap->saip == NULL)
6079
if (sap->saip->indextype == INDEX_CHILD)
6081
dsp = (DenseSegPtr)(sap->segs);
6082
saip = (SAIndex2Ptr)(sap->saip);
6083
} else if (sap->saip->indextype == INDEX_PARENT)
6085
amaip = (AMAlignIndex2Ptr)(sap->saip);
6086
if (amaip->alnstyle == AM2_LITE)
6088
dsp = (DenseSegPtr)(amaip->sharedaln->segs);
6089
saip = (SAIndex2Ptr)(amaip->sharedaln->saip);
6091
if (dsp->numseg < interrupt->segnum)
6093
if (saip->anchor > 0)
6095
trans = saip->srdp[saip->anchor-1]->sect;
6096
translen = saip->srdp[saip->anchor-1]->numsect;
6099
trans = (Uint2Ptr)MemNew(dsp->numseg*sizeof(Uint2));
6100
for (i=0; i<dsp->numseg; i++)
6104
translen = dsp->numseg;
6106
strand = AlnMgr2GetNthStrand(sap, interrupt->row-1);
6107
srdp = saip->srdp[interrupt->row-1];
6108
/* now look for inserts and unaligned regions on the side indicated */
6109
if (interrupt->which_side == AM2_RIGHT)
6111
/* check if this is unaligned */
6112
disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6113
/* then look for inserts */
6115
iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6119
for (i=interrupt->segnum+1; !done; i++)
6121
n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6123
n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6129
inserts++; /* only increment if region gets interrupted */
6130
disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6131
if (disc != -1) /* this insert has an unaligned region */
6133
iip->num += inserts;
6141
iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6142
iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6143
iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6145
disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum, srdp->numunaln);
6146
if (disc != -1) /* starts with unaligned */
6148
AlnMgr2GetUnalignedInfo(sap, interrupt->segnum, interrupt->row, &intfrom, &intto);
6149
iip->starts[k] = intfrom;
6150
iip->lens[k] = intto - intfrom + 1;
6151
iip->types[k] = AM_UNALIGNED;
6156
for (i=interrupt->segnum+1; !done; i++)
6158
n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6159
u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numinsect);
6160
if (n == -1 && u == -1)
6167
if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6168
iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6169
iip->lens[k] += dsp->lens[i];
6170
iip->types[k] = AM_INSERT;
6171
disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6172
if (disc != -1) /* this insert has an unaligned region */
6175
AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6176
iip->starts[k] = intfrom;
6177
iip->lens[k] = intto - intfrom + 1;
6178
iip->types[k] = AM_UNALIGNED;
6184
} else if (interrupt->which_side == AM2_LEFT)
6186
/* check if the next non-gap segment to the left has unaligned */
6189
while (n != -1 && interrupt->segnum-j >= 0)
6191
n = binary_search_on_uint2_list(srdp->unsect, interrupt->segnum-j, srdp->numunsect);
6193
n = binary_search_on_uint2_list(srdp->insect, interrupt->segnum-j, srdp->numinsect);
6197
disc = binary_search_on_uint2_list(srdp->unaligned, interrupt->segnum-j, srdp->numunaln);
6198
/* then look for inserts */
6200
iip = (AMInterrInfoPtr)MemNew(sizeof(AMInterrInfo));
6204
for (i=interrupt->segnum-1; !done; i--)
6206
n = binary_search_on_uint2_list(srdp->insect, i, srdp->numinsect);
6208
n = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6214
inserts++; /* only increment if region gets interrupted */
6215
disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6216
if (disc != -1) /* this insert has an unaligned region */
6218
iip->num += inserts;
6225
iip->starts = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6226
iip->lens = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6227
iip->types = (Int4Ptr)MemNew(iip->num*sizeof(Int4));
6230
/* check first non-inserted segment for unaligned */
6233
disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6234
if (disc != -1) /* there's an unaligned region */
6236
AlnMgr2GetUnalignedInfo(sap, i, interrupt->row, &intfrom, &intto);
6237
iip->starts[k] = intfrom;
6238
iip->lens[k] = intto - intfrom + 1;
6239
iip->types[k] = AM_UNALIGNED;
6243
i++; /* start from leftmost end of inserts/unaligned */
6244
for (i; i<interrupt->segnum; i++)
6246
u = binary_search_on_uint2_list(srdp->unsect, i, srdp->numunsect);
6249
if (disc != -1 || strand == Seq_strand_minus) /* only record new start if region gets interrupted or if on minus strand */
6250
iip->starts[k] = dsp->starts[dsp->dim*i + interrupt->row-1];
6251
iip->lens[k] += dsp->lens[i];
6252
iip->types[k] = AM_INSERT;
6253
disc = binary_search_on_uint2_list(srdp->unaligned, i, srdp->numunaln);
6254
if (disc != -1) /* this insert has an unaligned region */
6257
AlnMgr2GetUnalignedInfo(sap, binary_search_on_uint2_list(trans, i, translen), interrupt->row, &intfrom, &intto);
6258
iip->starts[k] = intfrom;
6259
iip->lens[k] = intto - intfrom + 1;
6260
iip->types[k] = AM_UNALIGNED;
6266
iip->strand = strand;
4318
6270
/* SECTION 4b */
5973
7993
rows = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
5974
numseg = AlnMgr2GetNumSegsInRange(sap, from_aln, to_aln, &start_seg);
5977
7994
amp = AlnMsgNew2();
5981
for (j=start_seg; j<numseg+start_seg; j++)
5984
AlnMgr2GetNthSegmentRange(sap, j+1, &->from_aln, &->to_aln);
5985
amp->from_aln = MAX(amp->from_aln, from_aln);
5986
amp->to_aln = MIN(amp->to_aln, to_aln);
5988
while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
5990
row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
5991
if (amp->type == AM_GAP)
5994
row->from = amp->from_row;
5995
row->len = amp->to_row - amp->from_row + 1;
5996
if (row_head != NULL)
5998
row_prev->next = row;
6001
row_head = row_prev = row;
6007
rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
6010
rowheads[i] = rows[i];
6012
while (rows[0] != NULL)
6017
if (rows[i]->len < minlen || minlen == -1)
6018
minlen = rows[i]->len;
6022
if (rows[i]->len > minlen)
6024
row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
6025
row->next = rows[i]->next;
6026
rows[i]->next = row;
6027
if (rows[i]->from == -1)
6029
else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
6031
row->from = rows[i]->from;
6032
rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
6034
row->from = rows[i]->from + minlen;
6035
row->len = rows[i]->len - minlen;
6036
rows[i]->len = minlen;
6038
rows[i] = rows[i]->next;
6043
rows[i] = rowheads[i];
6046
dsp = DenseSegNew();
6054
dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
6055
dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
6056
dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
6061
dsp->lens[j] = row->len;
6065
id = AlnMgr2GetNthSeqIdPtr(salp, 0);
6071
id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
7995
seg = lengthbit = 0;
7998
salp_head = salp_prev = NULL;
7999
while (AlnMgr2GetNextLengthBit(sap, &lengthbit, &seg))
8001
if (currlen <= to_aln && seg >= 0 && currlen+lengthbit-1 >= from_aln)
8003
numseg = AlnMgr2GetNumSegsInRange(sap, currlen, currlen+lengthbit-1, &start_seg);
8008
for (j=start_seg; j<numseg+start_seg; j++)
8011
AlnMgr2GetNthSegmentRange(sap, j+1, &->from_aln, &->to_aln);
8012
amp->from_aln = MAX(amp->from_aln, from_aln);
8013
amp->to_aln = MIN(amp->to_aln, to_aln);
8015
while ((more = AlnMgr2GetNextAlnBit(salp, amp)) == TRUE)
8017
if (amp->right_interrupt != NULL && amp->right_interrupt->unalnlen > 0)
8019
row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8020
if (amp->type == AM_GAP)
8023
row->from = amp->from_row;
8024
row->len = amp->to_row - amp->from_row + 1;
8025
if (row_head != NULL)
8027
row_prev->next = row;
8030
row_head = row_prev = row;
8036
rowheads = (AMRowInfoPtr PNTR)MemNew(n*sizeof(AMRowInfoPtr));
8039
rowheads[i] = rows[i];
8041
while (rows[0] != NULL)
8046
if (rows[i]->len < minlen || minlen == -1)
8047
minlen = rows[i]->len;
8051
if (rows[i]->len > minlen)
8053
row = (AMRowInfoPtr)MemNew(sizeof(AMRowInfo));
8054
row->next = rows[i]->next;
8055
rows[i]->next = row;
8056
if (rows[i]->from == -1)
8058
else if (AlnMgr2GetNthStrand(salp, i) == Seq_strand_minus)
8060
row->from = rows[i]->from;
8061
rows[i]->from = rows[i]->from + rows[i]->len - 1 - minlen;
8063
row->from = rows[i]->from + minlen;
8064
row->len = rows[i]->len - minlen;
8065
rows[i]->len = minlen;
8067
rows[i] = rows[i]->next;
8072
rows[i] = rowheads[i];
8075
dsp = DenseSegNew();
8083
dsp->numseg += numunaln;
8085
dsp->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8086
dsp->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8087
dsp->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
6076
strand = AlnMgr2GetNthStrand(salp, i+1);
6077
8090
while (row != NULL)
6079
dsp->starts[n*j + i] = row->from;
6080
dsp->strands[n*j + i] = strand;
8092
dsp->lens[j] = row->len;
6082
8094
row = row->next;
6085
subsalp = SeqAlignNew();
6086
subsalp->type = SAT_PARTIAL;
6087
subsalp->segtype = SAS_DENSEG;
6089
subsalp->segs = (Pointer)(dsp);
6095
row_prev = row->next;
8096
id = AlnMgr2GetNthSeqIdPtr(salp, 0);
8102
id->next = AlnMgr2GetNthSeqIdPtr(salp, i+1);
8107
strand = AlnMgr2GetNthStrand(salp, i+1);
8110
dsp->starts[n*j + i] = row->from;
8111
dsp->strands[n*j + i] = strand;
8120
AlnMgr2GetNthUnalignedForNthRow(sap, seg+1, i+1, &ustart, &ustop);
8121
if (ustart >= 0 && ustop >= ustart)
8125
dsp->starts[n*j + k] = -1;
8126
dsp->strands[n*j + k] = dsp->strands[i];
8128
dsp->starts[n*j + i] = ustart;
8133
subsalp = SeqAlignNew();
8134
subsalp->type = SAT_PARTIAL;
8135
subsalp->segtype = SAS_DENSEG;
8137
subsalp->segs = (Pointer)(dsp);
8143
row_prev = row->next;
8150
currlen += lengthbit;
8152
if (salp_head != NULL)
8154
salp_prev->next = subsalp;
8155
salp_prev = subsalp;
8157
salp_head = salp_prev = subsalp;
8161
if (fill_in && salp_head->next != NULL) /* stick subsalps together into a big aln */
8164
subsalp = salp_head;
8165
while (subsalp != NULL)
8167
dsp = (DenseSegPtr)(subsalp->segs);
8169
subsalp = subsalp->next;
8171
dsp_new = DenseSegNew();
8173
dsp_new->numseg = j;
8174
dsp_new->lens = (Int4Ptr)MemNew((dsp->numseg)*sizeof(Int4));
8175
dsp_new->starts = (Int4Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8176
dsp_new->strands = (Uint1Ptr)MemNew((dsp->numseg)*(dsp->dim)*sizeof(Int4));
8177
subsalp = salp_head;
8179
while (subsalp != NULL)
8181
dsp = (DenseSegPtr)(subsalp->segs);
8182
for (j=0; j<dsp->numseg; j++)
8184
dsp_new->lens[k] = dsp->lens[j];
8187
dsp_new->starts[k*n+i] = dsp->starts[j*n+i];
8188
dsp_new->strands[k*n+i] = dsp->strands[j*n+i];
8192
subsalp = subsalp->next;
8194
subsalp = SeqAlignNew();
8195
subsalp->type = SAT_PARTIAL;
8196
subsalp->segtype = SAS_DENSEG;
8198
subsalp->segs = (Pointer)(dsp_new);
8199
SeqAlignSetFree(salp_head);
8200
} else if (!fill_in && salp_head->next != NULL)
8202
subsalp = SeqAlignNew();
8203
subsalp->segtype = SAS_DISC;
8204
subsalp->type = SAT_PARTIAL;
8205
subsalp->segs = (SeqAlignPtr)(salp_head);
8206
salp_prev = salp_head;
8207
while (salp_prev != NULL)
8209
AMAlignIndexFreeEitherIndex(salp_prev);
8210
salp_prev = salp_prev->next;
8212
} else /* if !salp_head->next */
8214
subsalp = salp_head;
8215
subsalp->dim = AlnMgr2GetNumRows(subsalp);
8216
subsalp->type = SAT_PARTIAL;
8217
AMAlignIndexFreeEitherIndex(subsalp);
6102
8220
SeqAlignFree(salp);
6103
8221
return subsalp;
6999
9163
MemFree(lenarray);
7000
9164
return sap_new;
9168
/***************************************************************************
9170
* AlnMgr2ExtractPairwiseSeqAlign takes an indexed alignment (parent or
9171
* child, but must be fully indexed, not lite) and extracts a pairwise
9172
* subalignment containing the two requested rows. The subalignment is
9173
* unindexed and may have internal unaligned regions.
9175
***************************************************************************/
9176
NLM_EXTERN SeqAlignPtr AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap, Int4 n1, Int4 n2)
9178
AMAlignIndex2Ptr amaip;
9180
DenseSegPtr dsp_new;
9184
SeqAlignPtr sap_new;
9186
if (sap == NULL || sap->saip == NULL || n1 == n2 || n1 <= 0 || n2 <= 0)
9188
if (sap->saip->indextype == INDEX_CHILD)
9189
dsp = (DenseSegPtr)(sap->segs);
9192
amaip = (AMAlignIndex2Ptr)(sap->saip);
9193
dsp = (DenseSegPtr)(amaip->sharedaln->segs);
9195
if (n1 > dsp->dim || n2 > dsp->dim)
9198
for (i=0; i<dsp->numseg; i++)
9200
if (dsp->starts[dsp->dim*i+n1-1] == -1 && dsp->starts[dsp->dim*i+n2-1] == -1)
9203
if (n == dsp->numseg) /* no overlap at all */
9205
dsp_new = DenseSegNew();
9206
dsp_new->numseg = dsp->numseg - n;
9207
dsp_new->starts = (Int4Ptr)MemNew(2*dsp_new->numseg*sizeof(Int4));
9208
dsp_new->strands = (Uint1Ptr)MemNew(2*dsp_new->numseg*sizeof(Uint1));
9209
dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9211
dsp_new->ids = AlnMgr2GetNthSeqIdPtr(sap, n1);
9212
dsp_new->ids->next = AlnMgr2GetNthSeqIdPtr(sap, n2);
9214
for (i=0; i<dsp->numseg; i++)
9216
if (dsp->starts[dsp->dim*i+n1-1] > -1 || dsp->starts[dsp->dim*i+n2-1] > -1)
9218
dsp_new->starts[2*j] = dsp->starts[dsp->dim*i+n1-1];
9219
dsp_new->starts[2*j+1] = dsp->starts[dsp->dim*i+n2-1];
9220
dsp_new->strands[2*j] = dsp->strands[n1-1];
9221
dsp_new->strands[2*j+1] = dsp->strands[n2-1];
9222
dsp_new->lens[j] = dsp->lens[i];
9226
sap_new = SeqAlignNew();
9228
sap_new->type = SAT_PARTIAL;
9229
sap_new->segtype = SAS_DENSEG;
9230
sap_new->segs = (Pointer)dsp_new;
9235
static void amconssetfree(AMConsSetPtr acp)
9237
AMConsSetPtr acp_next;
9241
acp_next = acp->next;
9242
MemFree(acp->starts);
9243
MemFree(acp->stops);
9244
MemFree(acp->strands);
9250
static int LIBCALLBACK AlnMgr2SortForConsistent(VoidPtr ptr1, VoidPtr ptr2)
9257
acp1 = *((AMConsSetPtr PNTR)ptr1);
9258
acp2 = *((AMConsSetPtr PNTR)ptr2);
9259
saip1 = (SAIndex2Ptr)(acp1->sap->saip);
9260
saip2 = (SAIndex2Ptr)(acp2->sap->saip);
9261
if (saip1->score == 0)
9262
saip1->score = AlnMgr2ComputeScoreForSeqAlign(acp1->sap);
9263
if (saip2->score == 0)
9264
saip2->score = AlnMgr2ComputeScoreForSeqAlign(acp2->sap);
9265
if (saip1->score > saip2->score)
9267
else if (saip1->score < saip2->score)
9274
/***************************************************************************
9276
* AlnMgr2RemoveInconsistentAlnsFromSet takes an alignment that is
9277
* indexed at least at the AM2_LITE level, and prunes the child
9278
* alignments so that the remaining alignments form a consistent,
9279
* nonoverlapping set. All alignments must have the same number of rows,
9280
* and they must be the same rows (although not necessarily in the same
9281
* order). The function uses a simple greedy algorithm to construct the
9282
* nonoverlapping set, starting with the highest-scoring alignment.
9283
* If fuzz is negative, the function creates the best nonoverlapping set
9284
* by actually truncating alignments.
9286
***************************************************************************/
9287
NLM_EXTERN void AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head, Int4 fuzz)
9290
AMConsSetPtr acp_head;
9291
AMConsSetPtr acp_prev;
9292
AMConsSetPtr PNTR acparray;
9304
SeqAlignPtr salp_head;
9305
SeqAlignPtr salp_prev;
9307
SeqAlignPtr sapnext;
9316
sap = (SeqAlignPtr)(sap_head->segs);
9317
if (sap->next == NULL)
9319
dsp = (DenseSegPtr)(sap->segs);
9320
sip_head = dsp->ids;
9321
numrows = AlnMgr2GetNumRows(sap);
9323
strand = AlnMgr2GetNthStrand(sap, 1);
9327
if (AlnMgr2GetNumRows(sap) != numrows)
9329
amconssetfree(acp_head);
9333
acp = (AMConsSetPtr)MemNew(sizeof(AMConsSet));
9334
acp->starts = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9335
acp->stops = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9336
acp->strands = (Uint1Ptr)MemNew(numrows*sizeof(Uint1));
9337
acp->which = (Int4Ptr)MemNew(numrows*sizeof(Int4));
9339
if (acp_head != NULL)
9341
acp_prev->next = acp;
9344
acp_head = acp_prev = acp;
9346
row = AlnMgr2GetFirstNForSip(sap, sip);
9349
amconssetfree(acp_head);
9352
if (acp->strands[row] != strand)
9354
sapnext = acp->sap->next;
9355
acp->sap->next = NULL;
9356
score = ((SAIndex2Ptr)(acp->sap->saip))->score;
9357
SeqAlignListReverseStrand(acp->sap);
9358
AMAlignIndexFreeEitherIndex(acp->sap);
9359
AlnMgr2IndexSingleChildSeqAlign(acp->sap);
9360
saip = (SAIndex2Ptr)(acp->sap->saip);
9361
saip->score = score;
9362
acp->strands[row] = strand;
9363
acp->sap->next = sapnext;
9365
for (i=0; i<numrows; i++)
9367
acp->which[i] = row;
9368
AlnMgr2GetNthSeqRangeInSA(sap, i+1, &acp->starts[i], &acp->stops[i]);
9369
acp->strands[i] = AlnMgr2GetNthStrand(sap, i+1);
9373
acparray = (AMConsSetPtr PNTR)MemNew(numsaps*sizeof(AMConsSetPtr));
9382
HeapSort(acparray, numsaps, sizeof(AMConsSetPtr), AlnMgr2SortForConsistent);
9383
/* orientation -1 means that ith is before jth in ALL rows, 1 means ith is after jth in ALL rows */
9384
for (i=0; i<numsaps; i++)
9386
if (acparray[i]->used != -1)
9388
for (j=i+1; j<numsaps; j++)
9391
for (k=0; acparray[j]->used != -1 && k<numrows; k++)
9393
if (acparray[i]->strands[k] != acparray[j]->strands[k])
9394
acparray[j]->used = -1;
9395
if (acparray[i]->starts[k] - fuzz < acparray[j]->starts[k])
9397
if (acparray[i]->stops[k] - fuzz < acparray[j]->starts[k])
9399
if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == 1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == -1))
9400
acparray[j]->used = -1;
9401
else if (orientation == 0)
9403
if (acparray[i]->strands[k] == Seq_strand_minus)
9410
if (lfuzz >= 0) /* just mark it for deletion */
9411
acparray[j]->used = -1;
9412
else /* truncate it */
9414
if (acparray[j]->stops[k] > acparray[i]->stops[k])
9416
newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[i]->stops[k]+1, acparray[j]->stops[k], k+1, TRUE);
9417
SeqAlignFree(acparray[j]->sap);
9418
acparray[j]->sap = newsap;
9419
acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9421
acparray[j]->used = -1;
9424
} else if (acparray[i]->starts[k] - fuzz > acparray[j]->starts[k])
9426
if (acparray[i]->starts[k] + fuzz > acparray[j]->stops[k])
9428
if ((acparray[i]->strands[k] == Seq_strand_plus && orientation == -1) || (acparray[i]->strands[k] == Seq_strand_minus && orientation == 1))
9429
acparray[j]->used = -1;
9430
else if (orientation == 0)
9432
if (acparray[i]->strands[k] == Seq_strand_minus)
9439
if (lfuzz >= 0) /* mark for deletion */
9440
acparray[j]->used = -1;
9443
if (acparray[j]->starts[k] < acparray[i]->starts[k])
9445
newsap = AlnMgr2GetSubAlign(acparray[j]->sap, acparray[j]->starts[k], acparray[i]->starts[k]-1, k+1, TRUE);
9446
SeqAlignFree(acparray[j]->sap);
9447
acparray[j]->sap = newsap;
9448
AlnMgr2IndexSingleChildSeqAlign(newsap);
9449
acparray[j]->starts[k] = acparray[i]->stops[k]+1;
9451
acparray[j]->used = -1;
9455
acparray[j]->used = -1;
9460
/* now free all the unused ones, stick the rest back together, reindex, and return */
9461
salp_head = salp_prev = NULL;
9462
for (i=0; i<numsaps; i++)
9464
if (acparray[i]->used == -1)
9466
SeqAlignFree(acparray[i]->sap);
9467
acparray[i]->sap = NULL;
9470
if (salp_head != NULL)
9472
salp_prev->next = acparray[i]->sap;
9473
salp_prev = acparray[i]->sap;
9474
salp_prev->next = NULL;
9477
salp_head = salp_prev = acparray[i]->sap;
9478
salp_prev->next = NULL;
9482
amconssetfree(acp_head);
9484
sap_head->segs = (Pointer)(salp_head);
9485
AMAlignIndex2Free2(sap_head->saip);
9486
AlnMgr2IndexLite(sap_head);
9489
static int LIBCALLBACK AlnMgr2CompareByScore(VoidPtr ptr1, VoidPtr ptr2)
9496
if (ptr1 == NULL || ptr2 == NULL)
9498
sap1 = *((SeqAlignPtr PNTR) ptr1);
9499
sap2 = *((SeqAlignPtr PNTR) ptr2);
9500
saip1 = (SAIndex2Ptr)(sap1->saip);
9501
saip2 = (SAIndex2Ptr)(sap2->saip);
9502
if (saip1->score == 0)
9503
saip1->score = AlnMgr2ComputeScoreForSeqAlign(sap1);
9504
if (saip2->score == 0)
9505
saip2->score = AlnMgr2ComputeScoreForSeqAlign(sap2);
9506
if (saip1->score > saip2->score)
9508
if (saip1->score < saip2->score)
9513
/***************************************************************************
9515
* AlnMgr2FuseSet takes a set of alignments sharing all their rows and orders
9516
* the alignments, then fuses together any adjacent alignments. If returnall
9517
* is TRUE, all pieces are returned; if not, then only the largest piece is
9518
* returned. This function will work best when called after
9519
* AlnMgr2RemoveInconsistentAlnsFromSet(sap_head, -1).
9521
***************************************************************************/
9522
NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall)
9524
AMAlignIndex2Ptr amaip;
9525
DenseSegPtr dsp_new;
9533
SeqAlignPtr sap_keep;
9534
SeqAlignPtr sap_keep_head;
9535
SeqAlignPtr sap_keep_prev;
9537
SeqAlignPtr PNTR saparray;
9544
if (sap_head == NULL || sap_head->saip == NULL)
9546
AlnMgr2SortAlnSetByNthRowPos(sap_head, 1);
9547
amaip = (AMAlignIndex2Ptr)(sap_head->saip);
9548
sap_keep = amaip->saps[0];
9549
sap_keep_head = sap_keep_prev = NULL;
9550
numrows = AlnMgr2GetNumRows(sap_keep);
9551
for (i=1; i<amaip->numsaps; i++)
9553
/* check for consistency with sap_keep; fuse if possible */
9555
for (n=0; !found && n<numrows; n++)
9557
strand = AlnMgr2GetNthStrand(sap_keep, n+1);
9558
AlnMgr2GetNthSeqRangeInSA(sap_keep, n+1, &start1, &stop1);
9559
AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], n+1, &start2, &stop2);
9560
if (strand == Seq_strand_minus)
9562
if (stop2+1 != start1)
9566
if (start2 != stop1+1)
9570
if (!found) /* fuse together */
9572
dsp1 = (DenseSegPtr)(sap_keep->segs);
9573
dsp2 = (DenseSegPtr)(amaip->saps[i]->segs);
9574
dsp_new = DenseSegNew();
9575
dsp_new->dim = dsp1->dim;
9576
dsp_new->numseg = dsp1->numseg+dsp2->numseg;
9577
dsp_new->starts = (Int4Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9578
dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9579
dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->numseg*dsp_new->dim*sizeof(Int4));
9580
for (n=0; n<dsp_new->numseg; n++)
9582
for (r=0; r<dsp_new->dim; r++)
9584
if (n >= dsp1->numseg)
9585
dsp_new->starts[r*n*r] = dsp2->starts[r*(n-dsp1->numseg)+r];
9587
dsp_new->starts[r*n+r] = dsp1->starts[r*n+r];
9588
dsp_new->strands[r*n*r] = dsp1->strands[r];
9590
if (n >= dsp1->numseg)
9591
dsp_new->lens[n] = dsp2->lens[n-dsp1->numseg];
9593
dsp_new->lens[n] = dsp1->lens[n];
9595
SeqAlignFree(amaip->saps[i]);
9596
amaip->saps[i] = NULL;
9597
} else /* add next alignment to keepers pile */
9599
if (sap_keep_head == NULL)
9601
if (sap_keep != NULL)
9603
sap_keep_head = sap_keep;
9604
sap_keep->next = amaip->saps[i];
9605
sap_keep_prev = amaip->saps[i];
9607
sap_keep_head = sap_keep_prev = amaip->saps[i];
9610
sap_keep_prev->next = amaip->saps[i];
9611
sap_keep_prev = amaip->saps[i];
9615
if (sap_keep_head == NULL || sap_keep_head->next == NULL) /* everything was fused */
9616
sap_keep_head = sap_keep;
9619
sap_head->segs = (Pointer)(sap_keep_head);
9620
return sap_keep_head;
9623
sap_keep = sap_keep_head;
9624
while (sap_keep != NULL)
9626
sap_keep = sap_keep->next;
9629
saparray = (SeqAlignPtr PNTR)MemNew(i*sizeof(SeqAlignPtr));
9631
sap_keep = sap_keep_head;
9632
while (sap_keep != NULL)
9634
saip = (SAIndex2Ptr)(sap_keep->saip);
9636
saparray[i] = sap_keep;
9638
sap_keep = sap_keep->next;
9640
HeapSort(saparray, i, sizeof(SeqAlignPtr), AlnMgr2CompareByScore);
9641
sap_keep = saparray[0];
9644
SeqAlignFree(saparray[n]);
9650
NLM_EXTERN void AlnMgr2FillInUnaligned(SeqAlignPtr sap)
9654
DenseSegPtr dsp_new;
9666
if (sap == NULL || (sap->saip != NULL && sap->saip->indextype != INDEX_CHILD))
9669
dsp = (DenseSegPtr)(sap->segs);
9670
for (i=0; i<dsp->dim; i++)
9673
AlnMgr2GetNthSeqRangeInSA(sap, i, &start, &stop);
9674
strand = dsp->strands[i];
9676
while (j<dsp->numseg-1)
9678
if (strand == Seq_strand_minus)
9683
while (j<dsp->numseg && !found)
9685
if (dsp->starts[j*dsp->dim+i] != -1)
9687
if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9695
last = dsp->starts[j*dsp->dim+i];
9701
while (j<dsp->numseg && !found)
9703
if (dsp->starts[j*dsp->dim+i] != -1)
9705
if (dsp->starts[j*dsp->dim+i]+dsp->lens[j] != last)
9714
last = dsp->starts[j*dsp->dim+i];
9716
last += dsp->lens[j];
9721
if (n == 0) /* no unaligned regions */
9723
dsp_new = DenseSegNew();
9724
dsp_new->numseg = dsp->numseg + n;
9725
dsp_new->dim = dsp->dim;
9726
dsp_new->starts = (Int4Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Int4));
9727
dsp_new->strands = (Uint1Ptr)MemNew(dsp_new->dim*dsp_new->numseg*sizeof(Uint1));
9728
for (i=0; i<dsp_new->numseg; i++)
9730
for (j=0; j<dsp_new->dim; j++)
9732
dsp_new->strands[i*dsp_new->dim+j] = dsp->strands[j];
9735
dsp_new->ids = SeqIdDupList(dsp->ids);
9736
dsp_new->lens = (Int4Ptr)MemNew(dsp_new->numseg*sizeof(Int4));
9738
for (j=0; j<dsp->numseg; j++)
9740
for (i=0; i<dsp->dim; i++)
9743
strand = dsp->strands[i];
9744
if (dsp->starts[j*dsp->dim+i] == -1)
9745
dsp_new->starts[curr*dsp_new->dim+i] = -1;
9750
while (k < dsp->numseg)
9752
if (dsp->starts[k*dsp->dim+i] != -1)
9755
if (strand == Seq_strand_minus)
9757
if (dsp->starts[k*dsp->dim+i] + dsp->lens[k] != dsp->starts[j*dsp->dim+i])
9759
dsp_new->lens[curr+offset] = dsp->starts[j*dsp->dim+i] - dsp->starts[k*dsp->dim+i] - dsp->lens[k];
9760
dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[k*dsp->dim+i] + dsp->lens[k];
9765
if (dsp->starts[j*dsp->dim+i] + dsp->lens[j] != dsp->starts[k*dsp->dim+i])
9767
dsp_new->lens[curr+offset] = dsp->starts[k*dsp->dim+i] - dsp->starts[j*dsp->dim+i] - dsp->lens[j];
9768
dsp_new->starts[(curr+offset)*dsp->dim+i] = dsp->starts[j*dsp->dim+i] + dsp->lens[j];
9776
curr = curr + 1 + offset;
9779
sap->segs = (Pointer)(dsp_new);
9780
AMAlignIndexFreeEitherIndex(sap);
9783
/* SECTION 11 -- functions for std-segs */
9784
NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n)
9789
if (sap == NULL || sap->segtype != SAS_STD)
9791
ssp = (StdSegPtr)(sap->segs);
9801
return (SeqIdDup(SeqLocId(slp)));
9804
NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip)
9810
if (sap == NULL || sap->segtype != SAS_STD)
9812
ssp = (StdSegPtr)(sap->segs);
9815
while (sip_tmp != NULL)
9817
if (SeqIdComp(sip, sip_tmp) == SIC_YES)
9819
sip_tmp = sip_tmp->next;
9825
NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop)
9834
if (sap == NULL || sap->segtype != SAS_STD)
9836
ssp = (StdSegPtr)(sap->segs);
9849
*start = SeqLocStart(slp);
9851
*stop = SeqLocStop(slp);
9855
/***************************************************************************
9857
* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence
9858
* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the
9859
* strand type. Either start, stop or strand can be NULL to only retrieve some of them.
9860
* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
9861
* alignment or the alignment is one big insert on that id. Returns true if the sip was found
9862
* in the alignment with real coordinates, i.e. *start would not be -1. RANGE
9864
***************************************************************************/
9865
NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand)
9867
Int4 c_start, c_stop;
9870
Boolean range_found = FALSE;
9871
Boolean strands_inconsistent = FALSE;
9873
if (start) *start = -1;
9874
if (stop) *stop = -1;
9875
if (strand) *strand = Seq_strand_unknown;
9877
if (sap->segtype != SAS_STD)
9880
ssp = (StdSegPtr)(sap->segs);
9882
if (AlnMgr2GetSeqRangeForSipInStdSeg(ssp, sip, &c_start, &c_stop, &c_strand, NULL) &&
9883
c_start != -1) /* skip inserts on our bioseq */
9891
*start = MIN(*start, c_start);
9895
*stop = MAX(*stop, c_stop);
9897
if (strand && ! strands_inconsistent) {
9898
/* if strands are different each time, ignore them. */
9899
if (*strand != Seq_strand_unknown && *strand != c_strand) {
9900
*strand = Seq_strand_unknown;
9901
strands_inconsistent = TRUE;
9913
/***************************************************************************
9915
* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence
9916
* coordinates in a Std-Segment for a given Sequence Id. Also return the
9917
* strand type. Either start, stop or strand can be NULL to only retrieve some of them.
9918
* If start and stop are -1, the SeqID was not found in this segment.
9919
* Returns true if the sip was found, even if it is a gap (start, stop = -1). RANGE
9921
***************************************************************************/
9922
NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
9928
Uint1Ptr segType) /* AM_SEQ, AM_GAP, AM_INSERT */
9932
Int4 m_start, m_stop, m_swap;
9933
Boolean s_present = FALSE;
9934
Boolean m_present = FALSE;
9935
Boolean found_id = FALSE;
9937
for ( loc = ssp->loc;
9940
/* One SeqLoc for each Sequence aligned by this segment. */
9941
/* find the one that matches the sip parameter. */
9942
if (SeqIdForSameBioseq(sip, SeqLocId(loc))) {
9943
m_strand = SeqLocStrand(loc);
9944
m_start = SeqLocStart(loc);
9945
m_stop = SeqLocStop(loc);
9946
/* Might have to reverse the order of start and stop on
9947
minus strands so that start is less than stop. */
9948
if (m_start > m_stop) {
9953
if (start) *start = m_start;
9954
if (stop) *stop = m_stop;
9955
if (strand) *strand = m_strand;
9959
/* found our sequence in this segment. */
9961
} else { /* a different sequence */
9962
if (SeqLocStart(loc) != -1)
9968
if (m_present && s_present)
9970
else if (!m_present && s_present)
9971
*segType = AM_INSERT;
9972
else if (m_present && !s_present)
9975
*segType = AM_GAP; /* start will be -1 */
9981
/***************************************************************************
9983
* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of
9984
* a standard segment alignment. Numbering starts with 1.
9985
* returns NULL if not n segments or is not a std-seg aligment.
9986
* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
9988
***************************************************************************/
9989
NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n)
9994
if (sap == NULL || sap->segtype != SAS_STD || n < 1)
9998
ssp = (StdSegPtr)(sap->segs);
10010
/***************************************************************************
10012
* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
10013
* returns -1 if sap is null or not a standard-seg alignment.
10015
***************************************************************************/
10016
NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap)
10018
Int4 seg_count = 0;
10021
if (sap == NULL || sap->segtype != SAS_STD)
10024
ssp = (StdSegPtr)(sap->segs);
10033
static SeqLocPtr AlnMgr2GetLongestSeqLoc(SeqAlignPtr sap)
10038
SeqLocPtr slp_longest;
10041
if (sap == NULL || sap->segtype != SAS_STD)
10044
ssp = (StdSegPtr)(sap->segs);
10046
while (slp != NULL)
10048
n = SeqLocLen(slp);
10056
return slp_longest;
10059
/***************************************************************************
10061
* The two mapping functions act a little differently for std-segs. The
10062
* alignment coordinates are 1:1 linearly correlated with the longest
10063
* seqloc in the set; the others may be significantly shorter.
10064
* The mapping functions deal with % lengths, and map those instead of
10065
* coordinates (which may not be linear);
10067
***************************************************************************/
10068
NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10071
SeqLocPtr slp_longest;
10078
if (sap == NULL || sap->segtype != SAS_STD)
10080
slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10081
start1 = SeqLocStart(slp_longest);
10082
stop1 = SeqLocStop(slp_longest);
10083
ssp = (StdSegPtr)(sap->segs);
10095
start2 = SeqLocStart(slp);
10096
stop2 = SeqLocStop(slp);
10097
if (start2 == -1) /* NULL */
10099
return (((stop1-start1)*(pos - start2))/(stop2-start2));
10102
NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos)
10105
SeqLocPtr slp_longest;
10112
if (sap == NULL || sap->segtype != SAS_STD)
10114
slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10115
start1 = SeqLocStart(slp_longest);
10116
stop1 = SeqLocStop(slp_longest);
10117
ssp = (StdSegPtr)(sap->segs);
10129
start2 = SeqLocStart(slp);
10130
stop2 = SeqLocStop(slp);
10131
if (start2 == -1) /* NULL */
10133
return (start2 + ((stop2-start2)*(pos-start1))/(stop1-start1));
10136
NLM_EXTERN Int4 AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap)
10138
SeqLocPtr slp_longest;
10140
if (sap == NULL || sap->segtype != SAS_STD)
10142
slp_longest = AlnMgr2GetLongestSeqLoc(sap);
10143
return (SeqLocLen(slp_longest));