6280
6335
lip = FreeLog (lip);
6283
static void CKA_FindNuc(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
6286
BioseqPtr PNTR bspptr;
6288
bspptr = (BioseqPtr PNTR)data;
6291
bsp = (BioseqPtr)sep->data.ptrvalue;
6292
if (ISA_na(bsp->mol))
6299
static int LIBCALLBACK CKA_CompareAlns(VoidPtr ptr1, VoidPtr ptr2)
6306
sap1 = *((SeqAlignPtr PNTR) ptr1);
6307
sap2 = *((SeqAlignPtr PNTR) ptr2);
6308
if (sap1 == NULL || sap2 == NULL)
6310
len1 = AlnMgr2GetAlnLength(sap1, FALSE);
6311
len2 = AlnMgr2GetAlnLength(sap2, FALSE);
6339
typedef struct seqalignrow {
6343
} SeqAlignRowData, PNTR SeqAlignRowPtr;
6345
static SeqAlignRowPtr SeqAlignRowNew (Int4 start, Int4 stop, Uint1 strand)
6350
r = (SeqAlignRowPtr) MemNew (sizeof (SeqAlignRowData));
6354
if (r->start > r->stop) {
6364
static SeqAlignRowPtr SeqAlignRowCopy (SeqAlignRowPtr orig)
6366
SeqAlignRowPtr r = NULL;
6369
r = SeqAlignRowNew (orig->start, orig->stop, orig->strand);
6375
static SeqAlignRowPtr SeqAlignRowFree (SeqAlignRowPtr r)
6382
static Int4 RowDiff (SeqAlignRowPtr r1, SeqAlignRowPtr r2)
6386
if (r1 == NULL || r2 == NULL) {
6390
diff = ABS(r1->start - r2->start) + ABS (r1->stop - r2->stop);
6395
static Int4 SeqAlignRowLen (SeqAlignRowPtr r)
6400
len = r->stop - r->start + 1;
6406
typedef struct seqalignsort {
6407
SeqAlignRowPtr row1;
6408
SeqAlignRowPtr row2;
6410
} SeqAlignSortData, PNTR SeqAlignSortPtr;
6413
static SeqAlignSortPtr SeqAlignSortNew (SeqAlignPtr salp)
6421
s = (SeqAlignSortPtr) MemNew (sizeof (SeqAlignSortData));
6424
AlnMgr2IndexSingleChildSeqAlign(salp);
6426
s->row1 = SeqAlignRowNew (SeqAlignStart (salp, 0), SeqAlignStop (salp, 0), SeqAlignStrand (salp, 0));
6427
s->row2 = SeqAlignRowNew (SeqAlignStart (salp, 1), SeqAlignStop (salp, 1), SeqAlignStrand (salp, 1));
6433
static SeqAlignSortPtr SeqAlignSortFree (SeqAlignSortPtr s)
6436
s->row1 = SeqAlignRowFree (s->row1);
6437
s->row2 = SeqAlignRowFree (s->row2);
6444
static ValNodePtr SeqAlignSortListNew (SeqAlignPtr salp)
6446
ValNodePtr list = NULL;
6447
SeqAlignPtr salp_next;
6449
while (salp != NULL) {
6450
salp_next = salp->next;
6452
ValNodeAddPointer (&list, 0, SeqAlignSortNew (salp));
6459
static ValNodePtr SeqAlignSortListFree (ValNodePtr vnp)
6461
ValNodePtr vnp_next;
6463
while (vnp != NULL) {
6464
vnp_next = vnp->next;
6466
vnp->data.ptrvalue = SeqAlignSortFree (vnp->data.ptrvalue);
6467
vnp = ValNodeFree (vnp);
6474
static SeqAlignRowPtr SeqAlignRowFromSeqAlignSort (SeqAlignSortPtr s, Int4 row)
6478
} else if (row == 1) {
6486
static Uint1 SeqAlignSortRowStrand (SeqAlignSortPtr s, Int4 row)
6488
Uint1 strand = Seq_strand_plus;
6491
r = SeqAlignRowFromSeqAlignSort (s, row);
6499
static Uint1 SeqAlignSortListFindBestStrand (ValNodePtr vnp, Int4 row)
6501
Int4 num_plus = 0, num_minus = 0;
6504
while (vnp != NULL) {
6505
s = (SeqAlignSortPtr) vnp->data.ptrvalue;
6507
if (SeqAlignSortRowStrand(s, row) == Seq_strand_minus) {
6516
if (num_minus > num_plus) {
6517
return Seq_strand_minus;
6519
return Seq_strand_plus;
6524
static void SeqAlignSortListMarkStrand (ValNodePtr vnp, Int4 row, Uint1 strand)
6526
while (vnp != NULL) {
6527
if (SeqAlignSortRowStrand (vnp->data.ptrvalue, row) == strand) {
6535
static ValNodePtr SeqAlignSortListRemoveAll (ValNodePtr list)
6540
for (vnp = list; vnp != NULL; vnp = vnp->next) {
6541
s = vnp->data.ptrvalue;
6542
if (s != NULL && s->salp != NULL) {
6543
s->salp->next = NULL;
6544
s->salp = SeqAlignFree (s->salp);
6548
list = SeqAlignSortListFree (list);
6553
static void SeqAlignSortListRemoveMarked (ValNodePtr PNTR list)
6555
ValNodePtr remove_list;
6561
remove_list = ValNodeExtractList (list, 1);
6562
remove_list = SeqAlignSortListRemoveAll (remove_list);
6566
static Uint1 SeqAlignSortListRemoveConflictingStrands (ValNodePtr PNTR list, Int4 row)
6571
return Seq_strand_plus;
6574
strand = SeqAlignSortListFindBestStrand (*list, row);
6575
if (strand == Seq_strand_plus) {
6576
SeqAlignSortListMarkStrand (*list, row, Seq_strand_minus);
6578
SeqAlignSortListMarkStrand (*list, row, Seq_strand_plus);
6581
SeqAlignSortListRemoveMarked (list);
6586
static SeqAlignPtr SeqAlignFromSeqAlignSortList (ValNodePtr vnp)
6588
SeqAlignPtr salp_list = NULL, salp_prev = NULL;
6591
while (vnp != NULL) {
6592
s = (SeqAlignSortPtr) vnp->data.ptrvalue;
6593
if (s != NULL && s->salp != NULL) {
6594
s->salp->next = NULL;
6595
if (salp_prev == NULL) {
6596
salp_list = s->salp;
6598
salp_prev->next = s->salp;
6600
salp_prev = s->salp;
6601
s->salp->next = NULL;
6609
static int CompareSeqAlignRow (SeqAlignRowPtr r1, SeqAlignRowPtr r2)
6613
if (r1 == NULL && r2 == NULL) {
6615
} else if (r1 == NULL) {
6617
} else if (r2 == NULL) {
6619
} else if (r1->start < r2->start) {
6621
} else if (r1->start > r2->start) {
6623
} else if (r1->stop < r2->stop) {
6625
} else if (r1->stop > r2->stop) {
6627
} else if (r1->strand < r2->strand) {
6629
} else if (r1->strand > r2->strand) {
6636
static int CompareSeqAlignSortPreferRow1 (SeqAlignSortPtr s1, SeqAlignSortPtr s2)
6639
if (s1 == NULL && s2 == NULL) {
6641
} else if (s1 == NULL) {
6643
} else if (s2 == NULL) {
6645
} else if ((rval = CompareSeqAlignRow (s1->row1,s2->row1)) == 0) {
6646
rval = CompareSeqAlignRow (s1->row2, s2->row2);
6652
static int CompareSeqAlignSortPreferRow2 (SeqAlignSortPtr s1, SeqAlignSortPtr s2)
6655
if (s1 == NULL && s2 == NULL) {
6657
} else if (s1 == NULL) {
6659
} else if (s2 == NULL) {
6661
} else if ((rval = CompareSeqAlignRow (s1->row2,s2->row2)) == 0) {
6662
rval = CompareSeqAlignRow (s1->row1, s2->row1);
6669
static int LIBCALLBACK SortVnpBySeqAlignSortRow1 (VoidPtr ptr1, VoidPtr ptr2)
6675
if (ptr1 != NULL && ptr2 != NULL) {
6676
vnp1 = *((ValNodePtr PNTR) ptr1);
6677
vnp2 = *((ValNodePtr PNTR) ptr2);
6678
if (vnp1 != NULL && vnp2 != NULL) {
6679
return CompareSeqAlignSortPreferRow1 (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
6686
static int LIBCALLBACK SortVnpBySeqAlignSortRow2 (VoidPtr ptr1, VoidPtr ptr2)
6692
if (ptr1 != NULL && ptr2 != NULL) {
6693
vnp1 = *((ValNodePtr PNTR) ptr1);
6694
vnp2 = *((ValNodePtr PNTR) ptr2);
6695
if (vnp1 != NULL && vnp2 != NULL) {
6696
return CompareSeqAlignSortPreferRow2 (vnp1->data.ptrvalue, vnp2->data.ptrvalue);
6703
static ValNodePtr SeqAlignSortListExtractRepeats (ValNodePtr PNTR list, Int4 row, Int4 fuzz)
6705
ValNodePtr repeat_start, repeat_prev = NULL, vnp_prev = NULL, vnp;
6706
ValNodePtr repeat_list = NULL;
6707
SeqAlignSortPtr s1, s2;
6708
SeqAlignRowPtr interval, r2;
6712
if (list == NULL || *list == NULL || (*list)->next == NULL) {
6717
*list = ValNodeSort (*list, SortVnpBySeqAlignSortRow1);
6719
*list = ValNodeSort (*list, SortVnpBySeqAlignSortRow2);
6722
repeat_start = *list;
6723
s1 = repeat_start->data.ptrvalue;
6724
interval = SeqAlignRowCopy (SeqAlignRowFromSeqAlignSort(s1, row));
6726
for (vnp = (*list)->next; vnp != NULL; vnp = vnp->next) {
6727
s2 = vnp->data.ptrvalue;
6729
r2 = SeqAlignRowFromSeqAlignSort (s2, row);
6731
if (interval->start <= r2->start && interval->stop >= r2->stop) {
6734
} else if (r2->start <= interval->start && r2->stop >= interval->stop) {
6737
} else if ((diff = RowDiff (interval, r2)) > -1 && diff < fuzz) {
6742
if (interval->start > r2->start) {
6743
interval->start = r2->start;
6745
if (interval->stop < r2->stop) {
6746
interval->stop = r2->stop;
6749
if (repeat_start->next == vnp) {
6750
repeat_prev = vnp_prev;
6753
if (repeat_prev == NULL) {
6756
repeat_prev->next = vnp;
6758
if (vnp_prev != NULL) {
6759
vnp_prev->next = NULL;
6761
ValNodeAddPointer (&repeat_list, 0, repeat_start);
6765
s1 = vnp->data.ptrvalue;
6766
interval = SeqAlignRowFree (interval);
6767
interval = SeqAlignRowCopy (SeqAlignRowFromSeqAlignSort(s1, row));
6771
if (repeat_start->next != NULL) {
6772
if (repeat_prev == NULL) {
6775
repeat_prev->next = NULL;
6777
ValNodeAddPointer (&repeat_list, 0, repeat_start);
6780
interval = SeqAlignRowFree (interval);
6785
static int SeqAlignRowFuzzyCompare (SeqAlignRowPtr r1, SeqAlignRowPtr r2, Int4 fuzz)
6787
if (r1 == NULL && r2 == NULL) {
6789
} else if (r1 == NULL) {
6791
} else if (r2 == NULL) {
6795
if (r1->stop < r2->start || r1->stop - r2->start < fuzz) {
6797
} else if (r2->stop < r1->start || r2->stop - r1->start < fuzz) {
6805
static int SeqAlignSortFuzzyCompare (SeqAlignSortPtr s1, SeqAlignSortPtr s2, Int4 row, Int4 fuzz)
6807
if (s1 == NULL && s2 == NULL) {
6809
} else if (s1 == NULL) {
6811
} else if (s2 == NULL) {
6813
} else if (row == 1) {
6814
return SeqAlignRowFuzzyCompare (s1->row1, s2->row1, fuzz);
6816
return SeqAlignRowFuzzyCompare (s1->row2, s2->row2, fuzz);
6821
static void SeqAlignSortListRemoveIntervalsOutOfOrder (ValNodePtr PNTR list, Int4 row, Int4 fuzz)
6823
ValNodePtr vnp, vnp_prev = NULL;
6829
for (vnp = *list; vnp != NULL; vnp = vnp->next) {
6830
if (vnp_prev != NULL && SeqAlignSortFuzzyCompare (vnp_prev->data.ptrvalue, vnp->data.ptrvalue, row, fuzz) != -1) {
6832
} else if (vnp->next != NULL && SeqAlignSortFuzzyCompare (vnp->data.ptrvalue, vnp->next->data.ptrvalue, row, fuzz) != -1) {
6833
if (vnp->next->next != NULL
6834
&& SeqAlignSortFuzzyCompare (vnp->data.ptrvalue, vnp->next->next->data.ptrvalue, row, fuzz) == -1
6835
&& SeqAlignSortFuzzyCompare (vnp->next->data.ptrvalue, vnp->next->next->data.ptrvalue, row, fuzz) != -1) {
6836
/* ok to keep this one, we'll toss the next one */
6841
if (vnp->choice == 0) {
6846
SeqAlignSortListRemoveMarked (list);
6850
static Int4 GetRepeatIntervalFuzz (SeqAlignSortPtr s_repeat, SeqAlignSortPtr s_before, SeqAlignSortPtr s_after, Int4 row)
6852
Int4 start_fuzz = 0, end_fuzz = 0;
6854
if (s_repeat == NULL) {
6858
if (s_before != NULL) {
6860
start_fuzz = ABS (s_repeat->row1->start - s_before->row1->stop);
6862
start_fuzz = ABS (s_repeat->row2->start - s_before->row2->stop);
6865
if (s_after != NULL) {
6867
end_fuzz = ABS (s_after->row1->start - s_repeat->row1->stop);
6869
end_fuzz = ABS (s_after->row2->start - s_repeat->row2->stop);
6873
return start_fuzz + end_fuzz;
6877
static int StrandedSeqAlignSortRowCompare (SeqAlignSortPtr s1, SeqAlignSortPtr s2, Int4 row, Int4 fuzz)
6879
SeqAlignRowPtr r1 = NULL, r2 = NULL;
6881
Uint1 strand = Seq_strand_plus;
6883
if (s1 == NULL || s2 == NULL) {
6887
r1 = SeqAlignRowFromSeqAlignSort (s1, row);
6888
r2 = SeqAlignRowFromSeqAlignSort (s2, row);
6889
strand = r1->strand;
6891
if (strand == Seq_strand_minus) {
6892
if (r1->start < r2->start - fuzz) {
6894
} else if (r1->start >r2->start + fuzz) {
6898
if (r1->start > r2->start + fuzz) {
6900
} else if (r1->stop < r2->stop - fuzz) {
6909
static Boolean FindSeqAlignSortWithPoint (ValNodePtr list, Int4 point, Int4 row)
6914
Boolean found = FALSE;
6916
for (vnp = list; vnp != NULL; vnp = vnp->next) {
6917
s = vnp->data.ptrvalue;
6918
r = SeqAlignRowFromSeqAlignSort (s, row);
6919
if (point >= r->start && point <= r->stop) {
6927
static ValNodePtr FindBestRepeat (ValNodePtr PNTR repeat_list, SeqAlignSortPtr s_before, SeqAlignSortPtr s_after, Int4 row, Int4 fuzz)
6929
Int4 best_diff = -1, diff;
6930
ValNodePtr vnp, vnp_best = NULL, vnp_best_prev = NULL, vnp_prev = NULL;
6931
SeqAlignSortPtr s_this;
6933
if (repeat_list == NULL || *repeat_list == NULL) {
6937
for (vnp = *repeat_list; vnp != NULL; vnp = vnp->next) {
6938
s_this = vnp->data.ptrvalue;
6940
if (StrandedSeqAlignSortRowCompare (s_this, s_before, row, fuzz) < 0
6941
|| StrandedSeqAlignSortRowCompare (s_this, s_after, row, fuzz) > 0) {
6942
/* skip - already out of order */
6944
diff = GetRepeatIntervalFuzz (vnp->data.ptrvalue, s_before, s_after, row);
6945
if (diff > -1 && (best_diff < 0 || best_diff > diff)) {
6947
vnp_best_prev = vnp_prev;
6954
if (vnp_best != NULL) {
6955
if (vnp_best_prev == NULL) {
6956
*repeat_list = vnp_best->next;
6958
vnp_best_prev->next = vnp_best->next;
6960
vnp_best->next = NULL;
6967
static void RemoveRepeatsCoincidingWithBest (ValNodePtr PNTR repeat_list, ValNodePtr best, Int4 row, Int4 fuzz)
6970
SeqAlignSortPtr s_best, s;
6971
SeqAlignRowPtr r_best, r2;
6975
if (repeat_list == NULL || *repeat_list == NULL || best == NULL) {
6979
s_best = best->data.ptrvalue;
6980
r_best = SeqAlignRowFromSeqAlignSort (s_best, row);
6982
for (vnp = *repeat_list; vnp != NULL; vnp = vnp->next) {
6983
s = vnp->data.ptrvalue;
6984
r2 = SeqAlignRowFromSeqAlignSort (s, row);
6987
if (r_best->start <= r2->start && r_best->stop >= r2->stop) {
6990
} else if (r2->start <= r_best->start && r2->stop >= r_best->stop) {
6993
} else if (r2->stop < r_best->stop || r2->stop - r_best->stop < fuzz) {
6995
} else if ((diff = RowDiff (r_best, r2)) > -1 && diff < fuzz) {
7003
SeqAlignSortListRemoveMarked (repeat_list);
7007
static ValNodePtr ExtractLongestSeqAlignRow (ValNodePtr PNTR list, Int4 row)
7009
ValNodePtr vnp, vnp_prev = NULL, rval = NULL;
7010
Int4 longest = 0, len;
7011
Boolean found = FALSE;
7013
if (list == NULL || *list == NULL) {
7017
for (vnp = *list; vnp != NULL; vnp = vnp->next) {
7018
len = SeqAlignRowLen (SeqAlignRowFromSeqAlignSort (vnp->data.ptrvalue, row));
7019
if (longest < len) {
7024
for (vnp = *list; vnp != NULL && !found; vnp = vnp->next) {
7025
len = SeqAlignRowLen (SeqAlignRowFromSeqAlignSort (vnp->data.ptrvalue, row));
7026
if (len == longest) {
7027
if (vnp_prev == NULL) {
7030
vnp_prev->next = vnp->next;
7042
static void InsertBestRepeat (ValNodePtr repeat_list, ValNodePtr PNTR sorted_list, Int4 row, Int4 fuzz)
7044
ValNodePtr vnp, vnp_prev = NULL, vnp_new;
7045
SeqAlignSortPtr s_repeat, s, s_before = NULL;
7046
Boolean found = TRUE;
7047
SeqAlignRowPtr r1, r2;
7050
if (repeat_list == NULL || sorted_list == NULL) {
7059
if (sorted_list == NULL || *sorted_list == NULL) {
7060
/* keep longest, mark others for removal */
7061
vnp = ExtractLongestSeqAlignRow (&repeat_list, row);
7062
ValNodeLink (sorted_list,vnp);
7063
repeat_list = SeqAlignSortListRemoveAll (repeat_list);
7065
s_repeat = repeat_list->data.ptrvalue;
7069
/* find first entry that is after this repeat, and insert before that */
7070
while (vnp != NULL && !found) {
7071
s = vnp->data.ptrvalue;
7072
r1 = SeqAlignRowFromSeqAlignSort (s, row);
7073
r2 = SeqAlignRowFromSeqAlignSort (s_repeat, row);
7075
if (r1->start > r2->start || r2->start - r1->start < fuzz) {
7076
while (repeat_list != NULL) {
7077
/* extract best repeat */
7078
vnp_new = FindBestRepeat (&repeat_list, s_before, vnp->data.ptrvalue, other_row, fuzz);
7079
if (vnp_new == NULL) {
7080
repeat_list = SeqAlignSortListRemoveAll (repeat_list);
7082
RemoveRepeatsCoincidingWithBest (&repeat_list, vnp_new, row, fuzz);
7083
vnp_new->next = vnp;
7084
if (vnp_prev == NULL) {
7085
*sorted_list = vnp_new;
7087
vnp_prev->next = vnp_new;
7090
s_before = vnp_new->data.ptrvalue;
7096
s_before = vnp->data.ptrvalue;
7102
while (repeat_list != NULL) {
7103
/* extract best repeat */
7104
vnp_new = FindBestRepeat (&repeat_list, s_before, NULL, other_row, fuzz);
7105
if (vnp_new == NULL) {
7106
repeat_list = SeqAlignSortListRemoveAll (repeat_list);
7108
RemoveRepeatsCoincidingWithBest (&repeat_list, vnp_new, row, fuzz);
7109
vnp_new->next = NULL;
7110
if (vnp_prev == NULL) {
7111
*sorted_list = vnp_new;
7113
vnp_prev->next = vnp_new;
7116
s_before = vnp_new->data.ptrvalue;
7122
SeqAlignSortListRemoveMarked (&repeat_list);
7123
repeat_list = ValNodeFree (repeat_list);
7127
static void SelectBestRepeatsFromList (SeqAlignPtr PNTR salp)
7129
ValNodePtr list, vnp;
7130
ValNodePtr row1_repeats, row2_repeats;
7131
Uint1 strand1, strand2;
7132
SeqAlignPtr tmp_salp;
7137
if (salp == NULL || *salp == NULL || (*salp)->next == NULL) {
7141
list = SeqAlignSortListNew (*salp);
7143
FindSeqAlignSortWithPoint (list, missing, 1);
7145
/* remove conflicting strands for row 1 */
7146
strand1 = SeqAlignSortListRemoveConflictingStrands (&list, 1);
7148
/* remove conflicting strands for row 1 */
7149
strand2 = SeqAlignSortListRemoveConflictingStrands (&list, 2);
7151
FindSeqAlignSortWithPoint (list, missing, 1);
7153
if (list != NULL && list->next != NULL) {
7154
row1_repeats = SeqAlignSortListExtractRepeats (&list, 1, fuzz);
7155
row2_repeats = SeqAlignSortListExtractRepeats (&list, 2, fuzz);
7157
FindSeqAlignSortWithPoint (list, missing, 1);
7159
/* remove scaffold intervals that are out of order */
7160
list = ValNodeSort (list, SortVnpBySeqAlignSortRow1);
7161
SeqAlignSortListRemoveIntervalsOutOfOrder (&list, 1, fuzz);
7162
list = ValNodeSort (list, SortVnpBySeqAlignSortRow2);
7163
SeqAlignSortListRemoveIntervalsOutOfOrder (&list, 2, fuzz);
7165
FindSeqAlignSortWithPoint (list, missing, 1);
7167
/* Remove overlaps.*/
7168
list = ValNodeSort (list, SortVnpBySeqAlignSortRow1);
7169
tmp_salp = SeqAlignFromSeqAlignSortList (list);
7170
list = SeqAlignSortListFree (list);
7171
ACT_RemoveInconsistentAlnsFromSet (tmp_salp, 1, 1);
7172
list = SeqAlignSortListNew (tmp_salp);
7174
FindSeqAlignSortWithPoint (list, missing, 1);
7176
/* for each repeat on row 1, we want to pick the most consistent interval for row 2 */
7177
list = ValNodeSort (list, SortVnpBySeqAlignSortRow1);
7178
for (vnp = row1_repeats; vnp != NULL; vnp = vnp->next) {
7179
InsertBestRepeat (vnp->data.ptrvalue, &list, 1, fuzz);
7181
row1_repeats = ValNodeFree (row1_repeats);
7183
/* for each repeat on row 2, we want to pick the most consistent interval for row 1 */
7184
list = ValNodeSort (list, SortVnpBySeqAlignSortRow2);
7185
for (vnp = row2_repeats; vnp != NULL; vnp = vnp->next) {
7186
InsertBestRepeat (vnp->data.ptrvalue, &list, 2, fuzz);
7188
row2_repeats = ValNodeFree (row2_repeats);
7191
list = ValNodeSort (list, SortVnpBySeqAlignSortRow1);
7192
*salp = SeqAlignFromSeqAlignSortList (list);
7194
list = SeqAlignSortListFree (list);
6319
7198
static void amconssetfree(AMConsSetPtr acp)
8631
/* advanced editor for Seq-hist assembly alignment */
8632
typedef struct assemblyalignmentdlg {
8633
DIALOG_MESSAGE_BLOCK
8634
DialoG intervals_dialog;
8637
} AssemblyAlignmentDlgData, PNTR AssemblyAlignmentDlgPtr;
8639
Uint2 assmbly_aln_types [] = {
8640
TAGLIST_TEXT, TAGLIST_TEXT, TAGLIST_TEXT, TAGLIST_TEXT, TAGLIST_PROMPT, TAGLIST_POPUP
8643
Uint2 assmbly_aln_widths [] = {
8644
16, 8, 8, 8, 8, 8, 0
8647
ENUM_ALIST(assmbly_aln_strand_alist)
8654
static EnumFieldAssocPtr assmbly_aln_alists[] = {
8655
NULL, NULL, NULL, NULL, NULL, assmbly_aln_strand_alist
8659
typedef struct assemblyalignmentinterval {
8660
CharPtr prim_accession;
8665
Uint2 percent_identity;
8666
} AssemblyAlignmentIntervalData, PNTR AssemblyAlignmentIntervalPtr;
8669
static AssemblyAlignmentIntervalPtr AssemblyAlignmentIntervalFree (AssemblyAlignmentIntervalPtr interval)
8671
if (interval != NULL) {
8672
interval->prim_accession = MemFree (interval->prim_accession);
8673
interval = MemFree (interval);
8679
static AssemblyAlignmentIntervalPtr AssemblyAlignmentIntervalFromTagListString (CharPtr str)
8681
AssemblyAlignmentIntervalPtr interval;
8685
if (StringHasNoText (str)) {
8689
interval = (AssemblyAlignmentIntervalPtr) MemNew (sizeof (AssemblyAlignmentIntervalData));
8690
MemSet (interval, 0, sizeof (AssemblyAlignmentIntervalData));
8692
cp = StringChr (str, '\t');
8694
interval->prim_accession = StringSave (str);
8697
interval->prim_accession = (CharPtr) MemNew (sizeof (Char) * len);
8698
StringNCpy (interval->prim_accession, str, len - 1);
8699
interval->prim_accession[len - 1] = 0;
8701
cp = StringChr (str, '\t');
8702
interval->tpa_from = atoi (str);
8705
cp = StringChr (str, '\t');
8706
interval->tpa_to = atoi (str);
8709
cp = StringChr (str, '\t');
8710
interval->prim_from = atoi (str);
8712
cp = StringChr (cp + 1, '\t');
8714
val = atoi (cp + 1);
8716
interval->prim_strand = Seq_strand_minus;
8718
interval->prim_strand = Seq_strand_plus;
8729
static CharPtr TagListStringFromAssemblyAlignmentInterval (AssemblyAlignmentIntervalPtr interval)
8731
CharPtr str, str_fmt = "%s\t%d\t%d\t%d\t%d (%d)\t%d\n";
8733
if (interval == NULL) {
8737
str = (CharPtr) MemNew (sizeof (Char) * (StringLen (str_fmt) + StringLen (interval->prim_accession) + 61));
8738
sprintf (str, str_fmt, interval->prim_accession == NULL ? "" : interval->prim_accession,
8741
interval->prim_from,
8742
interval->prim_from + interval->tpa_to - interval->tpa_from,
8743
interval->percent_identity,
8744
interval->prim_strand == Seq_strand_minus ? 1 : 0);
8749
static AssemblyAlignmentIntervalPtr AssemblyAlignmentIntervalFromSeqAlign (SeqAlignPtr salp)
8751
AssemblyAlignmentIntervalPtr interval;
8755
if (salp == NULL || salp->dim != 2 || salp->segtype != SAS_DENSEG) {
8759
dsp = (DenseSegPtr) salp->segs;
8760
if (dsp == NULL || dsp->numseg != 1) {
8764
interval = (AssemblyAlignmentIntervalPtr) MemNew (sizeof (AssemblyAlignmentIntervalData));
8765
MemSet (interval, 0, sizeof (AssemblyAlignmentIntervalData));
8767
/* first row is TPA, second row is primary */
8768
SeqIdWrite (dsp->ids->next, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
8769
interval->prim_accession = StringSave (id_txt);
8771
interval->tpa_from = dsp->starts[0] + 1;
8772
interval->tpa_to = dsp->starts[0] + dsp->lens[0];
8773
interval->prim_from = dsp->starts[1] + 1;
8774
if (dsp->strands == NULL) {
8775
interval->prim_strand = Seq_strand_plus;
8777
interval->prim_strand = dsp->strands[1];
8780
interval->percent_identity = AlignmentPercentIdentity (salp, FALSE);
8786
static SeqAlignPtr SeqAlignFromAssemblyAlignmentInterval (AssemblyAlignmentIntervalPtr interval)
8791
if (interval == NULL) {
8794
dsp = DenseSegNew ();
8797
dsp->starts = (Int4Ptr) MemNew (sizeof (Int4) * dsp->dim * dsp->numseg);
8798
dsp->lens = (Int4Ptr) MemNew (sizeof (Int4) * dsp->numseg);
8799
dsp->strands = (Uint1Ptr) MemNew (sizeof (Uint1) * dsp->dim * dsp->numseg);
8801
dsp->ids = ValNodeNew (NULL);
8802
dsp->ids->next = SeqIdFromAccessionDotVersion(interval->prim_accession);
8804
dsp->starts[0] = interval->tpa_from - 1;
8805
dsp->starts[1] = interval->prim_from - 1;
8806
dsp->lens[0] = interval->tpa_to - interval->tpa_from + 1;
8807
dsp->strands[0] = Seq_strand_plus;
8808
dsp->strands[1] = interval->prim_strand;
8810
salp = SeqAlignNew ();
8812
salp->segtype = SAS_DENSEG;
8814
salp->type = SAT_PARTIAL;
8819
static void SeqAlignToAssemblyAlignmentDialog (DialoG d, Pointer data)
8821
AssemblyAlignmentDlgPtr dlg;
8823
dlg = (AssemblyAlignmentDlgPtr) GetObjectExtra (d);
8828
PointerToDialog (dlg->intervals_dialog, data);
8832
static Pointer AssemblyAlignmentDialogToSeqAlign (DialoG d)
8834
AssemblyAlignmentDlgPtr dlg;
8835
SeqAlignPtr salp = NULL;
8837
dlg = (AssemblyAlignmentDlgPtr) GetObjectExtra (d);
8842
salp = DialogToPointer (dlg->intervals_dialog);
8848
static void SeqAlignToTagDlg (DialoG d, Pointer data)
8852
AssemblyAlignmentIntervalPtr interval;
8854
tlp =(TagListPtr) GetObjectExtra (d);
8859
tlp->vnp = ValNodeFreeData (tlp->vnp);
8860
SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
8861
salp = (SeqAlignPtr) data;
8863
while (salp != NULL) {
8864
interval = AssemblyAlignmentIntervalFromSeqAlign (salp);
8865
if (interval != NULL) {
8866
ValNodeAddPointer (&(tlp->vnp), 0, TagListStringFromAssemblyAlignmentInterval (interval));
8867
interval = AssemblyAlignmentIntervalFree (interval);
8871
SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
8875
static Pointer SeqAlignFromTagDlg (DialoG d)
8878
SeqAlignPtr salp = NULL, salp_last = NULL, salp_tmp;
8880
AssemblyAlignmentIntervalPtr interval;
8882
tlp =(TagListPtr) GetObjectExtra (d);
8887
for (vnp = tlp->vnp; vnp != NULL; vnp = vnp->next) {
8888
interval = AssemblyAlignmentIntervalFromTagListString (vnp->data.ptrvalue);
8889
if (interval != NULL) {
8890
salp_tmp = SeqAlignFromAssemblyAlignmentInterval (interval);
8891
if (salp_tmp != NULL) {
8892
if (salp_last == NULL) {
8895
salp_last->next = salp_tmp;
8897
salp_last = salp_tmp;
8899
interval = AssemblyAlignmentIntervalFree (interval);
8906
static DialoG CreateAssemblyAlignmentDialog (GrouP g)
8909
AssemblyAlignmentDlgPtr dlg;
8914
p = HiddenGroup (g, -1, 0, NULL);
8915
SetGroupSpacing (p, 10, 10);
8917
dlg = (AssemblyAlignmentDlgPtr) MemNew (sizeof (AssemblyAlignmentDlgData));
8918
if (dlg == NULL) return NULL;
8920
SetObjectExtra (p, dlg, NULL);
8921
dlg->dialog = (DialoG) p;
8922
dlg->todialog = SeqAlignToAssemblyAlignmentDialog;
8923
dlg->fromdialog = AssemblyAlignmentDialogToSeqAlign;
8925
x = HiddenGroup (p, 0, 2, NULL);
8926
y = HiddenGroup (x, 6, 0, NULL);
8927
StaticPrompt (y, "", 16 * stdCharWidth, 0, programFont, 'c');
8928
StaticPrompt (y, "", 8 * stdCharWidth, 0, programFont, 'c');
8929
StaticPrompt (y, "", 8 * stdCharWidth, 0, programFont, 'c');
8930
StaticPrompt (y, "", 8 * stdCharWidth, 0, programFont, 'c');
8931
StaticPrompt (y, "Primary To", 8 * stdCharWidth, 0, programFont, 'c');
8932
StaticPrompt (y, "", 8 * stdCharWidth, 0, programFont, 'c');
8933
StaticPrompt (y, "Accessions", 16 * stdCharWidth, 0, programFont, 'c');
8934
StaticPrompt (y, "TPA From", 8 * stdCharWidth, 0, programFont, 'c');
8935
StaticPrompt (y, "TPA To", 8 * stdCharWidth, 0, programFont, 'c');
8936
StaticPrompt (y, "Primary From", 8 * stdCharWidth, 0, programFont, 'c');
8937
StaticPrompt (y, "(Percent Identity)", 8 * stdCharWidth, 0, programFont, 'c');
8938
StaticPrompt (y, "Primary Strand", 8 * stdCharWidth, 0, programFont, 'c');
8939
dlg->intervals_dialog = CreateTagListDialogExEx (x, 6, 6, -1, assmbly_aln_types, assmbly_aln_widths, assmbly_aln_alists,
8940
TRUE, FALSE, SeqAlignToTagDlg, SeqAlignFromTagDlg,
8948
static void AddTPAIdToAlignment (SeqAlignPtr salp, BioseqPtr bsp)
8953
if (salp == NULL || salp->dim != 2 || salp->segtype != SAS_DENSEG || bsp == NULL) {
8962
sip = SeqIdFindWorst (bsp->id);
8963
sip = SeqIdDup (sip);
8965
sip->next = dsp->ids->next;
8966
dsp->ids->next = NULL;
8967
dsp->ids = SeqIdFree (dsp->ids);
8972
static void AddTPAIdToAlignmentList (SeqAlignPtr salp, BioseqPtr bsp)
8974
while (salp != NULL) {
8975
AddTPAIdToAlignment (salp, bsp);
8980
typedef struct assemblyalignmentform {
8984
} AssemblyAlignmentFormData, PNTR AssemblyAlignmentFormPtr;
8987
static void CheckCoverageWithAddedIntervals (LogInfoPtr lip, BioseqPtr bsp, SeqAlignPtr salp)
8989
ValNodePtr err_list = NULL;
8990
SeqAlignPtr salp_orig, salp_prev = NULL;
8996
if (bsp->hist == NULL) {
8997
bsp->hist = SeqHistNew ();
9000
salp_orig = bsp->hist->assembly;
9001
while (salp_orig != NULL) {
9002
salp_prev = salp_orig;
9003
salp_orig = salp_orig->next;
9006
if (salp_prev == NULL) {
9007
bsp->hist->assembly = salp;
9009
salp_prev->next = salp;
9012
ValidateTPAHistAlign (bsp, &err_list);
9013
if (err_list != NULL) {
9014
fprintf (lip->fp, "Projected Coverage Problems\n");
9015
PrintTPAHistErrors (lip, err_list);
9016
lip->data_in_log = TRUE;
9017
err_list = ValNodeFreeData (err_list);
9020
if (salp_prev == NULL) {
9021
bsp->hist->assembly = NULL;
9023
salp_prev->next = NULL;
9028
static Boolean ReportAssemblyIntervalProblems (BioseqPtr bsp, SeqAlignPtr salp)
9031
AssemblyAlignmentIntervalPtr interval;
9032
Boolean has_errors = FALSE;
9033
SeqAlignPtr salp_tmp;
9035
lip = OpenLog ("Assembly Alignment Interval Problems");
9036
fprintf (lip->fp, "Primary Accession\tTPA From\tTPA To\tPrimary From\tPrimary To\tStrand\tPercent Identity\n");
9037
for (salp_tmp = salp; salp_tmp != NULL; salp_tmp = salp_tmp->next) {
9038
interval = AssemblyAlignmentIntervalFromSeqAlign (salp_tmp);
9039
fprintf (lip->fp, "%s\t%d\t%d\t%d\t%d\t%s\t%d%s\n",
9040
interval->prim_accession,
9043
interval->prim_from,
9044
interval->prim_from + interval->tpa_to - interval->tpa_from,
9045
interval->prim_strand == Seq_strand_minus ? "c" : "",
9046
interval->percent_identity,
9047
interval->percent_identity < 75 ? "(Suspiciously low percent identity!)" : "");
9048
if (interval->percent_identity < 75) {
9049
lip->data_in_log = TRUE;
9051
interval = AssemblyAlignmentIntervalFree (interval);
9053
fprintf (lip->fp, "\n");
9054
CheckCoverageWithAddedIntervals (lip, bsp, salp);
9056
has_errors = lip->data_in_log;
9061
static void AcceptAssemblyAlignment (ButtoN b)
9063
AssemblyAlignmentFormPtr frm;
9064
SeqAlignPtr salp, salp_next;
9066
MsgAnswer ans = ANS_OK;
9068
frm = (AssemblyAlignmentFormPtr) GetObjectExtra (b);
9072
salp = DialogToPointer (frm->dlg);
9074
Message (MSG_ERROR, "No intervals specified");
9077
AddTPAIdToAlignmentList (salp, frm->bsp);
9078
if (ReportAssemblyIntervalProblems (frm->bsp, salp)) {
9079
ans = Message (MSG_OKC, "Continue with errors?");
9082
if (ans == ANS_OK) {
9083
if (frm->bsp->hist == NULL) {
9084
frm->bsp->hist = SeqHistNew ();
9087
/* something is wrong here, alignment is not being sorted */
9089
while (salp_next->next != NULL) {
9090
salp_next = salp_next->next;
9092
salp_next->next = frm->bsp->hist->assembly;
9094
list = SeqAlignSortListNew (salp);
9095
list = ValNodeSort (list, SortVnpBySeqAlignSortRow1);
9096
salp = SeqAlignFromSeqAlignSortList (list);
9097
list = SeqAlignSortListFree (list);
9098
frm->bsp->hist->assembly = salp;
9099
ObjMgrSetDirtyFlag (frm->bsp->idx.entityID, TRUE);
9100
ObjMgrSendMsg (OM_MSG_UPDATE, frm->bsp->idx.entityID, 0, 0);
9103
while (salp != NULL) {
9104
salp_next = salp->next;
9106
salp = SeqAlignFree (salp);
9113
static void CheckAssemblyAlignment (ButtoN b)
9115
AssemblyAlignmentFormPtr frm;
9116
SeqAlignPtr salp, salp_next;
9118
frm = (AssemblyAlignmentFormPtr) GetObjectExtra (b);
9123
salp = DialogToPointer (frm->dlg);
9125
Message (MSG_ERROR, "No intervals specified");
9128
AddTPAIdToAlignmentList (salp, frm->bsp);
9129
PointerToDialog (frm->dlg, salp);
9130
ReportAssemblyIntervalProblems (frm->bsp, salp);
9132
while (salp != NULL) {
9133
salp_next = salp->next;
9135
salp = SeqAlignFree (salp);
9141
extern void AdvancedAssemblyAlignmentEditor (IteM i)
9147
AssemblyAlignmentFormPtr frm;
9151
bfp = currentFormDataPtr;
9153
bfp = GetObjectExtra (i);
9155
if (bfp == NULL) return;
9157
bsp = GetBioseqGivenIDs (bfp->input_entityID, bfp->input_itemID, bfp->input_itemtype);
9159
Message (MSG_ERROR, "Must select single Bioseq!");
9163
frm = (AssemblyAlignmentFormPtr) MemNew (sizeof (AssemblyAlignmentFormData));
9166
w = FixedWindow (-50, -33, -10, -10, "Add Intervals to Assembly Alignment", StdCloseWindowProc);
9167
SetObjectExtra (w, frm, StdCleanupExtraProc);
9168
frm->form = (ForM) w;
9170
h = HiddenGroup (w, -1, 0, NULL);
9171
SetGroupSpacing (h, 10, 10);
9173
frm->dlg = CreateAssemblyAlignmentDialog(h);
9175
c = HiddenGroup (h, 3, 0, NULL);
9176
b = PushButton (c, "Accept", AcceptAssemblyAlignment);
9177
SetObjectExtra (b, frm, NULL);
9178
b = PushButton (c, "Check", CheckAssemblyAlignment);
9179
SetObjectExtra (b, frm, NULL);
9180
b = PushButton (c, "Cancel", StdCancelButtonProc);
9182
AlignObjects (ALIGN_CENTER, (HANDLE) frm->dlg, (HANDLE) c, NULL);
9189
eAssemblyIntervalInfo_NoAction = 0,
9190
eAssemblyIntervalInfo_Remove,
9191
eAssemblyIntervalInfo_Truncate_Left,
9192
eAssemblyIntervalInfo_Truncate_Right,
9193
eAssemblyIntervalInfo_Truncate_Both
9194
} EAssemblyIntervalInfoAction;
9197
typedef struct assemblyintervalinfo {
9205
ValNodePtr conflict_list;
9206
EAssemblyIntervalInfoAction action;
9207
} AssemblyIntervalInfoData, PNTR AssemblyIntervalInfoPtr;
9210
eIntervalConflict_none,
9211
eIntervalConflict_a_contains_b,
9212
eIntervalConflict_a_contained_in_b,
9213
eIntervalConflict_a_overlaps_b_on_5,
9214
eIntervalConflict_a_overlaps_b_on_3
9215
} EIntervalConflict;
9218
static Int4 FindIntervalConflict (Int4 left1, Int4 right1, Int4 left2, Int4 right2, Int4 overlap)
9220
if (right1 < left2 + overlap || left1 > right2 - overlap) {
9221
return eIntervalConflict_none;
9222
} else if (left1 <= left2 && right1 < right2 && right1 - left2 + 1> overlap) {
9223
return eIntervalConflict_a_overlaps_b_on_5;
9224
} else if (left1 > left2 && right1 >= right2 && right2 - left1 + 1> overlap) {
9225
return eIntervalConflict_a_overlaps_b_on_3;
9226
} else if (left1 <= left2 && right1 >= right2) {
9227
return eIntervalConflict_a_contains_b;
9228
} else if (left2 <= left1 && right2 >= right2) {
9229
return eIntervalConflict_a_contained_in_b;
9231
Message (MSG_ERROR, "Conflict calculation failed");
9232
return eIntervalConflict_none;
9238
typedef struct intervalconflictinfo {
9239
AssemblyIntervalInfoPtr conflict_interval;
9240
EIntervalConflict prim_conflict;
9241
EIntervalConflict tpa_conflict;
9242
} IntervalConflictInfoData, PNTR IntervalConflictInfoPtr;
9245
static IntervalConflictInfoPtr IntervalConflictInfoNew (AssemblyIntervalInfoPtr conflict_interval, EIntervalConflict prim_conflict, EIntervalConflict tpa_conflict)
9247
IntervalConflictInfoPtr ip;
9249
ip = (IntervalConflictInfoPtr) MemNew (sizeof (IntervalConflictInfoData));
9250
ip->conflict_interval = conflict_interval;
9251
ip->prim_conflict = prim_conflict;
9252
ip->tpa_conflict = tpa_conflict;
9257
static IntervalConflictInfoPtr IntervalConflictInfoFree (IntervalConflictInfoPtr ip)
9266
static ValNodePtr IntervalConflictInfoListFree (ValNodePtr vnp)
9268
ValNodePtr vnp_next;
9270
while (vnp != NULL) {
9271
vnp_next = vnp->next;
9272
vnp->data.ptrvalue = IntervalConflictInfoFree (vnp->data.ptrvalue);
9274
vnp = ValNodeFree (vnp);
9281
static AssemblyIntervalInfoPtr AssemblyIntervalInfoFree (AssemblyIntervalInfoPtr ip)
9284
ip->prim_id = MemFree (ip->prim_id);
9285
ip->conflict_list = IntervalConflictInfoListFree (ip->conflict_list);
9292
static ValNodePtr AssemblyIntervalInfoListFree (ValNodePtr vnp)
9294
ValNodePtr vnp_next;
9296
while (vnp != NULL) {
9297
vnp_next = vnp->next;
9298
vnp->data.ptrvalue = AssemblyIntervalInfoFree (vnp->data.ptrvalue);
9300
vnp = ValNodeFree (vnp);
9307
static void TruncateForConflictsOnLeft (AssemblyIntervalInfoPtr ai)
9309
IntervalConflictInfoPtr ip;
9311
Int4 prim_change, tpa_change;
9313
if (ai == NULL || ai->conflict_list == NULL) {
9317
for (vnp = ai->conflict_list; vnp != NULL; vnp = vnp->next) {
9318
if (vnp->choice == 1) {
9319
ip = (IntervalConflictInfoPtr) vnp->data.ptrvalue;
9322
if (ip->tpa_conflict == eIntervalConflict_a_overlaps_b_on_5) {
9323
tpa_change = ip->conflict_interval->tpa_right - ai->tpa_left;
9324
if (tpa_change > 0) {
9325
ai->tpa_left += tpa_change;
9326
if (ai->prim_strand == Seq_strand_minus) {
9327
ai->prim_right -= tpa_change;
9329
ai->prim_left += tpa_change;
9333
if (ip->prim_conflict == eIntervalConflict_a_overlaps_b_on_5) {
9334
prim_change = ip->conflict_interval->prim_right - ai->prim_left;
9335
if (prim_change > 0) {
9336
ai->prim_left += prim_change;
9337
if (ai->prim_strand == Seq_strand_minus) {
9338
ai->tpa_right -= prim_change;
9340
ai->tpa_left += prim_change;
9350
static void TruncateForConflictsOnRight (AssemblyIntervalInfoPtr ai)
9352
IntervalConflictInfoPtr ip;
9354
Int4 prim_change, tpa_change;
9356
if (ai == NULL || ai->conflict_list == NULL) {
9360
for (vnp = ai->conflict_list; vnp != NULL; vnp = vnp->next) {
9361
if (vnp->choice == 1) {
9362
ip = (IntervalConflictInfoPtr) vnp->data.ptrvalue;
9365
if (ip->tpa_conflict == eIntervalConflict_a_overlaps_b_on_3) {
9366
tpa_change = ai->tpa_right - ip->conflict_interval->tpa_left;
9367
if (tpa_change > 0) {
9368
ai->tpa_right -= tpa_change;
9369
if (ai->prim_strand == Seq_strand_minus) {
9370
ai->prim_left += tpa_change;
9372
ai->prim_right -= tpa_change;
9376
if (ip->prim_conflict == eIntervalConflict_a_overlaps_b_on_3) {
9377
prim_change = ai->prim_right - ip->conflict_interval->prim_left;
9378
if (prim_change > 0) {
9379
ai->prim_right -= prim_change;
9380
if (ai->prim_strand == Seq_strand_minus) {
9381
ai->tpa_left += prim_change;
9383
ai->tpa_right -= prim_change;
9393
static void ReevaluateConflicts (AssemblyIntervalInfoPtr ip, Int4 overlap)
9396
IntervalConflictInfoPtr cp;
9402
for (vnp = ip->conflict_list; vnp != NULL; vnp = vnp->next) {
9403
cp = vnp->data.ptrvalue;
9404
if (cp->conflict_interval->action == eAssemblyIntervalInfo_Remove) {
9406
} else if (FindIntervalConflict (ip->tpa_left, ip->tpa_right,
9407
cp->conflict_interval->tpa_left, cp->conflict_interval->tpa_right, overlap) == eIntervalConflict_none
9408
&& FindIntervalConflict (ip->prim_left, ip->prim_right,
9409
cp->conflict_interval->tpa_left, cp->conflict_interval->tpa_right, overlap) == eIntervalConflict_none) {
9418
static void RecalculateAssemblyIntervalInfoEndpoints (AssemblyIntervalInfoPtr ip, Int4 overlap)
9420
IntervalConflictInfoPtr cp;
9427
/* calculate endpoints */
9428
AlnMgr2IndexSingleChildSeqAlign (ip->salp);
9429
ip->prim_strand = SeqAlignStrand (ip->salp, 2);
9430
AlnMgr2GetNthSeqRangeInSA (ip->salp, 1, &(ip->tpa_left), &(ip->tpa_right));
9431
AlnMgr2GetNthSeqRangeInSA (ip->salp, 2, &(ip->prim_left), &(ip->prim_right));
9433
/* apply changes for conflicts and actions */
9434
if (ip->action == eAssemblyIntervalInfo_Truncate_Left) {
9435
TruncateForConflictsOnLeft (ip);
9436
} else if (ip->action == eAssemblyIntervalInfo_Truncate_Right) {
9437
TruncateForConflictsOnRight (ip);
9438
} else if (ip->action == eAssemblyIntervalInfo_Truncate_Both) {
9439
TruncateForConflictsOnLeft (ip);
9440
TruncateForConflictsOnRight (ip);
9443
ReevaluateConflicts (ip, overlap);
9444
for (vnp = ip->conflict_list; vnp != NULL; vnp = vnp->next) {
9445
cp = vnp->data.ptrvalue;
9446
ReevaluateConflicts (cp->conflict_interval, overlap);
9451
static AssemblyIntervalInfoPtr AssemblyIntervalInfoNew (SeqAlignPtr salp, Int4 overlap)
9453
AssemblyIntervalInfoPtr ip;
9462
ip = (AssemblyIntervalInfoPtr) MemNew (sizeof (AssemblyIntervalInfoData));
9464
AlnMgr2IndexSingleChildSeqAlign (ip->salp);
9466
sip = AlnMgr2GetNthSeqIdPtr (salp, 2);
9467
bsp = BioseqLockById (sip);
9469
sip = SeqIdFree (sip);
9470
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
9474
SeqIdWrite (sip, id_txt, PRINTID_REPORT, sizeof (id_txt) - 1);
9475
sip = SeqIdFree (sip);
9477
ip->prim_id = StringSave (id_txt);
9478
RecalculateAssemblyIntervalInfoEndpoints (ip, overlap);
9483
static CharPtr SummarizeAssemblyInterval (AssemblyIntervalInfoPtr ip)
9485
CharPtr summary = NULL;
9486
CharPtr fmt = "%d-%d %s %d-%d%s";
9493
len = StringLen (fmt) + StringLen (ip->prim_id) + 60;
9494
if (ip->prim_strand == Seq_strand_minus) {
9497
summary = (CharPtr) MemNew (sizeof (Char) + len);
9498
sprintf (summary, fmt, ip->tpa_left + 1, ip->tpa_right + 1,
9500
ip->prim_left + 1, ip->prim_right + 1,
9501
ip->prim_strand == Seq_strand_minus ? "(c)" : "");
9506
static CharPtr SummarizeConflictList (ValNodePtr list)
9508
CharPtr summary = NULL, str, conflict = "Conflict with ";
9509
ValNodePtr vnp, strings = NULL;
9510
IntervalConflictInfoPtr ip;
9513
for (vnp = list; vnp != NULL; vnp = vnp->next) {
9514
if (vnp->choice == 1 && vnp->data.ptrvalue != NULL) {
9515
ip = (IntervalConflictInfoPtr) vnp->data.ptrvalue;
9516
str = SummarizeAssemblyInterval (ip->conflict_interval);
9517
ValNodeAddPointer (&strings, 0, str);
9518
len += StringLen (str) + 3;
9521
if (strings == NULL) {
9522
summary = StringSave ("No conflicts");
9524
summary = (CharPtr) MemNew (sizeof (Char) * (StringLen (conflict) + len));
9525
StringCpy (summary, conflict);
9526
for (vnp = strings; vnp != NULL; vnp = vnp->next) {
9527
StringCat (summary, vnp->data.ptrvalue);
9528
if (vnp->next != NULL) {
9529
StringCat (summary, ", ");
9532
strings = ValNodeFreeData (strings);
9538
static void RemoveConflictLists (ValNodePtr list)
9540
AssemblyIntervalInfoPtr ip;
9542
while (list != NULL) {
9543
ip = (AssemblyIntervalInfoPtr) list->data.ptrvalue;
9544
ip->conflict_list = IntervalConflictInfoListFree (ip->conflict_list);
9550
static void BuildConflictLists (ValNodePtr list, Int4 overlap)
9552
ValNodePtr vnp1, vnp2;
9553
AssemblyIntervalInfoPtr ip1, ip2;
9554
EIntervalConflict prim_conflict, tpa_conflict;
9555
IntervalConflictInfoPtr conflict;
9557
if (list == NULL || list->next == NULL) {
9561
RemoveConflictLists (list);
9563
for (vnp1 = list; vnp1->next != NULL; vnp1 = vnp1->next) {
9564
ip1 = (AssemblyIntervalInfoPtr) vnp1->data.ptrvalue;
9565
for (vnp2 = vnp1->next; vnp2 != NULL; vnp2 = vnp2->next) {
9566
ip2 = (AssemblyIntervalInfoPtr) vnp2->data.ptrvalue;
9567
if (StringCmp (ip1->prim_id, ip2->prim_id) == 0) {
9568
tpa_conflict = FindIntervalConflict (ip1->tpa_left, ip1->tpa_right, ip2->tpa_left, ip2->tpa_right, overlap);
9569
prim_conflict = FindIntervalConflict (ip1->prim_left, ip1->prim_right, ip2->prim_left, ip2->prim_right, overlap);
9570
if (tpa_conflict != eIntervalConflict_none || prim_conflict != prim_conflict) {
9571
conflict = IntervalConflictInfoNew (ip2, prim_conflict, tpa_conflict);
9572
ValNodeAddPointer (&(ip1->conflict_list), 1, conflict);
9574
tpa_conflict = FindIntervalConflict (ip2->tpa_left, ip2->tpa_right, ip1->tpa_left, ip1->tpa_right, overlap);
9575
prim_conflict = FindIntervalConflict (ip2->prim_left, ip2->prim_right, ip1->prim_left, ip1->prim_right, overlap);
9576
if (tpa_conflict != eIntervalConflict_none || prim_conflict != prim_conflict) {
9577
conflict = IntervalConflictInfoNew (ip1, prim_conflict, tpa_conflict);
9578
ValNodeAddPointer (&(ip2->conflict_list), 1, conflict);
9586
static ValNodePtr AssemblyIntervalInfoListFromSeqAlign (SeqAlignPtr salp, Int4 overlap)
9588
ValNodePtr list = NULL;
9590
while (salp != NULL) {
9591
ValNodeAddPointer (&list, 0, AssemblyIntervalInfoNew (salp, overlap));
9595
BuildConflictLists (list, overlap);
9600
typedef struct assemblyalignmentintervalresolutiondlg {
9601
DIALOG_MESSAGE_BLOCK
9602
DialoG intervals_dialog;
9605
} AssemblyAlignmentIntervalResolutionDlgData, PNTR AssemblyAlignmentIntervalResolutionDlgPtr;
9607
CharPtr assmbly_aln_int_res_labels [] = {
9612
Uint2 assmbly_aln_int_res_types [] = {
9613
TAGLIST_POPUP, TAGLIST_PROMPT, TAGLIST_PROMPT
9616
Uint2 assmbly_aln_int_res_widths [] = {
9620
ENUM_ALIST(assmbly_aln_int_res_action_alist)
9621
{"No change", eAssemblyIntervalInfo_NoAction},
9622
{"Remove", eAssemblyIntervalInfo_Remove},
9623
{"Truncate Left", eAssemblyIntervalInfo_Truncate_Left},
9624
{"Truncate Right", eAssemblyIntervalInfo_Truncate_Right},
9625
{"Truncate Both", eAssemblyIntervalInfo_Truncate_Both},
9629
static EnumFieldAssocPtr assmbly_aln_int_res_alists[] = {
9630
assmbly_aln_int_res_action_alist, NULL, NULL
9634
static CharPtr SummarizeOneIntervalResolutionRow (AssemblyIntervalInfoPtr ip)
9636
CharPtr str, int_str, conf_str, fmt = "%d\t%s\t%s\n";
9642
int_str = SummarizeAssemblyInterval (ip);
9643
conf_str = SummarizeConflictList (ip->conflict_list);
9644
str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + 15 + StringLen (int_str) + StringLen (conf_str)));
9645
sprintf (str, fmt, ip->action, int_str, conf_str);
9646
int_str = MemFree (int_str);
9647
conf_str = MemFree (conf_str);
9652
static void UpdateConflicts (Pointer userdata)
9654
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9655
AssemblyIntervalInfoPtr ip;
9657
ValNodePtr vnp, vnp_t;
9659
Boolean any_change = FALSE;
9663
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) userdata;
9669
tlp = (TagListPtr) GetObjectExtra (dlg->intervals_dialog);
9674
if (!TextHasNoText (dlg->overlap)) {
9675
str = SaveStringFromText (dlg->overlap);
9676
overlap = atoi (str);
9677
str = MemFree (str);
9683
/* now update conflict lists */
9684
for (vnp = dlg->list, vnp_t = tlp->vnp; vnp != NULL && tlp->vnp != NULL; vnp = vnp->next, vnp_t = vnp_t->next) {
9685
ip = (AssemblyIntervalInfoPtr) vnp->data.ptrvalue;
9686
str = ExtractTagListColumn ((CharPtr) vnp_t->data.ptrvalue, 0);
9687
action = atoi (str);
9688
str = MemFree (str);
9689
if (action != ip->action) {
9690
ip->action = action;
9691
RecalculateAssemblyIntervalInfoEndpoints (ip, overlap);
9697
for (vnp = dlg->list, vnp_t = tlp->vnp; vnp != NULL && tlp->vnp != NULL; vnp = vnp->next, vnp_t = vnp_t->next) {
9698
ip = (AssemblyIntervalInfoPtr) vnp->data.ptrvalue;
9699
vnp_t->data.ptrvalue = MemFree (vnp_t->data.ptrvalue);
9700
vnp_t->data.ptrvalue = SummarizeOneIntervalResolutionRow (ip);
9704
SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
9705
SendMessageToDialog (tlp->dialog, VIB_MSG_ENTER);
9709
static TaglistCallback assmbly_int_res_callback_list[3] =
9710
{ UpdateConflicts, UpdateConflicts, UpdateConflicts };
9712
static void SeqAlignToAssemblyAlignmentIntervalResolutionDialog (DialoG d, Pointer data)
9714
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9721
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) GetObjectExtra (d);
9722
salp = (SeqAlignPtr) data;
9726
tlp = (TagListPtr) GetObjectExtra (dlg->intervals_dialog);
9730
SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
9731
tlp->vnp = ValNodeFreeData (tlp->vnp);
9733
if (!TextHasNoText (dlg->overlap)) {
9734
str = SaveStringFromText (dlg->overlap);
9735
overlap = atoi (str);
9736
str = MemFree (str);
9742
dlg->list = AssemblyIntervalInfoListFree (dlg->list);
9743
dlg->list = AssemblyIntervalInfoListFromSeqAlign (salp, overlap);
9745
for (vnp = dlg->list; vnp != NULL; vnp = vnp->next) {
9746
str = SummarizeOneIntervalResolutionRow (vnp->data.ptrvalue);
9747
ValNodeAddPointer (&(tlp->vnp), 0, str);
9750
SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
9751
tlp->max = MAX ((Int2) 0, (Int2) (ValNodeLen (tlp->vnp) - tlp->rows));
9752
CorrectBarMax (tlp->bar, tlp->max);
9753
CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
9755
SafeShow (tlp->bar);
9757
SafeHide (tlp->bar);
9759
SendMessageToDialog (tlp->dialog, VIB_MSG_ENTER);
9764
static Pointer AssemblyAlignmentIntervalResolutionDialogToSeqAlign (DialoG d)
9766
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9767
AssemblyIntervalInfoPtr ai;
9768
SeqAlignPtr salp_list = NULL, salp_prev = NULL, salp;
9771
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) GetObjectExtra (d);
9776
for (vnp = dlg->list; vnp != NULL; vnp = vnp->next) {
9777
ai = vnp->data.ptrvalue;
9778
if (ai->action != eAssemblyIntervalInfo_Remove) {
9779
salp = (SeqAlignPtr) AsnIoMemCopy (ai->salp, (AsnReadFunc) SeqAlignAsnRead, (AsnWriteFunc) SeqAlignAsnWrite);
9780
/* TODO: truncate alignments */
9783
if (salp_prev == NULL) {
9786
salp_prev->next = salp;
9792
return (Pointer) salp_list;
9796
static void ChangeAssemblyAlignmentIntervalResolutionOverlap (TexT t)
9798
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9799
AssemblyIntervalInfoPtr ip;
9801
ValNodePtr vnp, vnp_t;
9805
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) GetObjectExtra (t);
9810
tlp = (TagListPtr) GetObjectExtra (dlg->intervals_dialog);
9815
if (!TextHasNoText (dlg->overlap)) {
9816
str = SaveStringFromText (dlg->overlap);
9817
overlap = atoi (str);
9818
str = MemFree (str);
9824
BuildConflictLists (dlg->list, overlap);
9827
for (vnp = dlg->list, vnp_t = tlp->vnp; vnp != NULL && tlp->vnp != NULL; vnp = vnp->next, vnp_t = vnp_t->next) {
9828
ip = (AssemblyIntervalInfoPtr) vnp->data.ptrvalue;
9829
RecalculateAssemblyIntervalInfoEndpoints (ip, overlap);
9830
vnp_t->data.ptrvalue = MemFree (vnp_t->data.ptrvalue);
9831
vnp_t->data.ptrvalue = SummarizeOneIntervalResolutionRow (ip);
9835
SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
9836
SendMessageToDialog (tlp->dialog, VIB_MSG_ENTER);
9841
static void CleanupAssemblyAlignmentIntervalResolutionDialog (GraphiC g, VoidPtr data)
9844
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9846
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) data;
9848
dlg->list = AssemblyIntervalInfoListFree (dlg->list);
9849
dlg = MemFree (dlg);
9854
static DialoG CreateAssemblyAlignmentIntervalResolutionDialog (GrouP g)
9857
AssemblyAlignmentIntervalResolutionDlgPtr dlg;
9861
Int4 num_columns = sizeof (assmbly_aln_int_res_labels) / sizeof (CharPtr);
9863
p = HiddenGroup (g, -1, 0, NULL);
9864
SetGroupSpacing (p, 10, 10);
9866
dlg = (AssemblyAlignmentIntervalResolutionDlgPtr) MemNew (sizeof (AssemblyAlignmentIntervalResolutionDlgData));
9867
if (dlg == NULL) return NULL;
9869
SetObjectExtra (p, dlg, CleanupAssemblyAlignmentIntervalResolutionDialog);
9870
dlg->dialog = (DialoG) p;
9872
dlg->todialog = SeqAlignToAssemblyAlignmentIntervalResolutionDialog;
9873
dlg->fromdialog = AssemblyAlignmentIntervalResolutionDialogToSeqAlign;
9875
z = HiddenGroup (p, 2, 0, NULL);
9876
StaticPrompt (z, "Allowable Overlap", 0, 0, programFont, 'r');
9877
dlg->overlap = DialogText (z, "5", 5, ChangeAssemblyAlignmentIntervalResolutionOverlap);
9878
SetObjectExtra (dlg->overlap, dlg, NULL);
9880
x = HiddenGroup (p, 0, 2, NULL);
9881
y = HiddenGroup (x, num_columns, 0, NULL);
9882
for (i = 0; i < num_columns; i++) {
9883
StaticPrompt (y, assmbly_aln_int_res_labels[i], assmbly_aln_int_res_widths[i] * stdCharWidth, 0, programFont, 'l');
9885
dlg->intervals_dialog = CreateTagListDialogExEx (x, 6, num_columns, -1, assmbly_aln_int_res_types,
9886
assmbly_aln_int_res_widths, assmbly_aln_int_res_alists,
9887
TRUE, TRUE, NULL, NULL,
9888
assmbly_int_res_callback_list, dlg, FALSE);
9890
AlignObjects (ALIGN_CENTER, (HANDLE) z, (HANDLE) x, NULL);
9895
typedef struct assemblyalignmentintervalresolutionfrm {
9900
} AssemblyAlignmentIntervalResolutionFrmData, PNTR AssemblyAlignmentIntervalResolutionFrmPtr;
9903
static void AcceptAssemblyAlignmentIntervalResolution (ButtoN b)
9905
AssemblyAlignmentIntervalResolutionFrmPtr frm;
9906
SeqAlignPtr new_assem, salp, salp_next;
9908
frm = (AssemblyAlignmentIntervalResolutionFrmPtr) GetObjectExtra (b);
9913
/* TODO: check for problems before accepting */
9915
new_assem = DialogToPointer (frm->dlg);
9916
if (new_assem == NULL) {
9917
Message (MSG_ERROR, "No intervals left in assembly!");
9921
/* remove old assembly */
9922
salp = frm->bsp->hist->assembly;
9923
frm->bsp->hist->assembly = NULL;
9924
while (salp != NULL) {
9925
salp_next = salp->next;
9927
salp = SeqAlignFree (salp);
9931
/* assign new assembly */
9932
frm->bsp->hist->assembly = new_assem;
9934
ObjMgrSetDirtyFlag (frm->bsp->idx.entityID, TRUE);
9935
ObjMgrSendMsg (OM_MSG_UPDATE, frm->bsp->idx.entityID, 0, 0);
9940
extern void AssemblyAlignmentIntervalResolution (IteM i)
9946
AssemblyAlignmentIntervalResolutionFrmPtr frm;
9950
bfp = currentFormDataPtr;
9952
bfp = GetObjectExtra (i);
9954
if (bfp == NULL) return;
9956
bsp = GetBioseqGivenIDs (bfp->input_entityID, bfp->input_itemID, bfp->input_itemtype);
9958
Message (MSG_ERROR, "Must select single Bioseq!");
9961
if (bsp->hist == NULL || bsp->hist->assembly == NULL) {
9962
Message (MSG_ERROR, "No assembly alignment!");
9965
frm = (AssemblyAlignmentIntervalResolutionFrmPtr) MemNew (sizeof (AssemblyAlignmentIntervalResolutionFrmData));
9968
w = FixedWindow (-50, -33, -10, -10, "Resolve Assembly Alignment Intervals", StdCloseWindowProc);
9969
SetObjectExtra (w, frm, StdCleanupExtraProc);
9970
frm->form = (ForM) w;
9972
h = HiddenGroup (w, -1, 0, NULL);
9973
SetGroupSpacing (h, 10, 10);
9975
frm->dlg = CreateAssemblyAlignmentIntervalResolutionDialog(h);
9976
PointerToDialog (frm->dlg, bsp->hist->assembly);
9978
c = HiddenGroup (h, 3, 0, NULL);
9979
b = PushButton (c, "Accept", AcceptAssemblyAlignmentIntervalResolution);
9980
SetObjectExtra (b, frm, NULL);
9981
/* b = PushButton (c, "Check", CheckAssemblyAlignmentIntervalResolution);
9982
SetObjectExtra (b, frm, NULL); */
9983
b = PushButton (c, "Cancel", StdCancelButtonProc);
9985
AlignObjects (ALIGN_CENTER, (HANDLE) frm->dlg, (HANDLE) c, NULL);
7702
9992
typedef struct historyformdata {
7703
9993
FEATURE_FORM_BLOCK
10402
typedef struct nw {
10404
Int4 traceback_pos;
10405
} NWData, PNTR NWPtr;
10408
static Int4 SegmentsNeededForAlignment (CharPtr buf1, CharPtr buf2)
10411
Boolean gap1, gap2, change = FALSE;
10428
while (*cp1 != 0 && *cp2 != 0) {
10458
FillInStartsAndLensForAlignment
10464
Boolean gap1, gap2, change = FALSE;
10466
Int4 pos1 = 0, pos2 = 0;
10471
dsp->starts[dsp->dim * num_segs] = -1;
10474
dsp->starts[dsp->dim * num_segs] = pos1;
10481
dsp->starts[dsp->dim * num_segs + 1] = -1;
10483
dsp->starts[dsp->dim * num_segs + 1] = pos2;
10488
dsp->lens[num_segs] = 1;
10490
while (*cp1 != 0 && *cp2 != 0) {
10517
dsp->starts[dsp->dim * num_segs] = -1;
10519
dsp->starts[dsp->dim * num_segs] = pos1;
10522
dsp->starts[dsp->dim * num_segs + 1] = -1;
10524
dsp->starts[dsp->dim * num_segs + 1] = pos2;
10526
dsp->lens[num_segs] = 1;
10528
dsp->lens[num_segs]++;
10542
static void AdjustStringAlingmentForOffsetAndStrand (DenseSegPtr dsp, Int4 start1, Int4 start2, Int4 stop2, Uint1 strand2)
10550
for (i = 0; i < dsp->numseg; i++) {
10551
if (dsp->starts[2 * i] != -1) {
10552
dsp->starts[2 * i] += start1;
10554
if (dsp->starts[2 * i + 1] != -1) {
10555
if (strand2 == Seq_strand_plus) {
10556
dsp->starts[2 * i + 1] += start2;
10558
dsp->starts[2 * i + 1] = stop2 - dsp->starts[2 * i + 1] - dsp->lens[i];
10565
/* assumption - first interval always on plus strand */
10566
static SeqAlignPtr NWAlignmentForInterval (SeqIdPtr sip1, SeqIdPtr sip2, Int4 start1, Int4 stop1, Int4 start2, Int4 stop2)
10568
BioseqPtr bsp1, bsp2;
10569
Int4 len1, len2, tmp, i, row, col, back_row, back_col;
10570
Uint1 strand2 = Seq_strand_plus;
10571
CharPtr buf1, buf2;
10572
CharPtr alnbuf1, alnbuf2, cp1, cp2;
10574
Int4 gap_penalty = -1;
10575
Int4 mismatch_penalty = -1;
10576
Int4 match_score = 1;
10577
Int4 left, up, diag;
10580
SeqAlignPtr salp = NULL;
10582
if (sip1 == NULL || sip2 == NULL) {
10586
bsp1 = BioseqLockById (sip1);
10587
bsp2 = BioseqLockById (sip2);
10589
if (bsp1 != NULL && bsp2 != NULL) {
10590
if (stop2 < start2) {
10591
strand2 = Seq_strand_minus;
10596
len1 = stop1 - start1 + 1;
10597
len2 = stop2 - start2 + 1;
10598
buf1 = MemNew (sizeof (Char) * (len1 + 1));
10599
buf2 = MemNew (sizeof (Char) * (len2 + 1));
10600
SeqPortStreamInt (bsp1, start1, stop1, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf1, NULL);
10601
SeqPortStreamInt (bsp2, start2, stop2, strand2, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
10603
matrix = (NWPtr) MemNew (sizeof (NWData) * (len1 + 1) * (len2 + 1));
10604
/* initalize matrix */
10605
MemSet (matrix, 0, sizeof (NWData) * (len1 + 1) * (len2 + 1));
10606
matrix[0].score = 0;
10607
matrix[0].traceback_pos = 0;
10609
for (col = 1; col <= len1; col++) {
10610
matrix[(row * (len1 + 1)) + col].score = matrix[(row * (len1 + 1)) + col - 1].score + gap_penalty;
10611
matrix[(row * (len1 + 1)) + col].traceback_pos = (row * (len1 + 1)) + col - 1;
10614
for (row = 1; row <= len2; row++) {
10615
matrix[(row * (len1 + 1)) + col].score = matrix[((row - 1) * (len1 + 1)) + col].score + gap_penalty;
10616
matrix[(row * (len1 + 1)) + col].traceback_pos = ((row - 1) * (len1 + 1)) + col;
10619
/* fill in scores */
10620
for (row = 1; row <= len2; row++) {
10621
for (col = 1; col <= len1; col++) {
10623
diag = matrix[((row - 1) * (len1 + 1)) + col - 1].score;
10624
if (buf1[col - 1] == buf2[row - 1]) {
10625
diag += match_score;
10627
diag += mismatch_penalty;
10629
left = matrix[((row) * (len1 + 1)) + col - 1].score + gap_penalty;
10630
up = matrix[((row - 1) * (len1 + 1)) + col].score + gap_penalty;
10633
if (left > diag && left > up) {
10634
matrix[((row) * (len1 + 1)) + col].score = left + gap_penalty;
10635
matrix[((row) * (len1 + 1)) + col].traceback_pos = ((row) * (len1 + 1)) + col - 1;
10636
} else if (up > diag && up > left) {
10637
matrix[((row) * (len1 + 1)) + col].score = up + gap_penalty;
10638
matrix[((row) * (len1 + 1)) + col].traceback_pos = ((row - 1) * (len1 + 1)) + col;
10640
matrix[((row) * (len1 + 1)) + col].score = diag;
10641
matrix[((row) * (len1 + 1)) + col].traceback_pos = ((row - 1) * (len1 + 1)) + col - 1;
10647
/* trace back, create alignment strings */
10648
alnbuf1 = (CharPtr) MemNew (sizeof (Char) * (len1 + len2 + 1));
10649
alnbuf2 = (CharPtr) MemNew (sizeof (Char) * (len1 + len2 + 1));
10650
cp1 = alnbuf1 + len1 + len2;
10651
cp2 = alnbuf2 + len1 + len2;
10658
while (row > 0 || col > 0) {
10659
back_row = matrix[(row * (len1 + 1)) + col].traceback_pos / (len1 + 1);
10660
back_col = matrix[(row * (len1 + 1)) + col].traceback_pos % (len1 + 1);
10661
if (row == back_row) {
10662
*cp1 = buf1[col - 1];
10664
} else if (col == back_col) {
10666
*cp2 = buf2[row - 1];
10668
*cp1 = buf1[col - 1];
10669
*cp2 = buf2[row - 1];
10677
/* no longer need matrix or original sequence buffers */
10678
matrix = MemFree (matrix);
10679
buf1 = MemFree (buf1);
10680
buf2 = MemFree (buf2);
10682
/* count number of segments needed */
10683
num_segs = SegmentsNeededForAlignment (cp1 + 1, cp2 + 1);
10685
/* create DenseSeg */
10686
dsp = DenseSegNew ();
10688
dsp->ids = SeqIdDup (sip1);
10689
dsp->ids->next = SeqIdDup (sip2);
10690
dsp->numseg = num_segs;
10691
dsp->lens = (Int4Ptr)MemNew (sizeof (Int4) * dsp->numseg);
10692
dsp->starts = (Int4Ptr)MemNew (sizeof (Int4) * dsp->dim * dsp->numseg);
10693
dsp->strands = (Uint1Ptr)MemNew (sizeof (Uint1) * dsp->dim * dsp->numseg);
10695
/* fill in strands */
10696
for (i = 0; i < dsp->numseg; i++) {
10697
dsp->strands[2 * i] = Seq_strand_plus;
10698
dsp->strands[2 * i + 1] = strand2;
10700
/* fill in starts and lens */
10701
FillInStartsAndLensForAlignment (dsp, cp1 + 1, cp2 + 1);
10703
/* no longer need FASTA+GAP alignment strings */
10704
alnbuf1 = MemFree (alnbuf1);
10705
alnbuf2 = MemFree (alnbuf2);
10707
/* adjust for real sequence position and strand */
10708
AdjustStringAlingmentForOffsetAndStrand (dsp, start1, start2, stop2, strand2);
10710
salp = SeqAlignNew ();
10712
salp->segtype = SAS_DENSEG;
10715
BioseqUnlock (bsp1);
10716
BioseqUnlock (bsp2);
10717
AlnMgr2IndexSingleChildSeqAlign (salp);
10723
* first interval always on plus strand
10725
static SeqAlignPtr AlignmentForInterval (SeqIdPtr sip1, SeqIdPtr sip2, Int4 start1, Int4 stop1, Int4 start2, Int4 stop2)
10728
SeqAlignPtr salp = NULL;
10729
Int4 len1, len2, i;
10730
Uint1 strand = Seq_strand_plus;
10732
if (sip1 == NULL || sip2 == NULL) {
10736
salp = NWAlignmentForInterval(sip1, sip2, start1, stop1, start2, stop2);
10737
if (salp != NULL) {
10741
dsp = DenseSegNew ();
10743
dsp->ids = SeqIdDup (sip1);
10744
dsp->ids->next = SeqIdDup (sip2);
10745
len1 = stop1 - start1 + 1;
10747
if (stop2 > start2) {
10748
len2 = stop2 - start2 + 1;
10750
len2 = start2 - stop2 + 1;
10751
strand = Seq_strand_minus;
10754
if (len1 == len2) {
10760
dsp->starts = (Int4Ptr) MemNew (sizeof (Int4) * dsp->dim * dsp->numseg);
10761
dsp->strands = (Uint1Ptr) MemNew (sizeof (Uint1) * dsp->dim * dsp->numseg);
10762
dsp->lens = (Int4Ptr) MemNew (sizeof (Int4) * dsp->dim);
10764
if (len1 == len2) {
10765
dsp->lens[0] = len1;
10766
} else if (len1 > len2) {
10767
dsp->lens[0] = len2;
10768
dsp->lens[1] = len1 - len2;
10770
dsp->lens[0] = len1;
10771
dsp->lens[1] = len2 - len1;
10774
dsp->starts[0] = start1;
10775
if (strand == Seq_strand_minus) {
10776
dsp->starts[1] = stop2;
10778
dsp->starts[1] = start2;
10781
for (i = 0; i < dsp->numseg; i++) {
10782
dsp->strands[2 * i] = Seq_strand_plus;
10783
dsp->strands[2 * i + 1] = strand;
10786
if (dsp->numseg > 1) {
10788
dsp->starts[2] = dsp->starts[0] + dsp->lens[0];
10789
dsp->starts[3] = -1;
10791
dsp->starts[2] = -1;
10792
if (strand == Seq_strand_minus) {
10793
dsp->starts[3] = dsp->starts[1] + dsp->lens[0] + dsp->lens[1] - 1;
10795
dsp->starts[3] = dsp->starts[1] + dsp->lens[0];
10800
salp = SeqAlignNew ();
10801
salp->segtype = SAS_DENSEG;
10804
AlnMgr2IndexSingleChildSeqAlign (salp);
10809
static void ReportCreatedAlignment (LogInfoPtr lip, SeqAlignPtr salp)
10811
Int4 from_1, to_1, from_2, to_2;
10813
Char id1[255], id2[255];
10817
if (lip == NULL || lip->fp == NULL || salp == NULL) {
10821
sip = AlnMgr2GetNthSeqIdPtr (salp, 1);
10822
bsp = BioseqLockById (sip);
10824
sip = SeqIdFree (sip);
10825
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
10826
BioseqUnlock (bsp);
10829
SeqIdWrite (sip, id1, PRINTID_REPORT, sizeof (id1) - 1);
10830
sip = SeqIdFree (sip);
10831
sip = AlnMgr2GetNthSeqIdPtr (salp, 2);
10832
bsp = BioseqLockById (sip);
10834
sip = SeqIdFree (sip);
10835
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
10836
BioseqUnlock (bsp);
10838
SeqIdWrite (sip, id2, PRINTID_REPORT, sizeof (id2) - 1);
10839
sip = SeqIdFree (sip);
10841
strand = SeqAlignStrand (salp, 2);
10842
AlnMgr2GetNthSeqRangeInSA (salp, 1, &from_1, &to_1);
10843
AlnMgr2GetNthSeqRangeInSA (salp, 2, &from_2, &to_2);
10844
fprintf (lip->fp, "Created alignment to cover space between local alignments: %s:%d-%d, %s:%d-%d%s\n",
10845
id1, from_1, to_1, id2, from_2, to_2,
10846
strand == Seq_strand_minus ? "(c)" : "");
10848
WriteAlignmentInterleaveToFileEx (salp, lip->fp, 40, FALSE, TRUE);
10852
static void FillInAlignmentHoles (SeqAlignPtr salp_list, LogInfoPtr lip)
10854
SeqAlignPtr salp, salp_new;
10855
Int4 start1_this, start1_next;
10856
Int4 stop1_this, stop1_next;
10857
Int4 start2_this, start2_next;
10858
Int4 stop2_this, stop2_next;
10860
SeqIdPtr sip1, sip2;
10862
if (salp_list == NULL || salp_list->next == NULL) {
10866
sip1 = AlnMgr2GetNthSeqIdPtr (salp_list, 1);
10867
sip2 = AlnMgr2GetNthSeqIdPtr (salp_list, 2);
10869
/* note - unlike the other functions, SeqAlignStrand uses 0-based index */
10870
strand = SeqAlignStrand (salp_list, 1);
10873
AlnMgr2GetNthSeqRangeInSA (salp, 1, &start1_this, &stop1_this);
10874
AlnMgr2GetNthSeqRangeInSA (salp, 2, &start2_this, &stop2_this);
10876
while (salp->next != NULL) {
10877
AlnMgr2GetNthSeqRangeInSA (salp->next, 1, &start1_next, &stop1_next);
10878
AlnMgr2GetNthSeqRangeInSA (salp->next, 2, &start2_next, &stop2_next);
10879
if (start1_next > stop1_this + 1
10880
|| (strand == Seq_strand_minus && start2_next < stop2_this - 1)
10881
|| (strand != Seq_strand_minus && start2_next > stop2_this + 1)) {
10882
if (strand == Seq_strand_minus) {
10883
salp_new = AlignmentForInterval (sip1, sip2, stop1_this + 1, start1_next - 1, stop2_this - 1, start2_next + 1);
10885
salp_new = AlignmentForInterval (sip1, sip2, stop1_this + 1, start1_next - 1, stop2_this + 1, start2_next - 1);
10887
ReportCreatedAlignment (lip, salp_new);
10888
salp_new->next = salp->next;
10889
salp->next = salp_new;
10892
start1_this = start1_next;
10893
stop1_this = stop1_next;
10894
start2_this = start2_next;
10895
stop2_this = stop2_next;
10902
static SeqAlignPtr MergeAlignments (SeqAlignPtr salp_list)
10904
SeqAlignPtr salp_new = NULL, salp, salp_next;
10905
DenseSegPtr dsp, dsp_new;
10908
if (salp_list == NULL || salp_list->next == NULL) {
10912
dsp_new = DenseSegNew ();
10914
dsp_new->ids = AlnMgr2GetNthSeqIdPtr (salp_list, 1);
10915
dsp_new->ids->next = AlnMgr2GetNthSeqIdPtr (salp_list, 2);
10917
/* get total number of segments */
10918
for (salp = salp_list; salp != NULL; salp = salp->next) {
10919
dsp = (DenseSegPtr) salp->segs;
10920
dsp_new->numseg += dsp->numseg;
10923
dsp_new->starts = (Int4Ptr) MemNew (sizeof (Int4) * dsp_new->dim * dsp_new->numseg);
10924
dsp_new->strands = (Uint1Ptr) MemNew (sizeof (Uint1) * dsp_new->dim * dsp_new->numseg);
10925
dsp_new->lens = (Int4Ptr) MemNew (sizeof (Int4) * dsp_new->numseg);
10928
for (salp = salp_list; salp != NULL; salp = salp_next) {
10929
salp_next = salp->next;
10930
dsp = (DenseSegPtr) salp->segs;
10931
for (k = 0; k < dsp->numseg; k++) {
10932
dsp_new->lens[seg_num] = dsp->lens[k];
10933
dsp_new->starts[2 * seg_num] = dsp->starts[2 * k];
10934
dsp_new->starts[2 * seg_num + 1] = dsp->starts[2 * k + 1];
10935
dsp_new->strands[2 * seg_num] = dsp->strands[2 * k];
10936
dsp_new->strands[2 * seg_num + 1] = dsp->strands[2 * k + 1];
10940
salp = SeqAlignFree (salp);
10943
salp_new = SeqAlignNew ();
10944
salp_new->segtype = SAS_DENSEG;
10945
salp_new->segs = dsp_new;
10952
static Boolean MatchWithAmbiguity (Char ch1, Char ch2)
10954
Boolean rval = FALSE;
10956
ch1 = toupper (ch1);
10957
ch2 = toupper (ch2);
10962
if (ch1 == 'X' || ch2 == 'X') {
10967
if (ch2 == 'M' || ch2 == 'R' || ch2 == 'W' || ch2 == 'V' || ch2 == 'H' || ch2 == 'D') {
10972
if (ch2 == 'K' || ch2 == 'Y' || ch2 == 'W' || ch2 == 'B' || ch2 == 'H' || ch2 == 'D') {
10977
if (ch2 == 'K' || ch2 == 'R' || ch2 == 'S' || ch2 == 'B' || ch2 == 'V' || ch2 == 'D') {
10982
if (ch2 == 'M' || ch2 == 'Y' || ch2 == 'S' || ch2 == 'B' || ch2 == 'V' || ch2 == 'H') {
10987
if (ch2 == 'G' || ch2 == 'T' || ch2 == 'B' || ch2 == 'D') {
10992
if (ch2 == 'A' || ch2 == 'C' || ch2 == 'V' || ch2 == 'H') {
10997
if (ch2 == 'A' || ch2 == 'G' || ch2 == 'V' || ch2 == 'D') {
11002
if (ch2 == 'C' || ch2 == 'T' || ch2 == 'B' || ch2 == 'H') {
11007
if (ch2 == 'C' || ch2 == 'G' || ch2 == 'B' || ch2 == 'V') {
11012
if (ch2 == 'A' || ch2 == 'T' || ch2 == 'H' || ch2 == 'D') {
11017
if (ch2 == 'C' || ch2 == 'G' || ch2 == 'T' || ch2 == 'K' || ch2 == 'Y' || ch2 == 'S') {
11022
if (ch2 == 'A' || ch2 == 'C' || ch2 == 'G' || ch2 == 'M' || ch2 == 'R' || ch2 == 'S') {
11027
if (ch2 == 'A' || ch2 == 'C' || ch2 == 'T' || ch2 == 'M' || ch2 == 'Y' || ch2 == 'W') {
11032
if (ch2 == 'A' || ch2 == 'G' || ch2 == 'T' || ch2 == 'K' || ch2 == 'R' || ch2 == 'W') {
11041
/* expand for ambiguity characters and poly-A tail */
11042
static SeqAlignPtr ExtendAlignmentList (SeqAlignPtr salp_list)
11044
Int4 from_1, to_1, from_2, to_2, len1, len2, len_check, len_extend;
11045
SeqAlignPtr salp_tmp;
11046
BioseqPtr bsp1 = NULL;
11047
BioseqPtr bsp2 = NULL;
11049
CharPtr buf1, buf2;
11051
SeqIdPtr sip1, sip2;
11054
if (salp_list == NULL) {
11058
strand = SeqAlignStrand (salp_list, 1);
11059
AlnMgr2IndexSingleChildSeqAlign (salp_list);
11061
sip1 = AlnMgr2GetNthSeqIdPtr (salp_list, 1);
11062
bsp1 = BioseqLockById (sip1);
11063
sip2 = AlnMgr2GetNthSeqIdPtr (salp_list, 2);
11064
bsp2 = BioseqLockById (sip2);
11066
AlnMgr2GetNthSeqRangeInSA (salp_list, 1, &from_1, &to_1);
11067
AlnMgr2GetNthSeqRangeInSA (salp_list, 2, &from_2, &to_2);
11070
&& ((strand == Seq_strand_plus && from_2 > 0)
11071
|| (strand == Seq_strand_minus && to_2 < bsp2->length - 1))) {
11073
if (strand == Seq_strand_plus) {
11076
len2 = bsp2->length - to_2 - 1;
11083
buf1 = (CharPtr) MemNew (sizeof (Char) * (len_check + 1));
11084
buf2 = (CharPtr) MemNew (sizeof (Char) * (len_check + 1));
11085
ctr = SeqPortStreamInt (bsp1, from_1 - len_check, from_1 - 1, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf1, NULL);
11087
if (strand == Seq_strand_plus) {
11088
ctr = SeqPortStreamInt (bsp2, from_2 - len_check, from_2 - 1, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
11090
ctr = SeqPortStreamInt (bsp2, to_2 + 1, to_2 + len_check, Seq_strand_minus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
11095
while (len_extend < len_check
11096
&& MatchWithAmbiguity (buf1[len_check - len_extend - 1], buf2[len_check - len_extend - 1])) {
11099
buf1 = MemFree (buf1);
11100
buf2 = MemFree (buf2);
11101
if (len_extend > 0) {
11102
dsp = (DenseSegPtr) salp_list->segs;
11103
dsp->lens[0] += len_extend;
11104
dsp->starts[0] -= len_extend;
11105
if (strand == Seq_strand_plus) {
11106
dsp->starts[1] -= len_extend;
11108
SeqAlignIndexFree(salp_list->saip);
11109
salp_list->saip = NULL;
11110
AlnMgr2IndexSingleChildSeqAlign (salp_list);
11114
/* extend at other end */
11115
salp_tmp = salp_list;
11116
while (salp_tmp->next != NULL) {
11117
salp_tmp = salp_tmp->next;
11120
AlnMgr2IndexSingleChildSeqAlign (salp_tmp);
11122
AlnMgr2GetNthSeqRangeInSA (salp_tmp, 1, &from_1, &to_1);
11123
AlnMgr2GetNthSeqRangeInSA (salp_tmp, 2, &from_2, &to_2);
11124
if (to_1 < bsp1->length - 1
11125
&& ((strand == Seq_strand_plus && to_2 < bsp2->length - 1)
11126
|| (strand == Seq_strand_minus && from_2 > 0))) {
11127
len1 = bsp1->length - to_1 - 1;
11128
if (strand == Seq_strand_plus) {
11129
len2 = bsp2->length - to_2 - 1;
11138
if (len_check > 0) {
11139
buf1 = (CharPtr) MemNew (sizeof (Char) * (len_check + 1));
11140
buf2 = (CharPtr) MemNew (sizeof (Char) * (len_check + 1));
11141
ctr = SeqPortStreamInt (bsp1, to_1 + 1, to_1 + len_check, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf1, NULL);
11143
if (strand == Seq_strand_plus) {
11144
ctr = SeqPortStreamInt (bsp2, to_2 + 1, to_2 + len_check, Seq_strand_plus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
11146
ctr = SeqPortStreamInt (bsp2, from_2 - len_check, from_2 - 1, Seq_strand_minus, STREAM_EXPAND_GAPS | STREAM_CORRECT_INVAL, (Pointer) buf2, NULL);
11151
while (len_extend < len_check
11152
&& MatchWithAmbiguity (buf1[len_extend], buf2[len_extend])) {
11155
buf1 = MemFree (buf1);
11156
buf2 = MemFree (buf2);
11157
if (len_extend > 0) {
11158
dsp = (DenseSegPtr) salp_tmp->segs;
11159
dsp->lens[dsp->numseg - 1] += len_extend;
11160
if (strand == Seq_strand_minus) {
11161
dsp->starts[(dsp->numseg - 1) * dsp->dim + 1] -= len_extend;
11163
SeqAlignIndexFree(salp_tmp->saip);
11164
salp_tmp->saip = NULL;
11165
AlnMgr2IndexSingleChildSeqAlign (salp_tmp);
11170
BioseqUnlock (bsp1);
11171
BioseqUnlock (bsp2);
11172
sip1 = SeqIdFree (sip1);
11173
sip2 = SeqIdFree (sip2);
11179
static void ReportInitialBlastResults (LogInfoPtr lip, SeqAlignPtr salp_list)
11181
Int4 from_1, to_1, from_2, to_2;
11183
Char id1[255], id2[255];
11187
if (lip == NULL || lip->fp == NULL || salp_list == NULL) {
11191
AlnMgr2IndexSingleChildSeqAlign (salp_list);
11192
sip = AlnMgr2GetNthSeqIdPtr (salp_list, 1);
11193
bsp = BioseqLockById (sip);
11195
sip = SeqIdFree (sip);
11196
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
11197
BioseqUnlock (bsp);
11199
SeqIdWrite (sip, id1, PRINTID_REPORT, sizeof (id1) - 1);
11200
sip = SeqIdFree (sip);
11201
sip = AlnMgr2GetNthSeqIdPtr (salp_list, 2);
11202
bsp = BioseqLockById (sip);
11204
sip = SeqIdFree (sip);
11205
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
11206
BioseqUnlock (bsp);
11208
SeqIdWrite (sip, id2, PRINTID_REPORT, sizeof (id2) - 1);
11209
sip = SeqIdFree (sip);
11210
fprintf (lip->fp, "Initial BLAST results\n");
11211
while (salp_list != NULL) {
11212
AlnMgr2IndexSingleChildSeqAlign (salp_list);
11213
strand = SeqAlignStrand (salp_list, 2);
11214
AlnMgr2GetNthSeqRangeInSA (salp_list, 1, &from_1, &to_1);
11215
AlnMgr2GetNthSeqRangeInSA (salp_list, 2, &from_2, &to_2);
11216
fprintf (lip->fp, "%s:%d-%d, %s:%d-%d%s\n", id1, from_1, to_1, id2, from_2, to_2,
11217
strand == Seq_strand_minus ? "(c)" : "");
11218
salp_list = salp_list->next;
11220
lip->data_in_log = TRUE;
11224
static void ReportForRemoval (LogInfoPtr lip, SeqAlignPtr salp, CharPtr reason)
11226
Int4 from_1, to_1, from_2, to_2;
11228
Char id1[255], id2[255];
11231
if (lip == NULL || lip->fp == NULL || salp == NULL) {
11235
sip = AlnMgr2GetNthSeqIdPtr (salp, 1);
11236
SeqIdWrite (sip, id1, PRINTID_REPORT, sizeof (id1) - 1);
11237
sip = SeqIdFree (sip);
11238
sip = AlnMgr2GetNthSeqIdPtr (salp, 2);
11239
SeqIdWrite (sip, id2, PRINTID_REPORT, sizeof (id2) - 1);
11240
sip = SeqIdFree (sip);
11241
AlnMgr2IndexSingleChildSeqAlign (salp);
11242
strand = SeqAlignStrand (salp, 2);
11243
AlnMgr2GetNthSeqRangeInSA (salp, 1, &from_1, &to_1);
11244
AlnMgr2GetNthSeqRangeInSA (salp, 2, &from_2, &to_2);
11245
fprintf (lip->fp, "Removed alignment %s:%d-%d, %s:%d-%d%s %s\n",
11246
id1, from_1, to_1, id2, from_2, to_2,
11247
strand == Seq_strand_minus ? "(c)" : "", reason);
11251
/* Assume earlier alignments are better.
11252
* Remove all alignments that are not the same strand as the first.
11253
* Remove all alignments that overlap on the first sequence.
11254
* Remove all alignments that overlap on the second sequence.
11255
* use Needleman-Wunsch to fill in holes between alignments.
11257
static SeqAlignPtr CombineTSAAlignments (SeqAlignPtr salp)
11259
SeqAlignPtr salp_tmp, salp_match, salp_prev, salp_next;
11261
Int4 from_1, to_1, from_2, to_2;
11262
LogInfoPtr lip = NULL;
11264
if (salp == NULL) {
11268
for (salp_tmp = salp; salp_tmp != NULL; salp_tmp = salp_tmp->next) {
11269
/* if alignment is to minus strand for first sequence, flip alignment */
11270
if (SeqAlignStrand (salp_tmp, 0) != Seq_strand_plus) {
11271
FlipAlignment (salp_tmp);
11275
if (salp->next != NULL) {
11276
lip = OpenLog ("TSA Alignment Adjustments");
11279
ReportInitialBlastResults (lip, salp);
11281
if (salp->next != NULL) {
11282
/* remove alignments that are not the same strand as the initial alignment */
11283
strand = SeqAlignStrand (salp, 1);
11285
AlnMgr2IndexSingleChildSeqAlign (salp);
11286
salp_tmp = salp->next;
11287
while (salp_tmp != NULL) {
11288
AlnMgr2IndexSingleChildSeqAlign (salp_tmp);
11289
salp_next = salp_tmp->next;
11290
if (SeqAlignStrand (salp_tmp, 1) != strand) {
11291
ReportForRemoval (lip, salp_tmp, "because strands do not match.");
11292
salp_prev->next = salp_tmp->next;
11293
salp_tmp->next = NULL;
11294
salp_tmp = SeqAlignFree (salp_tmp);
11296
salp_tmp = salp_next;
11300
/* remove alignments that overlap on the first sequence */
11302
while (salp_match != NULL) {
11303
salp_prev = salp_match;
11304
salp_tmp = salp_match->next;
11305
AlnMgr2GetNthSeqRangeInSA (salp_match, 1, &from_1, &to_1);
11306
while (salp_tmp != NULL) {
11307
salp_next = salp_tmp->next;
11308
AlnMgr2GetNthSeqRangeInSA (salp_tmp, 1, &from_2, &to_2);
11309
if ((from_2 >= from_1 && from_2 <= to_1) || (to_2 >= from_1 && to_2 <= to_1)) {
11310
ReportForRemoval (lip, salp_tmp, "because alignments overlap for first sequence.");
11311
salp_prev->next = salp_tmp->next;
11312
salp_tmp->next = NULL;
11313
salp_tmp = SeqAlignFree (salp_tmp);
11315
salp_tmp = salp_next;
11317
salp_match = salp_match->next;
11320
if (salp->next != NULL) {
11321
/* remove alignments that overlap on the second sequence */
11323
while (salp_match != NULL) {
11324
salp_prev = salp_match;
11325
salp_tmp = salp_match->next;
11326
AlnMgr2GetNthSeqRangeInSA (salp_match, 2, &from_1, &to_1);
11327
while (salp_tmp != NULL) {
11328
salp_next = salp_tmp->next;
11329
AlnMgr2GetNthSeqRangeInSA (salp_tmp, 2, &from_2, &to_2);
11330
if ((from_2 >= from_1 && from_2 <= to_1) || (to_2 >= from_1 && to_2 <= to_1)) {
11331
ReportForRemoval (lip, salp_tmp, "because alignments overlap for second sequence.");
11332
salp_prev->next = salp_tmp->next;
11333
salp_tmp->next = NULL;
11334
salp_tmp = SeqAlignFree (salp_tmp);
11336
salp_tmp = salp_next;
11338
salp_match = salp_match->next;
11342
/* sort remaining alignments by start position on the first sequence */
11343
salp = SortPairwiseAlignmentsByFirstSeqRange (salp);
11345
/* temporary hack. only interested in "unusual" errors. */
11347
lip->data_in_log = FALSE;
11350
if (salp->next != NULL) {
11351
/* remove alignments that are out of order on the second sequence */
11353
AlnMgr2GetNthSeqRangeInSA (salp_prev, 2, &from_1, &to_1);
11354
salp_tmp = salp->next;
11355
while (salp_tmp != NULL) {
11356
AlnMgr2GetNthSeqRangeInSA (salp_tmp, 2, &from_2, &to_2);
11357
if (from_2 < from_1) {
11358
ReportForRemoval (lip, salp_tmp, "because alignments are out of order for second sequence.");
11359
salp_prev->next = salp_tmp->next;
11360
salp_tmp->next = NULL;
11361
salp_tmp = SeqAlignFree (salp_tmp);
11363
salp_prev = salp_tmp;
11367
salp_tmp = salp_prev->next;
11371
/* fill in holes */
11372
FillInAlignmentHoles (salp, lip);
11374
/* extend for good matches */
11375
salp = ExtendAlignmentList (salp);
11377
/* make new alignment by stringing together local alignments */
11378
salp = MergeAlignments (salp);
11380
lip = FreeLog (lip);
11385
NLM_EXTERN SeqAlignPtr GetSeqAlignTSA (BioseqPtr bsp1, BioseqPtr bsp2)
11387
BLAST_SummaryOptions *options = NULL;
11388
SeqAlignPtr salp = NULL;
11390
if (bsp1 == NULL || bsp2 == NULL) return NULL;
11392
BLAST_SummaryOptionsInit(&options);
11393
options->filter_string = StringSave ("m L");
11394
options->word_size = 20;
11395
options->cutoff_evalue = act_get_eval (1);
11396
options->hint = eNone;
11397
if (ISA_na (bsp1->mol))
11399
options->program = eBlastn;
11403
options->program = eBlastp;
11406
BLAST_TwoSequencesSearch(options, bsp1, bsp2, &salp);
11408
/* if there were no alignments from the first search, try again
11409
* with low-complexity masking turned off.
11411
if (salp == NULL) {
11412
options->filter_string = MemFree (options->filter_string);
11413
options->filter_string = StringSave ("m F");
11414
BLAST_TwoSequencesSearch(options, bsp1, bsp2, &salp);
11417
BLAST_SummaryOptionsFree(options);
11419
if (salp != NULL) {
11420
salp = CombineTSAAlignments (salp);
11421
AlnMgr2IndexSeqAlign(salp);
11427
/* code for editing TSA assembly */
11428
typedef struct tsaassemblydialog {
11429
DIALOG_MESSAGE_BLOCK
11431
BioseqPtr consensus_bsp;
11432
} TSAAssemblyDialog, PNTR TSAAssemblyDialogPtr;
11435
Uint2 tsa_assembly_types [] = {
11436
TAGLIST_TEXT, TAGLIST_PROMPT, TAGLIST_PROMPT
11440
Uint2 tsa_assembly_widths [] = {
11445
static void SetTSAAssemblyDialogConsensusBioseq (DialoG d, BioseqPtr bsp)
11447
TSAAssemblyDialogPtr dlg;
11449
dlg = (TSAAssemblyDialogPtr) GetObjectExtra (d);
11451
dlg->consensus_bsp = bsp;
11456
static void SeqAlignsToTSAAssemblyDialog (DialoG d, Pointer data)
11458
TSAAssemblyDialogPtr dlg;
11460
SeqAlignPtr salp_list, salp, salp_tmp;
11461
ValNodePtr data_list = NULL;
11462
Int4 tsa_from, tsa_to, primary_from, primary_to;
11466
CharPtr fmt = "%s\t%d-%d%s\t%d-%d\n";
11470
dlg = (TSAAssemblyDialogPtr) GetObjectExtra (d);
11475
tlp = (TagListPtr) GetObjectExtra (dlg->intervals);
11480
salp_list = (SeqAlignPtr) data;
11483
while (salp != NULL) {
11484
salp_tmp = salp->next;
11486
AlnMgr2IndexSeqAlign (salp);
11487
AlnMgr2GetNthSeqRangeInSA(salp, 1, &tsa_from, &tsa_to);
11488
AlnMgr2GetNthSeqRangeInSA(salp, 2, &primary_from, &primary_to);
11489
sip = AlnMgr2GetNthSeqIdPtr (salp, 2);
11490
bsp = BioseqLockById (sip);
11492
sip = SeqIdFree (sip);
11493
sip = SeqIdDup (SeqIdFindBest (bsp->id, SEQID_GENBANK));
11494
BioseqUnlock (bsp);
11496
SeqIdWrite (sip, id_buf, PRINTID_FASTA_SHORT, sizeof (id_buf) - 1);
11497
sip = SeqIdFree (sip);
11498
strand = AlnMgr2GetNthStrand (salp, 2);
11499
str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (id_buf) + 70));
11500
sprintf (str, fmt, id_buf, primary_from + 1, primary_to + 1, strand == Seq_strand_minus ? "(c)" : "", tsa_from + 1, tsa_to + 1);
11501
ValNodeAddPointer (&data_list, 0, str);
11502
salp->next = salp_tmp;
11506
SendMessageToDialog (tlp->dialog, VIB_MSG_RESET);
11507
tlp->vnp = data_list;
11508
SendMessageToDialog (tlp->dialog, VIB_MSG_REDRAW);
11509
tlp->max = MAX ((Int2) 0, (Int2) (ValNodeLen (data_list) - tlp->rows));
11510
CorrectBarMax (tlp->bar, tlp->max);
11511
CorrectBarPage (tlp->bar, tlp->rows - 1, tlp->rows - 1);
11512
if (tlp->max > 0) {
11513
SafeShow (tlp->bar);
11515
SafeHide (tlp->bar);
11517
SendMessageToDialog (tlp->dialog, VIB_MSG_ENTER);
11521
static void TSATableCallback (Pointer data)
11523
ProcessExternalEvent ();
11527
static Pointer TSAAssemblyDialogToTranscriptomeIdsList (DialoG d)
11529
TSAAssemblyDialogPtr dlg;
11534
TranscriptomeIdsPtr t;
11535
ValNodePtr token_list = NULL;
11537
dlg = (TSAAssemblyDialogPtr) GetObjectExtra (d);
11542
tlp = (TagListPtr) GetObjectExtra (dlg->intervals);
11547
if (dlg->consensus_bsp == NULL) {
11551
for (vnp = tlp->vnp; vnp != NULL; vnp = vnp->next) {
11552
txt = ExtractTagListColumn (vnp->data.ptrvalue, 0);
11553
if (StringHasNoText (txt)) {
11554
txt = MemFree (txt);
11556
ValNodeAddPointer (&token_list, 0, txt);
11559
t = TranscriptomeIdsNew (dlg->consensus_bsp, token_list);
11560
vnp = ValNodeNew (NULL);
11562
vnp->data.ptrvalue = t;
11567
static DialoG CreateTSAAssemblyDialog (GrouP g)
11570
TSAAssemblyDialogPtr dlg;
11575
p = HiddenGroup (g, -1, 0, NULL);
11576
SetGroupSpacing (p, 10, 10);
11578
dlg = (TSAAssemblyDialogPtr) MemNew (sizeof (TSAAssemblyDialog));
11579
if (dlg == NULL) return NULL;
11581
SetObjectExtra (p, dlg, NULL);
11582
dlg->dialog = (DialoG) p;
11583
dlg->todialog = SeqAlignsToTSAAssemblyDialog;
11584
dlg->fromdialog = TSAAssemblyDialogToTranscriptomeIdsList;
11586
x = HiddenGroup (p, 0, 2, NULL);
11587
y = HiddenGroup (x, 3, 0, NULL);
11589
StaticPrompt (y, "PrimaryID", 16 * stdCharWidth, 0, programFont, 'c');
11590
StaticPrompt (y, "Primary Interval", 16 * stdCharWidth, 0, programFont, 'c');
11591
StaticPrompt (y, "TSA Interval", 16 * stdCharWidth, 0, programFont, 'c');
11592
dlg->intervals = CreateTagListDialogEx3 (x, 10, 3, 1, tsa_assembly_types, tsa_assembly_widths, NULL, TRUE, FALSE, NULL, NULL,
11593
NULL, NULL, FALSE, TRUE);
11599
static void ShowAssemblyAlignment (SeqAlignPtr salp)
11603
if (salp == NULL) {
11607
lip = OpenLog ("Assembly Alignments");
11608
while (salp != NULL) {
11609
WriteAlignmentInterleaveToFileEx (salp, lip->fp, 40, FALSE, TRUE);
11610
lip->data_in_log = TRUE;
11614
lip = FreeLog (lip);
11618
typedef struct tsaassemblyform {
11622
} TSAAssemblyFormData, PNTR TSAAssemblyFormPtr;
11625
static void AcceptTSAAssembly (ButtoN b)
11627
TSAAssemblyFormPtr frm;
11629
ValNodePtr err_list, coverage_report, vnp, ids_list, match_errs;
11630
SeqAlignPtr salp, salp_next;
11633
frm = (TSAAssemblyFormPtr) GetObjectExtra (b);
11637
bsp = GetBioseqGivenIDs (frm->input_entityID, frm->input_itemID, frm->input_itemtype);
11644
SetTSAAssemblyDialogConsensusBioseq (frm->intervals, bsp);
11646
ids_list = DialogToPointer (frm->intervals);
11648
/* remove existing assembly */
11649
if (bsp->hist != NULL) {
11650
salp = bsp->hist->assembly;
11651
bsp->hist->assembly = NULL;
11652
while (salp != NULL) {
11653
salp_next = salp->next;
11655
salp = SeqAlignFree (salp);
11660
if (ids_list == NULL) {
11661
Message (MSG_ERROR, "TSA assembly removed");
11663
err_list = MakeTranscriptomeAssemblySeqHist (ids_list->data.ptrvalue, GetSeqAlignTSA, TSATableCallback, NULL);
11664
coverage_report = ReportCoverageForTranscriptomeIdsListSeqHist (ids_list);
11665
match_errs = ReportConsensusMatchForBioseqSeqHist (bsp);
11666
ValNodeLink (&coverage_report, match_errs);
11667
ids_list = TranscriptomeIdsListFree (ids_list);
11669
ValNodeLink (&coverage_report, err_list);
11670
err_list = coverage_report;
11672
if (err_list != NULL) {
11673
lip = OpenLog ("TSA Table Problems");
11674
for (vnp = err_list; vnp != NULL; vnp = vnp->next) {
11675
fprintf (lip->fp, "%s\n", vnp->data.ptrvalue);
11677
lip->data_in_log = TRUE;
11679
lip = FreeLog (lip);
11680
err_list = ValNodeFreeData (err_list);
11684
ObjMgrSetDirtyFlag (frm->input_entityID, TRUE);
11685
ObjMgrSendMsg (OM_MSG_UPDATE, frm->input_entityID, 0, 0);
11686
Remove (frm->form);
11692
NLM_EXTERN void EditTSAAssembly (IteM i)
11696
TSAAssemblyFormPtr frm;
11702
bfp = currentFormDataPtr;
11704
bfp = GetObjectExtra (i);
11706
if (bfp == NULL) return;
11708
bsp = GetBioseqGivenIDs (bfp->input_entityID, bfp->input_itemID, bfp->input_itemtype);
11710
Message (MSG_ERROR, "Must select sequence for editing TSA Assembly");
11713
frm = (TSAAssemblyFormPtr) MemNew (sizeof (TSAAssemblyFormData));
11714
if (frm == NULL) return;
11715
frm->input_entityID = bfp->input_entityID;
11716
frm->input_itemID = bfp->input_itemID;
11717
frm->input_itemtype = bfp->input_itemtype;
11719
w = FixedWindow (-50, -33, -10, -10, "TSA Assembly", StdCloseWindowProc);
11720
SetObjectExtra (w, frm, StdCleanupFormProc);
11721
frm->form = (ForM) w;
11722
h = HiddenGroup (w, -1, 0, NULL);
11723
frm->intervals = CreateTSAAssemblyDialog (h);
11725
if (bsp->hist == NULL) {
11726
PointerToDialog (frm->intervals, NULL);
11728
PointerToDialog (frm->intervals, bsp->hist->assembly);
11732
c = HiddenGroup (h, 2, 0, NULL);
11733
b = PushButton (c, "Accept", AcceptTSAAssembly);
11734
SetObjectExtra (b, frm, NULL);
11735
PushButton (c, "Cancel", StdCancelButtonProc);
11737
AlignObjects (ALIGN_CENTER, (HANDLE) frm->intervals, (HANDLE) c, NULL);
8111
11745
/* automatic defline generator */
8113
11747
typedef struct deffeats {