5654
static SeqAlignPtr LIBCALL SeqAlignBestHit (SeqAlignPtr salp, BioseqPtr bsp1, BioseqPtr bsp2, Int4 threshold, CharPtr PNTR message, Int4Ptr nonly)
5658
AMAlignIndex2Ptr amaip;
5674
SeqAlignPtr sap_new;
5675
SeqAlignPtr sap_new2;
5676
SeqAlignPtr sap_orig;
5687
if (salp->next != NULL)
5689
sap_orig = SeqAlignDup(salp);
5690
AlnMgr2IndexLite(salp);
5691
AlnMgr2SortAlnSetByNthRowPos(salp, 1);
5692
SPI_RemoveInconsistentAlnsFromSet(salp, 10, 1, SPI_LEFT);
5693
amaip = (AMAlignIndex2Ptr)(salp->saip);
5696
for (i=0; i<amaip->numsaps-1; i++)
5698
amaip->saps[i]->next = NULL;
5699
AlnMgr2GetNthSeqRangeInSA(amaip->saps[i], 1, &start1, &stop1);
5700
AlnMgr2GetNthSeqRangeInSA(amaip->saps[i+1], 1, &start2, &stop2);
5701
strand = AlnMgr2GetNthStrand(amaip->saps[i], 1);
5702
if (strand == Seq_strand_minus)
5704
if (start1 <= stop2)
5705
AlnMgr2TruncateSeqAlign(amaip->saps[i], stop2+1, start1, 1);
5706
SeqAlignFree(amaip->saps[i]->next);
5707
amaip->saps[i]->next = NULL;
5710
if (stop1 >= start2)
5711
AlnMgr2TruncateSeqAlign(amaip->saps[i], start1, start2-1, 1);
5712
SeqAlignFree(amaip->saps[i]->next);
5713
amaip->saps[i]->next = NULL;
5716
amaip->saps[amaip->numsaps-1]->next = NULL;
5717
for (i=0; i<amaip->numsaps-1; i++)
5719
if (sap_new == NULL)
5720
sap_new = SeqAlignDup(amaip->saps[i]);
5721
sap_new2 = AlnMgr2MergeTwoAlignments(sap_new, amaip->saps[i+1]);
5722
if (sap_new2 == NULL)
5723
sap_new = amaip->saps[i];
5726
SeqAlignFree(sap_new);
5731
AlnMgr2IndexSingleChildSeqAlign(sap_new);
5732
if (AlnMgr2GetAlnLength(sap_orig, FALSE) >= AlnMgr2GetAlnLength(sap_new, FALSE))
5734
SeqAlignFree(sap_new);
5735
sap_new = SeqAlignDup(sap_orig);
5740
AlnMgr2IndexSingleChildSeqAlign(sap);
5741
AlnMgr2ExtendToCoords(sap, 0, -1, 1);
5742
AlnMgr2GetNthSeqRangeInSA(sap, 1, &start1, &stop1);
5743
len = stop1 - start1 + 1; /* the actual length of sequence1 covered by aln */
5744
afp = AlnMgr2ComputeFreqMatrix(sap, 0, -1, 0);
5748
for (i=0; i<afp->len; i++)
5750
if (afp->freq[5][i] > 0)
5753
for (j=0; !found && j<afp->size; j++)
5755
if (afp->freq[0][i] > 0)
5759
} else if (afp->freq[j][i] == 1 && afp->freq[5][i] == 0)
5763
} else if (afp->freq[j][i] == 1 && afp->freq[5][i] == 1)
5770
if (alln > 0 && (gaps>0 || mismatches>0 || n > 0))
5772
spp = SeqPortNew(bsp1, 0, bsp1->length-1, Seq_strand_plus, Seq_code_ncbi4na);
5776
while ((res = SeqPortGetResidue(spp)) != SEQPORT_EOF)
5792
if (beg > 0 || end > 0)
5794
sprintf(newstr, "The local sequence has %d terminal N%s. ", beg+end, beg+end!=1?"s":"");
5795
StringCat(*message, newstr);
5799
sprintf(newstr, "The local sequence has %d internal N%s. ", nctr, nctr!=1?"s":"");
5800
StringCat(*message, newstr);
5804
if ((*message[0] != '\0') && len == bsp1->length && gaps == 0 && mismatches == 0)
5807
*nonly = nctr+beg+end;
5809
*nonly = -(beg+end);
5810
sprintf(newstr, "\nThere are no other differences between the local and database sequences.");
5811
StringCat(*message, newstr);
5815
if (len < bsp1->length)
5817
sprintf(newstr, "%sThe alignment to the database sequence does not cover all of the local sequence. ", *message[0]=='\0'?"":"\n");
5818
StringCat(*message, newstr);
5819
spp = SeqPortNew(bsp1, len, bsp1->length-1, Seq_strand_plus, Seq_code_ncbi4na);
5821
while (alln && (res = SeqPortGetResidue(spp)) != SEQPORT_EOF)
5827
sprintf(newstr, "\nThe unaligned sequence consists only of Ns. ");
5829
sprintf(newstr, "\nThere are non-N residues in the unaligned sequence. ");
5830
StringCat(*message, newstr);
5833
if (gaps > 0 || mismatches > 0 || n > 0)
5835
sprintf(newstr, "%sThe alignment to the database sequence has %d gap%s, %d mismatch%s, and %d N-mismatch%s. ", afp->len<bsp1->length?"\n":"", gaps, (gaps!=1?"s":""), mismatches, (mismatches!=1?"es":""), n, (n!=1?"es":""));
5836
StringCat(*message, newstr);
5841
static void SeqAlignStartUpdate (SeqAlignPtr salp, SeqIdPtr target_sip, Int4 offset, Int4 len, Uint1 strand)
5843
SeqAlignPtr salptmp;
5845
SeqIdPtr pre, sip, next;
5853
if (salp==NULL || offset<=0)
5855
for (salptmp=salp; salptmp!=NULL; salptmp=salptmp->next)
5857
if (salptmp->segtype == 2)
5859
dsp = (DenseSegPtr) salptmp->segs;
5866
if (SeqIdForSameBioseq(target_sip, sip))
5868
if (strand == Seq_strand_minus)
5870
strandp=dsp->strands;
5871
if (strandp != NULL) {
5873
for (j=0; j < dsp->numseg; j++)
5875
if (*strandp == Seq_strand_minus)
5876
*strandp = Seq_strand_plus;
5877
else if (*strandp == Seq_strand_plus)
5878
*strandp = Seq_strand_minus;
5883
dsp->strands = (Uint1Ptr)MemNew(dsp->dim*dsp->numseg*sizeof(Uint1));
5884
for (j=0; j<dsp->dim*dsp->numseg; j++)
5885
dsp->strands[j] = Seq_strand_plus;
5886
for(j=0; j<dsp->numseg; j++)
5887
dsp->strands[j*dsp->dim + index] = Seq_strand_minus;
5891
start=startp[index];
5893
j=dsp->dim*dsp->numseg-dsp->dim;
5895
for (j=0; j<dsp->numseg; j++) {
5896
if (startp[j*dsp->dim + index]>=0) {
5897
startp[j*dsp->dim + index]=len-startp[j*dsp->dim + index]-dsp->lens[j]+1;
5902
for (j=0; j<dsp->numseg; j++) {
5903
if (dsp->starts[dsp->dim*j+index] != -1)
5904
dsp->starts[dsp->dim*j+index] += offset;
5917
static SeqIdPtr SWSeqIdReplaceID(SeqIdPtr sip_head, SeqIdPtr sip1, SeqIdPtr sip2)
5926
while (sip != NULL && !found)
5928
if (SeqIdComp(sip, sip1) == SIC_YES)
5938
if (sip_prev == NULL)
5940
sip2->next = sip_head->next;
5941
SeqIdFree(sip_head);
5945
sip_prev->next = sip2;
5946
sip2->next = sip->next;
5952
/**********************************************************************
5954
* nrSeqIdIsInValNodeList (vnp, sip)
5956
* This function checks to see if SeqIdPtr sip points to the same Bioseq
5957
* as the SeqIdPtr values in ValNode list vnp's data.ptrvalues.
5960
* TRUE if a match was found
5961
* FALSE if no match was found
5962
**********************************************************************/
5963
static Boolean nrSeqIdIsInValNodeList (ValNodePtr vnp, SeqIdPtr sip)
5965
ValNodePtr vnptmp=NULL;
5967
Boolean rval = FALSE;
5974
for (vnptmp=vnp; vnptmp!=NULL && !rval; vnptmp=vnptmp->next) {
5975
siptmp=(SeqIdPtr)vnptmp->data.ptrvalue;
5976
if (SeqIdForSameBioseq(sip, siptmp))
5986
/**********************************************************************
5988
* nrSeqIdAdd (vnp, sip)
5990
* This function checks to see if SeqIdPtr sip points to the same Bioseq
5991
* as the SeqIdPtr values in ValNode list vnp's data.ptrvalues.
5992
* If not, sip is added to the list.
5995
* A ValNodeList pointing to SeqIDPtr values
5996
**********************************************************************/
5997
static ValNodePtr nrSeqIdAdd (ValNodePtr vnp, SeqIdPtr sip)
6004
if (!nrSeqIdIsInValNodeList (vnp, sip))
6006
ValNodeAddPointer(&vnp, 0, sip);
6013
static Int4 CalculateAlignmentDisplayPosition (SeqAlignPtr sap, Int4 aln_pos, Int4 row)
6017
/* calculate alignment position */
6022
seq_pos = AlnMgr2MapSeqAlignToBioseq(sap, aln_pos, row);
6023
while ((seq_pos == ALNMGR_GAP || seq_pos == ALNMGR_ROW_UNDEFINED) && aln_pos > 1) { /* count back if we in the gap */
6025
seq_pos = AlnMgr2MapSeqAlignToBioseq(sap, aln_pos, 1);
6027
if (seq_pos == ALNMGR_GAP || seq_pos == ALNMGR_ROW_UNDEFINED)
6028
seq_pos = 1; /* Gap at the begining of the alignment */
6033
static void SWPrintFarpointerAln(SeqAlignPtr sap, FILE *fp)
6035
Uint1Ptr buf, seqbuf;
6044
Uint1 strand1, strand2;
6049
if (sap == NULL || fp == NULL)
6051
if (sap->saip == NULL)
6052
AlnMgr2IndexSingleChildSeqAlign(sap);
6053
buf = (Uint1Ptr)MemNew(linesize * sizeof(Uint1));
6054
seqbuf = (Uint1Ptr)MemNew(linesize * sizeof(Uint1));
6056
for (i=0; i<16; i++)
6058
textid1[i] = textid2[i] = ' ';
6060
sip = AlnMgr2GetNthSeqIdPtr(sap, 1);
6061
SeqIdWrite(sip, textid1, PRINTID_TEXTID_ACC_VER, 15);
6062
sip2 = AlnMgr2GetNthSeqIdPtr(sap, 2);
6063
SeqIdWrite(sip2, textid2, PRINTID_TEXTID_ACC_VER, 15);
6064
for (i=0; i<15; i++)
6066
if (textid1[i] == '\0')
6068
if (textid2[i] == '\0')
6074
len = AlnMgr2GetAlnLength(sap, FALSE);
6075
strand1 = AlnMgr2GetNthStrand(sap, 1);
6076
strand2 = AlnMgr2GetNthStrand(sap, 2);
6078
for (l = 0; l < len; l+= linesize)
6080
alnbuflen = linesize;
6081
AlignmentIntervalToString (sap, 1, l, l + linesize - 1, 1, FALSE, seqbuf, buf, &alnbuflen, TRUE);
6082
StringUpper ((char *)buf);
6083
seq_pos = CalculateAlignmentDisplayPosition (sap, l, 1);
6084
sprintf (dig, "%5d", seq_pos + 1);
6086
fprintf(fp, "%s%c%s %s\n", textid1, strand1 == Seq_strand_plus?'>':'<', dig, buf);
6087
alnbuflen = linesize;
6088
AlignmentIntervalToString (sap, 2, l, l + linesize - 1, 1, FALSE, seqbuf, buf, &alnbuflen, TRUE);
6089
StringUpper ((char *)buf);
6090
seq_pos = CalculateAlignmentDisplayPosition (sap, l, 2);
6091
sprintf (dig, "%5d", seq_pos + 1);
6092
fprintf(fp, "%s%c%s %s\n", textid2, strand2 == Seq_strand_plus?'>':'<', dig, buf);
6096
sip = SeqIdFree(sip);
6097
sip2 = SeqIdFree (sip2);
6099
seqbuf = MemFree (seqbuf);
6106
FARPOINTER_LOOKUP_NO_ERROR = 0,
6107
FARPOINTER_LOOKUP_NOT_FOUND,
6108
FARPOINTER_LOOKUP_NONLY,
6109
FARPOINTER_LOOKUP_BAD_ALN
6112
typedef struct farpointer {
6115
BioseqPtr bsp_local;
6121
EFarPointerError err_type;
6122
}FarPointerData, PNTR FarPointerPtr;
6124
typedef struct farpointerwin
6135
FarPointerPtr far_pointer_list;
6137
} FarPointerWinData, PNTR FarPointerWinPtr;
6140
static FarPointerPtr FreeFarPointerData (FarPointerPtr fpp, Int4 num)
6145
for (i = 0; i < num; i++) {
6146
fpp[i].sip_local = SeqIdFree (fpp[i].sip_local);
6147
fpp[i].sip_db = SeqIdFree (fpp[i].sip_db);
6148
BioseqUnlock (fpp[i].bsp_local);
6149
if (fpp[i].bsp_local != NULL) {
6150
fpp[i].bsp_local->idx.deleteme = TRUE;
6152
fpp[i].bsp_local = NULL;
6153
BioseqUnlock (fpp[i].bsp_db);
6154
fpp[i].bsp_db = NULL;
6155
fpp[i].salp = SeqAlignFree (fpp[i].salp);
6156
fpp[i].err_msg = MemFree (fpp[i].err_msg);
6158
fpp = MemFree (fpp);
6163
static void CleanupFarPointerWinProc (GraphiC g, Pointer data)
6165
FarPointerWinPtr fpwp;
6169
fpwp = (FarPointerWinPtr) data;
6170
fpwp->selected = MemFree (fpwp->selected);
6171
fpwp = MemFree (fpwp);
6175
static Int4 GetDisplayGroupNum (FarPointerPtr fpp)
6177
if (fpp == NULL) return -1;
6178
else if (fpp->sip_db == NULL) return 4;
6179
else if (fpp->bsp_db == NULL) return 3;
6180
else if (fpp->nonly < 0) return 1;
6181
else if (fpp->err_msg != NULL) return 0;
6185
static Int4 GetSeqNumFromListPos (Int4 list_pos, FarPointerPtr fpp, Int4 num)
6187
Int4 seq_num, list_offset = 0, group_num;
6189
if (fpp == NULL || num < list_pos)
6194
/* TO DO: sort items by how they are listed */
6196
for (group_num = 0; group_num < 5; group_num++) {
6197
for (seq_num = 0; seq_num < num; seq_num++) {
6198
if (GetDisplayGroupNum(fpp + seq_num) == group_num) {
6199
if (list_offset == list_pos) {
6209
static void ReleaseFarPointer (DoC d, PoinT pt)
6213
FarPointerWinPtr fpwp;
6217
Int4 seq_num, group_num;
6219
fpwp = (FarPointerWinPtr) GetObjectExtra (d);
6220
if (fpwp != NULL && fpwp->selected != NULL) {
6221
MapDocPoint (d, pt, &item, &row, &col, &rct);
6223
rct.right = rct.left + fpwp->lineheight;
6224
rct.bottom = rct.top + (rct.right - rct.left);
6225
if (row == 1 && col == 3 && PtInRect (pt, &rct))
6227
seq_num = GetSeqNumFromListPos (item - 1, fpwp->far_pointer_list, fpwp->num_sequences);
6228
if (seq_num > -1 && seq_num < fpwp->num_sequences)
6230
group_num = GetDisplayGroupNum(fpwp->far_pointer_list + seq_num);
6231
if (group_num == 3 || group_num == 4) {
6232
/* can never replace non-far pointers */
6233
fpwp->selected[seq_num] = FALSE;
6234
} else if (fpwp->selected [seq_num]) {
6235
fpwp->selected [seq_num] = FALSE;
6237
fpwp->selected [seq_num] = TRUE;
6239
InsetRect (&rct, -1, -1);
6247
static void DrawFarPointer (DoC d, RectPtr r, Int2 item, Int2 firstLine)
6250
FarPointerWinPtr fpwp;
6253
Int4 seq_num, group_num;
6255
fpwp = (FarPointerWinPtr) GetObjectExtra (d);
6257
if (fpwp == NULL || r == NULL
6258
|| item < 1 || item > fpwp->num_sequences
6266
rct.left = rct.right - fpwp->lineheight;
6267
rct.bottom = rct.top + (rct.right - rct.left);
6269
/* make sure we don't draw a box where we aren't drawing text */
6270
ObjectRect (fpwp->doc, &doc_rect);
6271
InsetRect (&doc_rect, 4, 4);
6272
if (rct.bottom > doc_rect.bottom)
6277
seq_num = GetSeqNumFromListPos (item - 1, fpwp->far_pointer_list, fpwp->num_sequences);
6278
if (seq_num > -1 && seq_num < fpwp->num_sequences) {
6279
group_num = GetDisplayGroupNum(fpwp->far_pointer_list + seq_num);
6280
if (group_num != 3 && group_num != 4) {
6283
if (fpwp->selected != NULL && fpwp->selected [seq_num]) {
6284
MoveTo (rct.left, rct.top);
6285
LineTo (rct.right - 1, rct.bottom - 1);
6286
MoveTo (rct.left, rct.bottom - 1);
6287
LineTo (rct.right - 1, rct.top);
6293
static void GetTextForOneFarPointerData (FarPointerPtr fpp, CharPtr doc_line)
6297
if (fpp == NULL || doc_line == NULL) return;
6299
SeqIdWrite (fpp->sip_local, doc_line, PRINTID_TEXTID_ACC_ONLY, 255);
6300
if (fpp->sip_db == NULL) {
6301
StringCat (doc_line, "\tNot a far pointer\t\n");
6302
} else if (fpp->bsp_db == NULL) {
6303
StringCat (doc_line, "\tNot found in GenBank\t\n");
6304
} else if (fpp->err_msg != NULL) {
6305
/* replace carriage returns with spaces */
6306
cp = StringChr (fpp->err_msg, '\n');
6307
while (cp != NULL) {
6309
cp = StringChr (cp + 1, '\n');
6311
/* replace tabs with spaces */
6312
cp = StringChr (fpp->err_msg, '\t');
6313
while (cp != NULL) {
6315
cp = StringChr (cp + 1, '\t');
6318
StringCat (doc_line, "\t");
6319
StringCat (doc_line, fpp->err_msg);
6320
StringCat (doc_line, "\t\n");
6322
StringCat (doc_line, "\t\t\n");
6326
static void RedrawFarPointerWin (FarPointerWinPtr fpwp)
6329
Char doc_line [500];
6338
for (group_num = 0; group_num < 5; group_num++) {
6339
for (i = 0; i < fpwp->num_sequences; i++) {
6340
if (GetDisplayGroupNum(fpwp->far_pointer_list + i) == group_num) {
6341
GetTextForOneFarPointerData (fpwp->far_pointer_list + i, doc_line);
6342
AppendText (fpwp->doc, doc_line, &(fpwp->ParFmt), fpwp->ColFmt, Nlm_programFont);
6343
if (group_num == 2) {
6344
fpwp->selected [i] = TRUE;
6346
fpwp->selected [i] = FALSE;
6353
UpdateDocument (fpwp->doc, 0, 0);
6356
static void ExportBadAlignments (ButtoN b)
6358
FarPointerWinPtr fpwp;
6359
Char path [PATH_MAX];
6364
fpwp = (FarPointerWinPtr) GetObjectExtra (b);
6365
if (fpwp == NULL) return;
6367
if (GetOutputFileName (path, sizeof (path), NULL)) {
6368
fp = FileOpen (path, "w");
6370
Message (MSG_ERROR, "Unable to open %s", path);
6373
for (i = 0; i < fpwp->num_sequences; i++) {
6374
if (fpwp->far_pointer_list[i].salp != NULL
6375
&& fpwp->far_pointer_list[i].err_msg != NULL) {
6376
SeqIdWrite (fpwp->far_pointer_list[i].sip_local, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
6377
fprintf (fp, "%s\n", str);
6378
fprintf (fp, "%s\n", fpwp->far_pointer_list[i].err_msg);
6379
WriteAlignmentInterleaveToFileEx (fpwp->far_pointer_list[i].salp, fp,
6388
static void ExportFASTAForUnselectedUpdates (ButtoN b)
6390
FarPointerWinPtr fpwp;
6391
Char path [PATH_MAX];
6395
fpwp = (FarPointerWinPtr) GetObjectExtra (b);
6396
if (fpwp == NULL) return;
6398
if (GetOutputFileName (path, sizeof (path), NULL)) {
6399
fp = FileOpen (path, "w");
6401
Message (MSG_ERROR, "Unable to open %s", path);
6404
for (i = 0; i < fpwp->num_sequences; i++) {
6405
if (fpwp->far_pointer_list[i].bsp_db != NULL
6406
&& !fpwp->selected[i]) {
6407
EditBioseqToFasta (fpwp->far_pointer_list[i].bsp_db, fp, 0, fpwp->far_pointer_list[i].bsp_db->length - 1);
6414
static void ExportFarPointerErrorMessages (ButtoN b)
6416
FarPointerWinPtr fpwp;
6417
Char path [PATH_MAX];
6422
fpwp = (FarPointerWinPtr) GetObjectExtra (b);
6423
if (fpwp == NULL) return;
6426
fp = FileOpen (path, "w");
6428
Message (MSG_ERROR, "Unable to open %s", path);
6431
for (i = 0; i < fpwp->num_sequences; i++) {
6432
if (fpwp->far_pointer_list[i].err_msg != NULL
6433
&& fpwp->far_pointer_list[i].sip_local != NULL) {
6434
SeqIdWrite (fpwp->far_pointer_list[i].sip_local, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
6435
fprintf (fp, "%s\t%s\n", str, fpwp->far_pointer_list[i].err_msg);
6439
LaunchGeneralTextViewer (path, "FarPointer Errors");
6444
static Boolean DisplayFarPointerData (FarPointerPtr fpp, Int4 num)
6447
ModalAcceptCancelData acd;
6448
FarPointerWinPtr fpwp;
6458
fpwp = (FarPointerWinPtr) MemNew (sizeof (FarPointerWinData));
6460
fpwp->far_pointer_list = fpp;
6461
fpwp->num_sequences = num;
6462
fpwp->selected = (BoolPtr) MemNew (sizeof(Boolean) * num);
6464
/* initialize document paragraph format */
6465
fpwp->ParFmt.openSpace = FALSE;
6466
fpwp->ParFmt.keepWithNext = FALSE;
6467
fpwp->ParFmt.keepTogether = FALSE;
6468
fpwp->ParFmt.newPage = FALSE;
6469
fpwp->ParFmt.tabStops = FALSE;
6470
fpwp->ParFmt.minLines = 0;
6471
fpwp->ParFmt.minHeight = 0;
6473
/* initialize document column format */
6474
fpwp->ColFmt[0].pixWidth = 0;
6475
fpwp->ColFmt[0].pixInset = 0;
6476
fpwp->ColFmt[0].charWidth = 80;
6477
fpwp->ColFmt[0].charInset = 0;
6478
fpwp->ColFmt[0].font = NULL;
6479
fpwp->ColFmt[0].just = 'l';
6480
fpwp->ColFmt[0].wrap = TRUE;
6481
fpwp->ColFmt[0].bar = FALSE;
6482
fpwp->ColFmt[0].underline = FALSE;
6483
fpwp->ColFmt[0].left = FALSE;
6484
fpwp->ColFmt[0].last = FALSE;
6485
fpwp->ColFmt[1].pixWidth = 0;
6486
fpwp->ColFmt[1].pixInset = 0;
6487
fpwp->ColFmt[1].charWidth = 80;
6488
fpwp->ColFmt[1].charInset = 0;
6489
fpwp->ColFmt[1].font = NULL;
6490
fpwp->ColFmt[1].just = 'l';
6491
fpwp->ColFmt[1].wrap = TRUE;
6492
fpwp->ColFmt[1].bar = FALSE;
6493
fpwp->ColFmt[1].underline = FALSE;
6494
fpwp->ColFmt[1].left = FALSE;
6495
fpwp->ColFmt[1].last = FALSE;
6496
fpwp->ColFmt[2].pixWidth = 0;
6497
fpwp->ColFmt[2].pixInset = 0;
6498
fpwp->ColFmt[2].charWidth = 80;
6499
fpwp->ColFmt[2].charInset = 0;
6500
fpwp->ColFmt[2].font = NULL;
6501
fpwp->ColFmt[2].just = 'l';
6502
fpwp->ColFmt[2].wrap = TRUE;
6503
fpwp->ColFmt[2].bar = FALSE;
6504
fpwp->ColFmt[2].underline = FALSE;
6505
fpwp->ColFmt[2].left = FALSE;
6506
fpwp->ColFmt[2].last = TRUE;
6508
w = MovableModalWindow (50, 33, -10, -10, "Far Pointer Sequences", NULL);
6509
SetObjectExtra (w, fpwp, CleanupFarPointerWinProc);
6510
fpwp->form = (ForM) w;
6512
h = HiddenGroup (w, -1, 0, NULL);
6513
SetGroupSpacing (h, 10, 10);
6515
fpwp->doc = DocumentPanel (h, stdCharWidth * 50, stdLineHeight * 20);
6516
SetObjectExtra (fpwp->doc, fpwp, NULL);
6517
SetDocAutoAdjust (fpwp->doc, TRUE);
6518
SetDocProcs (fpwp->doc, NULL, NULL, ReleaseFarPointer, NULL);
6519
SetDocShade (fpwp->doc, DrawFarPointer, NULL, NULL, NULL);
6521
SelectFont (Nlm_programFont);
6522
fpwp->lineheight = LineHeight ();
6524
ObjectRect (fpwp->doc, &r);
6525
InsetRect (&r, 4, 4);
6526
fpwp->ColFmt[0].pixWidth = (r.right - r.left - fpwp->lineheight) / 2;
6527
fpwp->ColFmt[1].pixWidth = (r.right - r.left - fpwp->lineheight) / 2;
6528
fpwp->ColFmt[2].pixWidth = fpwp->lineheight;
6530
g = HiddenGroup (h, 4, 0, NULL);
6531
b = PushButton (g, "Export Bad Alignments", ExportBadAlignments);
6532
SetObjectExtra (b, fpwp, NULL);
6534
for (i = 0; i < fpwp->num_sequences; i++) {
6535
if (fpwp->far_pointer_list[i].salp != NULL && fpwp->far_pointer_list[i].err_msg != NULL) {
6540
b = PushButton (g, "Export FASTA for Unselected Sequences", ExportFASTAForUnselectedUpdates);
6541
SetObjectExtra (b, fpwp, NULL);
6543
b = PushButton (g, "Export FarPointer Error Messages", ExportFarPointerErrorMessages);
6544
SetObjectExtra (b, fpwp, NULL);
6546
c = HiddenGroup (h, 2, 0, NULL);
6547
b = PushButton (c, "Replace Selected Sequences", ModalAcceptButton);
6548
SetObjectExtra (b, &acd, NULL);
6549
b = PushButton (c, "Cancel", ModalCancelButton);
6550
SetObjectExtra (b, &acd, NULL);
6552
AlignObjects (ALIGN_CENTER, (HANDLE) fpwp->doc,
6556
RedrawFarPointerWin (fpwp);
6562
acd.accepted = FALSE;
6563
acd.cancelled = FALSE;
6564
while (!acd.accepted && ! acd.cancelled)
6566
ProcessExternalEvent ();
6574
for (i = 0; i < num; i++) {
6575
if (!fpwp->selected[i]) {
6576
fpp[i].salp = SeqAlignFree (fpp[i].salp);
6577
BioseqUnlock (fpp[i].bsp_db);
6578
fpp[i].bsp_db = NULL;
6579
fpp[i].sip_db = SeqIdFree (fpp[i].sip_db);
6580
/* make sure local sequence is not deleted */
6581
BioseqUnlock (fpp[i].bsp_local);
6582
fpp[i].bsp_local = NULL;
6590
return acd.accepted;
6593
/* This function will replace a sequence in an alignment record with one
6594
* downloaded from GenBank. It will also adjust the alignment starts
6595
* for that sequence if the GenBank sequence is not identical to the
6596
* sequence in the alignment (salp).
6597
* vnp is a ValNodePtr to a list of sequence IDs for Bioseqs to be deleted
6600
static ValNodePtr CCNormalizeSeqAlignId (SeqAlignPtr salp, ValNodePtr vnp)
6603
CharPtr tmp, id_start, dot_pos;
6605
FarPointerPtr far_pointer_list = NULL;
6606
Int4 num_missing = 0, num_found = 0;
6607
CharPtr missing_cont_fmt = "The alignment contains %s that can not be found in GenBank.\nPlease check the accession number.\nContinue anyway?\n";
6608
CharPtr missing_fmt = "The alignment contains %s that can not be found in GenBank.\nPlease check the accession number.\n";
6611
BLAST_OptionsBlkPtr options;
6612
SeqAlignPtr tmp_salp;
6614
Int4 offset, len, start1, start2, stop1, stop2;
6615
SeqIdPtr sip, presip;
6618
if (salp == NULL || salp->segtype != 2) {
6622
dsp = (DenseSegPtr) salp->segs;
6624
AlnMgr2IndexSingleChildSeqAlign(salp);
6625
num_rows = AlnMgr2GetNumRows(salp);
6627
far_pointer_list = (FarPointerPtr) MemNew (num_rows * sizeof (FarPointerData));
6629
for (i = 0; i < num_rows; i++) {
6630
far_pointer_list[i].sip_local = AlnMgr2GetNthSeqIdPtr(salp, i + 1);
6631
far_pointer_list[i].sip_db = NULL;
6632
far_pointer_list[i].bsp_local = NULL;
6633
far_pointer_list[i].bsp_db = NULL;
6634
far_pointer_list[i].salp = NULL;
6635
far_pointer_list[i].revcomp = FALSE;
6636
far_pointer_list[i].nonly = 0;
6637
far_pointer_list[i].err_type = FARPOINTER_LOOKUP_NO_ERROR;
6639
/* is this a farpointer ID? */
6640
SeqIdWrite (far_pointer_list[i].sip_local, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
6641
tmp = StringISearch (str, "acc");
6648
/* look for next pipe char, carriage return, or end of string */
6649
while (*tmp!='\0' && *tmp != '|' && *tmp!='\n')
6653
/* check for version */
6655
dot_pos = StringChr (id_start, '.');
6656
if (dot_pos != NULL) {
6658
version = atoi (dot_pos + 1);
6660
if (StringSpn (id_start, "0123456789") == StringLen (id_start)) {
6661
/* all numbers, is GI */
6662
gi = (Int4)atol(id_start);
6664
far_pointer_list[i].sip_db = ValNodeNew (NULL);
6665
if (far_pointer_list[i].sip_db) {
6666
far_pointer_list[i].sip_db->choice = SEQID_GI;
6667
far_pointer_list[i].sip_db->data.intvalue = (Int4)gi;
6670
} else if (IS_ntdb_accession(id_start) || IS_protdb_accession(id_start)) {
6671
far_pointer_list[i].sip_db = SeqIdFromAccession (id_start, version, NULL);
6673
if (far_pointer_list[i].sip_db != NULL) {
6674
far_pointer_list[i].bsp_local = BioseqLockById(far_pointer_list[i].sip_local);
6675
far_pointer_list[i].bsp_db = BioseqLockById(far_pointer_list[i].sip_db);
6676
if (far_pointer_list[i].bsp_local != NULL
6677
&& far_pointer_list[i].bsp_db != NULL
6678
&& far_pointer_list[i].bsp_local->length > 0
6679
&& far_pointer_list[i].bsp_db->length > 0) {
6680
options = BLASTOptionNew("blastn", TRUE);
6681
options->filter_string = StringSave("m L;R");
6682
tmp_salp = BlastTwoSequences (far_pointer_list[i].bsp_local, far_pointer_list[i].bsp_db, "blastn", options);
6683
options = BLASTOptionDelete(options);
6684
far_pointer_list[i].err_msg = (CharPtr) MemNew (sizeof(Char) * 1000);
6685
far_pointer_list[i].salp = SeqAlignBestHit (tmp_salp,
6686
far_pointer_list[i].bsp_local,
6687
far_pointer_list[i].bsp_db,
6688
100, &(far_pointer_list[i].err_msg),
6689
&(far_pointer_list[i].nonly));
6690
if (far_pointer_list[i].err_msg[0] == '\0')
6691
far_pointer_list[i].err_msg = MemFree (far_pointer_list[i].err_msg);
6692
else if (far_pointer_list[i].nonly < 0)
6693
far_pointer_list[i].err_type = FARPOINTER_LOOKUP_NONLY;
6695
far_pointer_list[i].err_type = FARPOINTER_LOOKUP_BAD_ALN;
6705
if (DisplayFarPointerData (far_pointer_list, num_rows)) {
6706
/* for each entry that still has a bioseq, do the replacement */
6708
for (i = 0, sip = dsp->ids; i < num_rows && sip != NULL; i++, sip = sip->next) {
6709
if (far_pointer_list[i].bsp_db == NULL) {
6712
offset = SeqAlignStart(far_pointer_list[i].salp, 1)-SeqAlignStart(far_pointer_list[i].salp, 0);
6713
if ((SeqAlignStrand(far_pointer_list[i].salp, 0)==Seq_strand_minus && SeqAlignStrand(far_pointer_list[i].salp, 1) != Seq_strand_minus)
6714
|| (SeqAlignStrand(far_pointer_list[i].salp, 1)==Seq_strand_minus && SeqAlignStrand(far_pointer_list[i].salp, 0) != Seq_strand_minus))
6716
/* strand is reversed */
6717
strand=Seq_strand_minus;
6718
AlnMgr2IndexSingleChildSeqAlign(far_pointer_list[i].salp);
6719
AlnMgr2GetNthSeqRangeInSA(far_pointer_list[i].salp, 1, &start1, &stop1);
6720
AlnMgr2GetNthSeqRangeInSA(far_pointer_list[i].salp, 2, &start2, &stop2);
6721
len = stop2 + start1;
6724
offset = 0 - offset;
6727
strand=Seq_strand_plus;
6728
SeqAlignStartUpdate (far_pointer_list[i].salp, far_pointer_list[i].sip_local, abs(offset), len, strand);
6729
dsp->ids = SWSeqIdReplaceID(dsp->ids, far_pointer_list[i].sip_local, far_pointer_list[i].sip_db);
6730
/* set to NULL so that we don't free it later */
6731
far_pointer_list[i].sip_db = NULL;
6737
/* We add the ID of the sequence we are replacing to a list
6738
* of sequences that will be deleted later.
6739
* We can't delete the sequence now, in case it is present
6740
* in more than one alignment for this record.
6742
vnp = nrSeqIdAdd (vnp, far_pointer_list[i].sip_local);
6743
/* set to NULL so that we don't free it later */
6744
far_pointer_list[i].sip_local = NULL;
6747
for (i = 0, sip = dsp->ids; i < num_rows && sip != NULL; i++, sip = sip->next) {
6748
if (far_pointer_list[i].bsp_db != NULL) {
6749
BioseqUnlock (far_pointer_list[i].bsp_db);
6750
far_pointer_list[i].bsp_db->idx.deleteme = TRUE;
6752
if (far_pointer_list[i].bsp_local != NULL) {
6753
BioseqUnlock (far_pointer_list[i].bsp_local);
6754
far_pointer_list[i].bsp_local = NULL;
6759
far_pointer_list = FreeFarPointerData(far_pointer_list, num_rows);
6764
static ValNodePtr CCNormalizeSeqAlignId (SeqAlignPtr salp, ValNodePtr vnp)
6766
BLAST_OptionsBlkPtr options;
6775
SeqAlignPtr seqalign = NULL;
6776
SeqAlignPtr bestsalp = NULL;
6777
CharPtr TmpBuff, tmp;
6781
totlenlcl = 0, totlendb = 0;
6782
Int4 i, j, k, len = 0, n;
6790
BioseqPtr bsp1, bsp2;
6791
Int4 start1, start2, stop1, stop2;
6794
BestHitPtr hip, hip_prev, hip_head;
6795
BestHitPtr PNTR hiparray;
6796
Char messagestr[1500];
6801
hip_prev = hip_head = NULL;
6802
errstr = (CharPtr)MemNew(500*sizeof(Char));
6804
if (salp->segtype == 2) {
6805
dsp = (DenseSegPtr) salp->segs;
6813
lclsip = SeqIdDup (sip);
6814
SeqIdWrite (lclsip, str, PRINTID_FASTA_LONG, 50);
6815
tmp = StringStr (str, "acc");
6818
tmp = StringStr (str, "ACC");
6821
tmp++; tmp++; tmp++;
6825
while (*tmp!='\0' && *tmp != '|' && *tmp!='\n')
6828
/* check for version */
6831
while (*tmp != 0 && *tmp != '.')
6838
version = atoi (tmp + 1);
6842
j = StringLen (TmpBuff);
6843
for(k =0; k < j; k++) {
6844
if(!isdigit(TmpBuff[k])) {
6851
ok=(IS_ntdb_accession(TmpBuff) || IS_protdb_accession(TmpBuff));
6853
dbsip = SeqIdFromAccession (TmpBuff, version, NULL);
6857
gi = (Int4)atol(TmpBuff);
6859
dbsip = ValNodeNew (NULL);
6861
dbsip->choice = SEQID_GI;
6862
dbsip->data.intvalue = (Int4)gi;
6867
bsp1 = BioseqLockById(lclsip);
6868
bsp2 = BioseqLockById(dbsip);
6869
if ( bsp1 != NULL && bsp2 != NULL && bsp1->length > 0 && bsp2->length > 0) {
6870
options = BLASTOptionNew("blastn", TRUE);
6871
options->filter_string = StringSave("m L;R");
6872
seqalign = BlastTwoSequences (bsp1, bsp2, "blastn", options);
6875
errstr = (CharPtr)MemNew(1000*sizeof(Char));
6876
bestsalp = SeqAlignBestHit (seqalign, bsp1, bsp2, 100, &errstr, &nonly);
6877
hip = (BestHitPtr)MemNew(sizeof(BestHit));
6878
hip->sap = bestsalp;
6880
if (dbsip->choice != SEQID_GI && bsp2->id != NULL) {
6881
/* recreate sip to get correct version number */
6882
for (tmpsip = bsp2->id; tmpsip != NULL; tmpsip = tmpsip->next) {
6883
if (tmpsip->choice == dbsip->choice) break;
6885
if (tmpsip != NULL) {
6886
dbsip = SeqIdFree (dbsip);
6887
dbsip = SeqIdStripLocus (SeqIdDup (tmpsip));
6893
hip->errstr = StringSave(errstr);
6895
if (*hip->errstr == '\0')
6901
if (hip_head != NULL)
6903
hip_prev->next = hip;
6906
hip_head = hip_prev = hip;
6908
if (totlendb == 0) {
6909
SeqIdWrite(dbsip, strLog, PRINTID_TEXTID_ACCESSION, 50);
6910
sprintf(errstr, "This alignment contains \"%s\" that can not be found in GenBank.\nPlease check the accession number.\n", strLog);
6912
} else if (totlenlcl == 0) {
6913
SeqIdWrite (lclsip, strLog, PRINTID_TEXTID_ACCESSION, 50);
6914
sprintf(errstr, "This alignment contains \"%s\" that can not be found.\nPlease check the accession number.\n", strLog);
6917
hip = (BestHitPtr)MemNew(sizeof(BestHit));
6918
hip->sap = bestsalp;
6921
hip->errstr = StringSave(errstr);
6926
if (hip_head != NULL)
6928
hip_prev->next = hip;
6931
hip_head = hip_prev = hip;
6934
SeqIdWrite (sip, strLog, PRINTID_TEXTID_ACCESSION, 50);
6935
sprintf(errstr, "This alignment contains \"%s\" that can not be found in GenBank.\nPlease check the accession number.\n", strLog);
6937
hip = (BestHitPtr)MemNew(sizeof(BestHit));
6938
hip->sap = bestsalp;
6941
hip->errstr = StringSave(errstr);
6946
if (hip_head != NULL)
6948
hip_prev->next = hip;
6951
hip_head = hip_prev = hip;
6966
hiparray = (BestHitPtr PNTR)MemNew(numhips*sizeof(BestHitPtr));
6971
hiparray[numhips] = hip;
6975
HeapSort(hiparray, numhips, sizeof(BestHitPtr), OrderBestHits);
6977
while (i<numhips && *hiparray[i]->errstr == '\0')
6983
/* If there are sequences that can be treated as Far Pointers
6984
* and are identical to the most recent version of the sequence
6985
* in GenBank, ask the user if these sequences should be replace
6986
* by the GenBank sequence.
6988
messagestr[0] = '\0';
6993
/* if we are looking at an alignment of segmented sets,
6994
* and the far pointer points to something other than a
6995
* segmented set, the sequence ID will be the same for
6996
* the alignments for each segment. We only want to ask
6997
* about the same sequence once.
6999
if (! nrSeqIdIsInValNodeList (vnp, hiparray[j]->sip1))
7001
SeqIdWrite(hiparray[j]->sip2, strLog, PRINTID_TEXTID_ACCESSION, 50);
7002
StringCat(messagestr, strLog);
7003
StringCat(messagestr, ", ");
7008
if (ans == ANS_CANCEL)
7010
ans = Message(MSG_OKC,
7011
"This alignment contains %s that %s already in GenBank. \nDo you wish to replace %s?",
7012
messagestr, num_not_asked > 1 ? "are":"is",
7013
num_not_asked > 1 ? "them":"it");
7015
if (ans != ANS_CANCEL)
7019
offset = SeqAlignStart(hiparray[j]->sap, 1)-SeqAlignStart(hiparray[j]->sap, 0);
7020
if ((SeqAlignStrand(hiparray[j]->sap, 0)==Seq_strand_minus && SeqAlignStrand(hiparray[j]->sap, 1) != Seq_strand_minus) || (SeqAlignStrand(hiparray[j]->sap, 1)==Seq_strand_minus && SeqAlignStrand(hiparray[j]->sap, 0) != Seq_strand_minus))
7022
strand=Seq_strand_minus;
7023
AlnMgr2IndexSingleChildSeqAlign(hiparray[j]->sap);
7024
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 1, &start1, &stop1);
7025
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 2, &start2, &stop2);
7026
len = stop2 + start1;
7029
offset = 0 - offset;
7032
strand=Seq_strand_plus;
7033
SeqAlignStartUpdate (salp, hiparray[j]->sip1, abs(offset), len, strand);
7034
dsp->ids = SWSeqIdReplaceID(dsp->ids, hiparray[j]->sip1, hiparray[j]->sip2);
7041
/* We add the ID of the sequence we are replacing to a list
7042
* of sequences that will be deleted later.
7043
* We can't delete the sequence now, in case it is present
7044
* in more than one alignment for this record.
7046
vnp = nrSeqIdAdd (vnp, hiparray[j]->sip1);
7048
SeqAlignFree(hiparray[j]->sap);
7049
if (hiparray[j]->bsp1 != NULL) {
7050
BioseqUnlock (hiparray[j]->bsp1);
7051
hiparray[j]->bsp1->idx.deleteme = TRUE;
7052
hiparray[j]->bsp1 = NULL;
7054
if (hiparray[j]->bsp2 != NULL) {
7055
BioseqUnlock (hiparray[j]->bsp2);
7056
hiparray[j]->bsp2 = NULL;
7059
} else /* need to ask one by one */
7063
SeqIdWrite(hiparray[j]->sip2, strLog, PRINTID_TEXTID_ACCESSION, 50);
7064
ans = Message(MSG_OKC, "This alignment contains %s that is already in GenBank. \nDo you wish to replace it?", strLog);
7065
if (ans != ANS_CANCEL)
7067
offset = SeqAlignStart(hiparray[j]->sap, 1)-SeqAlignStart(hiparray[j]->sap, 0);
7068
if ((SeqAlignStrand(hiparray[j]->sap, 0)==Seq_strand_minus && SeqAlignStrand(hiparray[j]->sap, 1) != Seq_strand_minus) || (SeqAlignStrand(hiparray[j]->sap, 1)==Seq_strand_minus && SeqAlignStrand(hiparray[j]->sap, 0) != Seq_strand_minus))
7070
strand=Seq_strand_minus;
7071
AlnMgr2IndexSingleChildSeqAlign(hiparray[j]->sap);
7072
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 1, &start1, &stop1);
7073
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 2, &start2, &stop2);
7074
len = stop2 + start1;
7077
offset = 0 - offset;
7080
strand=Seq_strand_plus;
7081
SeqAlignStartUpdate (salp, hiparray[j]->sip1, offset, len, strand);
7082
dsp->ids = SWSeqIdReplaceID(dsp->ids, hiparray[j]->sip1, hiparray[j]->sip2);
7087
SeqAlignReplaceId (hiparray[j]->sip1, hiparray[j]->sip2, salp);
7088
vnp = nrSeqIdAdd (vnp, hiparray[j]->sip1);
7090
if (hiparray[j]->bsp1 != NULL) {
7091
hiparray[j]->bsp1->idx.deleteme = TRUE;
7094
SeqAlignFree(hiparray[j]->sap);
7095
if (hiparray[j]->bsp1 != NULL) {
7096
BioseqUnlock (hiparray[j]->bsp1);
7097
hiparray[j]->bsp1 = NULL;
7099
if (hiparray[j]->bsp2 != NULL) {
7100
BioseqUnlock (hiparray[j]->bsp2);
7101
hiparray[j]->bsp2 = NULL;
7108
/* Replacement of sequences that are not exact matches */
7109
for (j=n+i; j<numhips; j++)
7111
if (hiparray[j]->errtype <3)
7113
SeqIdWrite (hiparray[j]->sip2, strLog, PRINTID_TEXTID_ACCESSION, 50);
7114
ans = Message (MSG_OKC, "This alignment contains \"%s\" that is already in GenBank.\n However, the local version is not identical to the most recent database version.\n %s \nDo you wish to replace it anyway ?\n If you cancel, the alignment of the local and the database versions \nof \"%s\" will be saved in the error file \"error.log\"", strLog, hiparray[j]->errstr, strLog);
7115
if (ans != ANS_CANCEL)
7117
offset = SeqAlignStart(hiparray[j]->sap, 1)-SeqAlignStart(hiparray[j]->sap, 0);
7119
if (SeqAlignStrand(hiparray[j]->sap, 0)==Seq_strand_minus || SeqAlignStrand(hiparray[j]->sap, 1)==Seq_strand_minus)
7121
strand=Seq_strand_minus;
7122
AlnMgr2IndexSingleChildSeqAlign(hiparray[j]->sap);
7123
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 1, &start1, &stop1);
7124
AlnMgr2GetNthSeqRangeInSA(hiparray[j]->sap, 2, &start2, &stop2);
7125
len = stop2 + start1;
7128
offset = 0 - offset;
7132
strand=Seq_strand_plus;
7133
SeqAlignStartUpdate (salp, hiparray[j]->sip1, offset, len, strand);
7134
dsp->ids = SWSeqIdReplaceID(dsp->ids, hiparray[j]->sip1, hiparray[j]->sip2);
7139
SeqAlignReplaceId (hiparray[j]->sip1, hiparray[j]->sip2, salp);
7140
vnp = nrSeqIdAdd (vnp, hiparray[j]->sip1);
7144
ofp = FileOpen("error.log", "a");
7145
fprintf(ofp, "This alignment contains %s that is already in GenBank; \nhowever, the local version is not identical to the most recent database version.\n %s\n", strLog, hiparray[j]->errstr);
7147
SWPrintFarpointerAln(hiparray[j]->sap, "error.log");
7150
Message(MSG_OK, "%s", hiparray[j]->errstr);
7158
static ValNodePtr errorp = NULL;
7160
/******************************************************************
7161
Output error message according to code defined in alignval.h.
7162
id refers to seqid of the sequence that causes the error
7163
and idcontext refers to other sequences in the same segment.
7164
Intvalue is used to indicate 1) the segment where the sequence
7165
with error is, or 2) the segtype in case of segtype error.
7166
Please note that not all errors report all three
7167
parameters(id, idcontext, Intvalue)
7168
******************************************************************/
7169
static void ValMessage (Int1 MessageCode, ErrSev errlevel, SeqIdPtr id, SeqIdPtr idcontext , Int4 Intvalue)
7179
SeqIdWrite(id, buf, PRINTID_FASTA_LONG, sizeof(buf)-1);
7183
sprintf(string1, "SeqId");
7184
sprintf(string2, "Invalid Seq_id: %s\n", buf);
7187
case Err_Strand_Rev:
7188
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7189
sprintf(string1, "Strand");
7190
sprintf(string2, "Alignment strand is reversed in segment %d for Seq ID: %s in the context of%s\n", Intvalue, buf, buf3);
7193
case Err_Denseg_Len_Start:
7194
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7195
sprintf(string1, "Start/Length");
7196
sprintf(string2, "Error in length and/or starts in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7199
case Err_Start_Less_Than_Zero:
7200
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7201
sprintf(string1, "Start");
7202
sprintf(string2, "Start point is less than zero in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7205
case Err_Start_More_Than_Biolen:
7206
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7207
sprintf(string1, "Start");
7208
sprintf(string2, "Start point is greater than total bioseq length in segment %d for sequence ID: %s in the context of%s\n", Intvalue, buf, buf3);
7211
case Err_End_Less_Than_Zero:
7212
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7213
sprintf(string1, "Length");
7214
sprintf(string2, "End point is less than zero in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7217
case Err_End_More_Than_Biolen:
7218
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7219
sprintf(string1, "Length");
7220
sprintf(string2, "End point is greater than total bioseq length in segment %d for sequence ID: %s in the context of%s\n", Intvalue, buf, buf3);
7223
case Err_Len_Less_Than_Zero:
7224
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7225
sprintf(string1, "Length");
7226
sprintf(string2, "Segment length is less than zero in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7229
case Err_Len_More_Than_Biolen:
7230
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7231
sprintf(string1, "Length");
7232
sprintf(string2, "Segment length is greater than total bioseq length in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7235
case Err_Sum_Len_Start:
7236
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7237
sprintf(string1, "Start");
7238
sprintf(string2, "Sum of start point and segment is greater than total bioseq length in segment %d for sequence ID: %s in the context of %s\n", Intvalue, buf, buf3);
7241
case Err_SeqAlign_DimSeqId_Not_Match:
7242
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7243
sprintf(string1, "SeqId");
7244
sprintf(string2, "The number of SeqId does not match the dimensions for sequence ID's %s\n", buf3);
7247
case Err_Segs_DimSeqId_Not_Match:
7248
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7249
sprintf(string1, "SeqId");
7250
sprintf(string2, "The number of SeqId does not match the dimensions in segment %d for sequence ID's %s\n", Intvalue, buf3);
7254
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7255
sprintf(string1, "Fasta");
7256
sprintf(string2, "This may be a fasta-like alignment for SeqId: %s in the context of %s\n", buf, buf3);
7260
sprintf(string1, "Segs");
7261
sprintf(string2, "This alignment contains a null segs\n");
7264
case Err_Segment_Gap:
7265
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7266
sprintf(string1, "Segs");
7267
sprintf(string2, "Segment %d is a gap for all sequence with the following ID's: %s\n", Intvalue, buf3);
7270
case Err_Segs_Dim_One:
7271
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7272
sprintf(string1, "Segs");
7273
sprintf(string2, "There is only one dimension in segment %d for sequence ID's %s\n", Intvalue, buf3);
7276
case Err_SeqAlign_Dim_One:
7277
SeqIdWrite (idcontext, buf3, PRINTID_REPORT, sizeof (buf3));
7278
sprintf(string1, "Dim");
7279
sprintf(string2, "There is only one dimension for sequence ID's %s\n", buf3);
7283
sprintf(string1, "Segs");
7284
sprintf(string2, "This alignment has a undefined or unsupported Seqalign segtype %d\n", Intvalue);
7290
if (StringLen(string1) > 0)
7291
errorp = BlastConstructErrorMessage (string1, string2, errlevel, &errorp);
7294
/******************************************************************
7295
validate each alignment sequentially.
7296
This function will subject the seqalign to all validation functions
7297
******************************************************************/
7298
/*********************************************************/
7299
static void delete_bioseqs (ValNodePtr ids, Uint2 entityID)
7301
SeqEntryPtr sep_top;
7302
SeqEntryPtr sep_del;
7307
ObjMgrDataPtr omdptop;
7314
sep_top = GetTopSeqEntryForEntityID (entityID);
7315
SaveSeqEntryObjMgrData (sep_top, &omdptop, &omdata);
7316
GetSeqEntryParent (sep_top, &parentptr, &parenttype);
7321
sip = (SeqIdPtr) vnp->data.ptrvalue;
7323
slp = (SeqLocPtr)ValNodeNew (NULL);
7324
slp->choice = SEQLOC_WHOLE;
7325
slp->data.ptrvalue = sip;
7326
bsp = GetBioseqGivenSeqLoc (slp, entityID);
7328
sep_del=GetBestTopParentForData (entityID, bsp);
7329
RemoveSeqEntryFromSeqEntry (sep_top, sep_del, FALSE);
7331
slp->data.ptrvalue = NULL;
7336
SeqMgrLinkSeqEntry (sep_top, parenttype, parentptr);
7337
RestoreSeqEntryObjMgrData (sep_top, omdptop, &omdata);
7338
RenormalizeNucProtSets (sep_top, TRUE);
7340
for (vnp=ids; vnp!=NULL; vnp=vnp->next) {
7341
SeqIdFree ((SeqIdPtr) vnp->data.ptrvalue);
7342
vnp->data.ptrvalue = NULL;
7349
static Boolean check_dbid_seqalign (SeqAlignPtr salp)
7354
CharPtr TmpBuff, tmp;
7356
Boolean found = FALSE;
7360
if (salp->segtype == 2)
7362
dsp = (DenseSegPtr) salp->segs;
7364
while (!found && sip != NULL)
7368
SeqIdWrite (sip, str, PRINTID_FASTA_LONG, 50);
7370
tmp = StringStr (str, "acc");
7372
tmp++; tmp++; tmp++;
7376
while (*tmp!='\0' && *tmp != '|' && *tmp!='\n' && *tmp != '.')
7380
j = StringLen (TmpBuff);
7381
for(k =0; k < j; k++) {
7382
if(!isdigit(TmpBuff[k])) {
7387
found=(IS_ntdb_accession(TmpBuff) || IS_protdb_accession(TmpBuff));
7398
/***************************************************************************************
7400
*** ValidateSeqAlignandACC
7401
*** calls ValidateSeqAlign (in api directory)
7402
*** and tests for occurrence of ACC string in sequence ID.
7403
*** ACC|ACC# will be compared with the corresponding sequence (ACC#)
7404
*** in the database and replaced by a far pointer if the sequences
7407
***************************************************************************************/
7408
typedef struct saval {
7410
Boolean msg_success;
7411
Boolean find_remote_bsp;
7412
Boolean find_acc_bsp;
7413
Boolean delete_salp;
7419
} SaVal, PNTR SaValPtr;
7423
ValidateSeqAlignandACCEx
7424
(SeqAlignPtr salp, Uint2 entityID, Boolean message,
7425
Boolean msg_success, Boolean find_remote_bsp,Boolean find_acc_bsp,
7426
Boolean delete_bsp, Boolean delete_salp, BoolPtr dirty,
7427
ValNodePtr PNTR id_list) /* added id_list so that we could defer deleting bioseqs */
7440
/* initialize SaVal structure */
7441
sv.message = message;
7442
sv.msg_success = msg_success;
7443
sv.find_remote_bsp = find_remote_bsp;
7444
sv.find_acc_bsp = find_acc_bsp;
7445
sv.delete_salp = delete_salp;
7446
sv.delete_bsp = delete_bsp;
7449
sv.entityID = entityID;
7458
if(salp->segtype==5)
7460
ValidateSeqAlignandACCEx ((SeqAlignPtr) (salptmp->segs), entityID,
7461
message, msg_success, find_remote_bsp,
7462
find_acc_bsp, delete_bsp, delete_salp,
7463
&svp->dirty, id_list);
7465
else if (salp->segtype<1 || salp->segtype>4)
7467
ValMessage (Err_Segtype, SEV_ERROR, NULL, NULL, salptmp->segtype);
7471
ValidateSeqAlign (salptmp, svp->entityID, svp->message,
7472
svp->msg_success, svp->find_remote_bsp,
7473
svp->delete_bsp, svp->delete_salp, &svp->dirty);
7474
if (svp->find_acc_bsp)
7476
ok = check_dbid_seqalign (salptmp);
7479
if (id_list != NULL)
7481
svp->ids = *id_list;
7483
svp->ids = CCNormalizeSeqAlignId (salptmp, svp->ids);
7484
if (svp->ids!=NULL && svp->entityID > 0) {
7485
if (svp->delete_bsp)
7487
delete_bioseqs (svp->ids, svp->entityID);
7499
BlastErrorPrint (errorp);
7500
errorp = BlastErrorChainDestroy (errorp);
7502
if (svp->delete_salp)
7507
salptmp->next = NULL;
7508
SeqAlignFree (salptmp);
7513
pre->next = salptmp->next;
7514
salptmp->next = NULL;
7515
SeqAlignFree (salptmp);
7516
salptmp = pre->next;
7521
salptmp = salptmp->next;
7528
salptmp = salptmp->next;
7531
if (err_count==0 && svp->msg_success)
7534
ans = Message (MSG_OK, "Validation test of %d alignments succeded", salp_count);
7536
ans = Message (MSG_OK, "Validation test of the alignment succeded");
7539
*dirty = svp->dirty;
7541
if (id_list != NULL)
7543
*id_list = svp->ids;
7548
NLM_EXTERN Boolean ValidateSeqAlignandACC (SeqAlignPtr salp, Uint2 entityID, Boolean message,
7549
Boolean msg_success, Boolean find_remote_bsp,Boolean find_acc_bsp,
7550
Boolean delete_bsp, Boolean delete_salp, BoolPtr dirty)
7552
return ValidateSeqAlignandACCEx (salp, entityID, message, msg_success,
7553
find_remote_bsp, find_acc_bsp, delete_bsp,
7554
delete_salp, dirty, NULL);
7557
/***************************************************************************
7559
* ValidateAllAlignmentsInAnnotList (sap, svp)
7561
* This function validates all of the alignments in the annotation list
7562
* (there may be multiple alignments, especially when there is an alignment
7563
* of segmented sequences), and then deletes the local versions of sequences
7564
* which have been replaced by farpointers.
7565
* We wait to remove the sequences in case the sequence is used in more than
7566
* one alignment, which may be the case if an alignment of segmented sets
7567
* contains a far pointer, and that far pointer points to a sequence that is
7568
* not actually a segmented set.
7570
***************************************************************************/
7571
static void ValidateAllAlignmentsInAnnotList (SeqAnnotPtr sap, SaValPtr svp)
7574
ValNodePtr id_list = NULL;
7583
if (sap->type == 2 && sap->data != NULL)
7585
salp = (SeqAlignPtr) sap->data;
7586
ValidateSeqAlignandACCEx (salp, svp->entityID, svp->message,
7587
svp->msg_success, svp->find_remote_bsp,
7588
svp->find_acc_bsp, FALSE,
7589
svp->delete_salp, &svp->dirty,
7594
if (svp->delete_bsp)
7596
delete_bioseqs (id_list, svp->entityID);
7602
/***************************************************************************
7604
* ValidateSeqAlignandACCCallback (sep, mydata, index, indent)
7606
* This function is a callback for SeqEntryExplore used by
7607
* ValidateSeqAlignandACCInSeqEntry. It will validate the alignments
7608
* found in the record.
7609
* This function used to only validate the first alignment found on a
7610
* SeqEntry. It was repaired to validate all alignments on the SeqEntry
7611
* on May 27, 2005 by Colleen Bollin.
7613
***************************************************************************/
7614
static void ValidateSeqAlignandACCCallback (SeqEntryPtr sep, Pointer mydata,
7615
Int4 index, Int2 indent)
7619
SaValPtr svp = NULL;
7620
SeqAnnotPtr sap = NULL;
7622
if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
7623
svp = (SaValPtr)mydata;
7624
if (IS_Bioseq(sep)) {
7625
bsp = (BioseqPtr) sep->data.ptrvalue;
7630
else if(IS_Bioseq_set(sep)) {
7631
bssp = (BioseqSetPtr)sep->data.ptrvalue;
7637
ValidateAllAlignmentsInAnnotList (sap, svp);
7641
NLM_EXTERN Boolean ValidateSeqAlignandACCInSeqEntry (SeqEntryPtr sep, Boolean message,
7642
Boolean msg_success, Boolean find_remote_bsp, Boolean find_acc_bsp,
7643
Boolean delete_bsp, Boolean delete_salp)
7645
SeqEntryPtr sep_head;
7648
Boolean success=TRUE;
7650
entityID = ObjMgrGetEntityIDForChoice (sep);
7652
sep_head = GetTopSeqEntryForEntityID (entityID);
7653
if (sep_head != NULL) {
7654
sv.message = message;
7655
sv.msg_success = msg_success;
7656
sv.find_remote_bsp = find_remote_bsp;
7657
sv.find_acc_bsp = find_acc_bsp;
7658
sv.delete_salp = delete_salp;
7659
sv.delete_bsp = delete_bsp;
7662
sv.entityID = entityID;
7664
SeqEntryExplore (sep_head, (Pointer)&sv, ValidateSeqAlignandACCCallback);
7666
ObjMgrSetDirtyFlag (entityID, TRUE);
7667
ObjMgrSendMsg (OM_MSG_UPDATE, entityID, 0, 0);
7669
success = sv.retdel;
7676
/* we need to iterate through the actual SeqEntries, because theoretically the
7677
* same SeqID should appear in the SeqEntry with the new alignment and again
7678
* in the SeqEntry of the original record.
7680
static BioseqPtr FindBioseqInSep (SeqEntryPtr sep, SeqIdPtr sip)
7682
BioseqPtr bsp = NULL;
7684
SeqEntryPtr this_sep;
7686
if (sep == NULL || sip == NULL) return NULL;
7688
if (IS_Bioseq (sep))
7690
bsp = (BioseqPtr) sep->data.ptrvalue;
7691
if (! BioseqMatch(bsp, sip))
7696
else if (IS_Bioseq_set (sep))
7698
bssp = (BioseqSetPtr) sep->data.ptrvalue;
7699
for (this_sep = bssp->seq_set; this_sep != NULL && bsp == NULL; this_sep = this_sep->next)
7701
bsp = FindBioseqInSep (this_sep, sip);
7708
NLM_EXTERN void CalculateAlignmentOffsets (SeqEntryPtr sepnew, SeqEntryPtr sepold)
7710
SeqAlignPtr salpnew;
7712
SeqIdPtr sip_temp, sip_next;
7713
BioseqPtr bsp1, bsp2;
7714
BLAST_OptionsBlkPtr options;
7715
SeqAlignPtr seqalign = NULL;
7716
SeqAlignPtr bestsalp = NULL;
7717
Int4 start1, start2, stop1, stop2;
7718
CharPtr errstr = NULL;
7720
Int4 offset, len, nonly;
7721
BioseqPtr copybsp1, copybsp2;
7722
SeqIdPtr tmp_id_1, tmp_id_2;
7724
if (sepnew == NULL || sepold == NULL)
7728
/* this function needs to look at the bioseqs we have created while reading in the
7729
* alignment, align them with the existing bioseqs, and adjust the alignment start
7730
* positions if necessary.
7733
salpnew = (SeqAlignPtr) FindSeqAlignInSeqEntry (sepnew, OBJ_SEQALIGN);
7734
if (salpnew == NULL)
7739
if (salpnew->segtype != 2) return;
7740
dsp = (DenseSegPtr) salpnew->segs;
7741
if (dsp == NULL) return;
7743
/* create IDs to use when copying Bioseqs.
7744
* BioseqCopyEx makes a copy of these for the Bioseq it creates,
7745
* so we can reuse them and then free them at the end of the for-next loop.
7747
tmp_id_1 = MakeSeqID ("lcl|tmp_1_for_update");
7748
tmp_id_2 = MakeSeqID ("lcl|tmp_2_for_update");
7750
for (sip_temp = dsp->ids; sip_temp != NULL; sip_temp = sip_next)
7752
sip_next = sip_temp->next;
7753
sip_temp->next = NULL;
7755
/* find bsp1 in sepnew, bsp2 in sepold */
7756
bsp1 = FindBioseqInSep (sepnew, sip_temp);
7757
bsp2 = FindBioseqInSep (sepold, sip_temp);
7759
if (bsp1 != NULL && bsp2 != NULL)
7761
/* create alignment between old and new bioseqs */
7762
/* new bioseq will have same ID as old bioseq, so BLAST won't work
7763
* because it's looking for IDs using indexing.
7764
* Create a temporary copy of the two bioseqs with different IDS,
7765
* add them to the BioseqIndex, BLAST them, then remove them
7766
* from the index and delete them.
7768
copybsp1 = BioseqCopyEx (tmp_id_1, bsp1, 0, bsp1->length - 1, Seq_strand_plus, FALSE);
7769
copybsp2 = BioseqCopyEx (tmp_id_2, bsp2, 0, bsp2->length - 1 , Seq_strand_plus, FALSE);
7770
SeqMgrAddToBioseqIndex (copybsp1);
7771
SeqMgrAddToBioseqIndex (copybsp2);
7773
options = BLASTOptionNew("blastn", TRUE);
7774
options->filter_string = StringSave("m L;R");
7775
seqalign = BlastTwoSequences (copybsp1, copybsp2, "blastn", options);
7778
errstr = (CharPtr)MemNew(1000*sizeof(Char));
7779
bestsalp = SeqAlignBestHit (seqalign, copybsp1, copybsp2, 100, &errstr, &nonly);
7781
/* we don't need the copies after this, and we don't want them in the BioseqIndex
7782
* (or BLAST might get confused the next time through the loop).
7784
copybsp1->idx.deleteme = TRUE;
7785
copybsp2->idx.deleteme = TRUE;
7786
SeqMgrDeleteFromBioseqIndex (copybsp1);
7787
SeqMgrDeleteFromBioseqIndex (copybsp2);
7789
/* update start position in alignment */
7790
offset = SeqAlignStart(bestsalp, 1)-SeqAlignStart(bestsalp, 0);
7791
if ((SeqAlignStrand(bestsalp, 0)==Seq_strand_minus && SeqAlignStrand(bestsalp, 1) != Seq_strand_minus) || (SeqAlignStrand(bestsalp, 1)==Seq_strand_minus && SeqAlignStrand(bestsalp, 0) != Seq_strand_minus))
7793
strand=Seq_strand_minus;
7794
AlnMgr2IndexSingleChildSeqAlign(bestsalp);
7795
AlnMgr2GetNthSeqRangeInSA(bestsalp, 1, &start1, &stop1);
7796
AlnMgr2GetNthSeqRangeInSA(bestsalp, 2, &start2, &stop2);
7797
len = stop2 + start1;
7800
offset = 0 - offset;
7805
strand=Seq_strand_plus;
7809
SeqAlignStartUpdate (salpnew, sip_temp, abs(offset), len, strand);
7811
sip_temp->next = sip_next;
7813
SeqIdFree (tmp_id_1);
7814
SeqIdFree (tmp_id_2);
7823
NLM_EXTERN Boolean CheckAlignmentSequenceLengths (SeqAlignPtr salp)
7825
Int4 i, num_rows, num_bad = 0;
7829
ValNodePtr sip_list = NULL, vnp;
7830
Char path [PATH_MAX];
7833
Boolean retval = TRUE;
7835
if (salp == NULL) return FALSE;
7837
num_rows = AlnMgr2GetNumRows(salp);
7839
for (i = 0; i < num_rows; i++) {
7840
AlnMgr2GetNthSeqRangeInSA(salp, i + 1, &from, &to);
7841
sip = AlnMgr2GetNthSeqIdPtr(salp, i + 1);
7842
bsp = BioseqFind (sip);
7844
if (from > to) to = from;
7845
if (bsp->length < to) {
7846
ValNodeAddPointer (&sip_list, 0, sip);
7849
sip = SeqIdFree (sip);
7853
if (sip_list != NULL) {
7855
fp = FileOpen (path, "w");
7857
Message (MSG_ERROR, "Unable to open %s", path);
7860
while (vnp != NULL) {
7861
SeqIdWrite (vnp->data.ptrvalue, str, PRINTID_FASTA_LONG, sizeof (str) - 1);
7862
fprintf (fp, "%s\n", str);
7863
vnp->data.ptrvalue = SeqIdFree (vnp->data.ptrvalue);
7867
LaunchGeneralTextViewer (path, "Short Sequences");
7870
if (Message(MSG_YN, "%d sequence%s too short for this alignment. Do you wish to continue?",
7871
num_bad, num_bad > 1 ? "s are" : " is") == ANS_NO) {
7874
sip_list = ValNodeFree (sip_list);
7880
/******************************************************************
7881
call back function for REGISTER_ALIGNVALIDATION defined in sequin4.c.
7882
Starting point for seqalign validation if user clicked on
7883
SeqalignValidation under menu Filer/Alignment.
7884
Either individual alignment or alignment block
7885
should be highlighted for this validation to work
7886
******************************************************************/
7888
NLM_EXTERN Int2 LIBCALLBACK ValidateSeqAlignandACCFromData (Pointer data)
7891
OMProcControlPtr ompcp;
7892
SeqAlignPtr salp=NULL;
7893
SeqAnnotPtr sap=NULL;
7894
SeqEntryPtr sep=NULL;
7896
ompcp = (OMProcControlPtr) data;
7897
if (ompcp == NULL || ompcp->proc == NULL) return OM_MSG_RET_ERROR;
7899
if (ompcp->input_data == NULL) return OM_MSG_RET_ERROR;
7901
switch(ompcp->input_itemtype)
7904
sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
7906
case OBJ_BIOSEQSET :
7907
sep = SeqMgrGetSeqEntryForData (ompcp->input_data);
7909
/*if clicked on alignment block*/
7911
sap=(SeqAnnotPtr) (ompcp->input_data);
7913
/*if clicked on individual alignment*/
7915
salp=(SeqAlignPtr) (ompcp->input_data);
7918
return OM_MSG_RET_ERROR;
7920
return OM_MSG_RET_ERROR;
7923
ErrSetMessageLevel(SEV_ERROR);
7926
salp=is_salp_in_sap(sap, 2);
7927
ValidateSeqAlignandACC (salp, 0, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, NULL);
7930
ValidateSeqAlignandACC (salp, 0, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, NULL);
7933
ValidateSeqAlignandACCInSeqEntry (sep, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE);
7935
return OM_MSG_RET_DONE;