2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
28
* Author: James Ostell
30
* Version Creation Date: 2/4/94
34
* File Description: Sequence editing utilities
37
* --------------------------------------------------------------------------
38
* Date Name Description of modification
39
* ------- ---------- -----------------------------------------------------
42
* Revision 6.14 2001/06/01 18:07:20 kans
43
* changes to SeqLocAdd to allow one plus and one unknown strand to be accepted
45
* Revision 6.13 2001/02/23 21:30:09 shkeda
46
* Fixed SeqLocAdd: Int-fuzz pointers should be set to NULL after IntFuzzFree
48
* Revision 6.12 2001/02/23 01:26:07 ostell
49
* Added support to BioseqDelete() for delta seqs
51
* Revision 6.11 2000/10/31 17:11:06 kans
52
* SeqLocReplaceID was handling SEQLOC_PACKED_PNT incorrectly
54
* Revision 6.10 1999/12/20 20:47:12 kans
55
* oldscope test was wrong everywhere
57
* Revision 6.9 1999/12/15 20:52:16 kans
58
* added IndexedSeqFeatsCopy if SeqMgrFeaturesAreIndexed
60
* Revision 6.8 1999/12/07 20:32:13 kans
61
* for most editing functions, if BioseqFind failed, temporarily clear scope/try again/reset scope
63
* Revision 6.7 1999/11/19 19:54:19 kans
64
* SeqLocAdd checks for NULL slp before dereferencing
66
* Revision 6.6 1998/09/03 20:43:52 kans
67
* added delta bioseq support to BioseqCopy
69
* Revision 6.5 1998/06/22 20:00:46 kans
70
* DelFeat was a bit too agressive when there were multiple feature tables
72
* Revision 6.4 1998/06/17 21:50:11 kans
73
* fixed unix compiler warnings, including 64-bit SGI
75
* Revision 6.3 1997/11/10 19:40:48 bazhin
76
* Fixed incorrect comment for ISAGappedSeqLoc() function.
78
* Revision 6.2 1997/10/24 19:16:17 bazhin
79
* Added three easy functions GapToSeqLoc(...), ISAGappedSeqLoc(...)
80
* and GappedSeqLocsToDeltaSeqs(...) for processing "gap(...)" tokens
83
* Revision 6.1 1997/10/10 20:18:02 ostell
84
* removed tab character from SeqLitTag for DeltaSeqsToSeqLoc
86
* Revision 6.0 1997/08/25 18:05:24 madden
87
* Revision changed to 6.0
89
* Revision 5.10 1997/07/25 20:34:51 kans
90
* added SegLocToPartsEx
92
* Revision 5.9 1997/06/19 18:37:30 vakatov
93
* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
95
* Revision 5.8 1996/12/20 17:59:34 kans
96
* SeqLocCopyRegion already reversed order for Seq_strand_minus, so no need
97
* to reverse it again (JO + JK)
99
* Revision 5.7 1996/10/21 18:56:19 ostell
100
* made SegLocToParts accept a complicated Seq-loc argument
102
* Revision 5.6 1996/10/09 17:27:34 chappey
103
* *** empty log message ***
105
* Revision 5.5 1996/10/09 16:34:59 chappey
106
* added SeqLocReplaceID() that replaces the Seq-Id of a Seq-Loc
108
* Revision 5.4 1996/07/15 14:43:51 epstein
109
* change SeqLocAdd() so that it merges identical SEQLOC_PNTs
111
* Revision 5.3 1996/06/12 18:29:41 epstein
112
* move SeqLocIntNew() and SeqLocPntNew() from edutil to sequtil
114
* Revision 5.1 1996/06/10 15:07:17 epstein
115
* replace make_seq_loc() with SeqLocIntNew() and make_pnt_loc with SeqLocPntNew()
117
* Revision 5.0 1996/05/28 13:23:23 ostell
118
* Set to revision 5.0
120
* Revision 4.10 1996/03/19 19:45:24 kans
121
* fix of SegLocToParts (JO)
123
* Revision 4.9 1996/03/12 22:14:22 ostell
124
* added SeqLocToParts()
126
* Revision 4.7 1996/02/19 19:58:05 ostell
127
* added support for Code-break and tRNA.anticodon
129
* Revision 4.6 1996/01/30 16:24:04 ostell
130
* changed name of SeqLocPack() to SeqLocPackage()
132
* Revision 4.5 1996/01/29 22:03:52 ostell
136
* Revision 4.4 1996/01/10 22:25:25 ostell
137
* added SeqLocIntNew()
139
* Revision 4.3 1995/12/29 21:31:44 ostell
140
* added mapping functions between delta seq and seq loc, for editing utilities
142
* Revision 4.2 1995/12/21 02:35:50 ostell
143
* changed call for BSAdd
145
* Revision 4.1 1995/11/15 20:40:20 ostell
146
* fixed SeqLocCopyPart so it correctly handles SEQLOC_NULL in segmented
149
* Revision 4.0 1995/07/26 13:49:01 ostell
150
* force revision to 4.0
152
* Revision 1.22 1995/05/15 21:46:05 ostell
157
* ==========================================================================
165
/*****************************************************************************
167
* SeqLocPackage(head)
168
* head is a chain of 1 or more SeqLocs connected by slp->next
169
* Assumes was built by SeqLocAdd to remove redundancy
170
* Frees the last element if it is a NULL.
171
* If more than one element left, then packages the chain into a SEQLOC_MIX,
172
* or SEQLOC_PACKED_INT as appropriate
173
* returns pointer to the head of the resulting single SeqLoc
175
*****************************************************************************/
176
NLM_EXTERN SeqLocPtr LIBCALL SeqLocPackage (SeqLocPtr head)
178
SeqLocPtr newhead = NULL, tmp, prev;
179
Boolean packed_int = TRUE;
182
if (head == NULL) return head;
184
prev = NULL; /* remove trailing NULL */
185
for (tmp = head; tmp->next != NULL; tmp = tmp->next)
188
if (tmp->choice == SEQLOC_NULL)
194
return NULL; /* nothing left */
197
for (tmp = head; tmp != NULL; tmp = tmp->next)
200
if (tmp->choice != SEQLOC_INT)
207
newhead = ValNodeNew(NULL);
209
newhead->choice = SEQLOC_PACKED_INT;
211
newhead->choice = SEQLOC_MIX;
212
newhead->data.ptrvalue = head;
217
/*****************************************************************************
219
* SeqLocAdd(headptr, slp, merge, do_copy)
220
* creates a linked list of SeqLocs.
221
* returns a pointer to the last SeqLoc in the chain
223
* deletes double NULLs or Nulls at start (application must delete at stop)
224
* merges adjacent intervals on the same strand
226
* Makes copies of incoming SeqLocs
227
* if incoming is merged, deletes the incoming SeqLoc
229
*****************************************************************************/
230
NLM_EXTERN SeqLocPtr LIBCALL SeqLocAdd (SeqLocPtr PNTR head, SeqLocPtr slp, Boolean merge, Boolean do_copy)
232
SeqLocPtr tmp, last, retval = NULL;
233
Boolean merged = FALSE; /* intervals were merged */
235
if (slp == NULL) return NULL;
240
for (tmp = *head; tmp != NULL; tmp = tmp->next)
246
if ((slp->choice == SEQLOC_NULL) && (merge)) /* no null at start, or two in a row */
248
if (last == NULL) /* first one */
253
if (last->choice == SEQLOC_NULL) /* double NULL */
260
if ((last != NULL) && (merge)) /* check for merging intervals */
262
if ((last->choice == SEQLOC_INT) && (slp->choice == SEQLOC_INT))
264
SeqIntPtr sip1, sip2;
266
Uint1 strand = Seq_strand_unknown;
268
sip1 = (SeqIntPtr)(last->data.ptrvalue);
269
sip2 = (SeqIntPtr)(slp->data.ptrvalue);
271
if ((sip1->strand == sip2->strand) ||
272
(sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus) ||
273
(sip1->strand == Seq_strand_unknown && sip2->strand != Seq_strand_minus)) {
275
if (sip1->strand == Seq_strand_minus || sip1->strand == Seq_strand_minus) {
276
strand = Seq_strand_minus;
277
} else if (sip1->strand == Seq_strand_plus || sip1->strand == Seq_strand_plus) {
278
strand = Seq_strand_plus;
280
strand = Seq_strand_unknown;
283
if (samestrand && (SeqIdForSameBioseq(sip1->id, sip2->id)))
285
if (strand == Seq_strand_minus)
287
if (sip1->from == (sip2->to + 1)) /* they are adjacent */
289
sip1->from = sip2->from;
290
sip1->if_from = IntFuzzFree(sip1->if_from);
291
if (sip2->if_from != NULL) /* copy the fuzz */
294
sip1->if_from = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_from),
295
(AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
298
sip1->if_from = sip2->if_from;
299
sip2->if_from = NULL;
301
sip1->strand = strand;
308
if (sip1->to == (sip2->from - 1)) /* they are adjacent */
311
sip1->if_to = IntFuzzFree(sip1->if_to);
312
if (sip2->if_to != NULL) /* copy the fuzz */
315
sip1->if_to = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->if_to),
316
(AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
319
sip1->if_to = sip2->if_to;
322
sip1->strand = strand;
328
} else if ((last->choice == SEQLOC_PNT) && (slp->choice == SEQLOC_PNT))
330
SeqPntPtr sip1, sip2;
332
sip1 = (SeqPntPtr)(last->data.ptrvalue);
333
sip2 = (SeqPntPtr)(slp->data.ptrvalue);
334
if ((sip1->strand == sip2->strand) && sip1->point == sip2->point && (SeqIdForSameBioseq(sip1->id, sip2->id)))
336
sip1->fuzz = IntFuzzFree(sip1->fuzz);
337
if (sip2->fuzz != NULL) /* copy the fuzz */
340
sip1->fuzz = (IntFuzzPtr)AsnIoMemCopy((Pointer)(sip2->fuzz),
341
(AsnReadFunc)IntFuzzAsnRead, (AsnWriteFunc)IntFuzzAsnWrite);
344
sip1->fuzz = sip2->fuzz;
354
if (! merged) /* then have to add a new one */
357
tmp = (SeqLocPtr)AsnIoMemCopy((Pointer)slp, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
372
if (! do_copy) /* got to free it here */
379
/*****************************************************************************
381
* SegLocToParts(BioseqPtr seg, SeqLocPtr slp)
382
* seg must be a segmented Bioseq
383
* slp must be a SeqLoc on it
384
* function maps slp to the components of seg
385
* returns a new SeqLocPtr
386
* does not delete slp
388
*****************************************************************************/
389
NLM_EXTERN SeqLocPtr LIBCALL SegLocToPartsEx (BioseqPtr seg, SeqLocPtr slp, Boolean nullsBetween)
391
SeqLocPtr newloc = NULL, tmp, tmp2, tmp3, next, curr;
394
Int4 left_end, right_end, tlen, tstart;
396
Boolean split, notFirst = FALSE;
398
if ((seg == NULL) || (slp == NULL)) return newloc;
399
if (seg->repr != Seq_repr_seg) return newloc;
402
if (sip == NULL) return newloc;
403
if (! SeqIdIn(sip, seg->id)) return newloc;
405
MemSet(&thead, 0, sizeof(ValNode));
406
thead.choice = SEQLOC_MIX;
407
thead.data.ptrvalue = seg->seq_ext;
410
while ((curr = SeqLocFindNext(slp, curr)) != NULL)
414
while ((tmp = SeqLocFindNext(&thead, tmp)) != NULL)
416
tlen = SeqLocLen(tmp);
419
right_end = left_end + tlen - 1;
420
tsip = SeqLocId(tmp);
421
tstart = SeqLocStart(tmp);
422
tmp2 = SeqLocCopyRegion(tsip, curr, seg, left_end, right_end, SeqLocStrand(tmp),
428
if (tmp2->choice == SEQLOC_INT)
430
if (nullsBetween && notFirst) {
431
tmp3 = ValNodeNew (NULL);
433
tmp3->choice = SEQLOC_NULL;
434
SeqLocAdd (&newloc, tmp3, TRUE, FALSE);
438
sintp = (SeqIntPtr)(tmp2->data.ptrvalue);
439
sintp->from += tstart;
441
SeqLocAdd(&newloc, tmp2, TRUE, FALSE);
445
left_end = right_end + 1;
451
newloc = SeqLocPackage(newloc);
455
NLM_EXTERN SeqLocPtr LIBCALL SegLocToParts (BioseqPtr seg, SeqLocPtr slp)
458
return SegLocToPartsEx (seg, slp, FALSE);
461
static CharPtr seqlitdbtag = "SeqLit";
462
/*****************************************************************************
464
* ISADeltaSeqsToSeqLoc(slp)
465
* returns Index (> 0) if this (one) SeqLoc was converted from a Delta Seq by
466
* DeltaSeqsToSeqLocs() by looking for the special Dbtag name
468
*****************************************************************************/
469
NLM_EXTERN Int4 LIBCALL ISADeltaSeqsToSeqLoc (SeqLocPtr slp)
474
if (slp == NULL) return retval;
476
if (sip == NULL) return retval;
478
if (sip->choice != SEQID_GENERAL) return retval;
480
if (! StringCmp(seqlitdbtag, ((DbtagPtr)(sip->data.ptrvalue))->db))
481
retval = (((DbtagPtr)(sip->data.ptrvalue))->tag->id);
486
/*****************************************************************************
488
* DeltaSeqsToSeqLocs(dsp)
489
* converts a chain of delta seqs to seqlocs
490
* each SeqLit is converted to SeqLoc of type Int with a SeqId of type
491
* Dbtag where db="Seq\tLit" and objectId.id which is the index of the
492
* element in the delta seq chain where 1 is the first one.
493
* Returned SeqLoc is of type "mix" and must be freed by caller.
495
*****************************************************************************/
496
NLM_EXTERN SeqLocPtr LIBCALL DeltaSeqsToSeqLocs (DeltaSeqPtr dsp)
498
SeqLocPtr head = NULL, thead=NULL;
505
MemSet(&vn, 0, sizeof(ValNode));
506
MemSet(&vn2, 0, sizeof(ValNode));
507
MemSet(&si, 0, sizeof(SeqInt));
508
MemSet(&db, 0, sizeof(Dbtag));
509
MemSet(&oi, 0, sizeof(ObjectId));
510
vn.choice = SEQLOC_INT;
511
vn.data.ptrvalue = &si;
513
vn2.choice = SEQID_GENERAL;
514
vn2.data.ptrvalue = &db;
520
for (curr = dsp; curr != NULL; curr = curr->next)
522
if (curr->choice == 1) /* a SeqLoc */
523
SeqLocAdd(&thead, (SeqLocPtr)(curr->data.ptrvalue), TRUE, TRUE);
526
si.to = ((SeqLitPtr)(curr->data.ptrvalue))->length - 1;
527
SeqLocAdd(&thead, &vn, TRUE, TRUE);
532
head = SeqLocPackage(thead);
536
/*****************************************************************************
538
* SeqLocsToDeltaSeqs(dsp, slp)
539
* converts a chain of seqlocs generated by DeltaSeqToSeqLocs() back into
540
* delta seqs. dsp is the original chain of DeltaSeqs, which is required
541
* to convert the delta seqs back.
543
*****************************************************************************/
544
NLM_EXTERN DeltaSeqPtr LIBCALL SeqLocsToDeltaSeqs (DeltaSeqPtr dsp, SeqLocPtr slp)
546
DeltaSeqPtr dhead=NULL, dcurr=NULL, dtmp;
548
Int4 ctr, index, strt, stp;
550
Uint1 strand, newcode;
551
SeqLitPtr slitp, slitp_new;
557
if ((dsp == NULL) || (slp == NULL))
560
vn.choice = SEQLOC_MIX;
562
vn.data.ptrvalue = slp;
564
while ((scurr = SeqLocFindNext(&vn, scurr)) != NULL)
566
dcurr = ValNodeNew(dhead);
570
index = ISADeltaSeqsToSeqLoc(scurr);
572
if (index == 0) /* just a SeqLoc */
575
dcurr->data.ptrvalue = NULL;
576
dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)scurr, (AsnReadFunc)SeqLocAsnRead, (AsnWriteFunc)SeqLocAsnWrite);
579
else /* convert to a delta seq */
582
sip = SeqLocId(scurr);
584
for (ctr = 1; ctr < index; ctr++)
587
if (dtmp->choice != 2) /* wups */
589
ErrPostEx(SEV_ERROR,0,0,"Wrong type in SeqLocsToDeltaSeqs");
590
dhead = DeltaSeqFree(dhead);
593
slitp = (SeqLitPtr)(dtmp->data.ptrvalue);
595
strt = SeqLocStart(scurr);
596
stp = SeqLocStop(scurr);
597
strand = SeqLocStrand(scurr);
599
if ((strt == 0) && (stp == (slitp->length - 1)) && (strand != Seq_strand_minus)) /* no change */
601
dcurr->data.ptrvalue = AsnIoMemCopy((Pointer)slitp, (AsnReadFunc)SeqLitAsnRead, (AsnWriteFunc)SeqLitAsnWrite);
603
else /* got to copy part of it */
605
switch (slitp->seq_data_type)
607
case Seq_code_iupacna:
608
case Seq_code_iupacaa:
609
case Seq_code_ncbi8na:
610
case Seq_code_ncbi8aa:
611
case Seq_code_ncbieaa:
612
case Seq_code_ncbistdaa:
613
case Seq_code_iupacaa3:
614
newcode = slitp->seq_data_type; /* one byte codes.. fine */
616
case Seq_code_ncbipna:
617
ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
618
newcode = Seq_code_ncbieaa;
620
case Seq_code_ncbipaa:
621
ErrPostEx(SEV_ERROR,0,0,"Converting from P residue codes");
622
case Seq_code_ncbi2na:
623
case Seq_code_ncbi4na:
624
newcode = Seq_code_iupacna;
627
ErrPostEx(SEV_FATAL,0,0,"Unrecognized residue code [%d] in SeqLocsToDeltaSeqs",
628
(int)(slitp->seq_data_type));
629
return DeltaSeqFree(dhead);
631
spps = MemNew(sizeof(SeqPort));
632
SeqPortSetUpFields (spps, strt, stp, strand, newcode);
633
SeqPortSetUpAlphabet(spps, slitp->seq_data_type, newcode);
634
spps->bp = slitp->seq_data;
635
slitp_new = SeqLitNew();
636
dcurr->data.ptrvalue = slitp_new;
637
slitp_new->seq_data_type = newcode;
638
slitp_new->length = (stp - strt + 1);
639
bsp = BSNew(slitp_new->length);
640
slitp_new->seq_data = bsp;
641
SeqPortSeek(spps, 0, SEEK_SET);
642
BSSeek(bsp, 0, SEEK_SET);
645
residue = SeqPortGetResidue(spps);
646
BSPutByte(bsp, residue);
657
/*****************************************************************************
659
* BioseqDelete (target, from, to, do_feat, do_split)
660
* Deletes the region of sequence between from-to, inclusive, on the
661
* Bioseq whose SeqId is target.
662
* If do_feat, the feature table is updated to reflect the deletion
663
* using SeqEntryDelFeat()
664
* If do_split, the features across the deleted region are split into
665
* two intervals on either side. If not, the feature is just shortened.
666
*****************************************************************************/
667
NLM_EXTERN Boolean LIBCALL BioseqDelete (SeqIdPtr target, Int4 from, Int4 to, Boolean do_feat, Boolean do_split)
669
Boolean retval = FALSE;
673
Int4 totlen, templen, tfrom, tto, diff1, diff2;
674
SeqLocPtr slp, tloc, newhead, prev;
677
SeqLocPtr PNTR newheadptr;
678
SeqFeatPtr sfpcurr, sfpnext, sfpprev;
680
SeqEntryPtr oldscope;
683
bsp = BioseqFind(target);
685
oldscope = SeqEntrySetScope (NULL);
686
if (oldscope != NULL) {
687
bsp = BioseqFind(target);
688
SeqEntrySetScope (oldscope);
691
if (bsp == NULL) return retval;
693
if ((from < 0) || (from >= bsp->length) || (to < 0) ||
694
(to >= bsp->length) || (from > to)) return retval;
697
SeqEntryDelFeat(NULL, target, from, to, do_split);
700
/* if actual sequence present */
702
if ((bsp->repr == Seq_repr_raw) || (bsp->repr == Seq_repr_const))
704
if (ISA_na(bsp->mol))
706
if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
707
BioseqRawConvert(bsp, Seq_code_iupacna);
711
if (bsp->seq_data_type != Seq_code_ncbieaa)
712
BioseqRawConvert(bsp, Seq_code_ncbieaa);
715
BSSeek(bsp->seq_data, from, SEEK_SET);
716
deleted = BSDelete(bsp->seq_data, len);
717
if (deleted != len) /* error */
718
ErrPost(CTX_NCBIOBJ, 1, "Delete of %ld residues failed", len);
723
/* update segmented sequence */
724
if ((bsp->repr == Seq_repr_seg) || (bsp->repr == Seq_repr_delta))
726
head = ValNodeNew(NULL); /* allocate to facilitate SeqLocFree */
727
head->choice = SEQLOC_MIX; /* make a SeqLoc out of the extension */
728
if (bsp->repr == Seq_repr_seg)
729
head->data.ptrvalue = bsp->seq_ext;
732
tdsp = (DeltaSeqPtr)(bsp->seq_ext);
733
head->data.ptrvalue = DeltaSeqsToSeqLocs(tdsp);
737
newheadptr = &newhead;
740
MemSet((Pointer)tloc, 0, sizeof(ValNode));
741
MemSet((Pointer)&si, 0, sizeof(SeqInt));
742
tloc->choice = SEQLOC_INT;
743
tloc->data.ptrvalue = (Pointer)(&si);
747
while ((slp = SeqLocFindNext(head, slp)) != NULL)
749
templen = SeqLocLen(slp);
750
tfrom = SeqLocStart(slp);
751
tto = SeqLocStop(slp);
753
if (((totlen + templen - 1) < from) || /* before cut */
754
(totlen > to)) /* after cut */
755
tmp = SeqLocAdd(newheadptr, slp, TRUE, TRUE); /* add whole SeqLoc */
758
retval = 1; /* will modify or drop interval */
759
diff1 = from - totlen; /* partial beginning? */
760
diff2 = (templen + totlen - 1) - to; /* partial end? */
761
si.id = SeqLocId(slp);
762
si.strand = SeqLocStrand(slp);
764
if (diff1 > 0) /* partial start */
766
if (si.strand != Seq_strand_minus)
769
si.to = tfrom + diff1 - 1;
773
si.from = tto - diff1 + 1;
776
tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
779
if (diff2 > 0) /* partial end */
781
if (si.strand != Seq_strand_minus)
783
si.from = tto - diff2 + 1;
789
si.to = tfrom + diff2 - 1;
791
tmp = SeqLocAdd(newheadptr, tloc, TRUE, TRUE);
799
for (tmp = newhead; tmp != NULL; tmp = tmp->next)
801
if (tmp->next == NULL) /* last one */
803
if (tmp->choice == SEQLOC_NULL)
807
else /* only a NULL left */
818
if (bsp->repr == Seq_repr_seg)
819
bsp->seq_ext = newhead;
822
bsp->seq_ext = SeqLocsToDeltaSeqs(tdsp, newhead);
823
DeltaSeqSetFree(tdsp);
824
SeqLocSetFree(newhead);
830
if (bsp->repr == Seq_repr_map) /* map bioseq */
834
sfpcurr = (SeqFeatPtr)(bsp->seq_ext);
836
for (; sfpcurr != NULL; sfpcurr = sfpnext)
838
sfpnext = sfpcurr->next;
839
dropped = SeqFeatDelete(sfpcurr, target, from, to, TRUE);
840
if (dropped == 2) /* completely gone */
842
SeqFeatFree(sfpcurr);
847
bsp->seq_ext = (Pointer)sfpcurr;
849
sfpprev->next = sfpcurr;
850
sfpcurr->next = NULL;
857
if (bsp->repr == Seq_repr_virtual)
858
retval = TRUE; /* nothing to do */
866
/*****************************************************************************
868
* BioseqOverwrite (target, pos, residue, seqcode)
869
* Overwrites the residue at pos with residue in the
870
* Bioseq whose SeqId is target.
871
* residue is iupacna for DNA or ncbieaa for protein
872
* target MUST be a raw Bioseq right now
874
*****************************************************************************/
875
NLM_EXTERN Boolean LIBCALL BioseqOverwrite (SeqIdPtr target, Int4 pos, Uint1 residue)
878
Boolean retval = FALSE;
879
SeqEntryPtr oldscope;
882
bsp = BioseqFind(target);
884
oldscope = SeqEntrySetScope (NULL);
885
if (oldscope != NULL) {
886
bsp = BioseqFind(target);
887
SeqEntrySetScope (oldscope);
890
if (bsp == NULL) return retval;
892
if ((pos < 0) || (pos >= bsp->length)) return retval;
893
if (bsp->repr != Seq_repr_raw) return retval;
895
if (ISA_na(bsp->mol))
897
if (bsp->seq_data_type != Seq_code_iupacna) /* need 1 byte/base */
898
BioseqRawConvert(bsp, Seq_code_iupacna);
902
if (bsp->seq_data_type != Seq_code_ncbieaa)
903
BioseqRawConvert(bsp, Seq_code_ncbieaa);
906
BSSeek(bsp->seq_data, pos, SEEK_SET);
907
BSPutByte(bsp->seq_data, (Int2)(TO_UPPER(residue)));
914
/*****************************************************************************
916
* SeqInsertByLoc (target, offset, fragment)
918
*****************************************************************************/
919
NLM_EXTERN Boolean LIBCALL SeqInsertByLoc (SeqIdPtr target, Int4 offset, SeqLocPtr fragment)
925
/*****************************************************************************
927
* SeqDeleteByLoc (slp, do_feat, do_split)
929
*****************************************************************************/
930
NLM_EXTERN Boolean LIBCALL SeqDeleteByLoc (SeqLocPtr slp, Boolean do_feat, Boolean do_split)
933
Boolean retval = FALSE;
934
Int2 numloc, i = 0, ctr, pick, totloc;
935
SeqLocPtr PNTR locs, PNTR tlocs, PNTR theorder;
936
SeqIdPtr the_id = NULL;
940
if (slp == NULL) return retval;
947
while ((tmp = SeqLocFindNext(slp, tmp)) != NULL)
953
if (BioseqFind(SeqLocId(tmp)) != NULL)
955
if (numloc == totloc)
958
locs = (SeqLocPtr PNTR)(MemNew((totloc+20) * sizeof(SeqLocPtr)));
959
MemCopy(locs, tlocs, (size_t)(totloc * sizeof(SeqLocPtr)));
968
Message(MSG_ERROR, "Unsupported Seqloc [%d] in SeqDeleteByLoc",
975
if (! numloc) return retval;
978
/***********************************************************
980
* first gather all the seqlocs, grouped by Bioseq, and
981
* ordered from end to beginning. They must be ordered
982
* before the underlying Bioseq is changed.
984
***********************************************************/
989
theorder = (SeqLocPtr PNTR)MemNew((sizeof(SeqLocPtr) * numloc));
990
for (ctr = 0; ctr < numloc; ctr++)
992
pick = -1; /* flag none found */
995
for (i = 0; i < numloc; i++)
999
if (SeqIdIn(SeqLocId(locs[i]), bsp->id))
1008
bsp = NULL; /* no more locs on this bioseq */
1011
if (bsp == NULL) /* have to find a new bioseq */
1013
for (i = 0; i < numloc; i++)
1015
if (locs[i] != NULL)
1017
bsp = BioseqFind(SeqLocId(locs[i]));
1027
if (SeqLocOrder(locs[pick], locs[i], bsp) == (-1)) /* it's after */
1032
theorder[ctr] = locs[pick];
1036
MemFree(locs); /* finished with original list */
1038
/*************************************************************
1040
* Now do the actual deletions
1042
*************************************************************/
1045
for (ctr = 0; ctr < numloc; ctr++)
1047
tstart = SeqLocStart(theorder[ctr]);
1048
tstop = SeqLocStop(theorder[ctr]);
1049
BioseqDelete(SeqLocId(theorder[ctr]), tstart, tstop, do_feat, do_split);
1058
/*****************************************************************************
1061
* 0 = no changes made to location or product
1062
* 1 = changes made but feature still has some location
1063
* 2 = all of sfp->location in deleted interval
1066
* 1) correct numbers > to by subtraction
1067
* 2) do not split intervals spanning the deletion
1069
* 1) do not change numbers > to
1070
* 2) split intervals which span the deletions
1072
*****************************************************************************/
1073
NLM_EXTERN Int2 LIBCALL SeqFeatDelete (SeqFeatPtr sfp, SeqIdPtr target, Int4 from, Int4 to, Boolean merge)
1078
Boolean changed = FALSE, tmpbool = FALSE;
1080
CodeBreakPtr cbp, prevcbp, nextcbp;
1085
MemSet((Pointer)tloc, 0, sizeof(ValNode));
1086
MemSet((Pointer)&si, 0, sizeof(SeqInt));
1087
tloc->choice = SEQLOC_INT;
1088
tloc->data.ptrvalue = (Pointer)(&si);
1093
sfp->location = SeqLocDelete(sfp->location, target, from, to, merge, &changed);
1095
sfp->product = SeqLocDelete(sfp->product, target, from, to, merge, &changed);
1097
if (sfp->location == NULL)
1100
switch (sfp->data.choice)
1102
case SEQFEAT_CDREGION: /* cdregion */
1103
crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
1105
for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
1107
nextcbp = cbp->next;
1108
cbp->loc = SeqLocDelete(cbp->loc, target, from, to, merge, &tmpbool);
1109
if (cbp->loc == NULL)
1111
if (prevcbp != NULL)
1112
prevcbp->next = nextcbp;
1114
crp->code_break = nextcbp;
1123
rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
1124
if (rrp->ext.choice == 2) /* tRNA */
1126
trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
1127
if (trp->anticodon != NULL)
1129
trp->anticodon = SeqLocDelete(trp->anticodon, target, from, to, merge, &tmpbool);
1139
sfp->partial = TRUE;
1146
/*****************************************************************************
1149
* returns altered head or NULL if nothing left.
1150
* sets changed=TRUE if all or part of loc is deleted
1151
* does NOT set changed if location coordinates are only moved
1152
* if (merge) then corrects coordinates upstream of to
1154
* splits intervals covering from-to, does not correct upstream of to
1156
*****************************************************************************/
1157
NLM_EXTERN SeqLocPtr LIBCALL SeqLocDelete (SeqLocPtr head, SeqIdPtr target, Int4 from, Int4 to, Boolean merge, BoolPtr changed)
1159
SeqIntPtr sip, sip2;
1161
PackSeqPntPtr pspp, pspp2;
1164
SeqLocPtr slp, tmp, prev, next, thead;
1165
Int4 diff, numpnt, i, tpos;
1168
if ((head == NULL) || (target == NULL))
1171
head->next = NULL; /* caller maintains chains */
1172
diff = to - from + 1;
1174
switch (head->choice)
1176
case SEQLOC_BOND: /* bond -- 2 seqs */
1177
sbp = (SeqBondPtr)(head->data.ptrvalue);
1179
if (SeqIdForSameBioseq(spp->id, target))
1181
if (spp->point >= from)
1183
if (spp->point <= to) /* delete it */
1186
sbp->a = SeqPntFree(spp);
1195
if (SeqIdForSameBioseq(spp->id, target))
1197
if (spp->point >= from)
1199
if (spp->point <= to) /* delete it */
1202
sbp->b = SeqPntFree(spp);
1211
if (sbp->b != NULL) /* only a required */
1218
head = SeqLocFree(head);
1222
case SEQLOC_FEAT: /* feat -- can't track yet */
1223
case SEQLOC_NULL: /* NULL */
1224
case SEQLOC_EMPTY: /* empty */
1226
case SEQLOC_WHOLE: /* whole */
1227
sidp = (SeqIdPtr)(head->data.ptrvalue);
1228
if (SeqIdForSameBioseq(sidp, target))
1230
bsp = BioseqFind(target);
1231
if (bsp != NULL) /* split it */
1233
if ((from == 0) && (to >= (bsp->length - 1)))
1234
{ /* complete delete */
1235
head = SeqLocFree(head);
1240
if (! merge) /* split it up */
1243
head->choice = SEQLOC_PACKED_INT;
1244
head->data.ptrvalue = NULL;
1251
sip->id = SeqIdDup(target);
1252
slp = ValNodeNew(NULL);
1253
slp->choice = SEQLOC_INT;
1254
slp->data.ptrvalue = sip;
1255
head->data.ptrvalue = slp;
1258
if (to < (bsp->length - 1))
1262
sip->to = bsp->length - 1;
1263
sip->id = SeqIdDup(target);
1264
tmp = ValNodeNew(NULL);
1265
tmp->choice = SEQLOC_INT;
1266
tmp->data.ptrvalue = sip;
1270
head->data.ptrvalue = tmp;
1278
case SEQLOC_MIX: /* mix -- more than one seq */
1279
case SEQLOC_EQUIV: /* equiv -- ditto */
1280
case SEQLOC_PACKED_INT: /* packed int */
1283
for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
1286
tmp = SeqLocDelete(slp, target, from, to, merge, changed);
1291
if ((merge) && (prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
1293
sip = (SeqIntPtr)(prev->data.ptrvalue);
1294
sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
1296
if (SeqIdForSameBioseq(sip->id, sip2->id))
1298
/* merge intervals? */
1299
if ((sip->strand == Seq_strand_minus) &&
1300
(sip2->strand == Seq_strand_minus))
1302
if (sip->from == (sip2->to + 1))
1304
sip->from = sip2->from;
1305
sip->if_from = sip2->if_from;
1306
sip2->if_from = NULL;
1307
tmp = SeqLocFree(tmp);
1310
else if((sip->strand != Seq_strand_minus) &&
1311
(sip2->strand != Seq_strand_minus))
1313
if (sip->to == (sip2->from - 1))
1316
sip->if_to = sip2->if_to;
1318
tmp = SeqLocFree(tmp);
1323
else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
1325
tmp = SeqLocFree(tmp);
1329
else if (tmp->choice == SEQLOC_NULL)
1331
tmp = SeqLocFree(tmp);
1335
if (tmp != NULL) /* still have one? */
1349
if (prev->choice == SEQLOC_NULL) /* ends with NULL */
1352
for (slp = thead; slp->next != NULL; slp = slp->next)
1361
thead = SeqLocFree(thead);
1366
head->data.ptrvalue = thead;
1368
head = SeqLocFree(head);
1370
case SEQLOC_INT: /* int */
1371
sip = (SeqIntPtr)(head->data.ptrvalue);
1372
if (SeqIdForSameBioseq(sip->id, target))
1374
if (sip->to < from) /* completely before cut */
1377
/* completely contained in cut */
1378
if ((sip->from >= from) && (sip->to <= to))
1380
head = SeqLocFree(head);
1385
if (sip->from > to) /* completely past cut */
1401
else /* to inside cut, so partial delete */
1407
if (sip->from >= from) /* from inside cut, partial del */
1418
/* interval spans cut.. only in non-merge */
1421
if ((sip->from < from) && (sip->to > to))
1424
head->choice = SEQLOC_PACKED_INT;
1425
head->data.ptrvalue = NULL;
1426
tmp = ValNodeNew(NULL);
1427
tmp->choice = SEQLOC_INT;
1428
tmp->data.ptrvalue = sip;
1431
sip2->from = to + 1;
1433
sip2->strand = sip->strand;
1434
sip2->if_to = sip->if_to;
1435
sip2->id = SeqIdDup(target);
1436
slp = ValNodeNew(NULL);
1437
slp->choice = SEQLOC_INT;
1438
slp->data.ptrvalue = sip2;
1443
if (sip->strand == Seq_strand_minus)
1445
head->data.ptrvalue = slp;
1450
head->data.ptrvalue = tmp;
1458
case SEQLOC_PNT: /* pnt */
1459
spp = (SeqPntPtr)(head->data.ptrvalue);
1460
if (SeqIdForSameBioseq(spp->id, target))
1462
if ((spp->point >= from) && (spp->point <= to))
1464
head = SeqLocFree(head);
1467
else if (spp->point > to)
1474
case SEQLOC_PACKED_PNT: /* packed pnt */
1475
pspp = (PackSeqPntPtr)(head->data.ptrvalue);
1476
if (SeqIdForSameBioseq(pspp->id, target))
1478
numpnt = PackSeqPntNum(pspp);
1479
pspp2 = PackSeqPntNew();
1480
head->data.ptrvalue = pspp2;
1481
for (i = 0; i < numpnt; i++)
1483
tpos = PackSeqPntGet(pspp, i);
1485
PackSeqPntPut(pspp2, tpos);
1492
PackSeqPntPut(pspp2, tpos);
1498
pspp2->id = pspp->id;
1500
pspp2->fuzz = pspp->fuzz;
1502
pspp2->strand = pspp->strand;
1503
PackSeqPntFree(pspp);
1504
numpnt = PackSeqPntNum(pspp2);
1506
head = SeqLocFree(head);
1517
typedef struct delstruct {
1521
} DelStruct, PNTR DelStructPtr;
1523
NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent);
1525
NLM_EXTERN void DelFeat (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1530
SeqAnnotPtr sap, nextsap;
1531
SeqFeatPtr sfp, nextsfp;
1532
Pointer PNTR prevsap, PNTR prevsfp;
1534
dsp = (DelStructPtr)data;
1537
bsp = (BioseqPtr)(sep->data.ptrvalue);
1539
prevsap = (Pointer PNTR) &(bsp->annot);
1543
bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1545
prevsap = (Pointer PNTR) &(bssp->annot);
1550
nextsap = sap->next;
1551
if (sap->type == 1) /* feature table */
1553
sfp = (SeqFeatPtr) sap->data;
1554
prevsfp = (Pointer PNTR) &(sap->data);
1557
nextsfp = sfp->next;
1558
if (SeqFeatDelete(sfp, dsp->sip, dsp->from, dsp->to, dsp->merge) == 2)
1560
/* location completely gone */
1561
*(prevsfp) = sfp->next;
1565
prevsfp = (Pointer PNTR) &(sfp->next);
1571
if (sap->data == NULL) /* all features deleted */
1573
*(prevsap) = sap->next;
1577
prevsap = (Pointer PNTR) &(sap->next);
1586
/*****************************************************************************
1588
* SeqEntryDelFeat(sep, id, from, to, do_split)
1589
* Deletes or truncates features on Bioseq (id) in the range
1590
* from-to, inclusive
1592
* Moves features > to left to account for decrease in length
1593
* if do_split, breaks intervals across the deletion
1594
* else just reduces their size
1596
* If sep == NULL, then calls SeqEntryFind(id) to set scope to look
1599
*****************************************************************************/
1600
NLM_EXTERN Boolean LIBCALL SeqEntryDelFeat (SeqEntryPtr sep, SeqIdPtr sip, Int4 from, Int4 to, Boolean do_split)
1609
sep = SeqEntryFind(sip);
1611
if (sep == NULL) return FALSE;
1621
SeqEntryExplore(sep, (Pointer)(&ds), DelFeat);
1626
/*****************************************************************************
1628
* DescrToFeatures(sep)
1629
* Moves all Seqdescr to features in sep where possible
1631
*****************************************************************************/
1634
/*****************************************************************************
1636
* BioseqCopy(newid, sourceid, from, to, strand, do_feat)
1637
* Creates a new Bioseq from sourceid in the range from-to inclusive.
1638
* If strand==Seq_strand_minus, reverse complements the sequence in
1639
* the copy and (if do_feat) corrects the feature table appropriately.
1640
* Names new Bioseq as newid, if not NULL
1641
* else Creates seqid.local = "Clipboard" if newid is NULL
1642
* If do_feat == TRUE copies appropriate region of feature table from
1643
* sourceid to new copy using SeqFeatsCopy().
1645
*****************************************************************************/
1646
NLM_EXTERN BioseqPtr LIBCALL BioseqCopy (SeqIdPtr newid, SeqIdPtr sourceid, Int4 from, Int4 to,
1647
Uint1 strand, Boolean do_feat)
1649
BioseqPtr newbsp=NULL, oldbsp, tmpbsp;
1650
SeqPortPtr spp=NULL;
1658
SeqLocPtr the_segs, head, curr;
1659
Boolean handled = FALSE, split;
1660
SeqFeatPtr sfp, newsfp, lastsfp;
1662
SeqEntryPtr oldscope;
1665
if ((sourceid == NULL) || (from < 0)) return FALSE;
1667
oldbsp = BioseqFind(sourceid);
1668
if (oldbsp == NULL) {
1669
oldscope = SeqEntrySetScope (NULL);
1670
if (oldscope != NULL) {
1671
oldbsp = BioseqFind(sourceid);
1672
SeqEntrySetScope (oldscope);
1675
if (oldbsp == NULL) return NULL;
1677
len = to - from + 1;
1678
if (len <= 0) return NULL;
1680
newbsp = BioseqNew();
1682
newbsp->id = SeqIdDup(newid);
1685
tmp = ValNodeNew(NULL);
1686
tmp->choice = SEQID_LOCAL;
1687
oid = ObjectIdNew();
1688
tmp->data.ptrvalue = (Pointer)oid;
1689
oid->str = StringSave("Clipboard");
1690
tmpbsp = BioseqFind(tmp); /* old clipboard present? */
1691
if (tmpbsp == NULL) {
1692
oldscope = SeqEntrySetScope (NULL);
1693
if (oldscope != NULL) {
1694
tmpbsp = BioseqFind(tmp);
1695
SeqEntrySetScope (oldscope);
1703
newbsp->repr = oldbsp->repr;
1704
newbsp->mol = oldbsp->mol;
1705
newbsp->length = len;
1706
newbsp->seq_ext_type = oldbsp->seq_ext_type;
1708
if (newbsp->repr == Seq_repr_virtual)
1709
handled = TRUE; /* no more to do */
1711
if ((newbsp->repr == Seq_repr_raw) ||
1712
(newbsp->repr == Seq_repr_const))
1714
if (ISA_aa(newbsp->mol))
1716
seqtype = Seq_code_ncbieaa;
1720
seqtype = Seq_code_iupacna;
1722
newbsp->seq_data_type = seqtype;
1724
if (bsp == NULL) goto erret;
1726
newbsp->seq_data = bsp;
1727
spp = SeqPortNew(oldbsp, from, to, strand, seqtype);
1728
if (spp == NULL) goto erret;
1730
for (i = 0; i < len; i++)
1732
residue = SeqPortGetResidue(spp);
1733
if (! IS_residue(residue)) goto erret;
1734
BSPutByte(bsp, residue);
1741
if ((newbsp->repr == Seq_repr_seg) ||
1742
(newbsp->repr == Seq_repr_ref) ||
1743
(newbsp->repr == Seq_repr_delta))
1745
if (newbsp->repr == Seq_repr_seg) /* segmented */
1747
fake.choice = SEQLOC_MIX; /* make SEQUENCE OF Seq-loc, into one */
1748
fake.data.ptrvalue = oldbsp->seq_ext;
1750
the_segs = (SeqLocPtr)&fake;
1751
head = SeqLocCopyPart (the_segs, from, to, strand, FALSE, NULL, NULL);
1753
else if (newbsp->repr == Seq_repr_ref) /* reference: is a Seq-loc */
1755
head = SeqLocCopyPart ((SeqLocPtr)(oldbsp->seq_ext), from, to,
1756
strand, TRUE, NULL, NULL);
1758
else if (newbsp->repr == Seq_repr_delta)
1760
dsp = (DeltaSeqPtr)(oldbsp->seq_ext); /* real data is here */
1761
the_segs = DeltaSeqsToSeqLocs(dsp);
1762
head = SeqLocCopyPart (the_segs, from, to, strand, FALSE, NULL, NULL);
1763
SeqLocFree (the_segs);
1766
newbsp->seq_ext = (Pointer)head;
1770
if (newbsp->repr == Seq_repr_map)
1773
for (sfp = (SeqFeatPtr)(oldbsp->seq_ext); sfp != NULL; sfp = sfp->next)
1776
curr = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
1777
if (curr != NULL) /* got one */
1779
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
1780
SeqLocFree(newsfp->location);
1781
newsfp->location = curr;
1783
newsfp->partial = TRUE;
1784
if (lastsfp == NULL) /* first one */
1785
newbsp->seq_ext = (Pointer)newsfp;
1787
lastsfp->next = newsfp;
1795
if (! handled) goto erret;
1797
/* get descriptors */
1801
SeqFeatsCopy (newbsp, oldbsp, from, to, strand);
1811
/*****************************************************************************
1813
* SeqLocCopyPart (the_segs, from, to, strand, group, first_segp, last_segp)
1814
* cuts out from the_segs the part from offset from to offset to
1815
* reverse complements resulting seqloc if strand == Seq_strand_minus
1816
* if (group) puts resulting intervals into a new Seq-loc (of type
1817
* PACKED_INT if no SEQLOC_NULL, else SEQLOC_MIX).
1818
* Currently this always makes intervals or nulls. Is really for segmented and
1819
* reference sequence extensions
1820
* If first_segp and last_segp are not NULL, then they are filled in with the
1821
* ordinal number of the source segments that remain in the copy, based
1822
* on SeqLocFindNext, where 1 is the first one. Thus if the third and
1823
* fourth segments were copied, first is 3 and last is 4. If the
1824
* location was reverse complemented, first is 4 and last is 3.
1826
*****************************************************************************/
1827
NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyPart (SeqLocPtr the_segs, Int4 from, Int4 to, Uint1 strand,
1828
Boolean group, Int2Ptr first_segp, Int2Ptr last_segp)
1830
SeqLocPtr currseg, newhead, head, prev, curr, last;
1831
Int2 numloc, first_seg = 0, last_seg = 0, seg_ctr = 0;
1832
Int4 oldpos, tlen, tfrom, tto, tstart, tstop, xfrom, xto;
1836
Boolean done, started, wasa_null, hada_null;
1839
if (the_segs == NULL) return NULL;
1840
if ((from < 0) || (to < 0)) return NULL;
1843
oldpos = 0; /* position in old sequence */
1851
while ((oldpos <= to) && ((currseg = SeqLocFindNext(the_segs, currseg)) != NULL))
1854
tlen = SeqLocLen(currseg);
1855
tid = SeqLocId(currseg);
1857
bsp = BioseqLockById (tid); /* only necessary for locations of type WHOLE */
1858
tlen = SeqLocLen (currseg);
1861
tstrand = SeqLocStrand(currseg);
1862
tfrom = SeqLocStart(currseg);
1863
tto = SeqLocStop(currseg);
1868
if (((oldpos + tlen - 1) >= from) &&
1869
(currseg->choice != SEQLOC_NULL))
1871
tstart = from - oldpos;
1873
first_seg = seg_ctr;
1880
if (currseg->choice == SEQLOC_NULL)
1883
tstart = -1; /* skip it till later */
1889
if (tstart >= 0) /* have a start */
1891
if ((oldpos + tlen - 1) >= to)
1893
done = TRUE; /* hit the end */
1894
tstop = ((oldpos + tlen - 1) - to);
1899
if (tstrand == Seq_strand_minus)
1901
xfrom = tfrom + tstop;
1906
xfrom = tfrom + tstart;
1911
sip->id = SeqIdDup(tid);
1912
sip->strand = tstrand;
1915
if (wasa_null) /* previous SEQLOC_NULL */
1917
curr = ValNodeAddInt(&head, SEQLOC_NULL, 0);
1922
curr = ValNodeAddPointer(&head, SEQLOC_INT, (Pointer)sip);
1930
if (strand == Seq_strand_minus) /* reverse order and complement */
1934
while (head != NULL)
1937
for (curr = head; curr->next != NULL; curr = curr->next)
1944
if (newhead == NULL)
1949
if (curr->choice == SEQLOC_INT)
1951
sip = (SeqIntPtr)(curr->data.ptrvalue);
1952
sip->strand = StrandCmp(sip->strand);
1958
last_seg = first_seg;
1959
first_seg = seg_ctr;
1962
if ((numloc) && (group))
1964
curr = ValNodeNew(NULL);
1966
curr->choice = SEQLOC_MIX;
1968
curr->choice = SEQLOC_PACKED_INT;
1969
curr->data.ptrvalue = (Pointer)head;
1973
if (first_segp != NULL)
1974
*first_segp = first_seg;
1975
if (last_segp != NULL)
1976
*last_segp = last_seg;
1981
/*****************************************************************************
1983
* SeqFeatCopy(new, old, from, to, strand)
1985
*****************************************************************************/
1986
static Int2 LIBCALL IndexedSeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
1990
SeqFeatPtr head=NULL, sfp, last=NULL, newsfp;
1995
Boolean split = FALSE;
1996
SeqAnnotPtr sap = NULL, saptmp;
1998
CodeBreakPtr cbp, prevcbp, nextcbp;
2001
SeqMgrFeatContext fcontext;
2004
vn.choice = SEQLOC_INT;
2005
vn.data.ptrvalue = (Pointer)(&si);
2013
while ((sfp = SeqMgrGetNextFeature (oldbsp, sfp, 0, 0, &fcontext)) != NULL)
2015
/* can exit once past rightmost limit */
2016
if (fcontext.left > to) return ctr;
2018
if (fcontext.right >= from && fcontext.left <= to) {
2021
newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2022
if (newloc != NULL) /* got one */
2024
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2025
SeqLocFree(newsfp->location);
2026
newsfp->location = newloc;
2028
newsfp->partial = TRUE;
2029
if (last == NULL) /* first one */
2031
sap = SeqAnnotNew();
2032
if (newbsp->annot == NULL)
2033
newbsp->annot = sap;
2036
for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2040
sap->type = 1; /* feature table */
2041
sap->data = (Pointer)newsfp;
2044
last->next = newsfp;
2047
switch (newsfp->data.choice)
2049
case SEQFEAT_CDREGION: /* cdregion */
2050
crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2052
for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2054
nextcbp = cbp->next;
2055
cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2056
if (cbp->loc == NULL)
2058
if (prevcbp != NULL)
2059
prevcbp->next = nextcbp;
2061
crp->code_break = nextcbp;
2070
rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2071
if (rrp->ext.choice == 2) /* tRNA */
2073
trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2074
if (trp->anticodon != NULL)
2076
trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2090
NLM_EXTERN Int2 LIBCALL SeqFeatsCopy (BioseqPtr newbsp, BioseqPtr oldbsp, Int4 from, Int4 to, Uint1 strand)
2093
BioseqContextPtr bcp = NULL;
2094
SeqFeatPtr head=NULL, sfp, last=NULL, newsfp;
2099
Boolean split = FALSE;
2100
SeqAnnotPtr sap = NULL, saptmp;
2102
CodeBreakPtr cbp, prevcbp, nextcbp;
2107
if (oldbsp == NULL) return ctr;
2109
entityID = ObjMgrGetEntityIDForPointer (oldbsp);
2110
if (entityID > 0 && SeqMgrFeaturesAreIndexed (entityID)) {
2111
/* indexed version should be much faster */
2112
return IndexedSeqFeatsCopy (newbsp, oldbsp, from, to, strand);
2115
bcp = BioseqContextNew(oldbsp);
2116
if (bcp == NULL) return ctr;
2119
vn.choice = SEQLOC_INT;
2120
vn.data.ptrvalue = (Pointer)(&si);
2128
while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
2131
newloc = SeqLocCopyRegion(newbsp->id, sfp->location, oldbsp, from, to, strand, &split);
2132
if (newloc != NULL) /* got one */
2134
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2135
SeqLocFree(newsfp->location);
2136
newsfp->location = newloc;
2138
newsfp->partial = TRUE;
2139
if (last == NULL) /* first one */
2141
sap = SeqAnnotNew();
2142
if (newbsp->annot == NULL)
2143
newbsp->annot = sap;
2146
for (saptmp = newbsp->annot; saptmp->next != NULL; saptmp = saptmp->next)
2150
sap->type = 1; /* feature table */
2151
sap->data = (Pointer)newsfp;
2154
last->next = newsfp;
2157
switch (newsfp->data.choice)
2159
case SEQFEAT_CDREGION: /* cdregion */
2160
crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
2162
for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2164
nextcbp = cbp->next;
2165
cbp->loc = SeqLocCopyRegion(newbsp->id, cbp->loc, oldbsp, from, to, strand, &split);
2166
if (cbp->loc == NULL)
2168
if (prevcbp != NULL)
2169
prevcbp->next = nextcbp;
2171
crp->code_break = nextcbp;
2180
rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
2181
if (rrp->ext.choice == 2) /* tRNA */
2183
trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2184
if (trp->anticodon != NULL)
2186
trp->anticodon = SeqLocCopyRegion(newbsp->id, trp->anticodon, oldbsp, from, to, strand, &split);
2196
BioseqContextFree (bcp);
2201
NLM_EXTERN SeqLocPtr LIBCALL SeqLocCopyRegion(SeqIdPtr newid, SeqLocPtr head, BioseqPtr oldbsp,
2202
Int4 from, Int4 to, Uint1 strand, BoolPtr split)
2204
SeqLocPtr newhead = NULL, last=NULL, tmp, slp, prev, next, thead;
2205
SeqIntPtr sip, sip2;
2206
SeqPntPtr spp, spp2;
2207
PackSeqPntPtr pspp, pspp2;
2208
SeqBondPtr sbp, sbp2;
2209
SeqIdPtr sidp, oldids;
2210
Int4 numpnt, i, tpos, len, intcnt, othercnt;
2212
Boolean dropped_one;
2216
if ((head == NULL) || (oldbsp == NULL)) return NULL;
2218
oldids = oldbsp->id;
2219
len = to - from + 1;
2220
switch (head->choice)
2222
case SEQLOC_BOND: /* bond -- 2 seqs */
2224
sbp = (SeqBondPtr)(head->data.ptrvalue);
2225
vn.choice = SEQLOC_PNT;
2226
vn.data.ptrvalue = sbp->a;
2228
tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2231
sbp2 = SeqBondNew();
2232
sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2237
vn.data.ptrvalue = sbp->b;
2238
tmp = SeqLocCopyRegion(newid, (SeqLocPtr)(&vn), oldbsp, from, to, strand, split);
2243
sbp2 = SeqBondNew();
2244
sbp2->a = (SeqPntPtr)(tmp->data.ptrvalue);
2247
sbp2->b = (SeqPntPtr)(tmp->data.ptrvalue);
2253
newhead = ValNodeNew(NULL);
2254
newhead->choice = SEQLOC_BOND;
2255
newhead->data.ptrvalue = sbp2;
2256
if ((sbp->b != NULL) && (sbp2->b == NULL))
2260
case SEQLOC_FEAT: /* feat -- can't track yet */
2261
case SEQLOC_NULL: /* NULL */
2262
case SEQLOC_EMPTY: /* empty */
2264
case SEQLOC_WHOLE: /* whole */
2265
sidp = (SeqIdPtr)(head->data.ptrvalue);
2266
if (SeqIdIn(sidp, oldids))
2268
if ((from != 0) || (to != (oldbsp->length - 1)))
2272
newhead = ValNodeNew(NULL);
2274
sip2->id = SeqIdDup(newid);
2276
sip2->to = to - from;
2277
newhead->choice = SEQLOC_INT;
2278
newhead->data.ptrvalue = (Pointer)sip2;
2279
if (strand == Seq_strand_minus)
2280
sip2->strand = Seq_strand_minus;
2283
case SEQLOC_EQUIV: /* does it stay equiv? */
2284
case SEQLOC_MIX: /* mix -- more than one seq */
2285
case SEQLOC_PACKED_INT: /* packed int */
2288
dropped_one = FALSE;
2289
for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
2292
tmp = SeqLocCopyRegion(newid, slp, oldbsp, from, to, strand, split);
2297
if ((prev->choice == SEQLOC_INT) && (tmp->choice == SEQLOC_INT))
2299
sip = (SeqIntPtr)(prev->data.ptrvalue);
2300
sip2 = (SeqIntPtr)(tmp->data.ptrvalue);
2302
if ((sip->strand == Seq_strand_minus) &&
2303
(sip2->strand == Seq_strand_minus))
2305
if (sip->from == (sip2->to + 1))
2307
sip->from = sip2->from;
2308
sip->if_from = sip2->if_from;
2309
sip2->if_from = NULL;
2310
tmp = SeqLocFree(tmp);
2313
else if((sip->strand != Seq_strand_minus) &&
2314
(sip2->strand != Seq_strand_minus))
2316
if (sip->to == (sip2->from - 1))
2319
sip->if_to = sip2->if_to;
2321
tmp = SeqLocFree(tmp);
2325
else if ((prev->choice == SEQLOC_NULL) && (tmp->choice == SEQLOC_NULL))
2327
tmp = SeqLocFree(tmp);
2331
else if (tmp->choice == SEQLOC_NULL)
2333
tmp = SeqLocFree(tmp);
2337
if (tmp != NULL) /* still have one? */
2353
if (prev->choice == SEQLOC_NULL) /* ends with NULL */
2356
for (slp = thead; slp->next != NULL; slp = slp->next)
2365
thead = SeqLocFree(thead);
2376
for (slp = thead; slp != NULL; slp = slp->next)
2378
if (slp->choice == SEQLOC_INT)
2383
if ((intcnt + othercnt) > 1)
2385
newhead = ValNodeNew(NULL);
2386
if (head->choice == SEQLOC_EQUIV)
2387
newhead->choice = SEQLOC_EQUIV;
2391
newhead->choice = SEQLOC_PACKED_INT;
2393
newhead->choice = SEQLOC_MIX;
2396
newhead->data.ptrvalue = (Pointer)thead;
2398
else /* only one SeqLoc left */
2403
case SEQLOC_INT: /* int */
2404
sip = (SeqIntPtr)(head->data.ptrvalue);
2405
if (SeqIdIn(sip->id, oldids))
2407
if (sip->to < from) /* completely before cut */
2409
if (sip->from > to) /* completely after cut */
2413
sip2->id = SeqIdDup(newid);
2414
sip2->strand = sip->strand;
2421
ifp->choice = 4; /* lim */
2422
ifp->a = 1; /* greater than */
2428
if (sip->if_to != NULL)
2431
MemCopy((Pointer)ifp, (Pointer)(sip->if_to), sizeof(IntFuzz));
2436
if (sip->from < from)
2441
ifp->choice = 4; /* lim */
2442
ifp->a = 2; /* less than */
2443
sip2->if_from = ifp;
2447
sip2->from = sip->from;
2448
if (sip->if_from != NULL)
2451
MemCopy((Pointer)ifp, (Pointer)(sip->if_from), sizeof(IntFuzz));
2452
sip2->if_from = ifp;
2455
/* set to region coordinates */
2458
IntFuzzClip(sip2->if_from, from, to, strand, split);
2459
IntFuzzClip(sip2->if_to, from, to, strand, split);
2461
if (strand == Seq_strand_minus) /* rev comp */
2463
sip2->strand = StrandCmp(sip2->strand);
2464
tpos = len - sip2->from - 1;
2465
sip2->from = len - sip2->to - 1;
2467
/* IntFuzz already complemented by IntFuzzClip */
2468
/* just switch order */
2469
ifp = sip2->if_from;
2470
sip2->if_from = sip2->if_to;
2474
newhead = ValNodeNew(NULL);
2475
newhead->choice = SEQLOC_INT;
2476
newhead->data.ptrvalue = (Pointer)sip2;
2479
case SEQLOC_PNT: /* pnt */
2480
spp = (SeqPntPtr)(head->data.ptrvalue);
2481
if (SeqIdIn(spp->id, oldids))
2483
if ((spp->point >= from) && (spp->point <= to))
2486
spp2->id = SeqIdDup(newid);
2487
spp2->point = spp->point - from;
2488
spp2->strand = spp->strand;
2489
if (spp->fuzz != NULL)
2493
MemCopy((Pointer)ifp, (Pointer)spp->fuzz, sizeof(IntFuzz));
2494
IntFuzzClip(ifp, from, to, strand, split);
2496
if (strand == Seq_strand_minus)
2498
spp2->point = len - spp2->point - 1;
2499
spp2->strand = StrandCmp(spp->strand);
2501
newhead = ValNodeNew(NULL);
2502
newhead->choice = SEQLOC_PNT;
2503
newhead->data.ptrvalue = (Pointer)spp2;
2507
case SEQLOC_PACKED_PNT: /* packed pnt */
2508
pspp = (PackSeqPntPtr)(head->data.ptrvalue);
2509
if (SeqIdIn(pspp->id, oldids))
2511
numpnt = PackSeqPntNum(pspp);
2512
pspp2 = PackSeqPntNew();
2513
pspp2->strand = pspp->strand;
2514
intcnt = 0; /* use for included points */
2515
othercnt = 0; /* use for exclued points */
2516
for (i = 0; i < numpnt; i++)
2518
tpos = PackSeqPntGet(pspp, i);
2519
if ((tpos < from) || (tpos > to))
2526
PackSeqPntPut(pspp2, tpos - from);
2529
if (! intcnt) /* no points in region */
2531
PackSeqPntFree(pspp2);
2536
if (pspp->fuzz != NULL)
2539
MemCopy((Pointer)ifp, (Pointer)(pspp->fuzz), sizeof(IntFuzz));
2544
if (strand == Seq_strand_minus) /* rev comp */
2546
IntFuzzClip(ifp, from, to, strand, split);
2548
pspp2 = PackSeqPntNew();
2549
pspp2->strand = StrandCmp(pspp->strand);
2550
numpnt = PackSeqPntNum(pspp);
2552
for (i = numpnt; i >= 0; i--) /* reverse order */
2554
tpos = PackSeqPntGet(pspp, i);
2555
PackSeqPntPut(pspp2, (len - tpos - 1));
2557
PackSeqPntFree(pspp);
2559
pspp2->id = SeqIdDup(newid);
2562
newhead = ValNodeNew(NULL);
2563
newhead->choice = SEQLOC_PACKED_PNT;
2564
newhead->data.ptrvalue = (Pointer)pspp2;
2575
/*****************************************************************************
2578
* returns TRUE if clipped range values
2579
* in all cases, adjusts and/or complements IntFuzz
2580
* Designed for IntFuzz on SeqLocs
2582
*****************************************************************************/
2583
NLM_EXTERN void LIBCALL IntFuzzClip(IntFuzzPtr ifp, Int4 from, Int4 to, Uint1 strand, BoolPtr split)
2587
if (ifp == NULL) return;
2588
len = to - from + 1;
2589
switch (ifp->choice)
2591
case 1: /* plus/minus - no changes */
2592
case 3: /* percent - no changes */
2595
if (ifp->a > to) /* max */
2605
if (ifp->b > to) /* min */
2615
ifp->a -= from; /* adjust to window */
2617
if (strand == Seq_strand_minus)
2619
tmp = len - ifp->a; /* reverse/complement */
2620
ifp->a = len - ifp->b;
2625
if (strand == Seq_strand_minus) /* reverse/complement */
2629
case 1: /* greater than */
2632
case 2: /* less than */
2635
case 3: /* to right of residue */
2638
case 4: /* to left of residue */
2650
/*****************************************************************************
2652
* BioseqInsert (from_id, from, to, strand, to_id, pos, from_feat, to_feat,
2654
* Inserts a copy the region "from"-"to" on "strand" of the Bioseq
2655
* identified by "from_id" into the Bioseq identified by "to_id"
2657
* if from_feat = TRUE, copies the feature table from "from" and updates
2658
* to locations to point to the proper residues in "to_id"
2659
* If to_feat = TRUE, updates feature table on "to_id" as well.
2660
* if do_split == TRUE, then splits features in "to_id" (to_feat must
2661
* be TRUE as well). Otherwise expands features at insertion.
2663
* All operations are copies. "frombsp" is unchanged.
2664
* Insert will only occur between certain Bioseq.repr classes as below
2666
* From Bioseq.repr To Bioseq.repr
2668
* virtual raw segmented map
2669
* +---------------------------------------------------
2670
* virtual | length inst SeqLoc length
2671
* +---------------------------------------------------
2672
* raw | error copy SeqLoc error
2673
* +---------------------------------------------------
2674
* segmented | error inst SeqLoc* error
2675
* +---------------------------------------------------
2676
* map | error inst* SeqLoc copy
2677
* +---------------------------------------------------
2679
* length = changes length of "to" by length of "from"
2680
* error = insertion not allowed
2681
* inst = "from" instantiated as residues ("N" or "X" for virtual "from")
2682
* inst* = as above, but a restriction map can instantiate other bases
2683
* than "N" for known restriction recognition sites.
2684
* copy = copy of "from" inserted into "to"
2685
* SeqLoc = a SeqLoc added to "to" which points to "from". No copy of residues.
2686
* SeqLoc* = as above, but note that "to" points to "from" directly, not
2687
* what "from" itself may point to.
2689
*****************************************************************************/
2690
NLM_EXTERN Boolean LIBCALL BioseqInsert (SeqIdPtr from_id, Int4 from, Int4 to, Uint1 strand, SeqIdPtr to_id, Int4 pos,
2691
Boolean from_feat, Boolean to_feat, Boolean do_split)
2693
BioseqPtr tobsp, frombsp;
2694
Int4 len, i, ctr, tlen;
2695
Boolean from_type, to_type;
2697
SeqAnnotPtr sap, newsap;
2698
SeqFeatPtr sfp, newsfp, prevsfp, sfphead = NULL;
2699
BioseqContextPtr bcp;
2700
Boolean handled = FALSE;
2703
Boolean split, added = FALSE;
2704
SeqLocPtr newloc, curr, head, tloc, xloc, yloc, fake;
2707
CodeBreakPtr cbp, prevcbp, nextcbp;
2710
SeqEntryPtr oldscope;
2713
if ((from_id == NULL) || (to_id == NULL)) return FALSE;
2715
tobsp = BioseqFind(to_id);
2716
if (tobsp == NULL) {
2717
oldscope = SeqEntrySetScope (NULL);
2718
if (oldscope != NULL) {
2719
tobsp = BioseqFind(to_id);
2720
SeqEntrySetScope (oldscope);
2723
if (tobsp == NULL) return FALSE;
2725
len = BioseqGetLen(tobsp);
2727
if (pos == LAST_RESIDUE)
2729
else if (pos == APPEND_RESIDUE)
2732
if ((pos < 0) || (pos > len)) return FALSE;
2734
frombsp = BioseqFind(from_id);
2735
if (frombsp == NULL) {
2736
oldscope = SeqEntrySetScope (NULL);
2737
if (oldscope != NULL) {
2738
frombsp = BioseqFind(from_id);
2739
SeqEntrySetScope (oldscope);
2742
if (frombsp == NULL) return FALSE;
2744
from_type = ISA_na(frombsp->mol);
2745
to_type = ISA_na(tobsp->mol);
2747
if (from_type != to_type) return FALSE;
2749
len = BioseqGetLen(frombsp);
2750
if (to == LAST_RESIDUE)
2753
if ((from < 0) || (to >= len)) return FALSE;
2755
len = to - from + 1;
2757
if (tobsp->repr == Seq_repr_virtual)
2759
if (frombsp->repr != Seq_repr_virtual)
2762
handled = TRUE; /* just length and features */
2765
if ((tobsp->repr == Seq_repr_raw) || (tobsp->repr == Seq_repr_const))
2767
if (ISA_na(tobsp->mol))
2769
seqtype = Seq_code_iupacna;
2773
seqtype = Seq_code_ncbieaa;
2776
if (tobsp->seq_data_type != seqtype)
2777
BioseqRawConvert(tobsp, seqtype);
2778
BSSeek(tobsp->seq_data, pos, SEEK_SET);
2779
Nlm_BSAdd(tobsp->seq_data, len, FALSE);
2783
spp = SeqPortNew(frombsp, from, to, strand, seqtype);
2784
while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
2786
if (! IS_residue(residue))
2788
ErrPost(CTX_NCBIOBJ, 1, "Non-residue in BioseqInsert [%d]",
2793
BSPutByte(tobsp->seq_data, residue);
2801
ErrPost(CTX_NCBIOBJ, 1, "Tried to insert %ld residues but %ld went in",
2809
if ((tobsp->repr == Seq_repr_seg) || (tobsp->repr == Seq_repr_ref))
2812
sip->id = SeqIdDup(from_id);
2815
sip->strand = strand;
2816
tloc = ValNodeNew(NULL);
2817
tloc->choice = SEQLOC_INT;
2818
tloc->data.ptrvalue = (Pointer)sip;
2820
if (tobsp->repr == Seq_repr_seg)
2822
fake = ValNodeNew(NULL);
2823
fake->choice = SEQLOC_MIX;
2824
fake->data.ptrvalue = (Pointer)(tobsp->seq_ext);
2827
fake = (SeqLocPtr)(tobsp->seq_ext);
2830
while ((curr = SeqLocFindNext(fake, curr)) != NULL)
2832
if ((! added) && (ctr == pos))
2834
newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
2837
tlen = SeqLocLen(curr);
2838
if ((! added) && ((ctr + tlen) > pos)) /* split interval */
2841
xloc = SeqLocAdd(&yloc, curr, TRUE, TRUE);
2842
i = (pos - ctr) + SeqLocStart(curr);
2843
newloc = SeqLocInsert(xloc, SeqLocId(xloc), i, 0, TRUE, NULL);
2845
yloc = newloc->next;
2846
SeqLocAdd(&head, xloc, TRUE, TRUE);
2847
SeqLocAdd(&head, tloc, TRUE, TRUE);
2848
SeqLocAdd(&head, yloc, TRUE, TRUE);
2854
newloc = SeqLocAdd(&head, curr, TRUE, TRUE);
2857
if ((! added) && (ctr == pos))
2859
newloc = SeqLocAdd(&head, tloc, TRUE, TRUE);
2864
if (tobsp->repr == Seq_repr_seg)
2866
tobsp->seq_ext = (Pointer)head;
2870
tobsp->seq_ext = SeqLocPackage(head);
2875
if (tobsp->repr == Seq_repr_map)
2877
if (! ((frombsp->repr == Seq_repr_map) || (frombsp->repr == Seq_repr_virtual)))
2881
for (sfp = (SeqFeatPtr)(tobsp->seq_ext); sfp != NULL; sfp = sfp->next)
2883
sfp->location = SeqLocInsert(sfp->location, to_id, pos, len, TRUE, NULL);
2887
if (frombsp->repr == Seq_repr_map)
2889
for (sfp = (SeqFeatPtr)(frombsp->seq_ext); sfp != NULL; sfp = sfp->next)
2892
newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
2893
if (newloc != NULL) /* got one */
2895
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
2896
SeqLocFree(newsfp->location);
2897
newsfp->location = newloc;
2899
newsfp->partial = TRUE;
2901
if (prevsfp == NULL)
2902
tobsp->seq_ext = (Pointer)newsfp;
2904
prevsfp->next = newsfp;
2907
newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
2915
if (! handled) return FALSE;
2917
tobsp->length += len;
2919
if (to_feat) /* fix up sourceid Bioseq feature table(s) */
2921
bcp = BioseqContextNew(tobsp);
2923
/* adjust features pointing by location */
2924
while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
2926
sfp->location = SeqLocInsert(sfp->location, to_id,pos, len, do_split, NULL);
2927
switch (sfp->data.choice)
2929
case SEQFEAT_CDREGION: /* cdregion */
2930
crp = (CdRegionPtr)(sfp->data.value.ptrvalue);
2932
for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
2934
nextcbp = cbp->next;
2935
cbp->loc = SeqLocInsert(cbp->loc, to_id,pos, len, do_split, NULL);
2936
if (cbp->loc == NULL)
2938
if (prevcbp != NULL)
2939
prevcbp->next = nextcbp;
2941
crp->code_break = nextcbp;
2950
rrp = (RnaRefPtr)(sfp->data.value.ptrvalue);
2951
if (rrp->ext.choice == 2) /* tRNA */
2953
trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
2954
if (trp->anticodon != NULL)
2956
trp->anticodon = SeqLocInsert(trp->anticodon, to_id,pos, len, do_split, NULL);
2966
/* adjust features pointing by product */
2967
while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
2968
sfp->product = SeqLocInsert(sfp->product, to_id,pos, len, do_split, NULL);
2970
BioseqContextFree(bcp);
2973
if (from_feat) /* add source Bioseq features to sourceid */
2975
bcp = BioseqContextNew(frombsp);
2976
sfp = NULL; /* NOTE: should make NEW feature table */
2978
/* is there an old feature table to use? */
2979
for (newsap = tobsp->annot; newsap != NULL; newsap = newsap->next)
2981
if (newsap->type == 1) /* feature table */
2985
{ /* create a new one if necessary */
2986
for (prevsfp = (SeqFeatPtr)(newsap->data); prevsfp != NULL;
2987
prevsfp = prevsfp->next)
2989
if (prevsfp->next == NULL)
2993
/* get features by location */
2994
while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 0)) != NULL)
2995
{ /* copy all old features */
2997
newloc = SeqLocCopyRegion(to_id, sfp->location, frombsp, from, to, strand, &split);
2998
if (newloc != NULL) /* got one */
3000
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3001
SeqLocFree(newsfp->location);
3002
newsfp->location = newloc;
3005
newsfp->partial = TRUE;
3007
if (prevsfp == NULL)
3010
prevsfp->next = newsfp;
3013
newsfp->location = SeqLocInsert(newsfp->location, to_id, 0,
3015
switch (newsfp->data.choice)
3017
case SEQFEAT_CDREGION: /* cdregion */
3018
crp = (CdRegionPtr)(newsfp->data.value.ptrvalue);
3020
for (cbp = crp->code_break; cbp != NULL; cbp = nextcbp)
3022
nextcbp = cbp->next;
3023
cbp->loc = SeqLocCopyRegion(to_id, cbp->loc, frombsp, from, to, strand, &split);
3024
if (cbp->loc == NULL)
3026
if (prevcbp != NULL)
3027
prevcbp->next = nextcbp;
3029
crp->code_break = nextcbp;
3035
cbp->loc = SeqLocInsert(cbp->loc, to_id, 0,
3042
rrp = (RnaRefPtr)(newsfp->data.value.ptrvalue);
3043
if (rrp->ext.choice == 2) /* tRNA */
3045
trp = (tRNAPtr)(rrp->ext.value.ptrvalue);
3046
if (trp->anticodon != NULL)
3048
trp->anticodon = SeqLocCopyRegion(to_id, trp->anticodon, frombsp, from, to, strand, &split);
3049
trp->anticodon = SeqLocInsert(trp->anticodon, to_id, 0,
3061
/* get features by product */
3062
while ((sfp = BioseqContextGetSeqFeat(bcp, 0, sfp, NULL, 1)) != NULL)
3063
{ /* copy all old features */
3065
newloc = SeqLocCopyRegion(to_id, sfp->product, frombsp, from, to, strand, &split);
3066
if (newloc != NULL) /* got one */
3068
newsfp = (SeqFeatPtr)AsnIoMemCopy((Pointer)sfp, (AsnReadFunc)SeqFeatAsnRead, (AsnWriteFunc)SeqFeatAsnWrite);
3069
SeqLocFree(newsfp->product);
3070
newsfp->product = newloc;
3072
newsfp->partial = TRUE;
3074
if (prevsfp == NULL)
3077
prevsfp->next = newsfp;
3080
newsfp->product = SeqLocInsert(newsfp->product, to_id, 0, pos,
3084
BioseqContextFree(bcp);
3087
if (sfphead != NULL) /* orphan chain of seqfeats to attach */
3091
for (sap = tobsp->annot; sap != NULL; sap = sap->next)
3093
if (sap->next == NULL)
3096
newsap = SeqAnnotNew();
3099
tobsp->annot = newsap;
3104
newsap->data = (Pointer)sfphead;
3111
/*****************************************************************************
3114
* alters "head" by insert "len" residues before "pos" in any SeqLoc
3115
* on the Bioseq "target"
3116
* all SeqLocs not on "target" are unaltered
3117
* for SeqLocs on "target"
3118
* all SeqLocs before "pos" are unaltered
3119
* all SeqLocs >= "pos" are incremented by "len"
3120
* all SeqLocs spanning "pos"
3121
* if "split" == TRUE, are split into two SeqLocs, one to the
3122
* left of the insertion, the other to right
3123
* if "split" != TRUE, the SeqLoc is increased in length to cover
3125
* returns altered head or NULL if nothing left.
3126
* if ("newid" != NULL) replaces "target" with "newid" whether the
3127
* SeqLoc is altered on not.
3130
* 1) To update a feature location on "target" when 10 residues of
3131
* sequence have been inserted before position 5
3132
* SeqFeatPtr->location = SeqLocInsert ( SeqFeatPtr->location ,
3133
* "target", 5, 10, TRUE, NULL); [for some feature types
3134
* you may want "split" equal FALSE]
3135
* 2) To insert the complete feature table from "source" into a
3136
* different Bioseq "dest" before position 20 in "dest"
3137
* SFP->location = SeqLocInsert(SFP->location, "source", 0, 20,
3141
*****************************************************************************/
3142
NLM_EXTERN SeqLocPtr LIBCALL SeqLocInsert (SeqLocPtr head, SeqIdPtr target, Int4 pos, Int4 len,
3143
Boolean split, SeqIdPtr newid)
3145
SeqIntPtr sip, sip2;
3147
PackSeqPntPtr pspp, pspp2;
3149
SeqLocPtr slp, tmp, prev, next, thead, tmp2;
3150
Int4 diff, numpnt, i, tpos;
3155
if ((head == NULL) || (target == NULL))
3158
head->next = NULL; /* caller maintains chains */
3162
switch (head->choice)
3164
case SEQLOC_BOND: /* bond -- 2 seqs */
3166
vn.choice = SEQLOC_PNT;
3168
sbp = (SeqBondPtr)(head->data.ptrvalue);
3169
vn.data.ptrvalue = (Pointer)(sbp->a);
3170
SeqLocInsert(&vn, target, pos, len, split, newid);
3171
sbp->a = (SeqPntPtr)(vn.data.ptrvalue);
3174
vn.data.ptrvalue = (Pointer)(sbp->b);
3175
SeqLocInsert(&vn, target, pos, len, split, newid);
3176
sbp->b = (SeqPntPtr)(vn.data.ptrvalue);
3179
case SEQLOC_FEAT: /* feat -- can't track yet */
3180
case SEQLOC_NULL: /* NULL */
3182
case SEQLOC_EMPTY: /* empty */
3183
case SEQLOC_WHOLE: /* whole */
3186
sidp = (SeqIdPtr)(head->data.ptrvalue);
3187
if (SeqIdForSameBioseq(sidp, target))
3190
sidp = SeqIdDup(newid);
3191
head->data.ptrvalue = (Pointer)sidp;
3195
case SEQLOC_MIX: /* mix -- more than one seq */
3196
case SEQLOC_EQUIV: /* equiv -- ditto */
3197
case SEQLOC_PACKED_INT: /* packed int */
3200
for (slp = (SeqLocPtr)(head->data.ptrvalue); slp != NULL; slp = next)
3203
oldchoice = slp->choice;
3204
tmp = SeqLocInsert(slp, target, pos, len, split, newid);
3207
if ((head->choice != SEQLOC_EQUIV) &&
3208
(oldchoice != tmp->choice)) /* split interval? */
3210
if ((oldchoice == SEQLOC_INT) &&
3211
(tmp->choice == SEQLOC_PACKED_INT))
3214
tmp = (SeqLocPtr)(tmp2->data.ptrvalue);
3216
while (tmp->next != NULL)
3234
head->data.ptrvalue = thead;
3236
head = SeqLocFree(head);
3238
case SEQLOC_INT: /* int */
3239
sip = (SeqIntPtr)(head->data.ptrvalue);
3240
if (SeqIdForSameBioseq(sip->id, target))
3242
if (newid != NULL) /* change id? */
3245
sip->id = SeqIdDup(newid);
3248
if (sip->to < pos) /* completely before insertion */
3253
if ((! split) || (sip->from >= pos)) /* interval unbroken */
3255
if (sip->from >= pos)
3261
/* split interval */
3263
slp = ValNodeNew(NULL);
3264
slp->choice = SEQLOC_INT;
3265
slp->data.ptrvalue = (Pointer)sip2;
3266
sip2->strand = sip->strand;
3267
sip2->id = SeqIdDup(sip->id);
3269
sip2->to = sip->to + len;
3270
sip2->from = pos + len;
3271
sip2->if_to = sip->if_to;
3276
if (sip->strand == Seq_strand_minus) /* reverse order */
3278
head->data.ptrvalue = (Pointer)sip2;
3279
slp->data.ptrvalue = (Pointer)sip;
3282
thead = head; /* make split interval into PACKED_INT */
3283
head = ValNodeNew(NULL);
3284
head->choice = SEQLOC_PACKED_INT;
3285
head->data.ptrvalue = thead;
3289
case SEQLOC_PNT: /* pnt */
3290
spp = (SeqPntPtr)(head->data.ptrvalue);
3291
if (SeqIdForSameBioseq(spp->id, target))
3293
if (newid != NULL) /* change id? */
3296
spp->id = SeqIdDup(newid);
3299
if (spp->point >= pos)
3303
case SEQLOC_PACKED_PNT: /* packed pnt */
3304
pspp = (PackSeqPntPtr)(head->data.ptrvalue);
3305
if (SeqIdForSameBioseq(pspp->id, target))
3307
if (newid != NULL) /* change id? */
3309
SeqIdFree(pspp->id);
3310
pspp->id = SeqIdDup(newid);
3313
numpnt = PackSeqPntNum(pspp);
3314
pspp2 = PackSeqPntNew();
3315
head->data.ptrvalue = pspp2;
3316
for (i = 0; i < numpnt; i++)
3318
tpos = PackSeqPntGet(pspp, i);
3321
PackSeqPntPut(pspp2, tpos);
3323
pspp2->id = pspp->id;
3325
pspp2->fuzz = pspp->fuzz;
3327
pspp2->strand = pspp->strand;
3328
PackSeqPntFree(pspp);
3336
ErrPost(CTX_NCBIOBJ, 1, "SeqLocInsert: lost a SeqLoc");
3341
/*****************************************************************************
3343
* SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3344
* Deletes piece from head.
3345
* head may be changed.
3346
* returns the changed head.
3348
*****************************************************************************/
3349
NLM_EXTERN SeqLocPtr LIBCALL SeqLocSubtract (SeqLocPtr head, SeqLocPtr piece)
3351
SeqLocPtr slp = NULL;
3354
Boolean changed = FALSE;
3356
if ((head == NULL) || (piece == NULL))
3359
while ((slp = SeqLocFindNext(piece, slp)) != NULL)
3361
sip = SeqLocId(slp);
3362
from = SeqLocStart(slp);
3363
to = SeqLocStop(slp);
3364
head = SeqLocDelete(head, sip, from, to, FALSE, &changed);
3370
/********************************************************************
3373
* replaces the Seq-Id in a Seq-Loc (slp) with a new Seq-Id (new_sip)
3375
**********************************************************************/
3376
NLM_EXTERN SeqLocPtr SeqLocReplaceID (SeqLocPtr slp, SeqIdPtr new_sip)
3380
SeqIntPtr target_sit;
3383
switch (slp->choice) {
3384
case SEQLOC_PACKED_INT :
3388
while ((curr = SeqLocFindNext (slp, curr)) != NULL) {
3389
curr = SeqLocReplaceID (curr, new_sip);
3392
case SEQLOC_PACKED_PNT :
3393
pspp = (PackSeqPntPtr) slp->data.ptrvalue;
3395
SeqIdFree (pspp->id);
3396
pspp->id = SeqIdDup (new_sip);
3401
SeqIdFree ((SeqIdPtr) slp->data.ptrvalue);
3402
slp->data.ptrvalue = (Pointer) SeqIdDup (new_sip);
3405
target_sit = (SeqIntPtr) slp->data.ptrvalue;
3406
SeqIdFree (target_sit->id);
3407
target_sit->id = SeqIdDup (new_sip);
3410
spp = (SeqPntPtr)slp->data.ptrvalue;
3412
spp->id = SeqIdDup(new_sip);
3420
/**********************************************************
3422
* NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(range):
3424
* Gets the size of gap and constructs SeqLoc block with
3425
* $(seqlitdbtag) value as Dbtag.db and Dbtag.tag.id = 0.
3427
**********************************************************/
3428
NLM_EXTERN SeqLocPtr LIBCALL GapToSeqLoc(Int4 range)
3438
slp = ValNodeNew(NULL);
3441
slp->choice = SEQLOC_NULL;
3442
slp->data.ptrvalue = NULL;
3448
dp->db = StringSave(seqlitdbtag);
3449
dp->tag = ObjectIdNew();
3451
dp->tag->str = NULL;
3453
sidp = ValNodeNew(NULL);
3454
sidp->choice = SEQID_GENERAL;
3455
sidp->data.ptrvalue = dp;
3459
sip->to = range - 1;
3462
slp->choice = SEQLOC_INT;
3463
slp->data.ptrvalue = sip;
3468
/**********************************************************
3470
* NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(slp):
3472
* Looks at a single SeqLoc item. If it has the SeqId
3473
* of type GENERAL with Dbtag.db == $(seqlitdbtag) and
3474
* Dbtag.tag.id == 0, then returns TRUE, otherwise
3477
**********************************************************/
3478
NLM_EXTERN Boolean LIBCALL ISAGappedSeqLoc(SeqLocPtr slp)
3486
sip = SeqLocId(slp);
3487
if(sip == NULL || sip->choice != SEQID_GENERAL)
3490
dp = (DbtagPtr) sip->data.ptrvalue;
3491
if(dp == NULL || dp->db == NULL || dp->tag == NULL)
3494
if(StringCmp(seqlitdbtag, dp->db) == 0 && dp->tag->id == 0)
3500
/**********************************************************
3502
* NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(slp):
3504
* This functions is used only in the case, if ISAGappedSeqLoc()
3505
* has returned TRUE.
3506
* Converts SeqLoc set to the sequence of DeltaSeqs.
3507
* Gbtag'ed SeqLocs it turns into SeqLits with the only "length"
3508
* element. The regular SeqLocs saves as they are. Returns
3509
* obtained DeltaSeq.
3511
**********************************************************/
3512
NLM_EXTERN DeltaSeqPtr LIBCALL GappedSeqLocsToDeltaSeqs(SeqLocPtr slp)
3519
dsp = ValNodeNew(NULL);
3523
for(; slp != NULL; slp = slp->next)
3525
if(ISAGappedSeqLoc(slp) != FALSE)
3527
dsp->next = ValNodeNew(NULL);
3529
sip = slp->data.ptrvalue;
3531
slip->length = sip->to - sip->from + 1;
3533
dsp->data.ptrvalue = slip;
3537
dsp->next = ValNodeNew(NULL);
3540
dsp->data.ptrvalue = AsnIoMemCopy((Pointer) slp,
3541
(AsnReadFunc) SeqLocAsnRead,
3542
(AsnWriteFunc) SeqLocAsnWrite);