39
39
* ------- ---------- -----------------------------------------------------
41
41
* $Log: seqport.c,v $
42
* Revision 6.172 2008/02/12 18:56:52 bollin
43
* Made ReverseSeqData and ComplementSeqData extern
45
* Revision 6.171 2007/12/28 16:27:58 kans
46
* in SeqPortStreamSetup, on failure, return -1 if count was 0 to ensure failure signal
48
* Revision 6.170 2007/12/04 19:00:21 kans
49
* SeqPortStreamSeqLoc uses scope to get local (segset) components while allowing multiple simultaneous StreamCache objects
51
* Revision 6.169 2007/05/30 18:10:06 kans
52
* added KNOWN_GAP_AS_PLUS to distinguish known-length from unknown-length gaps, use for validation
54
* Revision 6.168 2007/05/07 13:28:35 kans
55
* added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
57
* Revision 6.167 2006/12/26 17:40:27 kans
58
* improvements to SeqPortStreamGap so it properly distinguishes virtual as plus symbol if requested
60
* Revision 6.166 2006/12/20 20:08:24 kans
61
* added SUPPRESS_VIRT_SEQ and STREAM_VIRT_AS_PLUS, moved STREAM_CORRECT_INVAL
63
* Revision 6.165 2006/12/18 15:42:57 kans
64
* made MakeCodeBreakList public so validator can check for unnecessary transl excepts
66
* Revision 6.164 2006/12/14 14:57:24 kans
67
* improvements to gap loops in StreamCacheGetResidue
69
* Revision 6.163 2006/12/13 23:10:00 kans
70
* StreamCacheGetResidue now handles all gap expansion flag choices
72
* Revision 6.162 2006/11/15 18:02:59 kans
73
* ProteinFromCdRegionExEx and TransTableTranslateCdRegionEx take farProdFetchOK argument
75
* Revision 6.161 2006/11/13 20:35:55 kans
76
* STREAM_ALLOW_NEG_GIS also allows gi zero, used for internal purposes
78
* Revision 6.160 2006/11/06 17:16:38 kans
79
* added stream flag to allow negative gi numbers by NCBI ID group
42
81
* Revision 6.159 2006/09/20 17:54:19 kans
43
82
* SeqPortStreamWork checks stack depth overflow indicating recursive sequence definition
1590
1629
SeqPortSetUpAlphabet(spps,
1591
1630
slitp->seq_data_type, newcode);
1592
1631
if (slitp->seq_data != NULL)
1632
spps->bp = (ByteStorePtr)
1594
1633
slitp->seq_data;
1681
1720
curr_code = BioseqGetCode(bsp);
1683
1722
SeqPortSetUpAlphabet(spp, curr_code, newcode);
1684
spp->bp = bsp->seq_data;
1723
spp->bp = (ByteStorePtr) bsp->seq_data;
1686
1725
/* allocate fast lookup caches for 2na or 4na to iupacna or 4na conversion */
2634
2674
static Int4 SeqPortStreamGap (
2641
2683
Char buf [4004];
2643
Boolean expand_gaps, many_dashes, single_dash;
2684
Char ch, gapchar = '-';
2685
Boolean expand_gaps, many_dashes, many_pluses, single_dash;
2646
2688
if (sdp == NULL) return 0;
2690
many_pluses = FALSE;
2692
if ((sdp->flags & SUPPRESS_VIRT_SEQ) != 0) return 0;
2693
if ((sdp->flags & STREAM_VIRT_AS_PLUS) != 0) {
2697
} else if (is_known) {
2698
if ((sdp->flags & KNOWN_GAP_AS_PLUS) != 0) {
2648
2704
expand_gaps = (Boolean) ((sdp->flags & STREAM_GAP_MASK) == STREAM_EXPAND_GAPS);
2649
2705
single_dash = (Boolean) ((sdp->flags & STREAM_GAP_MASK) == GAP_TO_SINGLE_DASH);
2650
2706
many_dashes = (Boolean) ((sdp->flags & STREAM_GAP_MASK) == EXPAND_GAPS_TO_DASHES);
2877
2933
if (bsp == NULL || sdp == NULL) return 0;
2878
2934
if (bsp->repr != Seq_repr_raw && bsp->repr != Seq_repr_const) return 0;
2936
is_na = (Boolean) ISA_na (bsp->mol);
2938
if (bsp->seq_data_type == Seq_code_gap) {
2940
/* support for new Seq-data.gap */
2942
count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, FALSE, sdp);
2947
/* otherwise Seq-data is a byte store */
2949
bs = (ByteStorePtr) bsp->seq_data;
2880
2950
if (bs == NULL) return 0;
2882
2952
alphabet = bsp->seq_data_type;
2884
is_na = (Boolean) ISA_na (bsp->mol);
2886
2954
if (strand == Seq_strand_minus && is_na) {
2887
2955
revcomp = TRUE;
2957
3026
if (slitp->length < 1) return 0;
2959
if (slitp->seq_data == NULL) {
2961
/* literal without sequence data is a virtual gap */
2963
count += SeqPortStreamGap (stop - start + 1, is_na, sdp);
3028
if (slitp->seq_data == NULL || slitp->seq_data_type == Seq_code_gap) {
3030
/* literal without sequence data is a virtual gap, also handle new gap type */
3032
if (slitp->fuzz != NULL) {
3036
count += SeqPortStreamGap (stop - start + 1, is_na, FALSE, is_known, sdp);
3018
3092
sip = SeqLocId (slp);
3019
3093
if (sip == NULL) return 0;
3021
if (sip->choice == SEQID_GI && sip->data.intvalue <= 0) {
3023
/* gi 0 or negative is always a data error, just report and bail */
3095
if (sip->choice == SEQID_GI && sip->data.intvalue <= 0 &&
3096
(Boolean) ((sdp->flags & STREAM_ALLOW_NEG_GIS) == 0)) {
3098
/* gi 0 is always a data error, just report and bail */
3099
/* negative gi sometimes used in-house, allow if flag set */
3025
3101
SeqIdWrite (sip, buf, PRINTID_FASTA_SHORT, sizeof (buf) - 1);
3026
3102
if (parentID != NULL) {
3480
3560
switch (bsp->repr) {
3482
3562
case Seq_repr_virtual :
3483
count += SeqPortStreamGap (stop - start + 1, ISA_na (bsp->mol), sdp);
3563
count += SeqPortStreamGap (stop - start + 1, ISA_na (bsp->mol), TRUE, FALSE, sdp);
3486
3566
case Seq_repr_raw :
3594
3675
if (bsp != NULL) {
3677
entityID = ObjMgrGetEntityIDForPointer (bsp);
3678
sd.scope = GetTopSeqEntryForEntityID (entityID);
3596
3680
count += SeqPortStreamWork (bsp, start, stop, strand, &sd);
3598
3682
} else if (loc != NULL) {
3684
sd.scope = SeqEntryGetScope ();
3600
3686
slp = SeqLocFindNext (loc, NULL);
3601
3687
while (slp != NULL) {
3694
NLM_EXTERN Uint1 StreamCacheGetResidue (
3781
static Boolean StreamCacheRefreshBuffer (
3695
3782
StreamCache PNTR scp
3701
Uint1 residue = '\0';
3787
StreamFlgType flags;
3706
if (scp == NULL) return residue;
3793
if (scp == NULL) return FALSE;
3708
3795
if (scp->ctr >= scp->total) {
3709
3796
scp->offset += (Int4) scp->total;
3711
3798
scp->total = 0;
3713
if (scp->offset < 0 || scp->offset >= scp->length) return residue;
3800
MemSet ((Pointer) &(scp->buf), 0, sizeof (scp->buf));
3802
if (scp->offset < 0 || scp->offset >= scp->length) return FALSE;
3715
3804
stop = MIN (scp->offset + 4000L, scp->length);
3807
if ((flags & STREAM_GAP_MASK) == GAP_TO_SINGLE_DASH || (flags & STREAM_GAP_MASK) == 0) {
3808
/* if expand_gaps_to_dashes not equal to gaps_to_single_dash + stream_gap_mask, need to clear other bits first */
3809
flags |= EXPAND_GAPS_TO_DASHES;
3811
if ((flags & SUPPRESS_VIRT_SEQ) != 0) {
3812
flags ^= SUPPRESS_VIRT_SEQ;
3813
flags |= STREAM_VIRT_AS_PLUS;
3717
3816
if (scp->bsp != NULL) {
3719
3818
SeqPortStreamInt (scp->bsp, scp->offset, stop - 1, Seq_strand_plus,
3720
scp->flags, (Pointer) &(scp->buf), NULL);
3819
flags, (Pointer) &(scp->buf), NULL);
3722
3821
} else if (scp->slp != NULL) {
3741
3840
sl.data.ptrvalue = (Pointer) slp->data.ptrvalue;
3742
3841
sl.next = NULL;
3744
3844
SeqPortStreamInt (&bsq, scp->offset, stop - 1, Seq_strand_plus,
3745
scp->flags, (Pointer) &(scp->buf), NULL);
3845
flags, (Pointer) &(scp->buf), NULL);
3748
3848
scp->total = StringLen (scp->buf);
3854
NLM_EXTERN Uint1 StreamCacheGetResidue (
3855
StreamCache PNTR scp
3859
Uint1 residue = '\0';
3861
if (scp == NULL) return '\0';
3863
if (scp->ctr >= scp->total) {
3864
if (! StreamCacheRefreshBuffer (scp)) return '\0';
3751
3867
if (scp->ctr < scp->total) {
3752
3868
residue = scp->buf [(int) scp->ctr];
3871
if (residue == '-') {
3873
if ((scp->flags & STREAM_GAP_MASK) == 0) {
3874
while (residue == '-') {
3875
if (scp->ctr >= scp->total) {
3876
if (! StreamCacheRefreshBuffer (scp)) return '\0';
3879
while (scp->ctr < scp->total && residue == '-') {
3880
residue = scp->buf [(int) scp->ctr];
3884
if (residue == '-') return '\0';
3886
} else if ((scp->flags & STREAM_GAP_MASK) == GAP_TO_SINGLE_DASH) {
3888
while (residue == '-') {
3889
if (scp->ctr >= scp->total) {
3890
if (! StreamCacheRefreshBuffer (scp)) return '-';
3893
while (scp->ctr < scp->total && residue == '-') {
3894
residue = scp->buf [(int) scp->ctr];
3895
if (residue != '-') return '-';
3901
} else if (residue == '+') {
3903
if ((scp->flags & SUPPRESS_VIRT_SEQ) != 0) {
3904
while (residue == '+') {
3905
if (scp->ctr >= scp->total) {
3906
if (! StreamCacheRefreshBuffer (scp)) return '\0';
3909
while (scp->ctr < scp->total && residue == '+') {
3910
residue = scp->buf [(int) scp->ctr];
3914
if (residue == '+') return '\0';
3756
3919
return residue;
3789
3952
********************************************************************************/
3791
NLM_EXTERN ByteStorePtr ProteinFromCdRegionExEx (SeqFeatPtr sfp, Boolean include_stop, Boolean remove_trailingX, BoolPtr altStartP)
3954
NLM_EXTERN ByteStorePtr ProteinFromCdRegionExEx (SeqFeatPtr sfp, Boolean include_stop, Boolean remove_trailingX, BoolPtr altStartP, Boolean farProdFetchOK)
3794
3957
ByteStorePtr bs;
3833
3996
tableExists = (Boolean) (tbl != NULL);
3835
3998
bs = TransTableTranslateCdRegionEx (&tbl, sfp, include_stop, remove_trailingX,
3999
FALSE, altStartP, farProdFetchOK);
3838
4001
/* save FSA in genetic code-specific app property name */
3847
4010
NLM_EXTERN ByteStorePtr ProteinFromCdRegionEx (SeqFeatPtr sfp, Boolean include_stop, Boolean remove_trailingX)
3850
return ProteinFromCdRegionExEx (sfp, include_stop, remove_trailingX, NULL);
4013
return ProteinFromCdRegionExEx (sfp, include_stop, remove_trailingX, NULL, TRUE);
3853
4016
NLM_EXTERN ByteStorePtr ProteinFromCdRegionExWithTrailingCodonHandling
5078
static Boolean ComplementSeqData (Uint1 seqtype, Int4 seqlen, ByteStorePtr bysp)
5241
NLM_EXTERN Boolean ComplementSeqData (Uint1 seqtype, Int4 seqlen, SeqDataPtr sdp)
5080
5243
SeqCodeTablePtr sctp;
5081
long readbyte, bslen;
5245
long readbyte, bslen;
5082
5246
Uint1 byte = 0, byte_to, newbyte = 0, residue;
5083
5247
Uint1 comp, bitctr, mask, lshift, rshift, bc;
5249
if (seqtype == Seq_code_gap) return FALSE;
5251
bysp = (ByteStorePtr) sdp;
5085
5252
if (bysp == NULL)
5087
5254
ErrPostEx(SEV_ERROR,0,0, "Error: no sequence data\n");
5248
5415
} /* BioseqComplement */
5251
static Boolean LIBCALL ReverseSeqData (Uint1 seqtype, Int4 seqlen, ByteStorePtr bysp1)
5418
NLM_EXTERN Boolean LIBCALL ReverseSeqData (Uint1 seqtype, Int4 seqlen, SeqDataPtr sdp)
5253
ByteStorePtr bysp2 = '\0';
5420
ByteStorePtr bysp1, bysp2 = '\0';
5254
5421
long readbyte, bslen = 0;
5255
5422
Int4 count = 0;
5256
5423
Uint1 byte = 0, byte2, byte_to = 0, byte_to2, newbyte = 0;
5257
5424
Uint1 newbyte2, finalbyte, residue, residue2, bitctr, bc2 = 0;
5258
5425
Uint1 bitctr2, mask, mask2, lshift, rshift, bc = 0, jagged;
5427
if (seqtype == Seq_code_gap) return FALSE;
5429
bysp1 = (ByteStorePtr) sdp;
5260
5431
if (bysp1 == NULL)
5262
5433
ErrPostEx(SEV_ERROR,0,0, "Error: No sequence data\n");
6531
static ValNodePtr MakeCodeBreakList (SeqLocPtr cdslocation, Int4 len, CodeBreakPtr cbp, Uint1 frame)
6702
NLM_EXTERN ValNodePtr MakeCodeBreakList (SeqLocPtr cdslocation, Int4 len, CodeBreakPtr cbp, Uint1 frame)
6534
6705
Int4 adjust = 0, pos, pos1, pos2;
6659
6831
no_start = FALSE;
6660
6832
part_loc = SeqLocPartialCheck (location);
6661
part_prod = SeqLocPartialCheck (product);
6833
part_prod = SeqLocPartialCheckEx (product, farProdFetchOK);
6662
6834
if ((part_loc & SLP_START) /* || (part_prod & SLP_START) */) {
6663
6835
no_start = TRUE;
6846
7018
return TransTableTranslateCommon (tblptr, location, NULL, FALSE, genCode,
6847
7019
frame, NULL, include_stop,
6848
remove_trailingX, FALSE, NULL);
7020
remove_trailingX, FALSE, NULL, TRUE);
6851
7023
NLM_EXTERN ByteStorePtr TransTableTranslateCdRegionEx (
6883
7056
return TransTableTranslateCommon (tblptr, cds->location, cds->product, partial3,
6884
7057
genCode, crp->frame, crp->code_break,
6885
7058
include_stop, remove_trailingX,
6886
no_stop_at_end_of_complete_cds, altStartP);
7059
no_stop_at_end_of_complete_cds, altStartP, farProdFetchOK);
6889
7062
NLM_EXTERN ByteStorePtr TransTableTranslateCdRegion (
6898
7071
return TransTableTranslateCdRegionEx (tblptr, cds, include_stop, remove_trailingX,
6899
no_stop_at_end_of_complete_cds, NULL);
7072
no_stop_at_end_of_complete_cds, NULL, TRUE);
6902
7075
/* allow reuse of translation tables by saving as AppProperty */
8855
9028
if (slp != NULL) {
8856
9029
slp->length = len;
8857
9030
ValNodeAddPointer (seq_ext, (Int2) 2, (Pointer) slp);
8858
slp->seq_data = BSNew (slp->length);
9031
slp->seq_data = (SeqDataPtr) BSNew (slp->length);
8859
9032
slp->seq_data_type = Seq_code_iupacna;
8860
AddBasesToByteStore (slp->seq_data, str);
9033
AddBasesToByteStore ((ByteStorePtr) slp->seq_data, str);