2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* File Name: gbparint.c
28
* Author: Karl Sirotkin
30
* $Log: gbparint.c,v $
31
* Revision 6.5 2001/06/07 17:00:54 tatiana
32
* added gi option in Nlm_gbparselex()
34
* Revision 6.4 2000/03/20 23:38:39 aleksey
35
* Finally submitted the changes which have been made by serge bazhin
36
* and been kept in my local directory.
38
* These changes allow to establish user callback functions
39
* in 'Asn2ffJobPtr' structure which are called within
40
* 'SeqEntryToFlatAjp' function call.
41
* The new members are:
42
* user_data - pointer to a user context for passing data
43
* ajp_count_index - user defined function
44
* ajp_print_data - user defined function
45
* ajp_print_index - user defined function
47
* Revision 6.3 1999/04/06 19:42:55 bazhin
48
* Changes, related to flat2asn's ACCESSION.VERSION parsing.
50
* Revision 6.2 1999/04/02 21:15:07 tatiana
51
* accession.version added
53
* Revision 6.1 1997/10/24 21:28:39 bazhin
54
* Is able to distinguish and process "gap(...)" tokens inside
55
* of location entries. Made for CONTIG line join contents.
57
* Revision 6.0 1997/08/25 18:06:05 madden
58
* Revision changed to 6.0
60
* Revision 5.3 1997/06/19 18:38:01 vakatov
61
* [WIN32,MSVC++] Adopted for the "NCBIOBJ.LIB" DLL'ization
63
* Revision 5.2 1997/02/06 00:16:14 tatiana
64
* dealing with 2+6 accession
66
* Revision 5.1 1997/01/27 19:16:17 tatiana
67
* accept two-letter prefix in accession number
69
* Revision 5.0 1996/05/28 13:23:23 ostell
72
* Revision 4.2 1996/05/21 21:12:05 tatiana
73
* bullet proof in gbparseint()
75
* Revision 4.1 1995/07/31 19:02:10 tatiana
78
* Revision 1.8 1995/05/15 21:46:05 ostell
94
void Nlm_gbgap PROTO((ValNodePtr PNTR currentPt, ValNodePtr PNTR retval));
96
/*--------- do_Nlm_gbparse_error () ---------------*/
99
do_Nlm_gbparse_error (CharPtr msg, CharPtr details)
101
Int4 len = StringLen(msg) +7;
102
CharPtr errmsg, temp;
104
len += StringLen(details);
105
temp = errmsg= MemNew((size_t)len);
106
temp = StringMove(temp, msg);
107
temp = StringMove(temp, " at ");
108
temp = StringMove(temp, details);
110
ErrPostStr(SEV_ERROR, ERR_FEATURE_LocationParsing, errmsg);
114
#define MAKE_THREAD_SAFE
115
#ifndef MAKE_THREAD_SAFE
116
static Nlm_gbparse_errfunc Err_func = do_Nlm_gbparse_error;
117
static Nlm_gbparse_rangefunc Range_func = NULL;
118
static Pointer Nlm_gbparse_range_data = NULL;
119
#define MACRO_THREAD_SAVE_STATIC
125
static TNlmTls Err_func_tls=NULL;
126
static TNlmTls Range_func_tls=NULL;
127
static TNlmTls Nlm_gbparse_range_data_tls=NULL;
129
#define MACRO_THREAD_SAVE_STATIC \
130
Nlm_gbparse_errfunc Err_func = NULL; \
131
Nlm_gbparse_rangefunc Range_func = NULL; \
132
Pointer Nlm_gbparse_range_data = NULL; \
133
if(Err_func_tls) NlmTlsGetValue(Err_func_tls,(VoidPtr PNTR)&Err_func); \
134
if(!Err_func) Err_func = do_Nlm_gbparse_error; \
135
if(Range_func_tls) NlmTlsGetValue(Range_func_tls,(VoidPtr PNTR)&Range_func); \
136
if(Nlm_gbparse_range_data_tls) NlmTlsGetValue(Nlm_gbparse_range_data_tls,(VoidPtr PNTR)&Nlm_gbparse_range_data);
139
/*------------------ Nlm_gbcheck_range()-------------*/
141
Nlm_gbcheck_range(Int4 num, SeqIdPtr idp, Boolean PNTR keep_rawPt, int PNTR num_errsPt, ValNodePtr head, ValNodePtr current)
144
MACRO_THREAD_SAVE_STATIC;
145
if (Range_func != NULL){
146
len = (*Range_func)(Nlm_gbparse_range_data, idp);
148
if (num <0 || num >= len){
149
Nlm_gbparse_error("range error", head, current);
156
/*----------- Nlm_install_gbparse_error_handler ()-------------*/
159
Nlm_install_gbparse_error_handler(Nlm_gbparse_errfunc new_func)
161
#ifdef MAKE_THREAD_SAFE
162
NlmTlsSetValue(&Err_func_tls, (VoidPtr PNTR) new_func, NULL);
168
/*----------- Nlm_install_gbparse_range_func ()-------------*/
171
Nlm_install_gbparse_range_func(Pointer data, Nlm_gbparse_rangefunc new_func)
173
#ifdef MAKE_THREAD_SAFE
174
NlmTlsSetValue(&Range_func_tls, (VoidPtr PNTR) new_func,NULL);
175
NlmTlsSetValue(&Nlm_gbparse_range_data_tls,data,NULL);
177
Range_func = new_func;
178
Nlm_gbparse_range_data = data;
183
/*--------- Nlm_gbparse_error()-----------*/
186
Nlm_gbparse_error(CharPtr front, ValNodePtr head, ValNodePtr current)
190
MACRO_THREAD_SAVE_STATIC;
192
details = Nlm_gbparse_point (head, current);
193
Err_func (front,details);
197
/*------ Nlm_gbparse_point ()----*/
200
Nlm_gbparse_point (ValNodePtr head, ValNodePtr current)
202
CharPtr temp, retval = NULL;
206
for ( now = head; now ; now = now -> next){
207
switch ( now-> choice){
208
case GBPARSE_INT_JOIN :
211
case GBPARSE_INT_COMPL :
214
case GBPARSE_INT_LEFT :
215
case GBPARSE_INT_RIGHT :
216
case GBPARSE_INT_CARET :
217
case GBPARSE_INT_GT :
218
case GBPARSE_INT_LT :
219
case GBPARSE_INT_COMMA :
220
case GBPARSE_INT_SINGLE_DOT :
223
case GBPARSE_INT_DOT_DOT :
226
case GBPARSE_INT_ACCESION :
227
case GBPARSE_INT_NUMBER :
228
len += StringLen ( now -> data.ptrvalue);
230
case GBPARSE_INT_ORDER :
231
case GBPARSE_INT_GROUP :
234
case GBPARSE_INT_ONE_OF :
235
case GBPARSE_INT_ONE_OF_NUM:
238
case GBPARSE_INT_REPLACE :
241
case GBPARSE_INT_STRING:
242
len += StringLen(now ->data.ptrvalue) + 1;
244
case GBPARSE_INT_UNKNOWN :
248
len ++; /* for space */
257
temp = retval = MemNew(len+1);
258
for ( now = head; now ; now = now -> next){
259
switch ( now-> choice){
260
case GBPARSE_INT_JOIN :
261
temp = StringMove(temp,"join");
263
case GBPARSE_INT_COMPL :
264
temp = StringMove(temp,"complement");
266
case GBPARSE_INT_LEFT :
267
temp = StringMove(temp,"(");
269
case GBPARSE_INT_RIGHT :
270
temp = StringMove(temp,")");
272
case GBPARSE_INT_CARET :
273
temp = StringMove(temp,"^");
275
case GBPARSE_INT_DOT_DOT :
276
temp = StringMove(temp,"..");
278
case GBPARSE_INT_ACCESION :
279
case GBPARSE_INT_NUMBER :
280
case GBPARSE_INT_STRING:
281
temp = StringMove(temp,now -> data.ptrvalue);
283
case GBPARSE_INT_GT :
284
temp = StringMove(temp,">");
286
case GBPARSE_INT_LT :
287
temp = StringMove(temp,"<");
289
case GBPARSE_INT_COMMA :
290
temp = StringMove(temp,",");
292
case GBPARSE_INT_ORDER :
293
temp = StringMove(temp,"order");
295
case GBPARSE_INT_SINGLE_DOT :
296
temp = StringMove(temp,".");
298
case GBPARSE_INT_GROUP :
299
temp = StringMove(temp,"group");
301
case GBPARSE_INT_ONE_OF :
302
case GBPARSE_INT_ONE_OF_NUM:
303
temp = StringMove(temp,"one-of");
305
case GBPARSE_INT_REPLACE :
306
temp = StringMove(temp,"replace");
308
case GBPARSE_INT_UNKNOWN :
312
temp = StringMove(temp," ");
321
/*--------- Nlm_find_one_of_num()------------*/
324
Consider these for locations:
325
misc_signal join(57..one-of(67,75),one-of(100,110)..200)
326
misc_signal join(57..one-of(67,75),one-of(100,110..120),200)
327
misc_signal join(57..one-of(67,75),one-of(100,110..115)..200)
329
misc_signal join(57..one-of(67,75),one-of(100,110),200)
331
In the first three, the one-of() is functioning as an alternative set
332
of numbers, in the last, as an alternative set of locations (even
333
though the locations are points).
334
[yes the one-of(100,110..115).. is illegal]
336
here is one more case:one-of(18,30)..470 so if the location
337
starts with a one-of, it also needs to be checked.
339
To deal with this, the GBPARSE_INT_ONE_OF token type will be changed
340
by the following function to GBPARSE_INT_ONE_OF_NUM, in the three cases.
342
note that this change is not necessary in this case:
343
join(100..200,300..one-of(400,500)), as after a ".." token,
344
it has to be a number.
349
Nlm_find_one_of_num(ValNodePtr head_token)
351
ValNodePtr current, scanner;
353
current = head_token;
354
if (current -> choice == GBPARSE_INT_ONE_OF){
355
scanner= current -> next;
356
/*-------(is first token after ")" a ".."?----*/
357
for (;scanner!=NULL; scanner = scanner -> next){
358
if (scanner -> choice == GBPARSE_INT_RIGHT){
359
scanner = scanner -> next;
360
if (scanner != NULL){
361
if (scanner -> choice == GBPARSE_INT_DOT_DOT){
362
/*---- this is it ! ! */
363
current -> choice = GBPARSE_INT_ONE_OF_NUM;
370
for (current = head_token; current != NULL; current = current -> next){
371
if ( current -> choice == GBPARSE_INT_COMMA ||
372
current -> choice == GBPARSE_INT_LEFT ){
373
scanner= current -> next;
374
if ( scanner != NULL){
375
if (scanner -> choice == GBPARSE_INT_ONE_OF){
376
/*-------(is first token after ")" a ".."?----*/
377
for (;scanner!=NULL; scanner = scanner -> next){
378
if (scanner -> choice == GBPARSE_INT_RIGHT){
379
scanner = scanner -> next;
380
if (scanner != NULL){
381
if (scanner -> choice == GBPARSE_INT_DOT_DOT){
382
/*---- this is it ! ! */
383
current -> next -> choice
384
= GBPARSE_INT_ONE_OF_NUM;
397
/*---------- Nlm_gbparseint()-----*/
400
Nlm_gbparseint(CharPtr raw_intervals, Boolean PNTR keep_rawPt, Boolean PNTR sitesPt, int PNTR num_errsPt, SeqIdPtr seq_id)
402
SeqLocPtr retval = NULL;
403
ValNodePtr head_token, current_token;
407
* keep_rawPt = FALSE;
411
(*num_errsPt) = gbparselex(raw_intervals, & head_token);
413
if (head_token == NULL) {
417
if ( ! (*num_errsPt)){
418
current_token = head_token;
419
Nlm_find_one_of_num(head_token);
424
switch ( current_token -> choice){
425
case GBPARSE_INT_JOIN : case GBPARSE_INT_ORDER :
426
case GBPARSE_INT_GROUP : case GBPARSE_INT_ONE_OF :
427
case GBPARSE_INT_COMPL:
428
retval = Nlm_gbloc(keep_rawPt, & paren_count, sitesPt, & current_token,
429
head_token, (num_errsPt), seq_id);
430
/* need to check that out of tokens here */
431
retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
432
head_token, retval, keep_rawPt, paren_count);
434
case GBPARSE_INT_STRING:
435
Nlm_gbparse_error("string in loc",
436
head_token, current_token);
437
* keep_rawPt = TRUE; (* num_errsPt) ++;
438
/* no break on purpose */
439
case GBPARSE_INT_UNKNOWN :
441
case GBPARSE_INT_RIGHT :
442
case GBPARSE_INT_DOT_DOT :
443
case GBPARSE_INT_COMMA :
444
case GBPARSE_INT_SINGLE_DOT :
446
Nlm_gbparse_error("illegal initial token",
447
head_token, current_token);
448
* keep_rawPt = TRUE; (* num_errsPt) ++;
449
current_token = current_token -> next;
452
case GBPARSE_INT_ACCESION :
453
/*--- no warn, but strange ---*/
454
/*-- no break on purpose ---*/
456
case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
457
case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
458
case GBPARSE_INT_LEFT :
460
case GBPARSE_INT_ONE_OF_NUM:
462
retval = Nlm_gbint(keep_rawPt, & current_token,
463
head_token, (num_errsPt), seq_id);
464
/* need to check that out of tokens here */
465
retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
466
head_token, retval, keep_rawPt, paren_count);
469
case GBPARSE_INT_REPLACE :
470
retval = Nlm_gbreplace(keep_rawPt, & paren_count, sitesPt, & current_token,
471
head_token, (num_errsPt), seq_id);
473
/*---all errors handled within this function ---*/
475
case GBPARSE_INT_SITES :
478
current_token = current_token -> next;
481
}while (go_again && current_token);
487
ValNodeFreeData(head_token);
496
/*---------- Nlm_gbparseint_ver()-----*/
498
NLM_EXTERN SeqLocPtr Nlm_gbparseint_ver(CharPtr raw_intervals,
499
Boolean PNTR keep_rawPt,
500
Boolean PNTR sitesPt,
502
SeqIdPtr seq_id, Boolean accver)
504
SeqLocPtr retval = NULL;
505
ValNodePtr head_token, current_token;
509
* keep_rawPt = FALSE;
513
(*num_errsPt) = Nlm_gbparselex_ver(raw_intervals, &head_token, accver);
515
if (head_token == NULL) {
519
if ( ! (*num_errsPt)){
520
current_token = head_token;
521
Nlm_find_one_of_num(head_token);
526
switch ( current_token -> choice){
527
case GBPARSE_INT_JOIN : case GBPARSE_INT_ORDER :
528
case GBPARSE_INT_GROUP : case GBPARSE_INT_ONE_OF :
529
case GBPARSE_INT_COMPL:
530
retval = Nlm_gbloc_ver(keep_rawPt, & paren_count, sitesPt, & current_token,
531
head_token, (num_errsPt), seq_id, accver);
532
/* need to check that out of tokens here */
533
retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
534
head_token, retval, keep_rawPt, paren_count);
536
case GBPARSE_INT_STRING:
537
Nlm_gbparse_error("string in loc",
538
head_token, current_token);
539
* keep_rawPt = TRUE; (* num_errsPt) ++;
540
/* no break on purpose */
541
case GBPARSE_INT_UNKNOWN :
543
case GBPARSE_INT_RIGHT :
544
case GBPARSE_INT_DOT_DOT :
545
case GBPARSE_INT_COMMA :
546
case GBPARSE_INT_SINGLE_DOT :
548
Nlm_gbparse_error("illegal initial token",
549
head_token, current_token);
550
* keep_rawPt = TRUE; (* num_errsPt) ++;
551
current_token = current_token -> next;
554
case GBPARSE_INT_ACCESION :
555
/*--- no warn, but strange ---*/
556
/*-- no break on purpose ---*/
558
case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
559
case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
560
case GBPARSE_INT_LEFT :
562
case GBPARSE_INT_ONE_OF_NUM:
564
retval = Nlm_gbint_ver(keep_rawPt, & current_token,
565
head_token, (num_errsPt), seq_id, accver);
566
/* need to check that out of tokens here */
567
retval = Nlm_gbparse_better_be_done(num_errsPt, current_token,
568
head_token, retval, keep_rawPt, paren_count);
571
case GBPARSE_INT_REPLACE :
572
retval = Nlm_gbreplace_ver(keep_rawPt, & paren_count, sitesPt, & current_token,
573
head_token, (num_errsPt), seq_id, accver);
575
/*---all errors handled within this function ---*/
577
case GBPARSE_INT_SITES :
580
current_token = current_token -> next;
583
}while (go_again && current_token);
589
ValNodeFreeData(head_token);
598
/*---------- Nlm_gbloc()-----*/
601
Nlm_gbloc(Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
603
SeqLocPtr retval =NULL;
604
Boolean add_nulls=FALSE;
605
ValNodePtr current_token = * currentPt;
606
Boolean did_complement= FALSE;
611
switch ( current_token -> choice){
612
case GBPARSE_INT_COMPL :
613
*currentPt = (* currentPt) -> next;
614
if ( (*currentPt) == NULL){
615
Nlm_gbparse_error("unexpected end of usable tokens",
616
head_token, *currentPt);
617
* keep_rawPt = TRUE; (* num_errPt) ++;
620
if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
621
Nlm_gbparse_error("Missing \'(\'", /* paran match ) */
622
head_token, * currentPt);
623
* keep_rawPt = TRUE; (* num_errPt) ++;
626
(*parenPt) ++; *currentPt = (* currentPt) -> next;
628
Nlm_gbparse_error("illegal null contents",
629
head_token, *currentPt);
630
* keep_rawPt = TRUE; (* num_errPt) ++;
633
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
634
Nlm_gbparse_error("Premature \')\'",
635
head_token, *currentPt);
636
* keep_rawPt = TRUE; (* num_errPt) ++;
639
retval = Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt,
640
head_token, num_errPt,seq_id) ;
641
SeqLocRevCmp ( retval);
642
did_complement= TRUE;
644
if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
645
Nlm_gbparse_error("Missing \')\'",
646
head_token, *currentPt);
647
* keep_rawPt = TRUE; (* num_errPt) ++;
650
(*parenPt) --; *currentPt = (* currentPt) -> next;
653
Nlm_gbparse_error("Missing \')\'",
654
head_token, *currentPt);
655
* keep_rawPt = TRUE; (* num_errPt) ++;
663
case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
664
case GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
665
case GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
667
case GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
670
case GBPARSE_INT_STRING:
671
Nlm_gbparse_error("string in loc",
672
head_token, current_token);
673
* keep_rawPt = TRUE; (* num_errPt) ++;
675
/*--- no break on purpose---*/
676
case GBPARSE_INT_UNKNOWN : default:
677
case GBPARSE_INT_RIGHT : case GBPARSE_INT_DOT_DOT:case GBPARSE_INT_COMMA :
678
case GBPARSE_INT_SINGLE_DOT :
679
Nlm_gbparse_error("illegal initial loc token",
680
head_token, *currentPt);
681
* keep_rawPt = TRUE; (* num_errPt) ++;
684
/* Interval, occurs on recursion */
685
case GBPARSE_INT_GAP:
686
Nlm_gbgap(currentPt, &retval);
688
case GBPARSE_INT_ACCESION :
689
case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
690
case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
691
case GBPARSE_INT_LEFT :
693
case GBPARSE_INT_ONE_OF_NUM:
695
retval = Nlm_gbint(keep_rawPt, currentPt,
696
head_token, num_errPt, seq_id);
699
case GBPARSE_INT_REPLACE :
700
/*-------illegal at this level --*/
701
Nlm_gbparse_error("illegal replace",
702
head_token, *currentPt);
703
* keep_rawPt = TRUE; (* num_errPt) ++;
705
case GBPARSE_INT_SITES :
708
(*currentPt) = (*currentPt) -> next;
711
} while (go_again && *currentPt);
713
if ( ! (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
714
if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
715
&& ! did_complement){
717
* ONLY THE CHOICE has been set. the "join", etc. only has been noted
719
*currentPt = (* currentPt) -> next;
721
Nlm_gbparse_error("unexpected end of interval tokens",
722
head_token, *currentPt);
723
* keep_rawPt = TRUE; (* num_errPt) ++;
726
if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
727
Nlm_gbparse_error("Missing \'(\'",
728
head_token, *currentPt); /* paran match ) */
729
* keep_rawPt = TRUE; (* num_errPt) ++;
732
(*parenPt) ++; *currentPt = (* currentPt) -> next;
734
Nlm_gbparse_error("illegal null contents",
735
head_token, *currentPt);
736
* keep_rawPt = TRUE; (* num_errPt) ++;
739
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
740
Nlm_gbparse_error("Premature \')\'" ,
741
head_token, *currentPt);
742
* keep_rawPt = TRUE; (* num_errPt) ++;
746
ValNodePtr last= NULL, next_loc = NULL;
748
while ( ! *num_errPt && * currentPt){
749
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
750
while ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
752
*currentPt = (* currentPt) -> next;
761
next_loc = Nlm_gbloc(keep_rawPt, parenPt,sitesPt,
762
currentPt, head_token, num_errPt,
764
if( retval -> data.ptrvalue == NULL)
765
retval -> data.ptrvalue = next_loc;
767
last -> next = next_loc;
775
if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
778
if ( (* currentPt) -> choice == GBPARSE_INT_COMMA){
779
*currentPt = (* currentPt) -> next;
781
next_loc = ValNodeNew(last);
782
next_loc -> choice = SEQLOC_NULL;
783
last -> next = next_loc;
787
Nlm_gbparse_error("Illegal token after interval",
788
head_token, *currentPt);
789
* keep_rawPt = TRUE; (* num_errPt) ++;
795
if ( (*currentPt) == NULL){
796
Nlm_gbparse_error("unexpected end of usable tokens",
797
head_token, *currentPt);
798
* keep_rawPt = TRUE; (* num_errPt) ++;
801
if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
802
Nlm_gbparse_error("Missing \')\'" /* paran match ) */,
803
head_token, *currentPt);
804
* keep_rawPt = TRUE; (* num_errPt) ++;
807
(*parenPt) --; *currentPt = (* currentPt) -> next;
818
retval =ValNodeNew(NULL);
819
retval -> choice = SEQLOC_WHOLE;
820
retval -> data.ptrvalue = SeqIdDup(seq_id);
827
/*---------- Nlm_gbloc_ver()-----*/
829
NLM_EXTERN SeqLocPtr Nlm_gbloc_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
830
Boolean PNTR sitesPt, ValNodePtr PNTR currentPt,
831
ValNodePtr head_token, int PNTR num_errPt,
832
SeqIdPtr seq_id, Boolean accver)
834
SeqLocPtr retval =NULL;
835
Boolean add_nulls=FALSE;
836
ValNodePtr current_token = * currentPt;
837
Boolean did_complement= FALSE;
842
switch ( current_token -> choice){
843
case GBPARSE_INT_COMPL :
844
*currentPt = (* currentPt) -> next;
845
if ( (*currentPt) == NULL){
846
Nlm_gbparse_error("unexpected end of usable tokens",
847
head_token, *currentPt);
848
* keep_rawPt = TRUE; (* num_errPt) ++;
851
if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
852
Nlm_gbparse_error("Missing \'(\'", /* paran match ) */
853
head_token, * currentPt);
854
* keep_rawPt = TRUE; (* num_errPt) ++;
857
(*parenPt) ++; *currentPt = (* currentPt) -> next;
859
Nlm_gbparse_error("illegal null contents",
860
head_token, *currentPt);
861
* keep_rawPt = TRUE; (* num_errPt) ++;
864
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
865
Nlm_gbparse_error("Premature \')\'",
866
head_token, *currentPt);
867
* keep_rawPt = TRUE; (* num_errPt) ++;
870
retval = Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt,
871
head_token, num_errPt,seq_id, accver) ;
872
SeqLocRevCmp ( retval);
873
did_complement= TRUE;
875
if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
876
Nlm_gbparse_error("Missing \')\'",
877
head_token, *currentPt);
878
* keep_rawPt = TRUE; (* num_errPt) ++;
881
(*parenPt) --; *currentPt = (* currentPt) -> next;
884
Nlm_gbparse_error("Missing \')\'",
885
head_token, *currentPt);
886
* keep_rawPt = TRUE; (* num_errPt) ++;
894
case GBPARSE_INT_JOIN : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; break;
895
case GBPARSE_INT_ORDER : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX; add_nulls=TRUE;break;
896
case GBPARSE_INT_GROUP : * keep_rawPt = TRUE; retval =ValNodeNew(NULL); retval -> choice = SEQLOC_MIX;
898
case GBPARSE_INT_ONE_OF : retval =ValNodeNew(NULL); retval -> choice = SEQLOC_EQUIV; break;
901
case GBPARSE_INT_STRING:
902
Nlm_gbparse_error("string in loc",
903
head_token, current_token);
904
* keep_rawPt = TRUE; (* num_errPt) ++;
906
/*--- no break on purpose---*/
907
case GBPARSE_INT_UNKNOWN : default:
908
case GBPARSE_INT_RIGHT : case GBPARSE_INT_DOT_DOT:case GBPARSE_INT_COMMA :
909
case GBPARSE_INT_SINGLE_DOT :
910
Nlm_gbparse_error("illegal initial loc token",
911
head_token, *currentPt);
912
* keep_rawPt = TRUE; (* num_errPt) ++;
915
/* Interval, occurs on recursion */
916
case GBPARSE_INT_GAP:
917
Nlm_gbgap(currentPt, &retval);
919
case GBPARSE_INT_ACCESION :
920
case GBPARSE_INT_CARET : case GBPARSE_INT_GT :
921
case GBPARSE_INT_LT : case GBPARSE_INT_NUMBER :
922
case GBPARSE_INT_LEFT :
924
case GBPARSE_INT_ONE_OF_NUM:
926
retval = Nlm_gbint_ver(keep_rawPt, currentPt,
927
head_token, num_errPt, seq_id, accver);
930
case GBPARSE_INT_REPLACE :
931
/*-------illegal at this level --*/
932
Nlm_gbparse_error("illegal replace",
933
head_token, *currentPt);
934
* keep_rawPt = TRUE; (* num_errPt) ++;
936
case GBPARSE_INT_SITES :
939
(*currentPt) = (*currentPt) -> next;
942
} while (go_again && *currentPt);
944
if ( ! (* num_errPt)) if (retval && retval->choice != SEQLOC_NULL)
945
if ( retval -> choice != SEQLOC_INT && retval -> choice != SEQLOC_PNT
946
&& ! did_complement){
948
* ONLY THE CHOICE has been set. the "join", etc. only has been noted
950
*currentPt = (* currentPt) -> next;
952
Nlm_gbparse_error("unexpected end of interval tokens",
953
head_token, *currentPt);
954
* keep_rawPt = TRUE; (* num_errPt) ++;
957
if ( (* currentPt) -> choice != GBPARSE_INT_LEFT){
958
Nlm_gbparse_error("Missing \'(\'",
959
head_token, *currentPt); /* paran match ) */
960
* keep_rawPt = TRUE; (* num_errPt) ++;
963
(*parenPt) ++; *currentPt = (* currentPt) -> next;
965
Nlm_gbparse_error("illegal null contents",
966
head_token, *currentPt);
967
* keep_rawPt = TRUE; (* num_errPt) ++;
970
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){ /* paran match ( */
971
Nlm_gbparse_error("Premature \')\'" ,
972
head_token, *currentPt);
973
* keep_rawPt = TRUE; (* num_errPt) ++;
977
ValNodePtr last= NULL, next_loc = NULL;
979
while ( ! *num_errPt && * currentPt){
980
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
981
while ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
983
*currentPt = (* currentPt) -> next;
992
next_loc = Nlm_gbloc_ver(keep_rawPt, parenPt,sitesPt,
993
currentPt, head_token, num_errPt,
995
if( retval -> data.ptrvalue == NULL)
996
retval -> data.ptrvalue = next_loc;
998
last -> next = next_loc;
1000
if ( ! * currentPt){
1003
if ( ! * currentPt){
1006
if ((* currentPt) -> choice == GBPARSE_INT_RIGHT){
1009
if ( (* currentPt) -> choice == GBPARSE_INT_COMMA){
1010
*currentPt = (* currentPt) -> next;
1012
next_loc = ValNodeNew(last);
1013
next_loc -> choice = SEQLOC_NULL;
1014
last -> next = next_loc;
1018
Nlm_gbparse_error("Illegal token after interval",
1019
head_token, *currentPt);
1020
* keep_rawPt = TRUE; (* num_errPt) ++;
1026
if ( (*currentPt) == NULL){
1027
Nlm_gbparse_error("unexpected end of usable tokens",
1028
head_token, *currentPt);
1029
* keep_rawPt = TRUE; (* num_errPt) ++;
1032
if ( (* currentPt) -> choice != GBPARSE_INT_RIGHT){
1033
Nlm_gbparse_error("Missing \')\'" /* paran match ) */,
1034
head_token, *currentPt);
1035
* keep_rawPt = TRUE; (* num_errPt) ++;
1038
(*parenPt) --; *currentPt = (* currentPt) -> next;
1046
if ( (* num_errPt)){
1049
retval =ValNodeNew(NULL);
1050
retval -> choice = SEQLOC_WHOLE;
1051
retval -> data.ptrvalue = SeqIdDup(seq_id);
1058
/**********************************************************/
1059
void Nlm_gbgap(ValNodePtr PNTR currentPt, ValNodePtr PNTR retval)
1061
ValNodePtr vnp_first;
1062
ValNodePtr vnp_second;
1063
ValNodePtr vnp_third;
1066
vnp_first = (*currentPt)->next;
1067
if(vnp_first == NULL || vnp_first->choice != GBPARSE_INT_LEFT)
1070
vnp_second = vnp_first->next;
1071
if(vnp_second == NULL || (vnp_second->choice != GBPARSE_INT_NUMBER &&
1072
vnp_second->choice != GBPARSE_INT_RIGHT))
1075
if(vnp_second->choice == GBPARSE_INT_RIGHT)
1077
(*retval) = ValNodeNew(*retval);
1078
(*retval)->choice = SEQLOC_NULL;
1082
vnp_third = vnp_second->next;
1083
if(vnp_third == NULL || vnp_third->choice != GBPARSE_INT_RIGHT)
1086
vvv = GapToSeqLoc(atoi((CharPtr) vnp_second->data.ptrvalue));
1094
(*retval)->next = vvv;
1095
(*retval) = (*retval)->next;
1098
(*currentPt) = (*currentPt)->next;
1101
(*currentPt) = (*currentPt)->next;
1102
(*currentPt) = (*currentPt)->next;
1103
(*currentPt) = (*currentPt)->next;
1106
/*--------------- Nlm_gbint ()--------------------*/
1108
NLM_EXTERN SeqLocPtr /* sometimes returns points */
1110
Nlm_gbint(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1112
SeqLocPtr retnode = ValNodeNew(NULL);
1113
SeqIntPtr retint = SeqIntNew();
1115
IntFuzzPtr fuzz=NULL;
1116
SeqIdPtr idp = NULL;
1117
Boolean took_choice=FALSE;
1119
retnode -> choice = SEQLOC_INT;
1121
if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1122
idp = ValNodeNew(NULL);
1125
seq_id -> choice == SEQID_GENBANK
1126
|| seq_id -> choice == SEQID_EMBL
1127
|| seq_id -> choice == SEQID_DDBJ
1129
idp -> choice = seq_id -> choice;
1134
idp -> choice = SEQID_GENBANK;
1136
tp = TextSeqIdNew();
1137
idp -> data.ptrvalue = tp;
1138
tp -> accession = StringSave ( (* currentPt) ->data.ptrvalue);
1139
*currentPt = (* currentPt) -> next;
1140
if ( ! *currentPt ){
1141
Nlm_gbparse_error("Nothing after accession",
1142
head_token, *currentPt);
1143
* keep_rawPt = TRUE; (* num_errPt) ++;
1151
idp = SeqIdDup (seq_id);
1153
if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1154
fuzz = IntFuzzNew();
1157
*currentPt = (* currentPt) -> next;
1158
if ( ! *currentPt ){
1159
Nlm_gbparse_error("Nothing after \'<\'",
1160
head_token, *currentPt);
1161
* keep_rawPt = TRUE; (* num_errPt) ++;
1165
if ( ! (* num_errPt))
1166
switch ( (*currentPt ) -> choice){
1167
case GBPARSE_INT_ACCESION :
1169
Nlm_gbparse_error("duplicate accessions",
1170
head_token, *currentPt);
1171
* keep_rawPt = TRUE; (* num_errPt) ++;
1175
case GBPARSE_INT_CARET :
1176
Nlm_gbparse_error("caret (^) before number" ,
1177
head_token, *currentPt);
1178
* keep_rawPt = TRUE; (* num_errPt) ++;
1180
case GBPARSE_INT_LT :
1182
Nlm_gbparse_error("duplicate \'<\'",
1183
head_token, *currentPt);
1184
* keep_rawPt = TRUE; (* num_errPt) ++;
1188
case GBPARSE_INT_GT :
1189
case GBPARSE_INT_NUMBER :
1190
case GBPARSE_INT_LEFT :
1192
case GBPARSE_INT_ONE_OF_NUM:
1194
retint -> if_from = fuzz;
1196
retnode -> data.ptrvalue = retint;
1197
Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1198
keep_rawPt, currentPt, head_token,
1199
num_errPt,TAKE_FIRST);
1200
Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1201
if ( ! (* num_errPt) ){
1203
Boolean in_caret = FALSE;
1204
switch ( (*currentPt ) -> choice){
1207
default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1208
case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1209
case GBPARSE_INT_ACCESION:
1210
Nlm_gbparse_error("problem with 2nd number",
1211
head_token, *currentPt);;
1212
* keep_rawPt = TRUE; (* num_errPt) ++;
1214
case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1215
/*--------------but have a point, not an interval----*/
1216
Nlm_gbpintpnt(retnode, & retint);
1218
case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1219
Nlm_gbparse_error("Missing \'..\'",
1220
head_token, *currentPt);;
1221
* keep_rawPt = TRUE; (* num_errPt) ++;
1223
case GBPARSE_INT_CARET:
1224
if (retint -> if_from){
1225
Nlm_gbparse_error("\'<\' then \'^\'",
1226
head_token, *currentPt);
1227
* keep_rawPt = TRUE; (* num_errPt) ++;
1230
retint -> if_from = IntFuzzNew();
1231
retint -> if_from -> choice = 4;
1232
retint -> if_from ->a = 4;
1233
retint -> if_to = IntFuzzNew();
1234
retint -> if_to -> choice = 4;
1235
retint -> if_to ->a = 4;
1237
/*---no break on purpose ---*/
1238
case GBPARSE_INT_DOT_DOT:
1239
*currentPt = (* currentPt) -> next;
1240
if ( (*currentPt) == NULL){
1241
Nlm_gbparse_error("unexpected end of usable tokens",
1242
head_token, *currentPt);
1243
* keep_rawPt = TRUE; (* num_errPt) ++;
1246
/*--no break on purpose here ---*/
1247
case GBPARSE_INT_NUMBER:
1248
case GBPARSE_INT_LEFT:
1250
case GBPARSE_INT_ONE_OF_NUM: /* unlikely, but ok */
1252
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1253
if (retint -> if_from){
1254
Nlm_gbparse_error("\'^\' then \'>\'",
1255
head_token, *currentPt);
1256
* keep_rawPt = TRUE; (* num_errPt) ++;
1260
Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1261
keep_rawPt, currentPt, head_token,
1262
num_errPt, TAKE_SECOND);
1263
Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1265
* The caret location implies a place (point) between two location.
1266
* This is not exactly captured by the ASN.1, but pretty close
1269
Int4 to = retint -> to;
1271
point = Nlm_gbpintpnt(retnode, & retint);
1272
if ( point -> point +1 == to){
1273
point -> point = to; /* was essentailly correct */
1275
point -> fuzz -> choice = 2; /* range */
1276
point -> fuzz -> a = to; /* max */
1277
point -> fuzz ->b = point -> point;
1281
if (retint -> from == retint -> to &&
1282
! retint -> if_from &&
1284
/*-------if interval really a point, make is so ----*/
1285
Nlm_gbpintpnt(retnode, & retint);
1289
Nlm_gbpintpnt(retnode, & retint);
1296
Nlm_gbparse_error("No number when expected",
1297
head_token, *currentPt);
1298
* keep_rawPt = TRUE; (* num_errPt) ++;
1308
if (retint && (* num_errPt)){
1312
ValNodeFree(retnode);
1317
/*--------------- Nlm_gbint_ver ()--------------------*/
1319
NLM_EXTERN SeqLocPtr /* sometimes returns points */
1321
Nlm_gbint_ver(Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt,
1322
ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id,
1325
SeqLocPtr retnode = ValNodeNew(NULL);
1326
SeqIntPtr retint = SeqIntNew();
1328
IntFuzzPtr fuzz=NULL;
1329
SeqIdPtr idp = NULL;
1330
Boolean took_choice=FALSE;
1333
retnode -> choice = SEQLOC_INT;
1335
if ( (* currentPt) -> choice == GBPARSE_INT_ACCESION){
1336
idp = ValNodeNew(NULL);
1339
seq_id -> choice == SEQID_GENBANK
1340
|| seq_id -> choice == SEQID_EMBL
1341
|| seq_id -> choice == SEQID_DDBJ
1343
idp -> choice = seq_id -> choice;
1348
idp -> choice = SEQID_GENBANK;
1350
tp = TextSeqIdNew();
1351
idp -> data.ptrvalue = tp;
1354
tp->accession = StringSave((*currentPt)->data.ptrvalue);
1358
p = StringChr((*currentPt)->data.ptrvalue, '.');
1361
tp->accession = StringSave((*currentPt)->data.ptrvalue);
1362
Nlm_gbparse_error("Missing accession's version",
1363
head_token, *currentPt);
1368
tp->accession = StringSave((*currentPt)->data.ptrvalue);
1369
tp->version = atoi(p + 1);
1373
*currentPt = (* currentPt) -> next;
1374
if ( ! *currentPt ){
1375
Nlm_gbparse_error("Nothing after accession",
1376
head_token, *currentPt);
1377
* keep_rawPt = TRUE; (* num_errPt) ++;
1385
idp = SeqIdDup (seq_id);
1387
if ( (* currentPt) -> choice == GBPARSE_INT_LT){
1388
fuzz = IntFuzzNew();
1391
*currentPt = (* currentPt) -> next;
1392
if ( ! *currentPt ){
1393
Nlm_gbparse_error("Nothing after \'<\'",
1394
head_token, *currentPt);
1395
* keep_rawPt = TRUE; (* num_errPt) ++;
1399
if ( ! (* num_errPt))
1400
switch ( (*currentPt ) -> choice){
1401
case GBPARSE_INT_ACCESION :
1403
Nlm_gbparse_error("duplicate accessions",
1404
head_token, *currentPt);
1405
* keep_rawPt = TRUE; (* num_errPt) ++;
1409
case GBPARSE_INT_CARET :
1410
Nlm_gbparse_error("caret (^) before number" ,
1411
head_token, *currentPt);
1412
* keep_rawPt = TRUE; (* num_errPt) ++;
1414
case GBPARSE_INT_LT :
1416
Nlm_gbparse_error("duplicate \'<\'",
1417
head_token, *currentPt);
1418
* keep_rawPt = TRUE; (* num_errPt) ++;
1422
case GBPARSE_INT_GT :
1423
case GBPARSE_INT_NUMBER :
1424
case GBPARSE_INT_LEFT :
1426
case GBPARSE_INT_ONE_OF_NUM:
1428
retint -> if_from = fuzz;
1430
retnode -> data.ptrvalue = retint;
1431
Nlm_gbload_number (& ( retint -> from), & (retint -> if_from),
1432
keep_rawPt, currentPt, head_token,
1433
num_errPt,TAKE_FIRST);
1434
Nlm_gbcheck_range(retint -> from, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1435
if ( ! (* num_errPt) ){
1437
Boolean in_caret = FALSE;
1438
switch ( (*currentPt ) -> choice){
1441
default: case GBPARSE_INT_JOIN: case GBPARSE_INT_COMPL:
1442
case GBPARSE_INT_SINGLE_DOT:case GBPARSE_INT_ORDER: case GBPARSE_INT_GROUP:
1443
case GBPARSE_INT_ACCESION:
1444
Nlm_gbparse_error("problem with 2nd number",
1445
head_token, *currentPt);;
1446
* keep_rawPt = TRUE; (* num_errPt) ++;
1448
case GBPARSE_INT_COMMA: case GBPARSE_INT_RIGHT: /* valid thing to leave on*/
1449
/*--------------but have a point, not an interval----*/
1450
Nlm_gbpintpnt(retnode, & retint);
1452
case GBPARSE_INT_GT: case GBPARSE_INT_LT:
1453
Nlm_gbparse_error("Missing \'..\'",
1454
head_token, *currentPt);;
1455
* keep_rawPt = TRUE; (* num_errPt) ++;
1457
case GBPARSE_INT_CARET:
1458
if (retint -> if_from){
1459
Nlm_gbparse_error("\'<\' then \'^\'",
1460
head_token, *currentPt);
1461
* keep_rawPt = TRUE; (* num_errPt) ++;
1464
retint -> if_from = IntFuzzNew();
1465
retint -> if_from -> choice = 4;
1466
retint -> if_from ->a = 4;
1467
retint -> if_to = IntFuzzNew();
1468
retint -> if_to -> choice = 4;
1469
retint -> if_to ->a = 4;
1471
/*---no break on purpose ---*/
1472
case GBPARSE_INT_DOT_DOT:
1473
*currentPt = (* currentPt) -> next;
1474
if ( (*currentPt) == NULL){
1475
Nlm_gbparse_error("unexpected end of usable tokens",
1476
head_token, *currentPt);
1477
* keep_rawPt = TRUE; (* num_errPt) ++;
1480
/*--no break on purpose here ---*/
1481
case GBPARSE_INT_NUMBER:
1482
case GBPARSE_INT_LEFT:
1484
case GBPARSE_INT_ONE_OF_NUM: /* unlikely, but ok */
1486
if ( (* currentPt) -> choice == GBPARSE_INT_RIGHT){
1487
if (retint -> if_from){
1488
Nlm_gbparse_error("\'^\' then \'>\'",
1489
head_token, *currentPt);
1490
* keep_rawPt = TRUE; (* num_errPt) ++;
1494
Nlm_gbload_number (& ( retint -> to), & (retint -> if_to),
1495
keep_rawPt, currentPt, head_token,
1496
num_errPt, TAKE_SECOND);
1497
Nlm_gbcheck_range(retint -> to, idp, keep_rawPt, num_errPt, head_token, *currentPt);
1499
* The caret location implies a place (point) between two location.
1500
* This is not exactly captured by the ASN.1, but pretty close
1503
Int4 to = retint -> to;
1505
point = Nlm_gbpintpnt(retnode, & retint);
1506
if ( point -> point +1 == to){
1507
point -> point = to; /* was essentailly correct */
1509
point -> fuzz -> choice = 2; /* range */
1510
point -> fuzz -> a = to; /* max */
1511
point -> fuzz ->b = point -> point;
1515
if (retint -> from == retint -> to &&
1516
! retint -> if_from &&
1518
/*-------if interval really a point, make is so ----*/
1519
Nlm_gbpintpnt(retnode, & retint);
1523
Nlm_gbpintpnt(retnode, & retint);
1530
Nlm_gbparse_error("No number when expected",
1531
head_token, *currentPt);
1532
* keep_rawPt = TRUE; (* num_errPt) ++;
1542
if (retint && (* num_errPt)){
1546
ValNodeFree(retnode);
1551
/*------------------- Nlm_gbpintpnt()-----------*/
1553
NLM_EXTERN SeqPntPtr
1554
Nlm_gbpintpnt(SeqLocPtr retnode, SeqIntPtr PNTR retintPt)
1557
point = SeqPntNew();
1558
point -> point = (*retintPt) -> from;
1559
point -> id = (*retintPt) -> id;
1560
(*retintPt) -> id = NULL;
1561
point -> fuzz = (*retintPt) -> if_from;
1562
(*retintPt) -> if_from = NULL;
1563
SeqIntFree((*retintPt));
1565
retnode -> choice = SEQLOC_PNT;
1566
retnode -> data.ptrvalue = point;
1570
/*----- Nlm_gbload_number() -----*/
1573
Nlm_gbload_number (Int4 PNTR numPt, IntFuzzPtr PNTR fuzzPt, Boolean PNTR keep_rawPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, int take_which)
1577
Boolean strange_sin_dot = FALSE;
1579
if ((*currentPt ) -> choice == GBPARSE_INT_CARET){
1580
Nlm_gbparse_error("duplicate carets",
1581
head_token, *currentPt);
1582
(*keep_rawPt) = TRUE; (*num_errPt) ++;
1583
*currentPt = (* currentPt) -> next;
1585
}else if ((*currentPt ) -> choice == GBPARSE_INT_GT ||
1586
(*currentPt ) -> choice == GBPARSE_INT_LT){
1588
* fuzzPt = IntFuzzNew();
1590
(* fuzzPt) -> choice = 4;
1591
if ((*currentPt ) -> choice == GBPARSE_INT_GT ){
1592
(* fuzzPt) -> a = 1; /* 'a' serves as "lim" for choice 4 */
1594
(* fuzzPt) -> a = 2;
1596
*currentPt = (* currentPt) -> next;
1597
}else if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1598
strange_sin_dot = TRUE;
1599
*currentPt = (* currentPt) -> next;
1600
if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1602
* fuzzPt = IntFuzzNew();
1604
(* fuzzPt) -> b = atoi((*currentPt ) -> data.ptrvalue)-1;
1605
(* fuzzPt) -> choice = 2;
1606
if ( take_which == TAKE_FIRST ){
1607
* numPt = (* fuzzPt) -> b;
1609
*currentPt = (* currentPt) -> next;
1614
if ((*currentPt ) -> choice != GBPARSE_INT_SINGLE_DOT ){
1617
*currentPt = (* currentPt) -> next;
1618
if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1619
(* fuzzPt) -> a = atoi((*currentPt ) -> data.ptrvalue)-1;
1620
if ( take_which == TAKE_SECOND ){
1621
* numPt = (* fuzzPt) -> a;
1623
*currentPt = (* currentPt) -> next;
1627
if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1628
*currentPt = (* currentPt) -> next;
1634
}else if ((*currentPt ) -> choice != GBPARSE_INT_NUMBER) {
1635
/* this prevents endless cycling, unconditionally */
1636
if ((*currentPt ) -> choice != GBPARSE_INT_ONE_OF
1637
&& (*currentPt ) -> choice != GBPARSE_INT_ONE_OF_NUM)
1638
*currentPt = (* currentPt) -> next;
1642
if ( ! strange_sin_dot){
1643
if ( ! * currentPt){
1644
Nlm_gbparse_error("unexpected end of interval tokens",
1645
head_token, *currentPt);
1646
* keep_rawPt = TRUE; (* num_errPt) ++;
1648
if ((*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1649
* numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1650
*currentPt = (* currentPt) -> next;
1657
Nlm_gbparse_error("Incorrect uncertainty",
1658
head_token, *currentPt);
1659
(*keep_rawPt) = TRUE; (*num_errPt) ++;
1661
if ( num_found != 1){
1662
(*keep_rawPt) = TRUE;
1665
* 10..one-of(13,15) type syntax here
1668
if ((*currentPt ) -> choice == GBPARSE_INT_ONE_OF
1669
|| (*currentPt ) -> choice == GBPARSE_INT_ONE_OF_NUM){
1670
Boolean one_of_ok = TRUE;
1671
Boolean at_end_one_of = FALSE;
1673
*currentPt = (* currentPt) -> next;
1674
if ((*currentPt ) -> choice != GBPARSE_INT_LEFT){
1677
*currentPt = (* currentPt) -> next;
1679
if (one_of_ok && (*currentPt ) -> choice == GBPARSE_INT_NUMBER){
1680
* numPt = atoi((*currentPt ) -> data.ptrvalue)-1;
1681
*currentPt = (* currentPt) -> next;
1685
while (one_of_ok && ! at_end_one_of && *currentPt != NULL){
1686
switch ( (*currentPt ) -> choice){
1690
case GBPARSE_INT_COMMA:
1691
case GBPARSE_INT_NUMBER:
1692
*currentPt = (* currentPt) -> next;
1694
case GBPARSE_INT_RIGHT:
1695
*currentPt = (* currentPt) -> next;
1696
at_end_one_of = TRUE;
1700
if ( ! one_of_ok && ! at_end_one_of){
1701
while (! at_end_one_of && *currentPt != NULL){
1702
if ((*currentPt ) -> choice == GBPARSE_INT_RIGHT){
1703
at_end_one_of = TRUE;
1705
*currentPt = (* currentPt) -> next;
1710
Nlm_gbparse_error("bad one-of() syntax as number",
1711
head_token, *currentPt);
1715
Nlm_gbparse_error("Number not found when expected",
1716
head_token, *currentPt);
1722
/*----------------- Nlm_gbparse_better_be_done()-------------*/
1723
NLM_EXTERN SeqLocPtr
1724
Nlm_gbparse_better_be_done(int PNTR num_errsPt, ValNodePtr current_token, ValNodePtr head_token, SeqLocPtr ret_so_far, Boolean PNTR keep_rawPt, int paren_count)
1726
SeqLocPtr retval = ret_so_far;
1729
while (current_token -> choice == GBPARSE_INT_RIGHT){
1731
current_token = current_token -> next;
1732
if ( ! current_token){
1735
sprintf(par_msg, "mismatched parentheses (%d)", paren_count);
1736
Nlm_gbparse_error(par_msg,
1737
head_token, current_token);
1745
Nlm_gbparse_error("text after last legal right parenthesis",
1746
head_token, current_token);
1752
Nlm_gbparse_error("text after end",
1753
head_token, current_token);
1760
/*-------- Nlm_gbreplace() --------*/
1762
NLM_EXTERN SeqLocPtr
1763
Nlm_gbreplace (Boolean PNTR keep_rawPt, int PNTR parenPt, Boolean PNTR sitesPt, ValNodePtr PNTR currentPt, ValNodePtr head_token, int PNTR num_errPt, SeqIdPtr seq_id)
1765
SeqLocPtr retval = NULL;
1767
* keep_rawPt = TRUE;
1768
*currentPt = (* currentPt) -> next;
1770
if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1771
*currentPt = (* currentPt) -> next;
1772
retval = Nlm_gbloc (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1774
if ( ! * currentPt){
1775
Nlm_gbparse_error("unexpected end of interval tokens",
1776
head_token, *currentPt);
1777
* keep_rawPt = TRUE; (* num_errPt) ++;
1780
if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1781
Nlm_gbparse_error("Missing comma after first location in replace",
1782
head_token, *currentPt);
1787
Nlm_gbparse_error("Missing \'(\'" /* paran match ) */
1788
, head_token, *currentPt);
1796
/*-------- Nlm_gbreplace_ver() --------*/
1798
NLM_EXTERN SeqLocPtr Nlm_gbreplace_ver(Boolean PNTR keep_rawPt, int PNTR parenPt,
1799
Boolean PNTR sitesPt,
1800
ValNodePtr PNTR currentPt,
1801
ValNodePtr head_token, int PNTR num_errPt,
1802
SeqIdPtr seq_id, Boolean accver)
1804
SeqLocPtr retval = NULL;
1806
* keep_rawPt = TRUE;
1807
*currentPt = (* currentPt) -> next;
1809
if ((*currentPt ) -> choice == GBPARSE_INT_LEFT){
1810
*currentPt = (* currentPt) -> next;
1811
retval = Nlm_gbloc_ver (keep_rawPt, parenPt, sitesPt, currentPt, head_token,
1812
num_errPt,seq_id, accver);
1813
if ( ! * currentPt){
1814
Nlm_gbparse_error("unexpected end of interval tokens",
1815
head_token, *currentPt);
1816
* keep_rawPt = TRUE; (* num_errPt) ++;
1819
if ((*currentPt ) -> choice != GBPARSE_INT_COMMA){
1820
Nlm_gbparse_error("Missing comma after first location in replace",
1821
head_token, *currentPt);
1826
Nlm_gbparse_error("Missing \'(\'" /* paran match ) */
1827
, head_token, *currentPt);
1837
#define Nlm_lex_error_MACRO(msg)\
1839
Saved_ch = *(current_col +1);\
1840
*(current_col +1) = '\0';\
1844
Nlm_gbparse_error(msg, & forerrmacro, & forerrmacro);\
1846
*(current_col +1) = Saved_ch;
1848
/*------------- gbparselex()-----------------------*/
1851
Nlm_gbparselex(CharPtr linein, ValNodePtr PNTR lexed)
1853
CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
1855
int retval = 0, len;
1856
ValNodePtr current_token = NULL, last_token = NULL;
1857
Boolean skip_new_token=FALSE;
1858
Boolean die_now=FALSE;
1859
ValNode forerrmacro;
1861
forerrmacro.choice =GBPARSE_INT_ACCESION ;
1864
len = StringLen(linein);
1865
line_use = MemNew(len + 1);
1866
StringCpy(line_use, linein);
1868
Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex")
1869
ValNodeFree( * lexed);
1872
current_col = line_use ;
1873
forerrmacro.data.ptrvalue = line_use;
1875
* Clear terminal white space
1877
points_at_term_null = line_use + len;
1878
spare = points_at_term_null - 1;
1879
while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
1881
points_at_term_null --;
1885
while (current_col < points_at_term_null && ! die_now) {
1886
if ( ! skip_new_token){
1887
last_token = current_token;
1888
current_token = ValNodeNew(current_token);
1890
* lexed = current_token;
1892
switch ( *current_col){
1895
skip_new_token = FALSE;
1896
current_token -> choice = GBPARSE_INT_STRING;
1897
for (spare = current_col +1; spare < points_at_term_null;
1899
if ( *spare == '\"'){
1903
if (spare >= points_at_term_null){
1904
Nlm_lex_error_MACRO( "unterminated string")
1907
len = spare-current_col + 1;
1908
current_token -> data.ptrvalue =
1910
StringNCpy(current_token -> data.ptrvalue,
1918
case '0': case '1': case '2': case '3': case '4':
1919
case '5': case '6': case '7': case '8': case '9':
1920
skip_new_token = FALSE;
1921
current_token -> choice = GBPARSE_INT_NUMBER;
1922
for (dex=0, spare = current_col; isdigit(*spare); spare ++){
1925
current_token -> data.ptrvalue = MemNew(dex+1);
1926
StringNCpy(current_token -> data.ptrvalue, current_col, dex);
1927
current_col += dex -1;
1933
skip_new_token = FALSE;
1934
current_token -> choice = GBPARSE_INT_JOIN;
1935
if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
1936
Nlm_lex_error_MACRO( "\"join\" misspelled")
1938
for(;*current_col && *current_col != '('; current_col++)
1939
; /* vi match ) empty body*/
1940
current_col -- ; /* back up 'cause ++ follows */
1950
skip_new_token = FALSE;
1951
if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
1952
if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
1953
Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
1955
for(;*current_col && *current_col != '('; current_col++)
1956
; /* vi match ) empty body*/
1957
current_col -- ; /* back up 'cause ++ follows */
1959
current_token -> choice = GBPARSE_INT_ONE_OF ;
1963
current_token -> choice = GBPARSE_INT_ORDER;
1972
skip_new_token = FALSE;
1973
current_token -> choice = GBPARSE_INT_REPLACE ;
1974
if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
1975
Nlm_lex_error_MACRO( "\"replace\" misspelled")
1977
for(;*current_col && *current_col != '('; current_col++)
1978
; /* vi match ) empty body*/
1979
current_col -- ; /* back up 'cause ++ follows */
1986
* GAP or GROUP or GI
1989
skip_new_token = FALSE;
1990
if(StringNCmp(current_col, "gap", 3) == 0 &&
1991
(current_col[3] == '(' ||
1992
current_col[3] == ' ' ||
1993
current_col[3] == '\t' ||
1994
current_col[3] == '\0'))
1996
current_token->choice = GBPARSE_INT_GAP;
1998
current_token->data.ptrvalue = MemNew(4);
1999
StringCpy(current_token->data.ptrvalue, "gap");
2002
if(StringNCmp(current_col, "gi|", 3) == 0) {
2003
current_token->choice = GBPARSE_INT_ACCESION;
2005
for (; IS_DIGIT(*current_col); current_col++) ;
2008
current_token -> choice = GBPARSE_INT_GROUP;
2009
if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2010
Nlm_lex_error_MACRO("\"group\" misspelled")
2012
for(;*current_col && *current_col != '('; current_col++)
2013
; /* vi match ) empty body*/
2014
current_col -- ; /* back up 'cause ++ follows */
2024
skip_new_token = FALSE;
2025
current_token -> choice = GBPARSE_INT_COMPL;
2026
if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2027
Nlm_lex_error_MACRO("\"complement\" misspelled")
2029
for(;*current_col && *current_col != '('; current_col++)
2030
; /* vi match ) empty body*/
2031
current_col -- ; /* back up 'cause ++ follows */
2038
* internal bases ignored
2041
if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2044
skip_new_token = TRUE;
2050
* ()^.,<> (bases (sites
2053
if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2054
skip_new_token = FALSE;
2055
current_token -> choice = GBPARSE_INT_JOIN;
2057
if (*current_col != '\0')
2058
if ( * (current_col +1) == 's')
2060
last_token = current_token;
2061
current_token = ValNodeNew(current_token);
2062
current_token -> choice = GBPARSE_INT_LEFT;
2063
}else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2064
skip_new_token = FALSE;
2066
if (*current_col != '\0')
2067
if ( * (current_col +1) == ')'){
2069
current_token -> choice = GBPARSE_INT_SITES;
2071
current_token -> choice = GBPARSE_INT_SITES;
2072
last_token = current_token;
2073
current_token = ValNodeNew(current_token);
2074
current_token -> choice = GBPARSE_INT_JOIN;
2075
last_token = current_token;
2076
current_token = ValNodeNew(current_token);
2077
current_token -> choice = GBPARSE_INT_LEFT;
2078
if (*current_col != '\0'){
2079
if ( * (current_col +1) == ';'){
2081
}else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2087
skip_new_token = FALSE;
2088
current_token -> choice = GBPARSE_INT_LEFT;
2093
skip_new_token = FALSE;
2094
current_token -> choice = GBPARSE_INT_RIGHT;
2099
skip_new_token = FALSE;
2100
current_token -> choice = GBPARSE_INT_CARET;
2104
skip_new_token = FALSE;
2105
current_token -> choice = GBPARSE_INT_DOT_DOT ;
2108
skip_new_token = FALSE;
2109
if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2110
current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2112
current_token -> choice = GBPARSE_INT_DOT_DOT;
2118
skip_new_token = FALSE;
2119
current_token -> choice = GBPARSE_INT_GT;
2123
skip_new_token = FALSE;
2124
current_token -> choice = GBPARSE_INT_LT;
2130
skip_new_token = FALSE;
2131
current_token -> choice = GBPARSE_INT_COMMA;
2134
case ' ': case '\t': case '\n': case '\r': case '~':
2135
skip_new_token = TRUE;
2139
if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2142
skip_new_token = FALSE;
2143
current_token -> choice = GBPARSE_INT_DOT_DOT;
2149
if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2152
skip_new_token = FALSE;
2153
current_token -> choice = GBPARSE_INT_SITES;
2155
if (*current_col != '\0')
2156
if ( * (current_col +1) == 's')
2158
if (*current_col != '\0'){
2159
if ( * (current_col +1) == ';'){
2161
}else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2172
* all GenBank accessions start with a capital letter
2173
* and then have numbers
2175
/* new accessions start with 2 capital letters !! 1997 */
2176
/* new accessions have .version !! 2/15/1999 */
2177
skip_new_token = FALSE;
2178
current_token -> choice = GBPARSE_INT_ACCESION;
2179
if (IS_ALPHA(*(current_col + 1))) {
2180
spare = current_col + 2;
2183
spare = current_col + 1;
2186
for (; isdigit(*spare); spare ++){
2189
if (*spare == '.') {
2191
for (spare++; isdigit(*spare); spare ++){
2196
Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2200
current_token -> data.ptrvalue = MemNew(dex+1);
2201
StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2202
current_col += dex ;
2206
/*--move to past last "good" character---*/
2209
if ( ! * lexed && current_token){
2210
* lexed = current_token;
2212
if (skip_new_token && current_token) {
2214
* last node points to a null (blank or white space token)
2217
last_token -> next = NULL;
2221
ValNodeFree(current_token);
2231
/*------------- Nlm_gbparselex_ver() -----------------------*/
2234
Nlm_gbparselex_ver(CharPtr linein, ValNodePtr PNTR lexed, Boolean accver)
2236
CharPtr current_col=0, points_at_term_null,spare, line_use = NULL;
2238
int retval = 0, len;
2239
ValNodePtr current_token = NULL, last_token = NULL;
2240
Boolean skip_new_token=FALSE;
2241
Boolean die_now=FALSE;
2242
ValNode forerrmacro;
2244
forerrmacro.choice =GBPARSE_INT_ACCESION ;
2247
len = StringLen(linein);
2248
line_use = MemNew(len + 1);
2249
StringCpy(line_use, linein);
2251
Nlm_lex_error_MACRO( "Lex list not cleared on entry to Nlm_gbparselex_ver")
2252
ValNodeFree( * lexed);
2255
current_col = line_use ;
2256
forerrmacro.data.ptrvalue = line_use;
2258
* Clear terminal white space
2260
points_at_term_null = line_use + len;
2261
spare = points_at_term_null - 1;
2262
while (*spare == ' ' || *spare == '\n' || *spare == '\r' || *spare == '~') {
2264
points_at_term_null --;
2268
while (current_col < points_at_term_null && ! die_now) {
2269
if ( ! skip_new_token){
2270
last_token = current_token;
2271
current_token = ValNodeNew(current_token);
2273
* lexed = current_token;
2275
switch ( *current_col){
2278
skip_new_token = FALSE;
2279
current_token -> choice = GBPARSE_INT_STRING;
2280
for (spare = current_col +1; spare < points_at_term_null;
2282
if ( *spare == '\"'){
2286
if (spare >= points_at_term_null){
2287
Nlm_lex_error_MACRO( "unterminated string")
2290
len = spare-current_col + 1;
2291
current_token -> data.ptrvalue =
2293
StringNCpy(current_token -> data.ptrvalue,
2301
case '0': case '1': case '2': case '3': case '4':
2302
case '5': case '6': case '7': case '8': case '9':
2303
skip_new_token = FALSE;
2304
current_token -> choice = GBPARSE_INT_NUMBER;
2305
for (dex=0, spare = current_col; isdigit(*spare); spare ++){
2308
current_token -> data.ptrvalue = MemNew(dex+1);
2309
StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2310
current_col += dex -1;
2316
skip_new_token = FALSE;
2317
current_token -> choice = GBPARSE_INT_JOIN;
2318
if (StringNCmp(current_col,"join",(unsigned) 4)!=0){
2319
Nlm_lex_error_MACRO( "\"join\" misspelled")
2321
for(;*current_col && *current_col != '('; current_col++)
2322
; /* vi match ) empty body*/
2323
current_col -- ; /* back up 'cause ++ follows */
2333
skip_new_token = FALSE;
2334
if (StringNCmp(current_col,"order",(unsigned) 5)!=0){
2335
if (StringNCmp(current_col,"one-of",(unsigned) 6)!=0){
2336
Nlm_lex_error_MACRO( "\"order\" or \"one-of\" misspelled")
2338
for(;*current_col && *current_col != '('; current_col++)
2339
; /* vi match ) empty body*/
2340
current_col -- ; /* back up 'cause ++ follows */
2342
current_token -> choice = GBPARSE_INT_ONE_OF ;
2346
current_token -> choice = GBPARSE_INT_ORDER;
2355
skip_new_token = FALSE;
2356
current_token -> choice = GBPARSE_INT_REPLACE ;
2357
if (StringNCmp(current_col,"replace",(unsigned) 6)!=0){
2358
Nlm_lex_error_MACRO( "\"replace\" misspelled")
2360
for(;*current_col && *current_col != '('; current_col++)
2361
; /* vi match ) empty body*/
2362
current_col -- ; /* back up 'cause ++ follows */
2369
* GAP or GROUP or GI
2372
skip_new_token = FALSE;
2373
if(StringNCmp(current_col, "gap", 3) == 0 &&
2374
(current_col[3] == '(' ||
2375
current_col[3] == ' ' ||
2376
current_col[3] == '\t' ||
2377
current_col[3] == '\0'))
2379
current_token->choice = GBPARSE_INT_GAP;
2381
current_token->data.ptrvalue = MemNew(4);
2382
StringCpy(current_token->data.ptrvalue, "gap");
2385
if(StringNCmp(current_col, "gi|", 3) == 0) {
2386
current_token->choice = GBPARSE_INT_ACCESION;
2388
for (; IS_DIGIT(*current_col); current_col++) ;
2391
current_token -> choice = GBPARSE_INT_GROUP;
2392
if (StringNCmp(current_col,"group",(unsigned) 5)!=0){
2393
Nlm_lex_error_MACRO("\"group\" misspelled")
2395
for(;*current_col && *current_col != '('; current_col++)
2396
; /* vi match ) empty body*/
2397
current_col -- ; /* back up 'cause ++ follows */
2407
skip_new_token = FALSE;
2408
current_token -> choice = GBPARSE_INT_COMPL;
2409
if (StringNCmp(current_col,"complement",(unsigned) 10)!=0){
2410
Nlm_lex_error_MACRO("\"complement\" misspelled")
2412
for(;*current_col && *current_col != '('; current_col++)
2413
; /* vi match ) empty body*/
2414
current_col -- ; /* back up 'cause ++ follows */
2421
* internal bases ignored
2424
if (StringNCmp(current_col,"bases",(unsigned) 5)!=0){
2427
skip_new_token = TRUE;
2433
* ()^.,<> (bases (sites
2436
if (StringNCmp(current_col,"(base",(unsigned) 5)==0){
2437
skip_new_token = FALSE;
2438
current_token -> choice = GBPARSE_INT_JOIN;
2440
if (*current_col != '\0')
2441
if ( * (current_col +1) == 's')
2443
last_token = current_token;
2444
current_token = ValNodeNew(current_token);
2445
current_token -> choice = GBPARSE_INT_LEFT;
2446
}else if (StringNCmp(current_col,"(sites",(unsigned) 5)==0){
2447
skip_new_token = FALSE;
2449
if (*current_col != '\0')
2450
if ( * (current_col +1) == ')'){
2452
current_token -> choice = GBPARSE_INT_SITES;
2454
current_token -> choice = GBPARSE_INT_SITES;
2455
last_token = current_token;
2456
current_token = ValNodeNew(current_token);
2457
current_token -> choice = GBPARSE_INT_JOIN;
2458
last_token = current_token;
2459
current_token = ValNodeNew(current_token);
2460
current_token -> choice = GBPARSE_INT_LEFT;
2461
if (*current_col != '\0'){
2462
if ( * (current_col +1) == ';'){
2464
}else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2470
skip_new_token = FALSE;
2471
current_token -> choice = GBPARSE_INT_LEFT;
2476
skip_new_token = FALSE;
2477
current_token -> choice = GBPARSE_INT_RIGHT;
2482
skip_new_token = FALSE;
2483
current_token -> choice = GBPARSE_INT_CARET;
2487
skip_new_token = FALSE;
2488
current_token -> choice = GBPARSE_INT_DOT_DOT ;
2491
skip_new_token = FALSE;
2492
if (StringNCmp(current_col,"..",(unsigned) 2)!=0){
2493
current_token -> choice = GBPARSE_INT_SINGLE_DOT ;
2495
current_token -> choice = GBPARSE_INT_DOT_DOT;
2501
skip_new_token = FALSE;
2502
current_token -> choice = GBPARSE_INT_GT;
2506
skip_new_token = FALSE;
2507
current_token -> choice = GBPARSE_INT_LT;
2513
skip_new_token = FALSE;
2514
current_token -> choice = GBPARSE_INT_COMMA;
2517
case ' ': case '\t': case '\n': case '\r': case '~':
2518
skip_new_token = TRUE;
2522
if (StringNCmp(current_col,"to",(unsigned) 2)!=0){
2525
skip_new_token = FALSE;
2526
current_token -> choice = GBPARSE_INT_DOT_DOT;
2532
if (StringNCmp(current_col,"site",(unsigned) 4)!=0){
2535
skip_new_token = FALSE;
2536
current_token -> choice = GBPARSE_INT_SITES;
2538
if (*current_col != '\0')
2539
if ( * (current_col +1) == 's')
2541
if (*current_col != '\0'){
2542
if ( * (current_col +1) == ';'){
2544
}else if (StringNCmp(current_col +1," ;", (unsigned) 2) ==0){
2555
* all GenBank accessions start with a capital letter
2556
* and then have numbers
2558
/* new accessions start with 2 capital letters !! 1997 */
2559
/* new accessions have .version !! 2/15/1999 */
2560
skip_new_token = FALSE;
2561
current_token -> choice = GBPARSE_INT_ACCESION;
2562
if (IS_ALPHA(*(current_col + 1))) {
2563
spare = current_col + 2;
2566
spare = current_col + 1;
2569
for (; isdigit(*spare); spare ++){
2572
if (accver != FALSE && *spare == '.') {
2574
for (spare++; isdigit(*spare); spare ++){
2579
Nlm_lex_error_MACRO( "ACCESSION missing \":\"" )
2583
current_token -> data.ptrvalue = MemNew(dex+1);
2584
StringNCpy(current_token -> data.ptrvalue, current_col, dex);
2585
current_col += dex ;
2589
/*--move to past last "good" character---*/
2592
if ( ! * lexed && current_token){
2593
* lexed = current_token;
2595
if (skip_new_token && current_token) {
2597
* last node points to a null (blank or white space token)
2600
last_token -> next = NULL;
2604
ValNodeFree(current_token);
2614
/*---- non_white()----*/
2617
Nlm_non_white(CharPtr ch)
2619
while (isspace(*++ch))if (! *ch) break;
2624
/*------ gbparse_lexfree()-------*/
2626
NLM_EXTERN ValNodePtr
2627
Nlm_gbparse_lexfree(ValNodePtr anp)
2634
if ( anp -> choice == GBPARSE_INT_NUMBER ||
2635
anp -> choice == GBPARSE_INT_ACCESION){
2636
MemFree(anp->data.ptrvalue);