5137
5185
sfp->data.value.intvalue = j;
5140
} else if (sfp->data.choice == SEQFEAT_PUB && (StringICmp (qual, "pmid") == 0 || StringICmp (qual, "PubMed") == 0)) {
5141
if (sscanf (val, "%ld", &uid) == 1) {
5188
} else if (sfp->data.choice == SEQFEAT_PUB) {
5190
if (sscanf (val, "%ld", &uid) == 1) {
5191
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5193
ValNodeAddInt (&(pdp->pub), PUB_PMid, (Int4) uid);
5196
} else if (isMuid) {
5197
if (sscanf (val, "%ld", &uid) == 1) {
5198
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5200
ValNodeAddInt (&(pdp->pub), PUB_Muid, (Int4) uid);
5203
} else if (isAuthor || isAffil) {
5142
5204
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5143
5206
if (pdp != NULL) {
5144
ValNodeAddInt (&(pdp->pub), PUB_PMid, (Int4) uid);
5207
for (vnp = pdp->pub; vnp != NULL; vnp = vnp->next) {
5208
if (vnp->choice == PUB_Sub) {
5209
csp = (CitSubPtr) vnp->data.ptrvalue;
5216
csp->date = DateCurr ();
5217
ValNodeAddPointer (&(pdp->pub), PUB_Sub, (Pointer) csp);
5223
alp = AuthListNew ();
5231
ValNodeCopyStr (&(alp->names), 3, val);
5232
} else if (isAffil) {
5234
if (affil == NULL) {
5235
affil = AffilNew ();
5238
if (affil != NULL) {
5240
affil->affil = StringSave (val);
5147
5247
} else if (sfp->data.choice == SEQFEAT_PUB && (StringICmp (qual, "muid") == 0 || StringICmp (qual, "MEDLINE") == 0)) {
5148
if (sscanf (val, "%ld", &uid) == 1) {
5149
pdp = (PubdescPtr) sfp->data.value.ptrvalue;
5151
ValNodeAddInt (&(pdp->pub), PUB_Muid, (Int4) uid);
5154
5248
} else if (sfp->data.choice == SEQFEAT_BIOSRC && ParseQualIntoBioSource (sfp, qual, val)) {
5155
5249
} else if (sfp->data.choice == SEQFEAT_CDREGION && StringCmp (qual, "prot_desc") == 0) {
5156
5250
xref = sfp->xref;
8139
/* general purpose text finite state machine */
8140
/* based on Practical Algorithms for Programmers by Binstock and Rex */
8142
typedef struct fsagoto {
8145
struct fsagoto * next;
8146
} GotoItem, PNTR GotoPtr;
8148
typedef struct fsastate {
8150
ValNodePtr matchfound;
8152
} StateItem, PNTR StatePtr;
8154
#define FAIL_STATE -1
8156
static StatePtr GetState (
8157
StatePtr PNTR stateTable,
8164
sp = stateTable [state];
8166
sp = (StatePtr) MemNew (sizeof (StateItem));
8167
stateTable [state] = sp;
8173
static Int2 GotoState (StatePtr PNTR stateTable, Int2 state,
8174
Char ch, Boolean zeroFailureReturnsZero)
8180
sp = GetState (stateTable, state);
8181
if (sp == NULL) return 0;
8183
for (gp = sp->transition; gp != NULL; gp = gp->next) {
8184
if (gp->ch == ch) return gp->newstate;
8187
if (state == 0 && zeroFailureReturnsZero) return 0;
8193
#define FailState(stateTable,state) stateTable [state].onfailure
8196
static Int2 FailState (
8197
StatePtr PNTR stateTable,
8204
sp = GetState (stateTable, state);
8205
if (sp == NULL) return 0;
8207
return sp->onfailure;
8210
static void AddTransition (StatePtr PNTR stateTable, Int2 oldState,
8211
Char ch, Int2 newState)
8218
gp = (GotoPtr) MemNew (sizeof (GotoItem));
8219
if (gp == NULL) return;
8222
gp->newstate = newState;
8224
sp = GetState (stateTable, oldState);
8225
if (sp == NULL) return;
8227
prev = sp->transition;
8229
sp->transition = gp;
8231
while (prev->next != NULL) {
8238
static void AddOutput (StatePtr PNTR stateTable, Int2 state, CharPtr word)
8244
sp = GetState (stateTable, state);
8245
if (sp == NULL) return;
8247
for (vnp = sp->matchfound; vnp != NULL; vnp = vnp->next) {
8248
if (StringCmp (word, (CharPtr) vnp->data.ptrvalue) == 0) return;
8251
ValNodeCopyStr (&(sp->matchfound), 0, word);
8254
static Int2 EnterWord (StatePtr PNTR stateTable, CharPtr word,
8255
Int2 highState, Int2 maxState)
8266
/* try to overlay beginning of word onto existing table */
8268
for (ptr = word, ch = *ptr; ch != '\0'; ptr++, ch = *ptr) {
8269
next = GotoState (stateTable, state, ch, FALSE);
8270
if (next == FAIL_STATE) break;
8274
/* now create new states for remaining characters in word */
8276
for ( ; ch != '\0'; ptr++, ch = *ptr) {
8278
if (highState >= maxState) return highState;
8280
AddTransition (stateTable, state, ch, highState);
8284
/* at end of word record match information */
8286
AddOutput (stateTable, state, word);
8291
static void QueueAdd (Int2Ptr queue, Int2 qbeg, Int2 val)
8300
for ( ; queue [q] != 0; q = queue [q]) continue;
8306
static void FindFail (StatePtr PNTR stateTable, Int2 state,
8307
Int2 newState, Char ch)
8314
/* traverse existing failure path */
8316
next = GotoState (stateTable, state, ch, TRUE);
8318
while ((next = GotoState (stateTable, state, ch, TRUE)) == FAIL_STATE) {
8319
state = FailState (stateTable, state);
8322
/* add new failure state */
8324
sp = GetState (stateTable, newState);
8325
if (sp == NULL) return;
8327
sp->onfailure = next;
8329
/* add matches of substring at new state */
8331
sp = GetState (stateTable, next);
8332
if (sp == NULL) return;
8334
for (vnp = sp->matchfound; vnp != NULL; vnp = vnp->next) {
8335
AddOutput (stateTable, newState, (CharPtr) vnp->data.ptrvalue);
8339
static void ComputeFail (StatePtr PNTR stateTable, Int2Ptr queue, Int2 highState)
8343
Int2 qbeg, r, s, state;
8349
/* queue up states reached directly from state 0 (depth 1) */
8351
sp = GetState (stateTable, 0);
8352
if (sp == NULL) return;
8354
for (gp = sp->transition; gp != NULL; gp = gp->next) {
8357
sp = GetState (stateTable, s);
8358
if (sp == NULL) return;
8361
QueueAdd (queue, qbeg, s);
8364
while (queue [qbeg] != 0) {
8368
/* depth 1 states beget depth 2 states, etc. */
8370
sp = GetState (stateTable, r);
8371
if (sp == NULL) return;
8373
for (gp = sp->transition; gp != NULL; gp = gp->next) {
8375
QueueAdd (queue, qbeg, s);
8378
State Substring Transitions Failure
8384
For example, r = 2 (st), if 'a' would go to s = 3 (sta).
8385
From previous computation, 2 (st) fails to 6 (t).
8386
Thus, check state 6 (t) for any transitions using 'a'.
8387
Since 6 (t) 'a' -> 7 (ta), therefore set fail [3] -> 7.
8390
state = FailState (stateTable, r);
8391
FindFail (stateTable, state, s, gp->ch);
8396
typedef struct TextFsa {
8397
StatePtr PNTR stateTable;
8398
ValNodePtr siteList;
8403
static void PrimeStateTable (TextFsaPtr tbl)
8409
StatePtr PNTR stateTable;
8413
if (tbl == NULL || tbl->siteList == NULL || tbl->primed) return;
8415
for (maxState = 1, vnp = tbl->siteList; vnp != NULL; vnp = vnp->next) {
8416
word = (CharPtr) vnp->data.ptrvalue;
8417
maxState += StringLen (word);
8421
if (maxState > 32000) {
8425
stateTable = (StatePtr PNTR) MemNew (sizeof (StatePtr) * (size_t) maxState);
8426
queue = (Int2Ptr) MemNew (sizeof (Int2) * maxState);
8428
if (stateTable == NULL || queue == NULL) {
8429
MemFree (stateTable);
8431
Message (MSG_POST, "FiniteStateSearch unable to allocate buffers");
8435
for (highState = 0, vnp = tbl->siteList; vnp != NULL; vnp = vnp->next) {
8436
word = (CharPtr) vnp->data.ptrvalue;
8437
highState = EnterWord (stateTable, word, highState, maxState);
8440
if (highState >= maxState) {
8441
ErrPostEx (SEV_ERROR, 0, 0, "FiniteStateSearch cannot handle more than %d states", (int) highState);
8444
ComputeFail (stateTable, queue, highState);
8448
tbl->stateTable = stateTable;
8449
tbl->highState = highState;
8453
NLM_EXTERN TextFsaPtr TextFsaNew (void)
8458
tbl = (TextFsaPtr) MemNew (sizeof (TextFsaData));
8459
if (tbl == NULL) return NULL;
8460
tbl->stateTable = NULL;
8461
tbl->siteList = NULL;
8462
tbl->primed = FALSE;
8466
NLM_EXTERN void TextFsaAdd (TextFsaPtr tbl, CharPtr word)
8469
if (tbl == NULL) return;
8470
ValNodeCopyStr (&(tbl->siteList), 0, word);
8473
NLM_EXTERN Int2 TextFsaNext (TextFsaPtr tbl, Int2 currState,
8474
Char ch, ValNodePtr PNTR matches)
8479
StatePtr PNTR stateTable;
8481
if (matches != NULL) {
8484
if (tbl == NULL) return 0;
8485
if (! tbl->primed) {
8486
PrimeStateTable (tbl);
8488
stateTable = tbl->stateTable;
8489
if (stateTable == NULL) return 0;
8491
while ((next = GotoState (stateTable, currState, ch, TRUE)) == FAIL_STATE) {
8492
currState = FailState (stateTable, currState);
8495
if (matches != NULL) {
8497
sp = GetState (stateTable, next);
8498
if (sp == NULL) return next;
8500
*matches = sp->matchfound;
8506
NLM_EXTERN TextFsaPtr TextFsaFree (TextFsaPtr tbl)
8514
StatePtr PNTR stateTable;
8516
if (tbl == NULL) return NULL;
8518
stateTable = tbl->stateTable;
8519
if (stateTable != NULL) {
8520
highState = tbl->highState;
8522
for (state = 0; state < highState; state++) {
8523
sp = stateTable [state];
8524
if (sp == NULL) continue;
8526
gp = sp->transition;
8527
while (gp != NULL) {
8533
ValNodeFreeData (sp->matchfound);
8537
MemFree (stateTable);
8540
ValNodeFreeData (tbl->siteList);
8542
return MemFree (tbl);
8545
#if 0 /* original text fsa */
7993
8547
/* general purpose text finite state machine */
7994
8548
/* based on Practical Algorithms for Programmers by Binstock and Rex */