2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
28
* Author: Jonathan Kans
30
* Version Creation Date: 5/5/00
37
* --------------------------------------------------------------------------
39
* ==========================================================================
57
#include <sys/times.h>
61
/* ml to std code modified from original in medutil.c */
63
static Boolean AllUpperCase (
70
if (p == NULL) return FALSE;
73
if (! IS_UPPER (ch)) return FALSE;
80
static void SplitMLAuthorName (
90
Char sbuf [20], ibuf [20];
92
/* Clear the ibuf field and transfer the entire name to 'last',
93
excluding leading and trailing spaces */
95
if (name == NULL) return;
102
while (*name <= ' ') {
104
if (*name == '\0') return;
106
StringCpy( last, name );
108
for (i=StringLen (last) - 1; ((i >= 0) && (last [i] <= ' ')); i--) {
112
/* Strip off the last token (initials or name suffix (Jr, Sr, suffix.) */
114
p = StringRChr (last, (int) ' ');
115
if (p != NULL) { /* more than just last name */
117
/* Separate the token from the last name */
120
while ((p > last) && (*p == ' ')) {
125
/* If the last token is not all upper case, and there are more than
126
two tokens, see if the next to the last are initials (upper case) */
128
if (! AllUpperCase (p2) && (p = StringRChr (last, (int) ' ' )) != NULL) {
130
/* We have at least three tokens, is the next to last initials? */
132
if (AllUpperCase (p + 1)) {
134
/* Yes - concatenate the last two tokens as initials */
136
StringCpy (ibuf, p + 1);
137
StringCpy (sbuf, p2);
138
while (p > last && (*p == ' ')) {
145
if (ibuf [0] == '\0') { /* Only the last token goes in ibuf */
146
StringCpy (ibuf, p2);
150
/* now add periods to ibuf and convert suffix */
152
for (p = initials, p2 = ibuf; *p2 != '\0'; p2++, p++) {
154
if (! IS_LOWER(*(p2 + 1))) { /* watch out for foreign names */
162
if (StringCmp (sbuf, "1d") == 0)
163
p = StringMove (suffix, "I.");
164
else if (StringCmp (sbuf, "2d") == 0)
165
p = StringMove (suffix, "II.");
166
else if (StringCmp (sbuf, "3d") == 0)
167
p = StringMove (suffix, "III.");
168
else if (StringCmp (sbuf, "4th") == 0)
169
p = StringMove (suffix, "IV.");
170
else if (StringCmp (sbuf, "5th") == 0)
171
p = StringMove (suffix, "V.");
172
else if (StringCmp (sbuf, "6th") == 0)
173
p = StringMove (suffix, "VI.");
174
else if (StringCmp (sbuf, "Sr") == 0)
175
p = StringMove (suffix, "Sr.");
176
else if (StringCmp (sbuf, "Jr") == 0)
177
p = StringMove (suffix, "Jr.");
179
p = StringMove (suffix, sbuf);
183
static ValNodePtr ConvertMLtoSTD (
190
Char last [80], initials [20], suffix [20];
195
if (token == NULL) return NULL;
196
for (eptr = token + StringLen (token) - 1;
197
eptr > token && *eptr == ' ';
200
SplitMLAuthorName (token, last, initials, suffix);
203
if (nsp == NULL) return NULL;
204
nsp->names [0] = StringSave (last);
205
if (initials [0] != '\0') {
206
nsp->names[4] = StringSave (initials);
208
if (suffix[0] != '\0') {
209
nsp->names[5] = StringSave (suffix);
211
if (nsp->names[0] != NULL) {
212
pid = PersonIdNew ();
213
pid->choice = 2; /* name */
217
vnp = ValNodeNew (NULL);
218
vnp->data.ptrvalue = (Pointer) aup;
224
static void ChangeMedlineAuthorsToISO (
231
ValNodePtr curr, oldnames, tmp, v;
233
if (mep == NULL) return;
235
if (cap == NULL) return;
237
if (alp == NULL || alp->choice != 2) return;
239
oldnames = alp->names;
241
alp->choice = 1; /* make std names */
243
for (tmp = oldnames; tmp != NULL; tmp = tmp->next) {
244
curr = ConvertMLtoSTD ((CharPtr) tmp->data.ptrvalue);
245
if (alp->names == NULL) {
248
for (v = alp->names; v->next != NULL; v = v->next) continue;
253
ValNodeFreeData (oldnames);
256
NLM_EXTERN CONN PMFetchOpenConnection (
264
if (StringHasNoText (db) || uid < 1) return NULL;
265
if (StringICmp (db, "PubMed") != 0 &&
266
StringICmp (db, "Protein") != 0&&
267
StringICmp (db, "Nucleotide") != 0&&
268
StringICmp (db, "Popset") != 0) {
269
ErrPostEx (SEV_ERROR, 0, 0, "Unrecognized database %s", db);
272
sprintf (query, "db=%s&id=%ld&report=asn1&mode=text", db, (long) uid);
273
return QUERY_OpenUrlQuery ("www.ncbi.nlm.nih.gov", 80, "/entrez/utils/pmfetch.fcgi",
274
query, "Entrez2Tool", 30, eMIME_T_NcbiData,
275
eMIME_AsnText, eENCOD_None, 0);
278
static EIO_Status CommonWaitForReply (
283
time_t currtime, starttime;
288
EventRecord currEvent;
291
if (conn == NULL) return eIO_Unknown;
301
starttime = GetSecs ();
302
while ((status = CONN_Wait (conn, eIO_Read, &timeout)) != eIO_Success && max < 300) {
303
currtime = GetSecs ();
304
max = currtime - starttime;
306
WaitNextEvent (0, &currEvent, 0, NULL);
313
NLM_EXTERN PubmedEntryPtr PubMedWaitForReply (
319
PubmedEntryPtr pep = NULL;
321
if (conn == NULL) return NULL;
323
if (CommonWaitForReply (conn) == eIO_Success) {
324
aicp = QUERY_AsnIoConnOpen ("r", conn);
325
pep = PubmedEntryAsnRead (aicp->aip, NULL);
326
QUERY_AsnIoConnClose (aicp);
330
ChangeMedlineAuthorsToISO ((MedlineEntryPtr) pep->medent);
335
NLM_EXTERN SeqEntryPtr PubSeqWaitForReply (
341
SeqEntryPtr sep = NULL;
343
if (conn == NULL) return NULL;
345
if (CommonWaitForReply (conn) == eIO_Success) {
346
aicp = QUERY_AsnIoConnOpen ("r", conn);
347
sep = SeqEntryAsnRead (aicp->aip, NULL);
348
QUERY_AsnIoConnClose (aicp);
355
NLM_EXTERN PubmedEntryPtr PubMedSynchronousQuery (
369
if (uid < 1) return NULL;
372
logtimes = (Boolean) ((getenv ("NCBI_LOG_SYNC_QUERY_TIMES")) != NULL);
375
conn = PMFetchOpenConnection ("PubMed", uid);
377
if (conn == NULL) return NULL;
379
QUERY_SendQuery (conn);
383
starttime = times (&timebuf);
387
pep = PubMedWaitForReply (conn);
391
stoptime = times (&timebuf);
392
printf ("PubMedWaitForReply %ld\n", (long) (stoptime - starttime));
399
NLM_EXTERN SeqEntryPtr PubSeqSynchronousQuery (
414
if (StringHasNoText (db) || uid < 1) return NULL;
417
logtimes = (Boolean) ((getenv ("NCBI_LOG_SYNC_QUERY_TIMES")) != NULL);
420
conn = PMFetchOpenConnection (db, uid);
422
if (conn == NULL) return NULL;
424
QUERY_SendQuery (conn);
428
starttime = times (&timebuf);
432
sep = PubSeqWaitForReply (conn);
436
stoptime = times (&timebuf);
437
printf ("PubSeqWaitForReply %ld\n", (long) (stoptime - starttime));
444
NLM_EXTERN Boolean PubMedAsynchronousQuery (
447
QueryResultProc resultproc,
454
conn = PMFetchOpenConnection ("PubMed", uid);
456
if (conn == NULL) return FALSE;
458
QUERY_SendQuery (conn);
460
QUERY_AddToQueue (queue, conn, resultproc, userdata, TRUE);
465
NLM_EXTERN Int4 PubMedCheckQueue (
470
return QUERY_CheckQueue (queue);
473
NLM_EXTERN PubmedEntryPtr PubMedReadReply (
480
PubmedEntryPtr pep = NULL;
482
if (conn != NULL && status == eIO_Success) {
483
aicp = QUERY_AsnIoConnOpen ("rb", conn);
484
pep = PubmedEntryAsnRead (aicp->aip, NULL);
485
QUERY_AsnIoConnClose (aicp);
490
NLM_EXTERN Boolean PubSeqAsynchronousQuery (
494
QueryResultProc resultproc,
501
conn = PMFetchOpenConnection (db, uid);
503
if (conn == NULL) return FALSE;
505
QUERY_SendQuery (conn);
507
QUERY_AddToQueue (queue, conn, resultproc, userdata, TRUE);
512
NLM_EXTERN Int4 PubSeqCheckQueue (
517
return QUERY_CheckQueue (queue);
520
NLM_EXTERN SeqEntryPtr PubSeqReadReply (
527
SeqEntryPtr sep = NULL;
529
if (conn != NULL && status == eIO_Success) {
530
aicp = QUERY_AsnIoConnOpen ("rb", conn);
531
sep = SeqEntryAsnRead (aicp->aip, NULL);
532
QUERY_AsnIoConnClose (aicp);
537
/* object manager registerable fetch function */
539
static CharPtr pubseqfetchproc = "PubSeqBioseqFetch";
541
static Int2 LIBCALLBACK PubSeqBioseqFetchFunc (Pointer data)
546
OMProcControlPtr ompcp;
548
SeqEntryPtr sep = NULL;
553
ompcp = (OMProcControlPtr) data;
554
if (ompcp == NULL) return OM_MSG_RET_ERROR;
556
if (ompp == NULL) return OM_MSG_RET_ERROR;
557
sip = (SeqIdPtr) ompcp->input_data;
558
if (sip == NULL) return OM_MSG_RET_ERROR;
560
if (sip->choice == SEQID_GI) {
562
uid = sip->data.intvalue;
563
if (uid == 0) return OM_MSG_RET_ERROR;
565
sep = PubSeqSynchronousQuery ("nucleotide", uid);
567
sep = PubSeqSynchronousQuery ("protein", uid);
572
sid = SeqIdDup (sip);
573
SeqIdWrite (sid, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
576
uid = EntrezGetUIDforSeqIdString ("nucleotide", id);
578
sep = PubSeqSynchronousQuery ("nucleotide", uid);
580
uid = EntrezGetUIDforSeqIdString ("protein", id);
581
if (uid == 0) return OM_MSG_RET_ERROR;
582
sep = PubSeqSynchronousQuery ("protein", uid);
586
if (sep == NULL) return OM_MSG_RET_ERROR;
587
bsp = BioseqFindInSeqEntry (sip, sep);
588
ompcp->output_data = (Pointer) bsp;
589
ompcp->output_entityID = ObjMgrGetEntityIDForChoice (sep);
590
return OM_MSG_RET_DONE;
593
NLM_EXTERN Boolean PubSeqFetchEnable (void)
596
ObjMgrProcLoad (OMPROC_FETCH, pubseqfetchproc, pubseqfetchproc,
597
OBJ_SEQID, 0, OBJ_BIOSEQ, 0, NULL,
598
PubSeqBioseqFetchFunc, PROC_PRIORITY_DEFAULT);
602
NLM_EXTERN void PubSeqFetchDisable (void)
609
ompp = ObjMgrProcFind (omp, 0, pubseqfetchproc, OMPROC_FETCH);
610
if (ompp == NULL) return;
611
ObjMgrFreeUserData (0, ompp->procid, OMPROC_FETCH, 0);