2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: tax3api.c
28
* Author: Jonathan Kans
30
* Version Creation Date: 7/8/04
37
* --------------------------------------------------------------------------
38
* Date Name Description of modification
39
* ------- ---------- -----------------------------------------------------
42
* ==========================================================================
51
/* low-level connection functions */
53
NLM_EXTERN CONN Tax3OpenConnection (
58
return QUERY_OpenServiceQuery ("TaxService3", NULL, 30);
65
NLM_EXTERN Taxon3ReplyPtr Tax3WaitForReply (
71
time_t currtime, starttime;
75
Taxon3ReplyPtr t3ry = NULL;
77
EventRecord currEvent;
80
if (conn == NULL) return NULL;
90
starttime = GetSecs ();
91
while ((status = CONN_Wait (conn, eIO_Read, &timeout)) == eIO_Timeout && max < 300) {
92
currtime = GetSecs ();
93
max = currtime - starttime;
95
WaitNextEvent (0, &currEvent, 0, NULL);
98
if (status == eIO_Success) {
99
aicp = QUERY_AsnIoConnOpen ("rb", conn);
100
t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
101
QUERY_AsnIoConnClose (aicp);
108
/* high-level connection functions */
110
NLM_EXTERN Taxon3ReplyPtr Tax3SynchronousQuery (
111
Taxon3RequestPtr t3rq
119
if (t3rq == NULL) return NULL;
121
conn = Tax3OpenConnection ();
123
if (conn == NULL) return NULL;
125
aicp = QUERY_AsnIoConnOpen ("wb", conn);
127
Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
129
AsnIoFlush (aicp->aip);
130
QUERY_AsnIoConnClose (aicp);
132
QUERY_SendQuery (conn);
134
t3ry = Tax3WaitForReply (conn);
139
NLM_EXTERN Boolean Tax3AsynchronousQuery (
140
Taxon3RequestPtr t3rq,
142
QueryResultProc resultproc,
150
if (t3rq == NULL) return FALSE;
152
conn = Tax3OpenConnection ();
154
if (conn == NULL) return FALSE;
156
aicp = QUERY_AsnIoConnOpen ("wb", conn);
158
Taxon3RequestAsnWrite (t3rq, aicp->aip, NULL);
160
AsnIoFlush (aicp->aip);
161
QUERY_AsnIoConnClose (aicp);
163
QUERY_SendQuery (conn);
165
QUERY_AddToQueue (queue, conn, resultproc, userdata, TRUE);
170
NLM_EXTERN Int4 Tax3CheckQueue (
175
return QUERY_CheckQueue (queue);
178
NLM_EXTERN Taxon3ReplyPtr Tax3ReadReply (
185
Taxon3ReplyPtr t3ry = NULL;
187
if (conn != NULL && status == eIO_Success) {
188
aicp = QUERY_AsnIoConnOpen ("rb", conn);
189
t3ry = Taxon3ReplyAsnRead (aicp->aip, NULL);
190
QUERY_AsnIoConnClose (aicp);
195
NLM_EXTERN Taxon3RequestPtr CreateTaxon3Request (
202
Taxon3RequestPtr t2rp;
204
t2rp = Taxon3RequestNew ();
205
if (t2rp == NULL) return NULL;
207
if (StringDoesHaveText (name)) {
208
ValNodeCopyStr (&(t2rp->request), 2, name);
209
} else if (taxid > 0) {
210
ValNodeAddInt (&(t2rp->request), 1, taxid);
211
} else if (orp != NULL) {
212
orp = AsnIoMemCopy ((Pointer) orp,
213
(AsnReadFunc) OrgRefAsnRead,
214
(AsnWriteFunc) OrgRefAsnWrite);
215
ValNodeAddPointer (&(t2rp->request), 3, (Pointer) orp);
221
NLM_EXTERN Taxon3RequestPtr CreateMultiTaxon3Request (ValNodePtr org_list)
224
Taxon3RequestPtr t3rp;
227
t3rp = Taxon3RequestNew ();
228
if (t3rp == NULL) return NULL;
230
for (vnp = org_list; vnp != NULL; vnp = vnp->next)
235
ValNodeAddInt (&(t3rp->request), 1, vnp->data.intvalue);
238
ValNodeCopyStr (&(t3rp->request), 2, vnp->data.ptrvalue);
241
orp = AsnIoMemCopy (vnp->data.ptrvalue,
242
(AsnReadFunc) OrgRefAsnRead,
243
(AsnWriteFunc) OrgRefAsnWrite);
244
ValNodeAddPointer (&(t3rp->request), 3, (Pointer) orp);
251
NLM_EXTERN ValNodePtr Taxon3GetOrgRefList (ValNodePtr org_list)
253
Taxon3RequestPtr t3rq;
256
OrgRefPtr t3orp = NULL;
259
ValNodePtr response_list = NULL;
261
if (org_list == NULL) return NULL;
263
t3rq = CreateMultiTaxon3Request (org_list);
264
if (t3rq == NULL) return NULL;
265
t3ry = Tax3SynchronousQuery (t3rq);
266
Taxon3RequestFree (t3rq);
268
for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
269
switch (trp->choice) {
271
tep = (T3ErrorPtr) trp->data.ptrvalue;
273
ErrPostEx (SEV_ERROR, 0, 0, tep->message);
275
ValNodeAddPointer (&response_list, 3, NULL);
278
tdp = (T3DataPtr) trp->data.ptrvalue;
280
t3orp = (OrgRefPtr)(tdp->org);
281
ValNodeAddPointer (&response_list, 3, (Pointer) t3orp);
289
Taxon3ReplyFree (t3ry);
292
return response_list;
295
NLM_EXTERN OrgRefPtr Taxon3GetOrg (OrgRefPtr orp)
298
Taxon3RequestPtr t3rq;
301
OrgRefPtr t3orp = NULL;
305
if (orp == NULL) return NULL;
307
t3rq = CreateTaxon3Request (0, NULL, orp);
308
if (t3rq == NULL) return NULL;
309
t3ry = Tax3SynchronousQuery (t3rq);
310
Taxon3RequestFree (t3rq);
312
for (trp = t3ry->reply; trp != NULL; trp = trp->next) {
313
switch (trp->choice) {
315
tep = (T3ErrorPtr) trp->data.ptrvalue;
317
ErrPostEx (SEV_ERROR, 0, 0, tep->message);
321
tdp = (T3DataPtr) trp->data.ptrvalue;
323
t3orp = (OrgRefPtr)(tdp->org);
331
Taxon3ReplyFree (t3ry);
337
static Boolean DoOrgIdsMatch(BioSourcePtr b1, BioSourcePtr b2)
339
DbtagPtr d1 = NULL, d2 = NULL;
342
if (b1 == NULL || b2 == NULL)
346
if (b1->org == NULL || b2->org == NULL)
350
for (vnp = b1->org->db; vnp; vnp = vnp->next)
352
d1 = (DbtagPtr) vnp->data.ptrvalue;
353
if (StringCmp(d1->db, "taxon") == 0)
358
for (vnp = b2->org->db; vnp; vnp = vnp->next)
360
d2 = (DbtagPtr) vnp->data.ptrvalue;
361
if (StringCmp(d2->db, "taxon") == 0)
368
if (d1->tag->id == d2->tag->id)
373
else if (StringICmp(b1->org->taxname, b2->org->taxname) == 0)
380
static BioSourcePtr Tax3BioSourceMerge(BioSourcePtr host, BioSourcePtr guest)
382
SubSourcePtr ssp, sp, last_ssp;
383
OrgModPtr omp, homp, last_omp;
386
if (host == NULL && guest == NULL)
390
if (host == NULL && guest != NULL)
392
host = AsnIoMemCopy(guest, (AsnReadFunc) BioSourceAsnRead,
393
(AsnWriteFunc) BioSourceAsnWrite);
396
if (host != NULL && guest == NULL)
400
if (host->genome == 0 && guest->genome != 0)
402
host->genome = guest->genome;
404
if (host->origin == 0 && guest->origin != 0)
406
host->origin = guest->origin;
408
last_ssp = host->subtype;
409
while (last_ssp != NULL && last_ssp->next != NULL)
411
last_ssp = last_ssp->next;
413
for (ssp = guest->subtype; ssp; ssp = ssp->next)
415
sp = AsnIoMemCopy(ssp, (AsnReadFunc) SubSourceAsnRead,
416
(AsnWriteFunc) SubSourceAsnWrite);
417
if (last_ssp == NULL)
427
if (guest->org->orgname)
429
if ((onp = host->org->orgname) == NULL)
432
host->org->orgname = onp;
435
while (last_omp != NULL && last_omp->next != NULL)
437
last_omp = last_omp->next;
439
for (omp = guest->org->orgname->mod; omp; omp = omp->next)
441
homp = AsnIoMemCopy(omp, (AsnReadFunc) OrgModAsnRead,
442
(AsnWriteFunc) OrgModAsnWrite);
443
if (last_omp == NULL)
449
last_omp->next = homp;
458
/**************************************************************************
459
* Compare BioSources in one bioseq->descr using Taxonomy to find
461
* merge if organisms are the same or create a feature if different
463
**************************************************************************/
464
NLM_EXTERN void Tax3MergeSourceDescr (SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
466
BioseqPtr bsp = NULL;
467
ValNodePtr vnp, newlist;
469
BioSourcePtr first_biop = NULL;
470
BioSourcePtr other_biop;
471
BioSourcePtr tmp_biop;
474
if (!IS_Bioseq(sep)) {
477
newlist = (ValNodePtr) data;
478
bsp = (BioseqPtr) sep->data.ptrvalue;
479
if ((bsp->repr != Seq_repr_raw) && (bsp->repr != Seq_repr_const)
480
&& (bsp->repr != Seq_repr_delta))
483
if (! ISA_na(bsp->mol))
486
/* add the descriptors in newlist to the end of the list in bsp->descr*/
487
if (bsp->descr == NULL)
489
bsp->descr = newlist;
493
for (vnp = bsp->descr; vnp->next != NULL; vnp = vnp->next)
499
/* now find the first source descriptor in bsp->descr that has an org*/
500
/* note - we can't use SeqMgrGetNextDescriptor here because we have just
501
* added to the descriptors, so they are not indexed. */
502
for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
504
if (vnp->choice != Seq_descr_source) continue;
505
if (vnp->data.ptrvalue == NULL)
507
ErrPostStr(SEV_WARNING, 0, 0, "Source descriptor missing data");
510
ovp = (ObjValNodePtr) vnp;
511
ovp->idx.deleteme = TRUE;
514
if (first_biop == NULL)
516
first_biop = vnp->data.ptrvalue;
520
other_biop = vnp->data.ptrvalue;
521
/* detach biosource pointer from descr, so that it will not be freed
522
* when the descriptor is deleted.
524
vnp->data.ptrvalue = NULL;
527
ovp = (ObjValNodePtr) vnp;
528
ovp->idx.deleteme = TRUE;
530
if (DoOrgIdsMatch(first_biop, other_biop))
532
/* merge the two sources */
533
tmp_biop = Tax3BioSourceMerge(first_biop, other_biop);
534
if (tmp_biop == NULL)
536
ErrPostStr (SEV_WARNING, 0, 0, "Failed to merge biosources");
540
first_biop = tmp_biop;
542
other_biop = BioSourceFree (other_biop);
544
/* create a source feature */
545
sfp = CreateNewFeatureOnBioseq (bsp, SEQFEAT_BIOSRC, NULL);
548
sfp->data.value.ptrvalue = other_biop;
556
static Int4 GetTaxIdFromOrgRef (OrgRefPtr orp)
564
for (vnp = orp->db; vnp != NULL; vnp = vnp->next)
566
d = (DbtagPtr) vnp->data.ptrvalue;
567
if (StringCmp(d->db, "taxon") == 0)
577
NLM_EXTERN Int4 Taxon3GetTaxIdByOrgRef (OrgRefPtr orp)
582
if (orp == NULL) return -1;
584
orp_repl = Taxon3GetOrg (orp);
585
tax_id = GetTaxIdFromOrgRef (orp_repl);
586
OrgRefFree (orp_repl);
591
NLM_EXTERN OrgRefPtr Taxon3GetOrgRefByName (CharPtr orgname)
593
OrgRefPtr request, org;
595
request = OrgRefNew ();
596
if (request == NULL) return NULL;
597
request->taxname = orgname;
598
org = Taxon3GetOrg (request);
599
request->taxname = NULL;
600
OrgRefFree (request);
604
NLM_EXTERN Int4 Taxon3GetTaxIdByName (CharPtr orgname)
609
orp = Taxon3GetOrgRefByName (orgname);
610
tax_id = GetTaxIdFromOrgRef (orp);
616
static void AddBioSourceToList (BioSourcePtr biop, Pointer userdata)
618
ValNodePtr PNTR list;
620
if (biop == NULL || userdata == NULL) return;
621
list = (ValNodePtr PNTR) userdata;
622
ValNodeAddPointer (list, 4, (Pointer) biop);
625
NLM_EXTERN void Taxon3ReplaceOrgInSeqEntry (SeqEntryPtr sep, Boolean keep_syn)
627
ValNodePtr biop_list = NULL;
628
ValNodePtr request_list = NULL;
629
ValNodePtr response_list = NULL;
630
ValNodePtr biop_vnp, response_vnp;
632
OrgRefPtr swap_org, response_org;
634
VisitBioSourcesInSep (sep, &biop_list, AddBioSourceToList);
636
for (biop_vnp = biop_list; biop_vnp != NULL; biop_vnp = biop_vnp->next)
638
biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
639
ValNodeAddPointer (&request_list, 3, biop->org);
641
response_list = Taxon3GetOrgRefList (request_list);
643
if (ValNodeLen (response_list) != ValNodeLen (request_list))
645
Message (MSG_POST, "Unable to retrieve information from tax server");
649
for (biop_vnp = biop_list, response_vnp = response_list;
650
biop_vnp != NULL && response_vnp != NULL;
651
biop_vnp = biop_vnp->next, response_vnp = response_vnp->next)
653
biop = (BioSourcePtr) biop_vnp->data.ptrvalue;
654
swap_org = biop->org;
655
response_org = response_vnp->data.ptrvalue;
656
if (response_org == NULL)
658
Message (MSG_POST, "No tax server information for %s", biop->org->taxname);
662
biop->org = response_org;
663
response_vnp->data.ptrvalue = NULL;
664
OrgRefFree (swap_org);
667
biop->org->syn = ValNodeFreeData(biop->org->syn);
671
ValNodeFree (request_list);
672
ValNodeFree (response_list);
673
ValNodeFree (biop_list);