2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: insdseqget.c
28
* Author: Jonathan Kans
30
* Version Creation Date: 11/4/02
34
* File Description: Demo to fetch by accession, write INSDSet XML
37
* --------------------------------------------------------------------------
38
* ==========================================================================
47
#include <objinsdseq.h>
58
static CharPtr ReadALine (
69
if (str == NULL || size < 1 || fp == NULL) return NULL;
71
rsult = FileGets (str, size, fp);
75
while (ch != '\0' && ch != '\n' && ch != '\r') {
84
typedef struct lookforids {
91
} LookForIDs, PNTR LookForIDsPtr;
93
static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
100
lfip = (LookForIDsPtr) userdata;
101
if (ISA_na (bsp->mol)) {
104
if (ISA_aa (bsp->mol)) {
107
for (sip = bsp->id; sip != NULL; sip = sip->next) {
108
switch (sip->choice) {
120
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
122
if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
124
} else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
125
lfip->isNTorNW = TRUE;
126
} else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
127
lfip->isNTorNW = TRUE;
137
static void LookForGEDetc (
150
MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
151
VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
153
*isNTorNW = lfi.isNTorNW;
157
*isProt = lfi.isProt;
160
static void DoSeqEntryToGnbk (
167
CstType cust = SHOW_TRANCRIPTION | SHOW_PEPTIDE;
168
FlgType flags = SHOW_FAR_TRANSLATION | SHOW_CONTIG_AND_SEQ;
175
LckType locks = LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS;
177
LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isNuc, &isProt);
179
if (fmt == GENBANK_FMT && (! isNuc)) return;
180
if (fmt == GENPEPT_FMT && (! isProt)) return;
182
if (isNTorNW || isTPA) {
183
flags |= ONLY_NEAR_FEATURES;
185
flags |= NEAR_FEATURES_SUPPRESS;
188
SeqEntryToGnbk (sep, NULL, fmt, ENTREZ_MODE, SEGMENT_STYLE,
189
flags, locks, cust, extra, NULL);
192
static void DoQuery (
202
Entrez2BooleanReplyPtr e2br;
203
Entrez2IdListPtr e2lp;
204
Entrez2RequestPtr e2rq;
205
Entrez2ReplyPtr e2ry;
218
e2rq = EntrezCreateBooleanRequest (TRUE, FALSE, "Nucleotide", NULL, 0, 0, NULL, 0, 0);
219
if (e2rq == NULL) return;
221
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
222
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
224
str = ReadALine (line, sizeof (line), fp);
225
if (! StringHasNoText (str)) {
226
EntrezAddToBooleanRequest (e2rq, NULL, 0, "ACCN", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
229
while (str != NULL) {
230
if (! StringHasNoText (str)) {
231
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_OR, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
232
EntrezAddToBooleanRequest (e2rq, NULL, 0, "ACCN", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
234
str = ReadALine (line, sizeof (line), fp);
237
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
238
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_AND, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
239
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_LEFT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
241
str = ReadALine (line, sizeof (line), dfp);
242
if (! StringHasNoText (str)) {
243
EntrezAddToBooleanRequest (e2rq, NULL, 0, "MDAT", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
246
while (str != NULL) {
247
if (! StringHasNoText (str)) {
248
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_OR, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
249
EntrezAddToBooleanRequest (e2rq, NULL, 0, "MDAT", str, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
251
str = ReadALine (line, sizeof (line), dfp);
254
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
255
EntrezAddToBooleanRequest (e2rq, NULL, ENTREZ_OP_RIGHT_PAREN, NULL, NULL, NULL, 0, 0, NULL, NULL, TRUE, TRUE);
257
e2ry = EntrezSynchronousQuery (e2rq);
258
e2rq = Entrez2RequestFree (e2rq);
260
if (e2ry == NULL) return;
262
if (reply == NULL || reply->choice != E2Reply_eval_boolean) return;
263
e2br = EntrezExtractBooleanReply (e2ry);
264
if (e2br == NULL) return;
268
BSSeek (e2lp->uids, 0, SEEK_SET);
269
for (i = 0; i < e2lp->num; i++) {
270
uid = Nlm_BSGetUint4 (e2lp->uids);
271
if (uid < 1) continue;
273
sep = PubSeqSynchronousQuery (uid, 0, flags);
274
if (sep == NULL) continue;
277
DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
280
DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
287
Entrez2BooleanReplyFree (e2br);
290
static void ProcessAccession (
304
Boolean is_numeric = TRUE;
314
while (ch != '\0' && is_numeric) {
315
if (! IS_DIGIT (ch)) {
323
if (sscanf (accn, "%ld", &val) == 1) {
327
sip = GetSeqIdForGI (gi);
329
SeqIdWrite (sip, tmp, PRINTID_TEXTID_ACC_VER, sizeof (tmp));
331
ptr = StringChr (tmp, '.');
334
sip = SeqIdFromAccessionDotVersion (tmp);
335
newgi = GetGIForSeqId (sip);
337
if (newgi == gi) return;
343
sip = SeqIdFromAccessionDotVersion (accn);
344
gi = GetGIForSeqId (sip);
347
sip = GetSeqIdForGI (gi);
349
SeqIdWrite (sip, id, PRINTID_TEXTID_ACC_VER, sizeof (id));
351
if (StringICmp (accn, id) == 0) return;
360
sep = PubSeqSynchronousQuery (gi, 0, flags);
361
if (sep == NULL) return;
364
DoSeqEntryToGnbk (sep, GENBANK_FMT, extra);
367
DoSeqEntryToGnbk (sep, GENPEPT_FMT, extra);
373
#define i_argInputFile 0
374
#define d_argDateFile 1
375
#define o_argOutputFile 2
376
#define n_argNewRecords 3
377
#define v_argVariations 4
378
#define m_argMolecule 5
381
{"Sequence File Name", "stdin", NULL, NULL,
382
FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
383
{"Date List", NULL, NULL, NULL,
384
TRUE, 'd', ARG_FILE_IN, 0.0, 0, NULL},
385
{"Output File Name", "stdout", NULL, NULL,
386
FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
387
{"New Records Only", "F", NULL, NULL,
388
TRUE, 'n', ARG_BOOLEAN, 0.0, 0, NULL},
389
{"Fetch SNP Variations", "F", NULL, NULL,
390
TRUE, 'v', ARG_BOOLEAN, 0.0, 0, NULL},
391
{"Molecule (n Nucleotide, p Protein, b Both)", "n", NULL, NULL,
392
FALSE, 'm', ARG_STRING, 0.0, 0, NULL},
395
NLM_EXTERN void AsnPrintNewLine PROTO((AsnIoPtr aip));
403
Boolean do_nuc = FALSE;
404
Boolean do_prot = FALSE;
416
ErrSetFatalLevel (SEV_MAX);
417
ErrClearOptFlags (EO_SHOW_USERSTR);
418
UseLocalAsnloadDataAndErrMsg ();
421
if (! AllObjLoad ()) {
422
Message (MSG_FATAL, "AllObjLoad failed");
425
if (! SubmitAsnLoad ()) {
426
Message (MSG_FATAL, "SubmitAsnLoad failed");
429
if (! SeqCodeSetLoad ()) {
430
Message (MSG_FATAL, "SeqCodeSetLoad failed");
433
if (! GeneticCodeTableLoad ()) {
434
Message (MSG_FATAL, "GeneticCodeTableLoad failed");
437
if (! objgbseqAsnLoad ()) {
438
Message (MSG_POSTERR, "objgbseqAsnLoad failed");
442
if (! GetArgs ("insdseqget", sizeof (myargs) / sizeof (Args), myargs)) {
446
fp = FileOpen (myargs [i_argInputFile].strvalue, "r");
451
if (! StringHasNoText (myargs [d_argDateFile].strvalue)) {
452
dfp = FileOpen (myargs [d_argDateFile].strvalue, "r");
458
if (GetAppParam ("NCBI", "SETTINGS", "XMLPREFIX", NULL, xmlbuf, sizeof (xmlbuf))) {
459
AsnSetXMLmodulePrefix (StringSave (xmlbuf));
462
MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
463
MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
465
aip = AsnIoOpen (myargs [o_argOutputFile].strvalue, "wx");
468
Message (MSG_POSTERR, "AsnIoOpen failed");
473
only_new = (Boolean) myargs [n_argNewRecords].intvalue;
474
get_var = (Boolean) myargs [v_argVariations].intvalue;
476
str = myargs [m_argMolecule].strvalue;
477
if (StringICmp (str, "n") == 0) {
479
} else if (StringICmp (str, "p") == 0) {
481
} else if (StringICmp (str, "b") == 0) {
488
PubSeqFetchEnable ();
491
atp = AsnLinkType (NULL, AsnFind ("INSDSet"));
492
xtra.atp = AsnLinkType (NULL, AsnFind ("INSDSet.E"));
493
if (atp == NULL || xtra.atp == NULL) {
494
Message (MSG_POSTERR, "AsnLinkType or AsnFind failed");
498
MemSet ((Pointer) &gbst, 0, sizeof (GBSet));
499
AsnOpenStruct (aip, atp, (Pointer) &gbst);
502
DoQuery (fp, dfp, extra, get_var, do_nuc, do_prot);
504
str = ReadALine (line, sizeof (line), fp);
505
while (str != NULL) {
506
if (! StringHasNoText (str)) {
507
ProcessAccession (str, extra, only_new, get_var, do_nuc, do_prot);
509
str = ReadALine (line, sizeof (line), fp);
513
AsnCloseStruct (aip, atp, NULL);
514
AsnPrintNewLine (aip);
520
PubSeqFetchDisable ();