43
44
* ==========================================================================
46
#ifndef _NEW_CdEntrez_
47
#define _NEW_CdEntrez_
55
48
#include <objall.h>
57
#define CHARSPERLINE 50
59
typedef struct expstruct {
64
} ExpStruct, PNTR ExpStructPtr;
66
static Char root [PATH_MAX];
67
static EntrezInfoPtr eip;
68
static EntrezDivInfo *div_info;
70
static Boolean scanDNA = FALSE;
71
static Boolean scanPRT = FALSE;
72
static Boolean scanCDS = FALSE;
74
static void PrintSequence (BioseqPtr bsp, SeqFeatPtr sfp,
75
FILE *fp, Boolean is_na)
87
if (bsp != NULL && fp != NULL) {
88
if ((Boolean) ISA_na (bsp->mol) == is_na) {
89
repr = Bioseq_repr (bsp);
90
if (repr == Seq_repr_raw || repr == Seq_repr_const) {
91
title = BioseqGetTitle (bsp);
92
tmp = StringMove (buffer, ">");
93
tmp = SeqIdPrint (bsp->id, tmp, PRINTID_FASTA_LONG);
94
tmp = StringMove (tmp, " ");
95
StringNCpy (tmp, title, 200);
96
fprintf (fp, "%s\n", buffer);
98
code = Seq_code_iupacna;
100
code = Seq_code_iupacaa;
103
spp = SeqPortNewByLoc (sfp->location, code);
105
spp = SeqPortNew (bsp, 0, -1, 0, code);
109
while ((residue = SeqPortGetResidue (spp)) != SEQPORT_EOF) {
110
if (! IS_residue (residue)) {
111
buffer [count] = '\0';
112
fprintf (fp, "%s\n", buffer);
116
fprintf (fp, "[Gap]\n");
119
fprintf (fp, "[EOS]\n");
122
fprintf (fp, "[Invalid Residue]\n");
126
buffer [count] = residue;
128
if (count >= CHARSPERLINE) {
129
buffer [count] = '\0';
130
fprintf (fp, "%s\n", buffer);
136
buffer [count] = '\0';
137
fprintf (fp, "%s\n", buffer);
146
static void LIBCALLBACK GetSeqFeat (AsnExpOptStructPtr aeosp)
153
if (aeosp->dvp->intvalue == START_STRUCT) {
154
esp = (ExpStructPtr) aeosp->data;
155
sfp = (SeqFeatPtr) aeosp->the_struct;
156
if (esp != NULL && esp->fp != NULL && sfp != NULL &&
157
sfp->data.choice == esp->feat) {
158
bsp = BioseqFind (SeqLocId (sfp->location));
160
PrintSequence (bsp, sfp, esp->fp, esp->is_na);
166
static void SeqEntryToFeat (SeqEntryPtr sep, FILE *fp,
167
Boolean is_na, Uint1 feat)
174
if (sep != NULL && fp != NULL) {
175
esp = MemNew (sizeof (ExpStruct));
177
aip = AsnIoNullOpen ();
180
esp->aip = AsnIoNew (ASNIO_TEXT_OUT, fp, NULL, NULL, NULL);
183
aeop = AsnExpOptNew (aip, "Seq-feat", (Pointer) esp, GetSeqFeat);
185
SeqEntryAsnWrite (sep, aip, NULL);
187
AsnExpOptFree (aip, aeop);
196
static void LIBCALLBACK GetFastaSeq (SeqEntryPtr sep, Pointer data,
197
Int4 index, Int2 indent)
203
if (IS_Bioseq (sep)) {
204
bsp = (BioseqPtr) sep->data.ptrvalue;
205
esp = (ExpStructPtr) data;
206
if (bsp != NULL && esp != NULL && esp->fp != NULL) {
207
PrintSequence (bsp, NULL, esp->fp, esp->is_na);
212
static void SeqEntryScan (SeqEntryPtr sep, FILE *fp, Boolean is_na)
217
if (sep != NULL && fp != NULL) {
222
SeqEntryExplore (sep, (Pointer) &es, GetFastaSeq);
226
static void ProcessSeqEntry (SeqEntryPtr sep, FILE *fp)
229
if (sep != NULL && fp != NULL) {
231
SeqEntryScan (sep, fp, TRUE);
232
} else if (scanPRT) {
233
SeqEntryScan (sep, fp, FALSE);
234
} else if (scanCDS) {
235
SeqEntryToFeat (sep, fp, TRUE, 3);
240
typedef struct filelist {
244
struct filelist PNTR next;
245
} FileList, PNTR FileListPtr;
247
static void ProcessFile (FileListPtr flp, CharPtr root, CharPtr outputfile)
252
Char path [PATH_MAX];
257
fp = FileOpen (outputfile, "a");
259
StringCpy (path, root);
260
FileBuildPath (path, flp->fdir, NULL);
261
FileBuildPath (path, NULL, flp->fname);
262
if ((casnh = CASN_Open (path)) != NULL) {
268
if (CASN_DocType (casnh) == type) {
269
while ((sep = CASN_NextSeqEntry (casnh)) != NULL) {
270
ProcessSeqEntry (sep, fp);
278
Message (MSG_FATAL, "Unable to reopen output file");
283
static void ProcessFileList (FileListPtr flp, CharPtr outputfile)
288
Char root [PATH_MAX];
294
while (flp != NULL) {
296
if (device != flp->cdnum) {
297
if (! CdMountEntrezVolume (flp->cdnum, root, sizeof (root))) {
298
Message (MSG_FATAL, "CdMountEntrezVolume failed");
302
ProcessFile (flp, root, outputfile);
309
static FileListPtr FileListNew (FileListPtr flp, Int2 cdnum,
310
CharPtr fdir, CharPtr fname)
315
newnode = (FileListPtr) MemNew (sizeof (FileList));
316
if (newnode != NULL) {
318
while (flp->next != NULL && flp->next->cdnum <= cdnum) {
321
newnode->next = flp->next;
324
newnode->cdnum = cdnum;
325
if (fdir != NULL && *fdir != '\0') {
326
newnode->fdir = StringSave (fdir);
328
if (fname != NULL && *fname != '\0') {
329
newnode->fname = StringSave (fname);
335
static Boolean LIBCALLBACK EnumerateFiles (int cdnum, const char *fdir,
336
const char *fname, long fsize,
341
FileListPtr PNTR head;
343
head = (FileListPtr PNTR) opaque_data;
346
flp = FileListNew (*head, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
351
flp = FileListNew (NULL, (Int2) cdnum, (CharPtr) fdir, (CharPtr) fname);
358
Args myargs [NUMARGS] = {
359
{"Scan DNA", "F", NULL, NULL, TRUE, 'd', ARG_BOOLEAN, 0.0, 0, NULL},
360
{"Scan Protein", "F", NULL, NULL, TRUE, 'p', ARG_BOOLEAN, 0.0, 0, NULL},
361
{"Scan Coding Regions", "F", NULL, NULL, TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
362
{"Output File", "stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}
56
typedef struct appflags {
62
} AppFlagData, PNTR AppFlagPtr;
64
static void DoOneUser (UserObjectPtr uop, Pointer userdata)
71
if (uop == NULL) return;
72
afp = (AppFlagPtr) userdata;
73
if (afp == NULL) return;
76
if (StringDoesHaveText (uop->_class)) {
77
StringCat (buf, uop->_class);
81
fprintf (afp->fp, "%s", buf);
86
if (StringDoesHaveText (oip->str)) {
87
StringCat (buf, oip->str);
88
} else if (oip->id > 0) {
89
sprintf (buf, "%ld", (long) oip->id);
94
fprintf (afp->fp, "%s", buf);
97
fprintf (afp->fp, " %s", afp->id);
100
fprintf (afp->fp, "\n");
104
static void DoOneDescriptor (SeqDescrPtr sdp, Pointer userdata)
110
if (sdp == NULL || sdp->choice != Seq_descr_user) return;
111
afp = (AppFlagPtr) userdata;
112
if (afp == NULL) return;
114
uop = (UserObjectPtr) sdp->data.ptrvalue;
115
if (uop == NULL) return;
117
VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
120
static void DoOneFeature (SeqFeatPtr sfp, Pointer userdata)
126
if (sfp == NULL) return;
127
afp = (AppFlagPtr) userdata;
128
if (afp == NULL) return;
132
VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
135
for (uop = sfp->exts; uop != NULL; uop = uop->next) {
136
VisitUserObjectsInUop (uop, (Pointer) afp, DoOneUser);
140
static void DoRecord (SeqEntryPtr sep, Pointer userdata)
147
if (sep == NULL) return;
148
afp = (AppFlagPtr) userdata;
149
if (afp == NULL) return;
151
fsep = FindNthBioseq (sep, 1);
152
if (fsep == NULL) return;
153
fbsp = (BioseqPtr) fsep->data.ptrvalue;
154
if (fbsp == NULL) return;
156
SeqIdWrite (fbsp->id, afp->id, PRINTID_FASTA_LONG, 64);
158
VisitDescriptorsInSep (sep, (Pointer) afp, DoOneDescriptor);
159
VisitFeaturesInSep (sep, (Pointer) afp, DoOneFeature);
162
static void ProcessOneRecord (
170
if (StringHasNoText (filename)) return;
171
afp = (AppFlagPtr) userdata;
172
if (afp == NULL) return;
174
if (StringStr (filename, "gbest") != NULL ||
175
StringStr (filename, "gbgss") != NULL ||
176
StringStr (filename, "gbhtg") != NULL) {
177
printf ("Skipping %s\n", filename);
181
printf ("%s\n", filename);
184
fprintf (afp->fp, "%s\n", filename);
187
ScanBioseqSetRelease (filename, afp->binary, afp->compressed, (Pointer) afp, DoRecord);
189
fprintf (afp->fp, "\n");
193
#define p_argInputPath 0
194
#define i_argInputFile 1
195
#define o_argOutputFile 2
196
#define f_argFilter 3
197
#define x_argSuffix 4
198
#define u_argRecurse 5
199
#define b_argBinary 6
200
#define c_argCompressed 7
201
#define v_argVerbose 8
204
{"Path to Files", NULL, NULL, NULL,
205
TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
206
{"Input File Name", NULL, NULL, NULL,
207
TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
208
{"Output File Name", NULL, NULL, NULL,
209
TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
210
{"Substring Filter", NULL, NULL, NULL,
211
TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
212
{"File Selection Suffix", ".aso", NULL, NULL,
213
TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
214
{"Recurse", "F", NULL, NULL,
215
TRUE, 'u', ARG_BOOLEAN, 0.0, 0, NULL},
216
{"Bioseq-set is Binary", "F", NULL, NULL,
217
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
218
{"Bioseq-set is Compressed", "F", NULL, NULL,
219
TRUE, 'c', ARG_BOOLEAN, 0.0, 0, NULL},
220
{"Verbose", "F", NULL, NULL,
221
TRUE, 'v', ARG_BOOLEAN, 0.0, 0, NULL},
365
224
extern Int2 Main (void)
376
if (GetArgs ("Scantest", NUMARGS, myargs)) {
377
scanDNA = (Boolean) myargs [0].intvalue;
378
scanPRT = (Boolean) myargs [1].intvalue;
379
scanCDS = (Boolean) myargs [2].intvalue;
381
for (i = 0; i < 3; i++) {
382
sum += myargs [i].intvalue;
385
if (AllObjLoad () && SeqCodeSetLoad ()) {
386
if (EntrezInit ("scantest", FALSE, &is_network)) {
388
Message (MSG_FATAL, "Network service does not allow scanning");
390
eip = EntrezGetInfo ();
391
if (eip != NULL && eip->div_info != NULL) {
392
flp = FileListNew (NULL, INT2_MIN, NULL, NULL);
394
div_info = eip->div_info;
395
for (i = 0; i < eip->div_count; i++) {
396
StringNCpy (div, div_info [i].tag, sizeof (div) - 1);
398
CdEnumFiles (CdDir_rec, TYP_AA, div, EnumerateFiles, &flp);
400
CdEnumFiles (CdDir_rec, TYP_NT, div, EnumerateFiles, &flp);
403
fp = FileOpen (myargs[3].strvalue, "w");
406
ProcessFileList (flp, myargs[3].strvalue);
408
Message (MSG_FATAL, "Unable to create output file");
410
while (flp != NULL) {
413
MemFree (flp->fname);
419
Message (MSG_FATAL, "Unable to allocate file list pointer");
422
Message (MSG_FATAL, "Unable to obtain Entrez Info");
427
Message (MSG_FATAL, "Unable to connect to Entrez service");
430
Message (MSG_FATAL, "Unable to load parse tables");
433
Message (MSG_FATAL, "You must choose one of the three options");
229
CharPtr filter, infile, outfile, directory, suffix;
233
ErrSetFatalLevel (SEV_MAX);
234
ErrClearOptFlags (EO_SHOW_USERSTR);
235
ErrSetLogfile ("stderr", ELOG_APPEND);
236
UseLocalAsnloadDataAndErrMsg ();
239
if (! AllObjLoad ()) {
240
Message (MSG_FATAL, "AllObjLoad failed");
243
if (! SubmitAsnLoad ()) {
244
Message (MSG_FATAL, "SubmitAsnLoad failed");
247
if (! FeatDefSetLoad ()) {
248
Message (MSG_FATAL, "FeatDefSetLoad failed");
251
if (! SeqCodeSetLoad ()) {
252
Message (MSG_FATAL, "SeqCodeSetLoad failed");
255
if (! GeneticCodeTableLoad ()) {
256
Message (MSG_FATAL, "GeneticCodeTableLoad failed");
260
/* process command line arguments */
262
if (! GetArgs ("scantest", sizeof (myargs) / sizeof (Args), myargs)) {
266
MemSet ((Pointer) &afd, 0, sizeof (AppFlagData));
268
directory = (CharPtr) myargs [p_argInputPath].strvalue;
269
infile = (CharPtr) myargs [i_argInputFile].strvalue;
270
outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
271
filter = (CharPtr) myargs [f_argFilter].strvalue;
272
suffix = (CharPtr) myargs [x_argSuffix].strvalue;
273
dorecurse = (Boolean) myargs [u_argRecurse].intvalue;
274
afd.binary = (Boolean) myargs [b_argBinary].intvalue;
275
afd.compressed = (Boolean) myargs [c_argCompressed].intvalue;
276
afd.verbose = (Boolean) myargs[v_argVerbose].intvalue;
278
afd.fp = FileOpen (outfile, "w");
279
if (afd.fp == NULL) {
283
if (StringDoesHaveText (directory)) {
285
DirExplore (directory, NULL, suffix, dorecurse, ProcessOneRecord, (Pointer) &afd);
287
} else if (StringDoesHaveText (infile)) {
289
ProcessOneRecord (infile, &afd);