2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: asnmacro.c
28
* Author: Colleen Bollin
30
* Version Creation Date: 4/12/07
37
* --------------------------------------------------------------------------
38
* Date Name Description of modification
39
* ------- ---------- -----------------------------------------------------
42
* ==========================================================================
55
#include <algo/blast/api/twoseq_api.h>
56
#define NLM_GENERATED_CODE_PROTO
61
#define ASNMACRO_APP_VER "1.0"
63
CharPtr ASNMACRO_APPLICATION = ASNMACRO_APP_VER;
66
static SeqAlignPtr LIBCALLBACK GetSeqAlign (BioseqPtr bsp1, BioseqPtr bsp2)
68
BLAST_SummaryOptions *options = NULL;
69
SeqAlignPtr salp = NULL;
71
if (bsp1 == NULL || bsp2 == NULL) return NULL;
73
BLAST_SummaryOptionsInit(&options);
74
if (bsp1->length > 10000 || bsp2->length > 10000)
76
options->filter_string = StringSave ("m L");
77
options->word_size = 20;
78
options->cutoff_evalue = act_get_eval (60);
79
options->hint = eNone;
83
options->filter_string = StringSave ("m F");
85
if (ISA_na (bsp1->mol))
87
options->program = eBlastn;
91
options->program = eBlastp;
94
BLAST_TwoSequencesSearch(options, bsp1, bsp2, &salp);
95
BLAST_SummaryOptionsFree(options);
99
static SeqAlignPtr LIBCALLBACK GetSeqAlignPiece (SeqLocPtr slp1, SeqLocPtr slp2)
101
BLAST_SummaryOptions *options = NULL;
102
SBlastSeqalignArray * seqalign_arr=NULL;
103
SeqAlignPtr salp = NULL;
106
if (slp1 == NULL || slp2 == NULL) return NULL;
109
bsp = BioseqFindFromSeqLoc (slp1);
115
BLAST_SummaryOptionsInit(&options);
117
if (ISA_na (bsp->mol))
119
options->program = eBlastn;
123
options->program = eBlastp;
126
options->gapped_calculation = TRUE;
127
options->cutoff_evalue = 10;
128
options->gap_x_dropoff = 100;
129
options->gap_open = 4;
130
options->gap_extend = 1;
131
options->nucleotide_mismatch = -1;
132
options->word_size = 7;
134
BLAST_TwoSeqLocSets(options, slp1, slp2, NULL, &seqalign_arr, NULL, NULL, NULL);
136
if (seqalign_arr != NULL)
138
salp = seqalign_arr->array[0];
139
seqalign_arr->array[0] = NULL;
140
seqalign_arr = SBlastSeqalignArrayFree(seqalign_arr);
143
BLAST_SummaryOptionsFree(options);
147
static SeqAlignPtr GlobalAlign2Seq (BioseqPtr bsp1, BioseqPtr bsp2, BoolPtr revcomp)
149
return Sqn_GlobalAlign2SeqEx (bsp1, bsp2, revcomp, GetSeqAlign, GetSeqAlignPiece);
154
typedef struct outputstream {
162
} OutputStreamData, PNTR OutputStreamPtr;
164
typedef struct inputstream {
170
} InputStreamData, PNTR InputStreamPtr;
172
typedef struct asnstream {
176
AsnTypePtr atp_bss_se;
177
} AsnStreamData, PNTR AsnStreamPtr;
179
static FILE* OpenOneFile (
186
Char file [FILENAME_MAX], path [PATH_MAX];
191
if (suffix == NULL) {
195
StringNCpy_0 (path, directory, sizeof (path));
196
sprintf (file, "%s%s", base, suffix);
197
FileBuildPath (path, NULL, file);
199
return FileOpen (path, "r");
202
static AsnIoPtr AsnIoFromInputStream (
208
Char file [FILENAME_MAX], path [PATH_MAX];
211
if (isp == NULL) return NULL;
213
if (isp->is_binary) {
219
if (isp->base == NULL) {
220
aip = AsnIoOpen ("stdin", read_flag);
222
StringNCpy_0 (path, isp->directory, sizeof (path));
223
sprintf (file, "%s%s", isp->base, isp->suffix);
224
FileBuildPath (path, NULL, file);
225
aip = AsnIoOpen (path, read_flag);
231
static AsnIoPtr AsnIoFromOutputStream (OutputStreamPtr osp)
234
Char file [FILENAME_MAX], path [PATH_MAX];
237
if (osp == NULL) return NULL;
238
if (osp->aip == NULL) {
239
write_flag = osp->is_binary ? "wb" : "w";
240
if (StringDoesHaveText (osp->outfile)) {
241
StringNCpy_0 (path, osp->outfile, sizeof (path));
243
if (osp->base == NULL) {
244
aip = AsnIoOpen ("stdout", write_flag);
246
if (osp->outsuffix == NULL) {
249
StringNCpy_0 (path, osp->results_dir, sizeof (path));
250
sprintf (file, "%s%s%s", osp->base, osp->suffix, osp->outsuffix);
251
FileBuildPath (path, NULL, file);
252
aip = AsnIoOpen (path, write_flag);
254
Message (MSG_POSTERR, "Unable to write to %s.", path);
264
static void WriteOneFile (
272
aip = AsnIoFromOutputStream (osp);
274
SeqEntryAsnWrite (sep, aip, NULL);
277
if (aip != osp->aip) {
283
static Uint2 ProcessOneAsn (
291
Uint2 datatype, entityID = 0;
293
Int4 num_fields = 0, num_features = 0;
295
if (fp == NULL) return 0;
297
dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, TRUE, FALSE, TRUE, FALSE);
298
if (dataptr == NULL) {
299
Message (MSG_POSTERR, "Unable to read data from %s.", path);
303
SeqMgrIndexFeatures (entityID, NULL);
304
sep = GetTopSeqEntryForEntityID (entityID);
305
ApplyMacroToSeqEntry (sep, macro, &num_fields, &num_features);
306
Message (MSG_POST, "For file %s, macro script affected %d fields and created %d features", path, num_fields, num_features);
311
/* return -1 if failure, 0 if success */
312
static Int4 ProcessOneRecord (
323
if (osp == NULL) return -1;
324
fp = OpenOneFile (directory, osp->base, osp->suffix);
325
if (fp == NULL) return -1;
327
entityID = ProcessOneAsn (fp, osp->base == NULL ? "input stream" : osp->base, macro);
331
if (entityID == 0) return -1;
333
/* finish processing */
335
sep = GetTopSeqEntryForEntityID (entityID);
337
WriteOneFile (osp, sep);
340
ObjMgrFreeByEntityID (entityID);
344
static Int4 ProcessStream (InputStreamPtr isp, OutputStreamPtr osp, AsnStreamPtr asp, ValNodePtr macro)
346
AsnTypePtr atp, atp_srch;
347
AsnIoPtr asn_in, asn_out;
352
Int4 num_fields = 0, num_features = 0;
353
Int4 tmp_fields, tmp_features;
355
if (isp == NULL || osp == NULL || asp == NULL) return 1;
357
asn_in = AsnIoFromInputStream (isp);
358
asn_out = AsnIoFromOutputStream (osp);
360
if (isp->is_seqentry) {
362
atp_srch = asp->atp_se;
366
atp_srch = asp->atp_bss_se;
369
while ((atp = AsnReadId(asn_in, asp->amp, atp)) != NULL && rval == 0) {
370
if (atp != atp_srch) {
371
AsnReadVal(asn_in, atp, &av);
372
AsnWrite(asn_out, atp, &av);
373
AsnKillValue(atp, &av);
376
if ((sep = SeqEntryAsnRead(asn_in, atp)) == NULL) {
377
Message (MSG_POSTERR, "SeqEntryAsnRead failure");
381
entityID = ObjMgrRegister (OBJ_SEQENTRY, sep);
384
ApplyMacroToSeqEntry (sep, macro, &tmp_fields, &tmp_features);
385
num_fields += tmp_fields;
386
num_features += tmp_features;
387
DeleteMarkedObjects (entityID, 0, NULL);
388
RenormalizeNucProtSets (sep, TRUE);
389
if (! SeqEntryAsnWrite(sep, asn_out, atp)) {
390
Message (MSG_POSTERR, "SeqEntryAsnWrite failure");
394
ObjMgrFreeByEntityID (entityID);
396
} /* Endwhile, AsnReadId */
399
if (asn_out != osp->aip) {
402
Message (MSG_POST, "Macro script affected %d fields and created %d features", num_fields, num_features);
406
/* return -1 on failure, 0 on success */
407
static Int4 FileRecurse (
416
Char path [PATH_MAX];
419
ValNodePtr head, vnp;
420
CharPtr orig_dir, orig_base;
423
/* get list of all files in source directory */
425
head = DirCatalog (directory);
427
for (vnp = head; vnp != NULL; vnp = vnp->next) {
428
if (vnp->choice == 0) {
429
str = (CharPtr) vnp->data.ptrvalue;
430
if (StringDoesHaveText (str)) {
432
/* does filename have desired substring? */
434
ptr = StringStr (str, osp->suffix);
438
/* make sure detected suffix is really at end of filename */
440
if (StringCmp (ptr, osp->suffix) == 0) {
443
/* process file that has desired suffix (usually .fsa) */
445
orig_dir = isp->directory;
446
isp->directory = directory;
447
orig_base = isp->base;
449
if (isp->is_binary) {
450
rval |= ProcessStream (isp, osp, asp, macro);
452
rval |= ProcessOneRecord (directory, osp, macro);
454
isp->directory = orig_dir;
455
isp->base = orig_base;
460
} else if (vnp->choice == 1) {
462
/* recurse into subdirectory */
464
StringNCpy_0 (path, directory, sizeof (path));
465
str = (CharPtr) vnp->data.ptrvalue;
466
FileBuildPath (path, str, NULL);
467
rval |= FileRecurse (path, isp, osp, asp, macro);
471
/* clean up file list */
473
ValNodeFreeData (head);
477
static Boolean SetUpAsnStreamData (AsnStreamPtr asp)
480
if (asp == NULL) return FALSE;
482
if (! SeqSetAsnLoad()) {
483
Message (MSG_POSTERR, "Unable to load SeqSet parse tree");
486
asp->amp = AsnAllModPtr();
487
if (asp->amp == NULL) {
488
Message (MSG_POSTERR, "Unable to obtain ASN.1 module pointer");
492
/* Get pointers to ASN.1 types that must be dealt with in asn_in */
494
if ( (asp->atp_bss = AsnFind("Bioseq-set")) == NULL) {
495
Message (MSG_POSTERR, "could not find type Bioseq-set");
498
if ( (asp->atp_bss_se = AsnFind("Bioseq-set.seq-set.E")) == NULL) {
499
Message (MSG_POSTERR, "AsnFind failure: Bioseq-set.seq-set.E");
502
if ( (asp->atp_se = AsnFind("Seq-entry")) == NULL) {
503
Message (MSG_POSTERR, "AsnFind failure: Seq-entry");
510
static ValNodePtr ReadMacroFile (CharPtr macro_file)
514
ValNodePtr action_list;
516
aip = AsnIoOpen (macro_file, "r");
518
Message (MSG_POSTERR, "Unable to open %s", macro_file);
521
action_list = MacroActionListAsnRead (aip, NULL);
522
if (action_list == NULL) {
523
Message (MSG_POSTERR, "Unable to read action list from %s.", macro_file);
530
/* Args structure contains command-line arguments */
532
#define p_argInputPath 0
533
#define r_argOutputPath 1
534
#define i_argInputFile 2
535
#define o_argOutputFile 3
536
#define x_argSuffix 4
537
#define s_argOutSuffix 5
538
#define b_argInputBinary 6
539
#define e_argInputSeqEntry 7
540
#define d_argOutputBinary 8
541
#define m_argMacroFile 9
544
{"Path to Files", NULL, NULL, NULL,
545
TRUE, 'p', ARG_STRING, 0.0, 0, NULL},
546
{"Path for Results", NULL, NULL, NULL,
547
TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
548
{"Single Input File", NULL, NULL, NULL,
549
TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
550
{"Single Output File", NULL, NULL, NULL,
551
TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
552
{"Suffix", ".sqn", NULL, NULL,
553
TRUE, 'x', ARG_STRING, 0.0, 0, NULL},
554
{"Suffix for stripped files", "", NULL, NULL,
555
TRUE, 's', ARG_STRING, 0.0, 0, NULL},
556
{"Input is binary", "F", NULL, NULL,
557
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
558
{"Input is Seq-entry", "F", NULL, NULL,
559
TRUE, 'e', ARG_BOOLEAN, 0.0, 0, NULL},
560
{"Output is binary", "F", NULL, NULL,
561
TRUE, 'b', ARG_BOOLEAN, 0.0, 0, NULL},
562
{"Macro file", "NULL", NULL, NULL,
563
TRUE, 'm', ARG_FILE_IN, 0.0, 0, NULL}
572
OutputStreamData osd;
577
ValNodePtr action_list;
581
ErrSetFatalLevel (SEV_MAX);
582
ErrClearOptFlags (EO_SHOW_USERSTR);
583
UseLocalAsnloadDataAndErrMsg ();
586
/* finish resolving internal connections in ASN.1 parse tables */
588
if (! AllObjLoad ()) {
589
Message (MSG_FATAL, "AllObjLoad failed");
592
if (! SubmitAsnLoad ()) {
593
Message (MSG_FATAL, "SubmitAsnLoad failed");
596
if (! FeatDefSetLoad ()) {
597
Message (MSG_FATAL, "FeatDefSetLoad failed");
600
if (! SeqCodeSetLoad ()) {
601
Message (MSG_FATAL, "SeqCodeSetLoad failed");
604
if (! GeneticCodeTableLoad ()) {
605
Message (MSG_FATAL, "GeneticCodeTableLoad failed");
609
SetUpAsnStreamData (&asd);
611
/* initialize OuputStreamData */
612
MemSet (&osd, 0, sizeof (osd));
614
/* initialize InputStreamData */
615
MemSet (&isd, 0, sizeof (isd));
617
/* process command line arguments */
619
sprintf (app, "asnmacro %s", ASNMACRO_APPLICATION);
620
if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
624
macro_file = (CharPtr) myargs [m_argMacroFile].strvalue;
625
action_list = ReadMacroFile (macro_file);
627
directory = (CharPtr) myargs [p_argInputPath].strvalue;
628
osd.results_dir = (CharPtr) myargs [r_argOutputPath].strvalue;
629
if (StringHasNoText (osd.results_dir)) {
630
osd.results_dir = NULL;
632
osd.suffix = (CharPtr) myargs [x_argSuffix].strvalue;
633
osd.outsuffix = (CharPtr) myargs [s_argOutSuffix].strvalue;
634
osd.base = (CharPtr) myargs [i_argInputFile].strvalue;
635
osd.outfile = (CharPtr) myargs [o_argOutputFile].strvalue;
636
if (StringHasNoText (osd.outfile)) {
639
osd.is_binary = (Boolean) myargs [d_argOutputBinary].intvalue;
641
if (osd.base == "stdin") {
645
/* if we don't have an output directory or an output file, and the user hasn't provided an
646
* output suffix, add a default.
648
if (osd.results_dir == NULL && osd.outfile == NULL && StringHasNoText (osd.outsuffix)) {
649
osd.outsuffix = ".processed";
652
isd.is_binary = (Boolean) myargs [b_argInputBinary].intvalue;
653
isd.is_seqentry = (Boolean) myargs [e_argInputSeqEntry].intvalue;
654
isd.directory = directory;
656
isd.suffix = osd.suffix;
658
if (StringDoesHaveText (osd.outfile)) {
659
osd.aip = AsnIoOpen (osd.outfile, "w");
660
if (osd.aip == NULL) {
661
Message (MSG_FATAL, "Unable to open output file");
665
if (StringHasNoText (osd.results_dir)) {
666
osd.results_dir = directory;
668
/* if we're putting the results in a separate directory, strip the directory name from the output base */
669
if (!StringHasNoText (osd.results_dir) && !StringHasNoText (osd.base)) {
671
ptr = StringRChr (osd.base, '\\');
673
ptr = StringRChr (osd.base, '/');
682
if (StringHasNoText(directory) && StringHasNoText(osd.base)) {
683
rval = ProcessStream (&isd, &osd, &asd, action_list);
684
} else if (StringDoesHaveText (osd.base)) {
685
ptr = StringRChr (osd.base, '.');
688
StringNCpy_0 (sfx, ptr, sizeof (sfx));
694
rval = ProcessStream (&isd, &osd, &asd, action_list);
696
rval = ProcessOneRecord (directory, &osd, action_list);
700
rval = FileRecurse (directory, &isd, &osd, &asd, action_list);
703
if (osd.aip != NULL) {
704
AsnIoFlush (osd.aip);
705
AsnIoClose (osd.aip);