46
46
#include <objall.h>
47
47
#include <objsset.h>
48
49
#include <objsub.h>
49
50
#include <sequtil.h>
50
51
#include <sqnutils.h>
51
53
#include <toasn3.h>
53
static void CleanupOneRecord (CharPtr directory, CharPtr filename)
56
#ifdef INTERNAL_NCBI_CLEANASN
57
#include <accpubseq.h>
60
#define CLEANASN_APP_VER "1.1"
62
CharPtr CLEANASN_APPLICATION = CLEANASN_APP_VER;
64
static void RemoveFeatUser (
70
if (sfp == NULL) return;
71
if (sfp->ext != NULL) {
72
sfp->ext = UserObjectFree (sfp->ext);
76
static void RemoveFeatDbxref (
85
if (sfp == NULL) return;
86
for (vnp = sfp->dbxref; vnp != NULL; vnp = next) {
88
dbt = (DbtagPtr) vnp->data.ptrvalue;
95
typedef struct dummysmfedata {
98
} DummySmfeData, PNTR DummySmfePtr;
100
static Boolean LIBCALLBACK CADummySMFEProc (
102
SeqMgrFeatContextPtr context
110
if (sfp == NULL || context == NULL) return TRUE;
111
dsp = context->userdata;
112
if (dsp == NULL) return TRUE;
114
len = SeqLocLen (sfp->location);
115
if (len < dsp->max) {
118
} else if (len == dsp->max) {
125
static void RemoveUnnecGeneXref (
132
SeqFeatXrefPtr curr, next;
134
SeqMgrFeatContext fcontext;
135
SeqFeatXrefPtr PNTR last;
136
GeneRefPtr grp, grpx;
140
if (sfp == NULL || sfp->data.choice == SEQFEAT_GENE) return;
141
grp = SeqMgrGetGeneXref (sfp);
142
if (grp == NULL || SeqMgrGeneIsSuppressed (grp)) return;
143
sfpx = SeqMgrGetOverlappingGene (sfp->location, &fcontext);
144
if (sfpx == NULL || sfpx->data.choice != SEQFEAT_GENE) return;
145
grpx = (GeneRefPtr) sfpx->data.value.ptrvalue;
146
if (grpx == NULL) return;
148
if ((StringDoesHaveText (grp->locus)) &&
149
(StringDoesHaveText (grpx->locus))) {
150
if ((StringICmp (grp->locus, grpx->locus) != 0)) return;
151
} else if (StringDoesHaveText (grp->locus_tag) &&
152
StringDoesHaveText (grp->locus_tag)) {
153
if ((StringICmp (grp->locus_tag, grpx->locus_tag) != 0)) return;
154
} else if (grp->syn != NULL && grpx->syn != NULL) {
155
syn1 = (CharPtr) grp->syn->data.ptrvalue;
156
syn2 = (CharPtr) grpx->syn->data.ptrvalue;
157
if ((StringDoesHaveText (syn1)) && (StringDoesHaveText (syn2))) {
158
if ((StringICmp (syn1, syn2) != 0)) return;
162
MemSet ((Pointer) &dsd, 0, sizeof (DummySmfeData));
165
count = SeqMgrGetAllOverlappingFeatures (sfp->location, FEATDEF_GENE,
166
NULL, 0, LOCATION_SUBSET,
167
(Pointer) &dsd, CADummySMFEProc);
169
if (dsd.num_at_max < 2) {
170
last = (SeqFeatXrefPtr PNTR) &(sfp->xref);
172
while (curr != NULL) {
174
if (curr->data.choice == SEQFEAT_GENE) {
177
SeqFeatXrefFree (curr);
179
last = &(curr->next);
186
static void CleanupOneRecord (
69
210
fp = FileOpen (path, "r");
70
211
if (fp == NULL) return;
72
dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE, FALSE, TRUE, FALSE);
213
dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, &entityID, FALSE,
76
218
sep = GetTopSeqEntryForEntityID (entityID);
77
219
if (sep != NULL) {
78
SeriousSeqEntryCleanup (sep, NULL, NULL);
221
if (StringChr (clean, 'b') != NULL) {
222
BasicSeqEntryCleanup (sep);
224
if (StringChr (clean, 's') != NULL) {
225
SeriousSeqEntryCleanup (sep, NULL, NULL);
229
Taxon3ReplaceOrgInSeqEntry (sep, FALSE);
232
if (StringChr (link, 'o') != NULL) {
233
SeqMgrIndexFeatures (entityID, 0);
234
LinkCDSmRNAbyOverlap (sep);
236
if (StringChr (link, 'p') != NULL) {
237
SeqMgrIndexFeatures (entityID, 0);
238
LinkCDSmRNAbyProduct (sep);
240
if (StringChr (link, 'r') != NULL) {
241
SeqMgrIndexFeatures (entityID, 0);
242
ReassignFeatureIDs (sep);
245
if (StringChr (feat, 'u') != NULL) {
246
VisitFeaturesInSep (sep, NULL, RemoveFeatUser);
248
if (StringChr (feat, 'd') != NULL) {
249
VisitFeaturesInSep (sep, NULL, RemoveFeatDbxref);
251
if (StringChr (feat, 'r') != NULL) {
252
SeqMgrIndexFeatures (entityID, 0);
253
VisitFeaturesInSep (sep, NULL, RemoveUnnecGeneXref);
256
StringNCpy_0 (path, results, sizeof (path));
257
FileBuildPath (path, NULL, filename);
80
259
aip = AsnIoOpen (path, "w");
81
260
if (aip != NULL) {
92
271
ObjMgrFreeByEntityID (entityID);
274
/* Args structure contains command-line arguments */
276
#define p_argInputPath 0
277
#define r_argOutputPath 1
281
#define t_argTaxonLookup 5
282
#define R_argRemote 6
95
284
Args myargs [] = {
96
{"Path to files", NULL, NULL, NULL,
97
FALSE, 'p', ARG_STRING, 0.0, 0, NULL}
285
{"Path to Files", NULL, NULL, NULL,
286
FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
287
{"Path for Results", NULL, NULL, NULL,
288
TRUE, 'r', ARG_STRING, 0.0, 0, NULL},
289
{"Cleanup (b BasicSeqEntryCleanup, s SeriousSeqEntryCleanup)", NULL, NULL, NULL,
290
TRUE, 'c', ARG_STRING, 0.0, 0, NULL},
291
{"Link (o LinkCDSmRNAbyOverlap, p LinkCDSmRNAbyProduct, r ReassignFeatureIDs)", NULL, NULL, NULL,
292
TRUE, 'l', ARG_STRING, 0.0, 0, NULL},
293
{"Feature (u Remove User Object, d Remove db_xref, r Remove Redundant Gene xref)", NULL, NULL, NULL,
294
TRUE, 'f', ARG_STRING, 0.0, 0, NULL},
295
{"Taxonomy Lookup", "F", NULL, NULL,
296
TRUE, 't', ARG_BOOLEAN, 0.0, 0, NULL},
297
{"Remote Fetching from ID", "F", NULL, NULL,
298
TRUE, 'R', ARG_BOOLEAN, 0.0, 0, NULL},
305
CharPtr clean, feat, link;
306
CharPtr directory, results;
307
ValNodePtr head, vnp;
308
Boolean remote, taxon;
105
312
ErrSetFatalLevel (SEV_MAX);
106
313
ErrClearOptFlags (EO_SHOW_USERSTR);
107
314
UseLocalAsnloadDataAndErrMsg ();
317
/* finish resolving internal connections in ASN.1 parse tables */
110
319
if (! AllObjLoad ()) {
111
320
Message (MSG_FATAL, "AllObjLoad failed");
127
if (! GetArgs ("cleanasn", sizeof (myargs) / sizeof (Args), myargs)) {
130
if (StringHasNoText (myargs [0].strvalue)) {
134
dir = DirCatalog ((CharPtr) myargs [0].strvalue);
135
for (vnp = dir; vnp != NULL; vnp = vnp->next) {
340
/* process command line arguments */
342
sprintf (app, "cleanasn %s", CLEANASN_APPLICATION);
343
if (! GetArgs (app, sizeof (myargs) / sizeof (Args), myargs)) {
347
directory = (CharPtr) myargs [p_argInputPath].strvalue;
348
if (StringHasNoText (directory)) {
349
Message (MSG_FATAL, "You must supply an input directory (-p).\nUse -p . to specify the current directory.\n\n");
352
results = (CharPtr) myargs [r_argOutputPath].strvalue;
353
if (StringHasNoText (results)) {
357
clean = myargs [c_argClean].strvalue;
358
link = myargs [l_argLink].strvalue;
359
feat = myargs [f_argFeat].strvalue;
361
taxon = (Boolean) myargs [t_argTaxonLookup].intvalue;
362
remote = (Boolean) myargs [R_argRemote].intvalue;
365
#ifdef INTERNAL_NCBI_CLEANASN
366
if (! PUBSEQBioseqFetchEnable ("cleanasn", FALSE)) {
367
Message (MSG_POSTERR, "PUBSEQBioseqFetchEnable failed");
371
PubSeqFetchEnable ();
375
head = DirCatalog (directory);
376
for (vnp = head; vnp != NULL; vnp = vnp->next) {
136
377
if (vnp->choice == 0) {
137
CleanupOneRecord (myargs [0].strvalue, (CharPtr) vnp->data.ptrvalue);
378
CleanupOneRecord (directory, results,
379
(CharPtr) vnp->data.ptrvalue,
380
clean, link, feat, taxon);
140
ValNodeFreeData (dir);
383
ValNodeFreeData (head);
386
#ifdef INTERNAL_NCBI_CLEANASN
387
PUBSEQBioseqFetchDisable ();
389
PubSeqFetchDisable ();