894
static void DoCleanup (
899
static ByteStorePtr Se2Bs (
907
if (sep == NULL) return NULL;
910
if (bs == NULL) return NULL;
911
aibp = AsnIoBSOpen ("w", bs);
912
if (aibp == NULL) return NULL;
914
SeqEntryAsnWrite (sep, aibp->aip, NULL);
916
AsnIoFlush (aibp->aip);
922
static void RemoveFeatureCitations (
928
if (sfp == NULL || sfp->cit == NULL) return;
930
sfp->cit = PubSetFree (sfp->cit);
934
static SeqEntryPtr CppBasicCleanup (
941
ByteStorePtr bs1, bs2;
943
SeqEntryPtr csep, nsep;
944
Char path1 [PATH_MAX];
945
Char path2 [PATH_MAX];
946
Char path3 [PATH_MAX];
948
if (sep == NULL || cfp == NULL) return NULL;
950
VisitFeaturesInSep (sep, NULL, RemoveFeatureCitations);
956
aop = AsnIoOpen (path1, "w");
957
SeqEntryAsnWrite (sep, aop, NULL);
960
sprintf (cmmd, "%s -i %s | cleanasn -a e -o %s",
961
"~/ncbi_cxx/compilers/xCode/build/bin/Debug/test_basic_cleanup",
965
sprintf (cmmd, "cleanasn -i %s -o %s -K b",
969
aip = AsnIoOpen (path3, "r");
970
csep = SeqEntryAsnRead (aip, NULL);
975
aip = AsnIoOpen (path2, "r");
976
nsep = SeqEntryAsnRead (aip, NULL);
982
if (cfp->logfp != NULL) {
983
fprintf (cfp->logfp, "EMPTY %s\n", cfp->buf);
986
} else if (! BSEqual (bs1, bs2)) {
987
if (cfp->logfp != NULL) {
988
fprintf (cfp->logfp, "BSEC DIFF %s\n", cfp->buf);
992
sprintf (cmmd, "echo '' >> ~/Desktop/diffclean.txt");
994
sprintf (cmmd, "echo '' >> ~/Desktop/diffclean.txt");
996
sprintf (cmmd, "echo '********** gi|%ld **********' >> ~/Desktop/diffclean.txt", (long) cfp->gi);
998
sprintf (cmmd, "echo '' >> ~/Desktop/diffclean.txt");
1000
sprintf (cmmd, "diff %s %s >> ~/Desktop/diffclean.txt", path3, path2);
1008
SeqEntryFree (csep);
1010
sprintf (cmmd, "rm %s; rm %s; rm %s", path1, path2, path3);
1017
static time_t DoCleanup (
895
1018
SeqEntryPtr sep,
1028
SeqEntryPtr fsep, nsep = NULL;
903
1029
SeqIdPtr sip, siphead;
905
if (sep == NULL || cfp == NULL) return;
1030
time_t starttime, stoptime;
1032
if (sep == NULL || cfp == NULL) return 0;
1034
starttime = GetSecs ();
907
1036
StringCpy (cfp->buf, "");
908
1038
fsep = FindNthBioseq (sep, 1);
909
1039
if (fsep != NULL && fsep->choice == 1) {
910
1040
bsp = (BioseqPtr) fsep->data.ptrvalue;
921
1054
if (StringChr (cfp->report, 'r') != NULL) {
922
1055
DoASNReport (sep, cfp);
1056
stoptime = GetSecs ();
1057
return stoptime - starttime;
925
1059
if (StringChr (cfp->report, 'g') != NULL) {
926
1060
DoGBFFReport (sep, cfp);
1061
stoptime = GetSecs ();
1062
return stoptime - starttime;
929
1064
if (StringChr (cfp->report, 'm') != NULL) {
930
1065
DoModernizeReport (sep, cfp);
1066
stoptime = GetSecs ();
1067
return stoptime - starttime;
934
1070
if (cfp->logfp != NULL) {
939
1075
if (StringChr (cfp->clean, 'b') != NULL) {
940
1076
BasicSeqEntryCleanup (sep);
1079
if (StringChr (cfp->clean, 'p') != NULL) {
1080
nsep = CppBasicCleanup (sep, cfp);
942
1083
if (StringChr (cfp->clean, 's') != NULL) {
943
1084
SeriousSeqEntryCleanup (sep, NULL, NULL);
1086
if (StringChr (cfp->clean, 'g') != NULL) {
1087
GpipeSeqEntryCleanup (sep);
945
1089
if (StringChr (cfp->clean, 'n') != NULL) {
946
1090
NormalizeDescriptorOrder (sep);
1092
if (StringChr (cfp->clean, 'u') != NULL) {
1093
RemoveAllNcbiCleanupUserObjects (sep);
949
1096
if (StringChr (cfp->modernize, 'g') != NULL) {
950
1097
VisitFeaturesInSep (sep, NULL, ModGenes);
1000
1147
SeqMgrIndexFeatures (entityID, 0);
1001
1148
DoAutoDef (sep, entityID);
1151
if (cfp->action_list != NULL) {
1152
ApplyMacroToSeqEntry (sep, cfp->action_list, NULL, NULL);
1155
stoptime = GetSecs ();
1159
SeqSubmitAsnWrite (ssp, aop, atp);
1160
} else if (nsep != NULL) {
1161
SeqEntryAsnWrite (nsep, aop, atp);
1162
SeqEntryFree (nsep);
1164
SeqEntryAsnWrite (sep, aop, atp);
1168
return stoptime - starttime;
1005
1171
static void CleanupSingleRecord (
1122
1292
sep = GetTopSeqEntryForEntityID (entityID);
1123
1293
if (sep != NULL && StringDoesHaveText (path)) {
1125
DoCleanup (sep, entityID, cfp);
1127
1295
aop = AsnIoOpen (path, "w");
1297
DoCleanup (sep, entityID, cfp, aop, NULL, ssp);
1128
1299
if (aop != NULL) {
1129
if (datatype == OBJ_SEQSUB) {
1130
SeqSubmitAsnWrite ((SeqSubmitPtr) dataptr, aop, NULL);
1132
SeqEntryAsnWrite (sep, aop, NULL);
1134
1300
AsnIoFlush (aop);
1135
1301
AsnIoClose (aop);
1269
1435
while ((atp = AsnReadId (aip, cfp->amp, atp)) != NULL) {
1270
1436
if (atp == cfp->atp_se) {
1438
SeqMgrHoldIndexing (TRUE);
1272
1439
sep = SeqEntryAsnRead (aip, atp);
1440
SeqMgrHoldIndexing (FALSE);
1273
1442
if (sep != NULL) {
1275
1444
entityID = ObjMgrGetEntityIDForChoice (sep);
1277
starttime = GetSecs ();
1278
DoCleanup (sep, entityID, cfp);
1279
stoptime = GetSecs ();
1446
timediff = DoCleanup (sep, entityID, cfp, aop, cfp->atp_se, NULL);
1281
if (stoptime - starttime > worsttime) {
1282
worsttime = stoptime - starttime;
1448
if (timediff > worsttime) {
1449
worsttime = timediff;
1283
1450
StringCpy (longest, cfp->buf);
1287
SeqEntryAsnWrite (sep, aop, cfp->atp_se);
1289
1454
ObjMgrFreeByEntityID (entityID);
1418
1584
TRUE, 'm', ARG_STRING, 0.0, 0, NULL},
1420
1586
" b BasicSeqEntryCleanup\n"
1587
" p C++ BasicCleanup\n"
1421
1588
" s SeriousSeqEntryCleanup\n"
1422
" n Normalize Descriptor Order", NULL, NULL, NULL,
1589
" g GpipeSeqEntryCleanup\n"
1590
" n Normalize Descriptor Order\n"
1591
" u Remove NcbiCleanup User Objects", NULL, NULL, NULL,
1423
1592
TRUE, 'K', ARG_STRING, 0.0, 0, NULL},
1442
1611
TRUE, 'D', ARG_STRING, 0.0, 0, NULL},
1443
1612
{"Miscellaneous\n"
1444
1613
" d Automatic Definition Line", NULL, NULL, NULL,
1445
TRUE, 'M', ARG_STRING, 0.0, 0, NULL},
1614
TRUE, 'X', ARG_STRING, 0.0, 0, NULL},
1615
{"Macro File", NULL, NULL, NULL,
1616
TRUE, 'M', ARG_FILE_IN, 0.0, 0, NULL},
1446
1617
{"Taxonomy Lookup", "F", NULL, NULL,
1447
1618
TRUE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
1448
1619
{"Publication Lookup", "F", NULL, NULL,
1452
1623
Int2 Main (void)
1626
ValNodePtr action_list;
1455
1628
Char app [64], mode, type;
1456
1629
CleanFlagData cfd;
1457
CharPtr directory, filter, infile, logfile, outfile, results, str, suffix;
1630
CharPtr directory, filter, infile, logfile, outfile,
1631
macro_file, results, str, suffix;
1458
1632
Boolean remote;
1459
1633
time_t runtime, starttime, stoptime;
1583
1757
cfd.link = myargs [N_argLink].strvalue;
1584
1758
cfd.feat = myargs [F_argFeat].strvalue;
1585
1759
cfd.desc = myargs [D_argDesc].strvalue;
1586
cfd.mods = myargs [M_argMods].strvalue;
1760
cfd.mods = myargs [X_argMods].strvalue;
1587
1761
cfd.taxon = (Boolean) myargs [T_argTaxonLookup].intvalue;
1588
1762
cfd.pub = (Boolean) myargs [P_argPubLookup].intvalue;
1764
macro_file = myargs [M_argMacro].strvalue;
1765
if (StringDoesHaveText (macro_file)) {
1766
aip = AsnIoOpen (macro_file, "r");
1768
Message (MSG_FATAL, "Unable to open macro file '%s'", macro_file);
1771
action_list = MacroActionListAsnRead (aip, NULL);
1773
if (action_list == NULL) {
1774
Message (MSG_FATAL, "Unable to read macro file '%s'", macro_file);
1776
cfd.action_list = action_list;
1590
1779
cfd.amp = AsnAllModPtr ();
1591
1780
cfd.atp_bss = AsnFind ("Bioseq-set");
1592
1781
cfd.atp_bsss = AsnFind ("Bioseq-set.seq-set");