5032
5033
VisitBioseqsInSep (sep, NULL, StripBadTitleFromProteinProducts);
5036
EC number replacement - copied from Sequin, with protection
5037
multiple reads if no replacement file available
5040
typedef struct ecrepdata {
5043
} EcRepData, PNTR EcRepPtr;
5045
static ValNodePtr ec_rep_list = NULL;
5046
static EcRepPtr PNTR ec_rep_data = NULL;
5047
static Int4 ec_rep_len = 0;
5048
static Boolean ec_rep_read = FALSE;
5050
static int LIBCALLBACK SortVnpByEcBefore (VoidPtr ptr1, VoidPtr ptr2)
5053
EcRepPtr erp1, erp2;
5055
ValNodePtr vnp1, vnp2;
5057
if (ptr1 == NULL || ptr2 == NULL) return 0;
5058
vnp1 = *((ValNodePtr PNTR) ptr1);
5059
vnp2 = *((ValNodePtr PNTR) ptr2);
5060
if (vnp1 == NULL || vnp2 == NULL) return 0;
5061
erp1 = (EcRepPtr) vnp1->data.ptrvalue;
5062
erp2 = (EcRepPtr) vnp2->data.ptrvalue;
5063
if (erp1 == NULL || erp2 == NULL) return 0;
5064
str1 = erp1->before;
5065
str2 = erp2->before;
5066
if (str1 == NULL || str2 == NULL) return 0;
5067
return StringCmp (str1, str2);
5070
static void SetupECReplacementTable (CharPtr file)
5077
ValNodePtr last = NULL;
5079
Char path [PATH_MAX];
5085
if (ec_rep_data != NULL) return;
5086
if (ec_rep_read) return;
5088
if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
5089
FileBuildPath (path, NULL, file);
5090
sev = ErrSetMessageLevel (SEV_ERROR);
5091
fp = FileOpen (path, "r");
5092
ErrSetMessageLevel (sev);
5094
FileCacheSetup (&fc, fp);
5096
str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
5097
while (str != NULL) {
5098
if (StringDoesHaveText (str)) {
5099
ptr = StringChr (str, '\t');
5103
/* only replace if a single destination number, not a split from one to many */
5104
if (StringChr (ptr, '\t') == NULL) {
5105
erp = (EcRepPtr) MemNew (sizeof (EcRepData));
5107
erp->before = StringSave (str);
5108
erp->after = StringSave (ptr);
5109
vnp = ValNodeAddPointer (&last, 0, (Pointer) erp);
5110
if (ec_rep_list == NULL) {
5118
str = FileCacheReadLine (&fc, line, sizeof (line), NULL);
5122
ec_rep_len = ValNodeLen (ec_rep_list);
5123
if (ec_rep_len > 0) {
5124
ec_rep_list = ValNodeSort (ec_rep_list, SortVnpByEcBefore);
5125
ec_rep_data = (EcRepPtr PNTR) MemNew (sizeof (EcRepPtr) * (ec_rep_len + 1));
5126
if (ec_rep_data != NULL) {
5127
for (vnp = ec_rep_list, i = 0; vnp != NULL; vnp = vnp->next, i++) {
5128
erp = (EcRepPtr) vnp->data.ptrvalue;
5129
ec_rep_data [i] = erp;
5139
static CharPtr GetECReplacement (CharPtr str)
5145
if (StringHasNoText (str)) return NULL;
5151
erp = ec_rep_data [(int) mid];
5152
if (erp != NULL && StringCmp (erp->before, str) < 0) {
5158
erp = ec_rep_data [(int) R];
5159
if (erp != NULL && StringCmp (erp->before, str) == 0 && StringChr (erp->after, '\t') == NULL) return erp->after;
5164
static void UpdateProtEC (SeqFeatPtr sfp, Pointer userdata)
5167
Int2 inf_loop_check;
5036
static void RemoveOrgFromEndOfProtein (SeqFeatPtr sfp, Pointer userdata)
5174
5049
if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return;
5175
5050
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
5176
if (prp == NULL || prp->ec == NULL) return;
5177
for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
5051
if (prp == NULL) return;
5053
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
5178
5054
str = (CharPtr) vnp->data.ptrvalue;
5179
5055
if (StringHasNoText (str)) continue;
5180
rep = GetECReplacement (str);
5181
if (rep == NULL) continue;
5184
while (nxt != NULL && inf_loop_check < 10) {
5187
nxt = GetECReplacement (rep);
5189
vnp->data.ptrvalue = MemFree (vnp->data.ptrvalue);
5190
vnp->data.ptrvalue = StringSave (rep);
5056
len = StringLen (str);
5057
if (len < 5) continue;
5058
if (str [len - 1] != ']') continue;
5059
cp = StringRChr (str, '[');
5060
if (cp == NULL) continue;
5061
if (StringNCmp (cp, "[NAD", 4) == 0) continue;
5062
bsp = BioseqFindFromSeqLoc (sfp->location);
5063
if (bsp == NULL) continue;
5064
sdp = GetNextDescriptorUnindexed (bsp, Seq_descr_source, NULL);
5065
if (sdp == NULL) continue;
5066
biop = (BioSourcePtr) sdp->data.ptrvalue;
5067
if (biop == NULL) continue;
5069
if (orp == NULL) continue;
5070
if (StringHasNoText (orp->taxname)) continue;
5071
len = StringLen (orp->taxname);
5072
if (StringLen (cp) != len + 2) continue;
5073
if (StringNICmp (cp + 1, orp->taxname, len - 1) != 0) continue;
5075
TrimSpacesAroundString (orp->taxname);
5323
5211
SeqEntryExplore (sep, NULL, CleanupEmptyFeatCallback);
5324
5212
SeqEntryExplore (sep, NULL, MergeAdjacentAnnotsCallback);
5325
5213
/* VisitBioseqsInSep (sep, NULL, BarCodeTechToKeyword); */
5326
SetupECReplacementTable ("ecnum_replaced.txt");
5327
if (ec_rep_data != NULL && ec_rep_len > 0) {
5328
VisitFeaturesInSep (sep, NULL, UpdateProtEC);
5215
/* tbl2asn now calls processes EC numbers with reporting before SSEC */
5216
UpdateReplacedECNumbersEx (sep, NULL, NULL, TRUE, FALSE);
5219
if (GetAppProperty ("NcbiTbl2Asn") != NULL) {
5220
DeleteBadECNumbers (sep);
5330
5224
/* reindex, since CdEndCheck (from CdCheck) gets best overlapping gene */
5331
5225
SeqMgrIndexFeatures (entityID, NULL);
6022
5921
hasNulls = LocationHasNullsBetween (sfp->location);
6024
5923
if (sfp->data.choice == SEQFEAT_GENE) {
6025
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE);
5924
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6026
5925
hasNulls = FALSE;
6027
5926
sfp->partial = FALSE;
6028
5927
} else if (sfp->data.choice == SEQFEAT_CDREGION) {
6029
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE);
5928
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6030
5929
} else if (sfp->data.choice == SEQFEAT_RNA) {
6031
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE);
5930
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE);
6033
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE);
5932
slp = SeqLocMergeExEx (bsp, sfp->location, NULL, FALSE, TRUE, FALSE, hasNulls, FALSE, FALSE, FALSE);
6035
5934
if (slp == NULL) {
6036
5935
ValNodeFree (partiallist);