3601
static Boolean CheckForInconsistentMolInfos (SeqEntryPtr sep, ValidStructPtr vsp, MolInfoPtr PNTR mipp, BioseqSetPtr top)
3606
SeqMgrDescContext dcontext;
3607
Uint2 entityID = 0, oldEntityID;
3608
MolInfoPtr firstmip;
3609
GatherContextPtr gcp;
3610
Uint4 itemID = 0, oldItemID;
3611
Uint2 itemtype = 0, oldItemtype;
3616
if (sep == NULL || vsp == NULL || mipp == NULL)
3620
if (IS_Bioseq_set (sep)) {
3621
bssp = (BioseqSetPtr) sep->data.ptrvalue;
3624
for (tmp = bssp->seq_set; tmp != NULL; tmp = tmp->next) {
3625
if (CheckForInconsistentMolInfos (tmp, vsp, mipp, top))
3631
if (!IS_Bioseq (sep))
3633
bsp = (BioseqPtr) sep->data.ptrvalue;
3637
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
3638
if (sdp == NULL) return FALSE;
3639
mip = (MolInfoPtr) sdp->data.ptrvalue;
3640
if (mip == NULL || mip->biomol == MOLECULE_TYPE_PEPTIDE) return FALSE;
3643
if (firstmip == NULL) {
3648
if (mip->biomol == firstmip->biomol) return FALSE;
3650
oldEntityID = gcp->entityID;
3651
oldItemID = gcp->itemID;
3652
oldItemtype = gcp->thistype;
3654
gcp->entityID = entityID;
3655
gcp->itemID = itemID;
3656
gcp->thistype = itemtype;
3659
gcp->entityID = top->idx.entityID;
3660
gcp->itemID = top->idx.itemID;
3661
gcp->thistype = OBJ_BIOSEQSET;
3664
/* only report the first one that doesn't match */
3666
ValidErr (vsp, SEV_WARNING, ERR_SEQ_PKG_InconsistentMolInfoBiomols, "Pop/phy/mut/eco set contains inconsistent MolInfo biomols");
3668
gcp->entityID = oldEntityID;
3669
gcp->itemID = oldItemID;
3670
gcp->thistype = oldItemtype;
3675
static void LookForMolInfoInconsistency (BioseqSetPtr bssp, ValidStructPtr vsp)
3678
MolInfoPtr mip = NULL;
3681
if (bssp == NULL) return;
3683
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
3684
if (CheckForInconsistentMolInfos (sep, vsp, &mip, bssp))
3578
3689
static void ValidatePopSet (BioseqSetPtr bssp, ValidStructPtr vsp)
7712
7924
"West Bank\tWE\t34.8\t31.3\t35.6\t32.6",
7713
7925
"Western Sahara\tWI\t-17.2\t20.7\t-8.7\t27.7",
7714
7926
"Yemen\tYM\t41.8\t11.7\t54.5\t19.0",
7715
"Yugoslavia\tXX\t13.3\t40.8\t23.0\t46.9",
7716
7927
"Zambia\tZA\t21.9\t-18.1\t33.7\t-8.2",
7717
7928
"Zimbabwe\tZI\t25.2\t-22.5\t33.1\t-15.6",
7721
typedef struct ctdata {
7933
/* one CtBlock for each discontiguous area per country */
7935
typedef struct ctblock {
7936
CharPtr country; /* points to instance in countries list */
7727
} CtData, PNTR CtDataPtr;
7729
typedef struct ctlist {
7730
ValNodePtr countries;
7731
ValNodePtr boundaries;
7732
CtDataPtr PNTR ctarray;
7733
CtDataPtr PNTR bdarray;
7735
} CtList, PNTR CtListPtr;
7737
static int LIBCALLBACK SortCdpByCountry (VoidPtr ptr1, VoidPtr ptr2)
7741
CtDataPtr cdp1, cdp2;
7743
if (ptr1 == NULL || ptr2 == NULL) return 0;
7744
cdp1 = *((CtDataPtr PNTR) ptr1);
7745
cdp2 = *((CtDataPtr PNTR) ptr2);
7746
if (cdp1 == NULL || cdp2 == NULL) return 0;
7748
compare = StringICmp (cdp1->country, cdp2->country);
7751
} else if (compare < 0) {
7758
static int LIBCALLBACK SortCdpByBounds (VoidPtr ptr1, VoidPtr ptr2)
7761
CtDataPtr cdp1, cdp2;
7763
if (ptr1 == NULL || ptr2 == NULL) return 0;
7764
cdp1 = *((CtDataPtr PNTR) ptr1);
7765
cdp2 = *((CtDataPtr PNTR) ptr2);
7766
if (cdp1 == NULL || cdp2 == NULL) return 0;
7768
if (cdp1->minx > cdp2->minx) {
7770
} else if (cdp1->minx < cdp2->minx) {
7774
if (cdp1->maxx > cdp2->maxx) {
7776
} else if (cdp1->maxx < cdp2->maxx) {
7780
if (cdp1->miny > cdp2->miny) {
7782
} else if (cdp1->miny < cdp2->miny) {
7786
if (cdp1->maxy > cdp2->maxy) {
7788
} else if (cdp1->maxy < cdp2->maxy) {
7795
static CtListPtr CtLatLonDataFree (
7800
if (clp == NULL) return NULL;
7802
ValNodeFreeData (clp->countries);
7803
ValNodeFreeData (clp->boundaries);
7805
MemFree (clp->ctarray);
7806
MemFree (clp->bdarray);
7941
} CtBlock, PNTR CtBlockPtr;
7943
/* one CtGrid for each 10-degree-by-10-degree area touched by a CtBlock */
7945
typedef struct ctgrid {
7949
} CtGrid, PNTR CtGridPtr;
7951
/* main structure for country/lat-lon lookup */
7953
typedef struct ctset {
7954
ValNodePtr countries;
7957
CtBlockPtr PNTR bkarray; /* sorted by country name */
7958
CtGridPtr PNTR gdarray; /* sorted by geographic index */
7961
} CtSet, PNTR CtSetPtr;
7963
static int LIBCALLBACK SortCbpByCountry (
7970
CtBlockPtr cbp1, cbp2;
7972
if (ptr1 == NULL || ptr2 == NULL) return 0;
7973
cbp1 = *((CtBlockPtr PNTR) ptr1);
7974
cbp2 = *((CtBlockPtr PNTR) ptr2);
7975
if (cbp1 == NULL || cbp2 == NULL) return 0;
7977
compare = StringICmp (cbp1->country, cbp2->country);
7980
} else if (compare < 0) {
7987
static int CgpGridComp (
7994
if (cgp1 == NULL) return 0;
7996
if (cgp1->xindex > xindex) {
7998
} else if (cgp1->xindex < xindex) {
8002
if (cgp1->yindex > yindex) {
8004
} else if (cgp1->yindex < yindex) {
8011
static int LIBCALLBACK SortCgpByGrid (
8017
CtBlockPtr cbp1, cbp2;
8018
CtGridPtr cgp1, cgp2;
8021
if (ptr1 == NULL || ptr2 == NULL) return 0;
8022
cgp1 = *((CtGridPtr PNTR) ptr1);
8023
cgp2 = *((CtGridPtr PNTR) ptr2);
8024
if (cgp1 == NULL || cgp2 == NULL) return 0;
8026
compare = CgpGridComp (cgp1, cgp2->xindex, cgp2->yindex);
8029
} else if (compare < 0) {
8035
if (cbp1 == NULL || cbp2 == NULL) return 0;
8037
if (cbp1->minx > cbp2->minx) {
8039
} else if (cbp1->minx < cbp2->minx) {
8043
if (cbp1->maxx > cbp2->maxx) {
8045
} else if (cbp1->maxx < cbp2->maxx) {
8049
if (cbp1->miny > cbp2->miny) {
8051
} else if (cbp1->miny < cbp2->miny) {
8055
if (cbp1->maxy > cbp2->maxy) {
8057
} else if (cbp1->maxy < cbp2->maxy) {
8061
compare = StringICmp (cbp1->country, cbp2->country);
8064
} else if (compare < 0) {
8071
static Int2 LatLonDegreeToIndex (
8088
static CtSetPtr CtSetDataFree (
8093
if (csp == NULL) return NULL;
8095
ValNodeFreeData (csp->countries);
8096
ValNodeFreeData (csp->blocks);
8097
ValNodeFreeData (csp->grids);
8099
MemFree (csp->bkarray);
8100
MemFree (csp->gdarray);
7813
static Boolean ct_list_not_found = FALSE;
8107
static Boolean ct_set_not_found = FALSE;
7815
static CtListPtr GetCtLatLonDataInt (
8109
static CtSetPtr GetCtSetLatLonDataInt (
7818
8112
CharPtr PNTR local
7822
ValNodePtr boundaries = NULL;
7825
CtDataPtr PNTR ctarray;
7826
CtDataPtr PNTR bdarray;
7828
ValNodePtr countries = NULL;
7834
ValNodePtr lastbdry = NULL;
7835
ValNodePtr lastctry = NULL;
7838
Char path [PATH_MAX];
7846
clp = (CtListPtr) GetAppProperty (prop);
7847
if (clp != NULL) return clp;
7849
if (ct_list_not_found) return NULL;
8116
CtBlockPtr PNTR bkarray;
8117
ValNodePtr blocks = NULL;
8121
ValNodePtr countries = NULL;
8126
CtGridPtr PNTR gdarray;
8127
ValNodePtr grids = NULL;
8132
ValNodePtr lastblk = NULL;
8133
ValNodePtr lastctry = NULL;
8134
ValNodePtr lastgrd = NULL;
8139
Char path [PATH_MAX];
8149
csp = (CtSetPtr) GetAppProperty (prop);
8150
if (csp != NULL) return csp;
8152
if (ct_set_not_found) return NULL;
7851
8154
if (FindPath ("ncbi", "ncbi", "data", path, sizeof (path))) {
7852
8155
FileBuildPath (path, NULL, file);
7940
8270
FileClose (fp);
7943
if (countries == NULL || boundaries == NULL) {
7944
ct_list_not_found = TRUE;
8273
if (countries == NULL || blocks == NULL || grids == NULL) {
8274
ct_set_not_found = TRUE;
7948
clp = (CtListPtr) MemNew (sizeof (CtList));
7949
if (clp == NULL) return NULL;
8278
csp = (CtSetPtr) MemNew (sizeof (CtSet));
8279
if (csp == NULL) return NULL;
7951
8281
/* now populate, heap sort arrays */
7953
num = ValNodeLen (boundaries);
7955
clp->countries = countries;
7956
clp->boundaries = boundaries;
7957
clp->num = (Int2) num;
7959
ctarray = (CtDataPtr PNTR) MemNew (sizeof (CtDataPtr) * (num + 1));
7960
if (ctarray != NULL) {
7961
for (vnp = boundaries, i = 0; vnp != NULL; vnp = vnp->next, i++) {
7962
cdp = (CtDataPtr) vnp->data.ptrvalue;
7966
HeapSort (ctarray, (size_t) num, sizeof (CtDataPtr), SortCdpByCountry);
7967
clp->ctarray = ctarray;
7970
bdarray = (CtDataPtr PNTR) MemNew (sizeof (CtDataPtr) * (num + 1));
7971
if (bdarray != NULL) {
7972
for (vnp = boundaries, i = 0; vnp != NULL; vnp = vnp->next, i++) {
7973
cdp = (CtDataPtr) vnp->data.ptrvalue;
7977
HeapSort (bdarray, (size_t) num, sizeof (CtDataPtr), SortCdpByBounds);
7978
clp->bdarray = bdarray;
7981
SetAppProperty (prop, (Pointer) clp);
8283
num = ValNodeLen (blocks);
8285
csp->countries = countries;
8286
csp->blocks = blocks;
8287
csp->num_blocks = (Int2) num;
8289
bkarray = (CtBlockPtr PNTR) MemNew (sizeof (CtBlockPtr) * (num + 1));
8290
if (bkarray != NULL) {
8291
for (vnp = blocks, i = 0; vnp != NULL; vnp = vnp->next, i++) {
8292
cbp = (CtBlockPtr) vnp->data.ptrvalue;
8296
HeapSort (bkarray, (size_t) num, sizeof (CtBlockPtr), SortCbpByCountry);
8297
csp->bkarray = bkarray;
8300
num = ValNodeLen (grids);
8302
csp->num_grids = (Int2) num;
8304
gdarray = (CtGridPtr PNTR) MemNew (sizeof (CtGridPtr) * (num + 1));
8305
if (gdarray != NULL) {
8306
for (vnp = grids, i = 0; vnp != NULL; vnp = vnp->next, i++) {
8307
cgp = (CtGridPtr) vnp->data.ptrvalue;
8311
HeapSort (gdarray, (size_t) num, sizeof (CtGridPtr), SortCgpByGrid);
8312
csp->gdarray = gdarray;
8315
SetAppProperty (prop, (Pointer) csp);
7986
static CtListPtr GetCtLatLonData (
8320
static CtSetPtr GetCtSetLatLonData (
7991
return GetCtLatLonDataInt ("CountryLatLonList", "country_lat_lon.txt", ctry_lat_lon);
8325
return GetCtSetLatLonDataInt ("CountryLatLonList", "country_lat_lon.txt", ctry_lat_lon);
7994
8328
NLM_EXTERN Boolean IsCountryInLatLonList (
9167
9479
} else if (ssp->subtype == SUBSRC_isolation_source) {
9168
9480
is_iso_source = TRUE;
9169
} else if (ssp->subtype == SUBSRC_plasmid_name) {
9481
} else if (ssp->subtype == SUBSRC_plasmid_name) {
9170
9482
if (biop->genome != GENOME_plasmid) {
9171
9483
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plasmid subsource but not plasmid location");
9485
} else if (ssp->subtype == SUBSRC_plastid_name) {
9486
if (StringCmp (ssp->name, "chloroplast") == 0) {
9487
if (biop->genome != GENOME_chloroplast) {
9488
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource chloroplast but not chloroplast location");
9490
} else if (StringCmp (ssp->name, "chromoplast") == 0) {
9491
if (biop->genome != GENOME_chromoplast) {
9492
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource chromoplast but not chromoplast location");
9494
} else if (StringCmp (ssp->name, "kinetoplast") == 0) {
9495
if (biop->genome != GENOME_kinetoplast) {
9496
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource kinetoplast but not kinetoplast location");
9498
} else if (StringCmp (ssp->name, "plastid") == 0) {
9499
if (biop->genome != GENOME_plastid) {
9500
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource plastid but not plastid location");
9502
} else if (StringCmp (ssp->name, "apicoplast") == 0) {
9503
if (biop->genome != GENOME_apicoplast) {
9504
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource apicoplast but not apicoplast location");
9506
} else if (StringCmp (ssp->name, "leucoplast") == 0) {
9507
if (biop->genome != GENOME_leucoplast) {
9508
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource leucoplast but not leucoplast location");
9510
} else if (StringCmp (ssp->name, "proplastid") == 0) {
9511
if (biop->genome != GENOME_proplastid) {
9512
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource proplastid but not proplastid location");
9515
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Plastid name subsource contains unrecognized value");
9173
9517
} else if (ssp->subtype == SUBSRC_collection_date) {
9174
9518
if (! CollectionDateIsValid (ssp->name)) {
9175
9519
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BadCollectionDate, "Collection_date format is not in DD-Mmm-YYYY format");
9482
9885
if (is_env_sample && (! is_iso_source) && (! is_specific_host)) {
9483
9886
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "Environmental sample should also have isolation source or specific host annotated");
9888
if (has_metagenome_source && (! is_metagenomic)) {
9889
ValidErr (vsp, SEV_ERROR, ERR_SEQ_DESCR_BioSourceInconsistency, "Metagenome source should also have metagenomic qualifier");
9891
if (StringDoesHaveText (synonym) && StringDoesHaveText (gb_synonym)) {
9892
if (StringICmp (synonym, gb_synonym) == 0) {
9893
ValidErr (vsp, SEV_WARNING, ERR_SEQ_DESCR_BioSourceInconsistency, "OrgMod synonym is identical to OrgMod gb_synonym");
9486
9897
for (db = orp->db; db != NULL; db = db->next) {
9488
9899
dbt = (DbtagPtr) db->data.ptrvalue;
9489
9900
if (dbt != NULL && dbt->db != NULL) {
9491
for (i = 0; legalDbXrefs [i] != NULL; i++) {
9492
if (StringCmp (dbt->db, legalDbXrefs [i]) == 0) {
9495
} else if (StringICmp (dbt->db, legalDbXrefs [i]) == 0) {
9496
casecounts = legalDbXrefs [i];
9499
if (id == -1 || id < 4) {
9500
if (StringDoesHaveText (casecounts)) {
9501
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s, legal capitalization is %s", dbt->db, casecounts);
9503
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", dbt->db);
9510
for (db = sfp->dbxref; db != NULL; db = db->next) {
9511
dbt = (DbtagPtr) db->data.ptrvalue;
9513
if (StringICmp (dbt->db, "taxon") == 0) {
9514
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_TaxonDbxrefOnFeature, "BioSource feature has taxon xref in common feature db_xref list");
9902
dbvalid = IsDbxrefValid (dbt->db, NULL, orp, FALSE, &dbxerr);
9903
if (dbxerr != NULL) {
9904
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, dbxerr);
9905
dbxerr = MemFree (dbxerr);
15157
15712
return rsult;
15717
ValidateIntronEndsAtSpliceSiteOrGap
15718
(ValidStructPtr vsp,
15724
Int4 strt, stop, pos;
15725
Boolean partial5, partial3;
15729
if (slp == NULL) return;
15730
CheckSeqLocForPartial (slp, &partial5, &partial3);
15731
if (partial5 && partial3) return;
15733
sip = SeqLocId (slp);
15738
if (sip != NULL && (sip->choice != SEQID_GI || sip->data.intvalue > 0)) {
15739
bsp = BioseqLockById (sip);
15744
BioseqLabel (bsp, id_buf, sizeof (id_buf) - 1, OM_LABEL_CONTENT);
15746
strt = SeqLocStart (slp);
15747
stop = SeqLocStop (slp);
15749
strand = SeqLocStrand (slp);
15752
if (strand == Seq_strand_minus) {
15753
SeqPortStreamInt (bsp, stop - 1, stop, Seq_strand_minus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
15756
SeqPortStreamInt (bsp, strt, strt + 1, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
15759
if ((buf[0] == '-' && buf[1] == '-')
15760
|| (buf[0] == 'G' && buf[1] == 'T')
15761
|| (buf[0] == 'G' && buf[1] == 'C')) {
15762
/* location is ok */
15763
} else if (pos == 0 || pos == bsp->length - 1) {
15764
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
15765
"Splice donor consensus (GT) not found at start of terminal intron, position %ld of %s", (long) (pos + 1), id_buf);
15767
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusDonor,
15768
"Splice donor consensus (GT) not found at start of intron, position %ld of %s", (long) (pos + 1), id_buf);
15772
if (strand == Seq_strand_minus) {
15773
SeqPortStreamInt (bsp, strt, strt + 1, Seq_strand_minus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
15776
SeqPortStreamInt (bsp, stop - 1, stop, Seq_strand_plus, EXPAND_GAPS_TO_DASHES, (Pointer) buf, NULL);
15779
if ((buf[0] == '-' && buf[1] == '-')
15780
|| (buf[0] == 'A' && buf[1] == 'G')) {
15781
/* location is ok */
15782
} else if (pos == 0 || pos == bsp->length - 1) {
15783
ValidErr (vsp, SEV_INFO, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
15784
"Splice acceptor consensus (AG) not found at end of terminal intron, position %ld of %s, but at end of sequence", (long) (pos + 1), id_buf);
15786
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_NotSpliceConsensusAcceptor,
15787
"Splice acceptor consensus (AG) not found at end of intron, position %ld of %s", (long) (pos + 1), id_buf);
15790
BioseqUnlock (bsp);
15161
15795
static void CheckTrnaCodons (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, tRNAPtr trp)
16129
16763
return FALSE;
16132
static CharPtr legalDbXrefOnRefSeq [] = {
16139
static CharPtr badDbXref [] = {
16149
16766
static void CheckForIllegalDbxref (ValidStructPtr vsp, GatherContextPtr gcp, SeqFeatPtr sfp, ValNodePtr dbxref)
16152
CharPtr casecounts;
16156
16772
ValNodePtr vnp;
16158
16775
for (vnp = dbxref; vnp != NULL; vnp = vnp->next) {
16160
16777
db = (DbtagPtr) vnp->data.ptrvalue;
16161
16778
if (db != NULL && db->db != NULL) {
16163
for (i = 0; legalDbXrefs [i] != NULL; i++) {
16164
if (StringCmp (db->db, legalDbXrefs [i]) == 0) {
16167
} else if (StringICmp (db->db, legalDbXrefs [i]) == 0) {
16168
casecounts = legalDbXrefs [i];
16171
if (id == -1 && GPSorRefSeq (vsp->sep, sfp->location)) {
16172
for (i = 0; legalDbXrefOnRefSeq [i] != NULL; i++) {
16173
if (StringCmp (db->db, legalDbXrefOnRefSeq [i]) == 0) return;
16175
for (i = 0; legalRefSeqDbXrefs [i] != NULL; i++) {
16176
if (StringCmp (db->db, legalRefSeqDbXrefs [i]) == 0) return;
16179
if (id == -1 || (sfp->data.choice != SEQFEAT_CDREGION && id < 4)) {
16180
if (StringDoesHaveText (casecounts)) {
16181
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s, legal capitalization is %s", db->db, casecounts);
16183
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, "Illegal db_xref type %s", db->db);
16186
for (i = 0; badDbXref [i] != NULL; i++) {
16187
if (StringICmp (db->db, badDbXref [i]) == 0) {
16188
ValidErr (vsp, SEV_ERROR, ERR_SEQ_FEAT_IllegalDbXref,
16189
"db_xref type %s is only created by the flatfile generator, and should not be in the record as a separate xref", db->db);
16780
valid = IsDbxrefValid (db->db, sfp, NULL,
16781
GPSorRefSeq (vsp->sep, sfp->location),
16783
if (dbxerr != NULL) {
16784
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_IllegalDbXref, dbxerr);
16785
dbxerr = MemFree (dbxerr);
17333
17930
return FALSE;
17933
static CharPtr badGeneSyn [] = {
17953
"unknown function",
17960
static CharPtr badProtName [] = {
17961
"'hypothetical protein",
17964
"alternatively spliced",
17965
"bacteriophage hypothetical protein",
17968
"cnserved hypothetical protein",
17969
"conesrved hypothetical protein",
17970
"conserevd hypothetical protein",
17971
"conserved archaeal protein",
17972
"conserved domain protein",
17973
"conserved hypohetical protein",
17974
"conserved hypotehtical protein",
17975
"conserved hypotheical protein",
17976
"conserved hypothertical protein",
17977
"conserved hypothetcial protein",
17978
"conserved hypothetical exported protein",
17979
"conserved hypothetical integral membrane protein",
17980
"conserved hypothetical membrane protein",
17981
"conserved hypothetical phage protein",
17982
"conserved hypothetical prophage protein",
17983
"conserved hypothetical protein - phage associated",
17984
"conserved hypothetical protein fragment 3",
17985
"conserved hypothetical protein, fragment",
17986
"conserved hypothetical protein, putative",
17987
"conserved hypothetical protein, truncated",
17988
"conserved hypothetical protein, truncation",
17989
"conserved hypothetical protein; possible membrane protein",
17990
"conserved hypothetical protein; putative membrane protein",
17991
"conserved hypothetical protein.",
17992
"conserved hypothetical protein",
17993
"conserved hypothetical proteins",
17994
"conserved hypothetical protien",
17995
"conserved hypothetical transmembrane protein",
17996
"conserved hypothetical",
17997
"conserved hypotheticcal protein",
17998
"conserved hypthetical protein",
17999
"conserved in bacteria",
18000
"conserved membrane protein",
18001
"conserved protein of unknown function ; putative membrane protein",
18002
"conserved protein of unknown function",
18003
"conserved protein",
18004
"conserved unknown protein",
18005
"conservedhypothetical protein",
18006
"conserverd hypothetical protein",
18007
"conservered hypothetical protein",
18008
"consrved hypothetical protein",
18009
"converved hypothetical protein",
18012
"duplicated hypothetical protein",
18015
"homeodomain protein",
18018
"hyopthetical protein",
18020
"hypotheical protein",
18021
"hypothertical protein",
18022
"hypothetcical protein",
18023
"hypothetical protein",
18024
"hypothetical conserved protein",
18025
"hypothetical exported protein",
18026
"hypothetical novel protein",
18027
"hypothetical orf",
18028
"hypothetical phage protein",
18029
"hypothetical prophage protein",
18030
"hypothetical protein - phage associated",
18031
"hypothetical protein (fragment)",
18032
"hypothetical protein (multi-domain)",
18033
"hypothetical protein (phage associated)",
18034
"hypothetical protein fragment ",
18035
"hypothetical protein fragment 1",
18036
"hypothetical protein predicted by genemark",
18037
"hypothetical protein predicted by glimmer",
18038
"hypothetical protein predicted by glimmer/critica",
18039
"hypothetical protein-putative conserved hypothetical protein",
18040
"hypothetical protein, conserved",
18041
"hypothetical protein, phage associated",
18042
"hypothetical protein, truncated",
18043
"hypothetical protein.",
18044
"hypothetical protein",
18045
"hypothetical proteins",
18046
"hypothetical protien",
18047
"hypothetical transmembrane protein",
18049
"hypothetoical protein",
18050
"hypothteical protein",
18051
"identified by sequence similarity; putative; ORF located\nusing Blastx/FrameD",
18052
"identified by sequence similarity; putative; ORF located\nusing Blastx/Glimmer/Genemark",
18054
"membrane protein, putative",
18056
"narrowly conserved hypothetical protein ",
18058
"orf, conserved hypothetical protein",
18059
"orf, hypothetical protein",
18060
"orf, hypothetical, fragment",
18061
"orf, hypothetical",
18062
"orf, partial conserved hypothetical protein",
18063
"orf; hypothetical protein",
18064
"orf; unknown function",
18066
"partial cds, hypothetical",
18068
"partially conserved hypothetical protein",
18069
"phage hypothetical protein",
18070
"phage-related conserved hypothetical protein",
18071
"phage-related protein",
18073
"possible hypothetical protein",
18075
"predicted coding region",
18076
"predicted protein (pseudogene)",
18077
"predicted protein family",
18078
"predicted protein",
18079
"product uncharacterised protein family",
18081
"protein of unknown function",
18083
"putative conserved protein",
18084
"putative exported protein",
18085
"putative hypothetical protein",
18086
"putative membrane protein",
18087
"putative orf; unknown function",
18088
"putative phage protein",
18089
"putative protein",
18093
"repeats containing protein",
18095
"ribosomal protein",
18097
"small hypothetical protein",
18099
"transmembrane protein",
18102
"trp-repeat protein",
18103
"truncated conserved hypothetical protein",
18104
"truncated hypothetical protein",
18105
"uncharacterized conserved membrane protein",
18106
"uncharacterized conserved protein",
18107
"uncharacterized conserved secreted protein",
18108
"uncharacterized protein conserved in archaea",
18109
"uncharacterized protein conserved in bacteria",
18110
"uncharacterized protein",
18111
"unique hypothetical protein",
18112
"unique hypothetical",
18114
"unknown function",
18118
"unknown-related protein",
18119
"unknown, conserved protein",
18120
"unknown, hypothetical",
18121
"unknown; predicted coding region",
18124
"unnamed protein product",
18126
"very hypothetical protein",
18130
static Boolean NameInList (CharPtr name, CharPtr PNTR list, size_t numelements)
18135
if (StringHasNoText (name) || list == NULL || numelements < 1) return FALSE;
18138
R = numelements - 1; /* -1 because now NULL terminated */
18142
if (StringICmp (list [mid], name) < 0) {
18149
if (StringICmp (list [R], name) == 0) return TRUE;
17336
18154
NLM_EXTERN void ValidateSeqFeat (GatherContextPtr gcp)
17338
18156
Int2 type, i, j;
17588
18418
} else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->excpt &&
17589
18419
StringStr (sfp->except_text, "rearrangement required for product") != NULL) {
17590
18420
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 0) {
17591
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
17592
"%s: %s", parterr[i], "5' partial is not at start AND"
17593
" is not at consensus splice site");
18421
if (no_nonconsensus_except) {
18422
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
18423
"%s: %s", parterr[i], "5' partial is not at start AND"
18424
" is not at consensus splice site");
17594
18426
} else if (sfp->data.choice == SEQFEAT_CDREGION && j == 1) {
17595
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
17596
"%s: %s", parterr[i], "3' partial is not at stop AND"
17597
" is not at consensus splice site");
18427
if (no_nonconsensus_except) {
18428
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
18429
"%s: %s", parterr[i], "3' partial is not at stop AND"
18430
" is not at consensus splice site");
17599
18433
ValidErr (vsp, SEV_WARNING, ERR_SEQ_FEAT_PartialProblem,
17600
18434
"%s: %s", parterr[i], parterrs[j]);