9288
/* Discrepancy Report */
9290
typedef void (*DiscrepancyCallback) (ValNodePtr item_list, Pointer userdata);
9291
typedef void (*DiscrepancyCallbackDataFree) (Pointer userdata);
9293
/* To add a new type of test, do ALL Of the following:
9294
* 1. add an item to the DiscrepancyType enum (this will fill the disc_type value)
9295
* 2. add a collection function and declare it with the others
9296
* 3. add an item to discrepancy_info_list that corresponds with the position of the
9297
* new enum value. If you are combining multiple types in one collection function,
9298
* be sure to list them together.
9302
DISC_GENE_MISSING = 0,
9303
DISC_SUPERFLUOUS_GENE,
9304
DISC_GENE_MISSING_LOCUS_TAG,
9305
DISC_GENE_DUPLICATE_LOCUS_TAG,
9306
DISC_GENE_LOCUS_TAG_BAD_FORMAT,
9307
DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX,
9308
DISC_NON_GENE_LOCUS_TAG,
9309
DISC_MISSING_PROTEIN_ID,
9310
DISC_INCONSISTENT_PROTEIN_ID_PREFIX,
9311
DISC_GENE_CDS_mRNA_LOCATION_CONFLICT,
9312
DISC_GENE_PRODUCT_CONFLICT,
9313
DISC_GENE_DUPLICATE_LOCUS,
9314
DISC_EC_NUMBER_NOTE,
9315
DISC_PSEUDO_MISMATCH,
9316
DISC_JOINED_FEATURES,
9317
DISC_OVERLAPPING_GENES,
9318
DISC_OVERLAPPING_CDS,
9320
DISC_INCONSISTENT_BIOSRC,
9321
DISC_SUSPECT_PRODUCT_NAME,
9322
DISC_INCONSISTENT_BIOSRC_DEFLINE,
9326
typedef void (*PerformDiscrepancyTest) PROTO ((SeqEntryPtr, ValNodePtr PNTR));
9328
typedef struct discrepancyinfo
9331
CharPtr setting_name;
9332
PerformDiscrepancyTest test_func;
9333
} DiscrepancyInfoData, PNTR DiscrepancyInfoPtr;
9335
/* declarations for discrepancy tests */
9336
static void AddMissingAndSuperfluousGeneDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9337
static void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9338
static void AddDiscrepanciesForNonGeneLocusTags (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9339
static void FindMissingProteinIDs (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9340
static void FindCDSmRNAGeneLocationDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9341
static void FindCDSGeneProductConflicts (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9342
static void FindDuplicateGeneLocus (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9343
static void AddECNumberNoteDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9344
static void FindPseudoDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9345
static void AddJoinedFeatureDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9346
static void AddOverlappingGeneDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9347
static void AddOverlappingCodingRegionDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9348
static void FindShortContigs (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9349
static void FindNonmatchingContigSources (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9350
static void FindSuspectProductNames (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9351
static void FindInconsistentSourceAndDefline (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list);
9353
const DiscrepancyInfoData discrepancy_info_list[] =
9355
{ "Missing Genes", "MISSING_GENES", AddMissingAndSuperfluousGeneDiscrepancies },
9356
{ "Extra Genes", "EXTRA_GENES", AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags },
9357
{ "Missing Locus Tags", "MISSING_LOCUS_TAGS", AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags },
9358
{ "Duplicate Locus Tags", "DUPLICATE_LOCUS_TAGS", AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags },
9359
{ "Bad Locus Tag Format", "BAD_LOCUS_TAG_FORMAT", AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags },
9360
{ "Inconsistent Locus Tag Prefix", "INCONSISTENT_LOCUS_TAG_PREFIX", AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags },
9361
{ "Nongene Locus Tag", "NON_GENE_LOCUS_TAG", AddDiscrepanciesForNonGeneLocusTags },
9362
{ "Missing Protein ID", "MISSING_PROTEIN_ID", FindMissingProteinIDs },
9363
{ "Inconsistent Protein ID", "INCONSISTENT_PROTEIN_ID", FindMissingProteinIDs },
9364
{ "Feature Location Conflict", "FEATURE_LOCATION_CONFLICT", FindCDSmRNAGeneLocationDiscrepancies },
9365
{ "Gene Product Conflict", "GENE_PRODUCT_CONFLICT", FindCDSGeneProductConflicts },
9366
{ "Duplicate Gene Locus", "DUPLICATE_GENE_LOCUS", FindDuplicateGeneLocus },
9367
{ "EC Number Note", "EC_NUMBER_NOTE", AddECNumberNoteDiscrepancies },
9368
{ "Pseudo Mismatch", "PSEUDO_MISMATCH", FindPseudoDiscrepancies },
9369
{ "Joined Features", "JOINED_FEATURES", AddJoinedFeatureDiscrepancies },
9370
{ "Overlapping Genes", "OVERLAPPING_GENES", AddOverlappingGeneDiscrepancies },
9371
{ "Overlapping CDS", "OVERLAPPING_CDS", AddOverlappingCodingRegionDiscrepancies },
9372
{ "Short Contig", "SHORT_CONTIG", FindShortContigs },
9373
{ "Inconsistent BioSource", "INCONSISTENT_BIOSOURCE", FindNonmatchingContigSources },
9374
{ "Suspect Product Name", "SUSPECT_PRODUCT_NAMES", FindSuspectProductNames },
9375
{ "Inconsistent Source And Definition Line", "INCONSISTENT_SOURCE_DEFLINE", FindInconsistentSourceAndDefline }
9380
typedef struct discrepancyitem
9382
DiscrepancyType disc_type;
9383
CharPtr description;
9384
ValNodePtr item_list;
9385
DiscrepancyCallback callback_func;
9386
DiscrepancyCallbackDataFree datafree_func;
9387
Pointer callback_data;
9389
ValNodePtr subcategories;
9392
} DiscrepancyItemData, PNTR DiscrepancyItemPtr;
9394
typedef struct discrepancyconfig
9396
Boolean conf_list[MAX_DISC_TYPE];
9397
Boolean use_feature_table_format;
9398
} DiscrepancyConfigData, PNTR DiscrepancyConfigPtr;
9400
typedef struct discrepancyreportform
9404
ValNodePtr discrepancy_list;
9415
Nlm_ColPtr PNTR col_fmt_array_array;
9416
DiscrepancyConfigPtr dcp;
9417
} DiscrepancyReportFormData, PNTR DiscrepancyReportFormPtr;
9420
static DiscrepancyConfigPtr DiscrepancyConfigFree (DiscrepancyConfigPtr dcp)
9422
return MemFree (dcp);
9425
static DiscrepancyConfigPtr DiscrepancyConfigNew (void)
9427
DiscrepancyConfigPtr dcp;
9430
dcp = (DiscrepancyConfigPtr) MemNew (sizeof (DiscrepancyConfigData));
9431
for (i = 0; i < MAX_DISC_TYPE; i++)
9433
dcp->conf_list[i] = TRUE;
9435
dcp->use_feature_table_format = FALSE;
9439
static DiscrepancyConfigPtr ReadDiscrepancyConfig (void)
9441
DiscrepancyConfigPtr dcp;
9445
dcp = DiscrepancyConfigNew();
9448
for (i = 0; i < MAX_DISC_TYPE; i++)
9450
if (GetSequinAppParam ("DISCREPANCY_REPORT", discrepancy_info_list[i].setting_name, NULL, str, sizeof (str))) {
9451
if (StringICmp (str, "FALSE") == 0) {
9452
dcp->conf_list[i] = FALSE;
9456
if (GetSequinAppParam ("DISCREPANCY_REPORT", "USE_FEATURE_TABLE_FORMAT", NULL, str, sizeof (str))) {
9457
if (StringICmp (str, "TRUE") == 0) {
9458
dcp->use_feature_table_format = TRUE;
9465
static void SaveDiscrepancyConfig (DiscrepancyConfigPtr dcp)
9474
for (i = 0; i < MAX_DISC_TYPE; i++)
9476
if (dcp->conf_list[i])
9478
SetAppParam ("SEQUINCUSTOM", "DISCREPANCY_REPORT", discrepancy_info_list[i].setting_name, "TRUE");
9482
SetAppParam ("SEQUINCUSTOM", "DISCREPANCY_REPORT", discrepancy_info_list[i].setting_name, "FALSE");
9485
if (dcp->use_feature_table_format)
9487
SetAppParam ("SEQUINCUSTOM", "DISCREPANCY_REPORT", "USE_FEATURE_TABLE_FORMAT", "TRUE");
9491
SetAppParam ("SEQUINCUSTOM", "DISCREPANCY_REPORT", "USE_FEATURE_TABLE_FORMAT", "FALSE");
9496
static void UnselectDiscrepancyList(ButtoN b)
9498
ButtoN *test_options;
9501
test_options = (ButtoN *) GetObjectExtra (b);
9502
if (test_options != NULL)
9504
for (i = 0; i < MAX_DISC_TYPE; i++)
9506
SetStatus (test_options[i], FALSE);
9511
static void SelectDiscrepancyList(ButtoN b)
9513
ButtoN *test_options;
9516
test_options = (ButtoN *) GetObjectExtra (b);
9517
if (test_options != NULL)
9519
for (i = 0; i < MAX_DISC_TYPE; i++)
9521
SetStatus (test_options[i], TRUE);
9526
/* This function returns TRUE if there was a change to the discrepancy config,
9529
static Boolean EditDiscrepancyConfig (DiscrepancyConfigPtr dcp)
9533
ButtoN b, use_feature_table_format_btn;
9534
ModalAcceptCancelData acd;
9536
ButtoN test_options[MAX_DISC_TYPE];
9537
Boolean rval = FALSE;
9544
acd.accepted = FALSE;
9545
acd.cancelled = FALSE;
9547
w = ModalWindow(-20, -13, -10, -10, NULL);
9548
h = HiddenGroup (w, -1, 0, NULL);
9549
SetGroupSpacing (h, 10, 10);
9551
g = NormalGroup (h, 0, 10, "Discrepancy Tests to Run", programFont, NULL);
9552
SetGroupSpacing (g, 10, 10);
9553
for (i = 0; i < MAX_DISC_TYPE; i++)
9555
test_options[i] = CheckBox (g, discrepancy_info_list[i].conf_name, NULL);
9556
SetStatus (test_options[i], dcp->conf_list[i]);
9559
use_feature_table_format_btn = CheckBox (h, "Use feature table format for features in report", NULL);
9560
SetStatus (use_feature_table_format_btn, dcp->use_feature_table_format);
9562
k = HiddenGroup (h, 2, 0, NULL);
9563
b = PushButton (k, "Select All", SelectDiscrepancyList);
9564
SetObjectExtra (b, test_options, NULL);
9565
b = PushButton (k, "Unselect All", UnselectDiscrepancyList);
9566
SetObjectExtra (b, test_options, NULL);
9568
c = HiddenGroup (h, 3, 0, NULL);
9569
b = PushButton (c, "Accept", ModalAcceptButton);
9570
SetObjectExtra (b, &acd, NULL);
9571
b = PushButton (c, "Cancel", ModalCancelButton);
9572
SetObjectExtra (b, &acd, NULL);
9573
AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) use_feature_table_format_btn, (HANDLE) k, (HANDLE) c, NULL);
9577
while (!acd.accepted && ! acd.cancelled)
9579
ProcessExternalEvent ();
9585
for (i = 0; i < MAX_DISC_TYPE; i++)
9587
dcp->conf_list [i] = GetStatus (test_options[i]);
9589
dcp->use_feature_table_format = GetStatus (use_feature_table_format_btn);
9591
SaveDiscrepancyConfig (dcp);
9599
static WindoW discrepancyReportWindow = NULL;
9602
static ValNodePtr FreeDiscrepancyList (ValNodePtr list)
9604
DiscrepancyItemPtr dip;
9610
FreeDiscrepancyList (list->next);
9612
dip = (DiscrepancyItemPtr) list->data.ptrvalue;
9615
dip->description = MemFree (dip->description);
9616
if (dip->datafree_func != NULL)
9618
(dip->datafree_func) (dip->callback_data);
9620
dip->item_list = ValNodeFree (dip->item_list);
9622
dip->subcategories = FreeDiscrepancyList (dip->subcategories);
9623
dip = MemFree (dip);
9624
list->data.ptrvalue = NULL;
9626
list = ValNodeFree (list);
9631
static DiscrepancyItemPtr
9633
(DiscrepancyType disc_type,
9634
CharPtr description_fmt,
9635
ValNodePtr item_list)
9637
DiscrepancyItemPtr dip;
9639
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
9642
dip->disc_type = disc_type;
9643
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (description_fmt) + 15));
9644
sprintf (dip->description, description_fmt, ValNodeLen (item_list));
9645
dip->callback_func = NULL;
9646
dip->datafree_func = NULL;
9647
dip->callback_data = NULL;
9648
dip->item_list = item_list;
9649
dip->subcategories = NULL;
9650
dip->expanded = FALSE;
9658
RemoveUnwantedDiscrepancyItems
9659
(ValNodePtr PNTR discrepancy_list,
9660
DiscrepancyConfigPtr dcp)
9662
ValNodePtr vnp, prev = NULL, vnp_next;
9663
DiscrepancyItemPtr dip;
9665
if (dcp == NULL || discrepancy_list == NULL || *discrepancy_list == NULL)
9670
for (vnp = *discrepancy_list; vnp != NULL; vnp = vnp_next)
9672
vnp_next = vnp->next;
9673
dip = (DiscrepancyItemPtr) vnp->data.ptrvalue;
9674
if (dip == NULL || ! dcp->conf_list[dip->disc_type])
9678
*discrepancy_list = vnp_next;
9682
prev->next = vnp_next;
9685
vnp = FreeDiscrepancyList (vnp);
9696
static void ValNodeLinkCopy (ValNodePtr PNTR list1, ValNodePtr list2)
9698
if (list1 == NULL) return;
9699
while (list2 != NULL)
9701
ValNodeAddPointer (list1, list2->choice, list2->data.ptrvalue);
9702
list2 = list2->next;
9707
static Boolean ValNodeStringListMatch (ValNodePtr vnp1, ValNodePtr vnp2)
9709
if (vnp1 == NULL && vnp2 == NULL)
9713
else if (vnp1 == NULL || vnp2 == NULL)
9717
else if (StringCmp (vnp1->data.ptrvalue, vnp2->data.ptrvalue) != 0)
9723
return ValNodeStringListMatch (vnp1->next, vnp2->next);
9728
static Boolean GeneRefMatch (GeneRefPtr grp1, GeneRefPtr grp2)
9730
if (grp1 == NULL && grp2 == NULL)
9734
else if (grp1 == NULL || grp2 == NULL)
9738
else if (StringCmp (grp1->locus, grp2->locus) != 0
9739
|| StringCmp (grp1->allele, grp2->allele) != 0
9740
|| StringCmp (grp1->desc, grp2->desc) != 0
9741
|| StringCmp (grp1->maploc, grp2->maploc) != 0
9742
|| StringCmp (grp1->locus_tag, grp2->locus_tag) != 0
9743
|| (grp1->pseudo && !grp2->pseudo)
9744
|| (!grp1->pseudo && grp2->pseudo)
9745
|| !ValNodeStringListMatch (grp1->db, grp2->db)
9746
|| !ValNodeStringListMatch (grp1->syn, grp2->syn))
9757
static void ExtractGeneFromListByGeneRef (ValNodePtr PNTR list, GeneRefPtr grp)
9759
ValNodePtr prev = NULL, this_vnp, next_vnp;
9761
if (list == NULL || grp == NULL)
9767
while (this_vnp != NULL)
9769
next_vnp = this_vnp->next;
9770
if (GeneRefMatch (this_vnp->data.ptrvalue, grp))
9778
prev->next = next_vnp;
9780
this_vnp->next = NULL;
9781
ValNodeFree (this_vnp);
9787
this_vnp = next_vnp;
9792
static void ExtractGeneFromListByGene (ValNodePtr PNTR list, SeqFeatPtr gene)
9794
ValNodePtr prev = NULL, this_vnp, next_vnp;
9796
if (list == NULL || gene == NULL)
9802
while (this_vnp != NULL)
9804
next_vnp = this_vnp->next;
9805
if (this_vnp->data.ptrvalue == gene)
9813
prev->next = next_vnp;
9815
this_vnp->next = NULL;
9816
ValNodeFree (this_vnp);
9822
this_vnp = next_vnp;
9828
CheckGenesForFeatureType
9829
(ValNodePtr PNTR features_without_genes,
9830
ValNodePtr PNTR superfluous_genes,
9833
Uint2 feature_subtype,
9834
Boolean makes_gene_not_superfluous)
9836
SeqFeatPtr sfp, gene_sfp;
9838
SeqMgrFeatContext context;
9840
if (features_without_genes == NULL
9841
|| superfluous_genes == NULL
9847
for (sfp = SeqMgrGetNextFeature (bsp, NULL, feature_type, feature_subtype, &context);
9849
sfp = SeqMgrGetNextFeature (bsp, sfp, feature_type, feature_subtype, &context))
9851
/* check for gene xref */
9852
grp = SeqMgrGetGeneXref (sfp);
9855
if (SeqMgrGeneIsSuppressed (grp))
9857
ValNodeAddPointer (features_without_genes, OBJ_SEQFEAT, sfp);
9861
ExtractGeneFromListByGeneRef (superfluous_genes, grp);
9866
gene_sfp = SeqMgrGetOverlappingGene (sfp->location, NULL);
9867
if (gene_sfp == NULL)
9869
ValNodeAddPointer (features_without_genes, OBJ_SEQFEAT, sfp);
9871
else if (makes_gene_not_superfluous)
9873
ExtractGeneFromListByGene (superfluous_genes, gene_sfp);
9879
typedef struct misssupergenes
9881
ValNodePtr missing_list;
9882
ValNodePtr super_list;
9883
} MissSuperGenesData, PNTR MissSuperGenesPtr;
9886
static void FindMissingGenes (BioseqPtr bsp, Pointer userdata)
9889
SeqMgrFeatContext context;
9890
ValNodePtr features_without_genes = NULL;
9891
ValNodePtr superfluous_genes = NULL;
9892
MissSuperGenesPtr msgp;
9894
if (bsp == NULL || userdata == NULL)
9899
msgp = (MissSuperGenesPtr) userdata;
9901
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, FEATDEF_GENE, &context);
9903
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, FEATDEF_GENE, &context))
9905
ValNodeAddPointer (&superfluous_genes, OBJ_SEQFEAT, sfp);
9908
CheckGenesForFeatureType (&features_without_genes, &superfluous_genes, bsp,
9909
SEQFEAT_CDREGION, 0, TRUE);
9910
CheckGenesForFeatureType (&features_without_genes, &superfluous_genes, bsp,
9911
SEQFEAT_RNA, 0, TRUE);
9912
CheckGenesForFeatureType (&features_without_genes, &superfluous_genes, bsp,
9913
SEQFEAT_IMP, FEATDEF_RBS, FALSE);
9915
ValNodeLink (&(msgp->missing_list), features_without_genes);
9916
ValNodeLink (&(msgp->super_list), superfluous_genes);
9921
GetPseudoAndNonPseudoGeneList
9922
(ValNodePtr super_list,
9923
ValNodePtr PNTR pseudo_list,
9924
ValNodePtr PNTR non_pseudo_list)
9930
if (pseudo_list == NULL || non_pseudo_list == NULL)
9934
*pseudo_list = NULL;
9935
*non_pseudo_list = NULL;
9937
for (vnp = super_list; vnp != NULL; vnp = vnp->next)
9939
if (vnp->choice == OBJ_SEQFEAT)
9941
gene = (SeqFeatPtr) vnp->data.ptrvalue;
9942
if (gene != NULL && gene->data.choice == SEQFEAT_GENE)
9944
grp = (GeneRefPtr) gene->data.value.ptrvalue;
9945
if (gene->pseudo || (grp != NULL && grp->pseudo))
9947
ValNodeAddPointer (pseudo_list, OBJ_SEQFEAT, gene);
9951
ValNodeAddPointer (non_pseudo_list, OBJ_SEQFEAT, gene);
9959
static void AddMissingAndSuperfluousGeneDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
9961
DiscrepancyItemPtr dip, pseudo_dip, non_pseudo_dip;
9962
CharPtr missing_genes_fmt = "%d features have no genes.";
9963
CharPtr extra_genes_fmt = "%d gene features are not associated with a CDS or RNA feature.";
9964
CharPtr pseudo_extra_genes_fmt = "%d pseudo gene features are not associated with a CDS or RNA feature.";
9965
CharPtr non_pseudo_extra_genes_fmt = "%d non-pseudo gene features are not associated with a CDS or RNA feature.";
9966
MissSuperGenesData msgd;
9967
ValNodePtr non_pseudo_list = NULL, pseudo_list = NULL;
9969
if (discrepancy_list == NULL)
9974
msgd.missing_list = NULL;
9975
msgd.super_list = NULL;
9977
VisitBioseqsInSep (sep, &msgd, FindMissingGenes);
9979
if (msgd.missing_list != NULL)
9981
dip = NewDiscrepancyItem (DISC_GENE_MISSING, missing_genes_fmt, msgd.missing_list);
9984
ValNodeAddPointer (discrepancy_list, 0, dip);
9988
if (msgd.super_list != NULL)
9990
dip = NewDiscrepancyItem (DISC_SUPERFLUOUS_GENE, extra_genes_fmt, msgd.super_list);
9993
ValNodeAddPointer (discrepancy_list, 0, dip);
9994
GetPseudoAndNonPseudoGeneList (msgd.super_list, &pseudo_list, &non_pseudo_list);
9995
non_pseudo_dip = NewDiscrepancyItem (DISC_SUPERFLUOUS_GENE, non_pseudo_extra_genes_fmt, non_pseudo_list);
9996
non_pseudo_dip->level = 1;
9997
pseudo_dip = NewDiscrepancyItem (DISC_SUPERFLUOUS_GENE, pseudo_extra_genes_fmt, pseudo_list);
9998
pseudo_dip->level = 1;
9999
ValNodeAddPointer (&(dip->subcategories), 0, non_pseudo_dip);
10000
ValNodeAddPointer (&(dip->subcategories), 0, pseudo_dip);
10006
typedef struct prefixcheck
10009
ValNodePtr feature_list;
10010
} PrefixCheckData, PNTR PrefixCheckPtr;
10013
static ValNodePtr FreePrefixCheckList (ValNodePtr prefix_list)
10015
PrefixCheckPtr pcp;
10017
if (prefix_list == NULL)
10022
prefix_list->next = FreePrefixCheckList (prefix_list->next);
10024
pcp = (PrefixCheckPtr) prefix_list->data.ptrvalue;
10027
pcp->prefix = MemFree (pcp->prefix);
10028
pcp->feature_list = ValNodeFree (pcp->feature_list);
10029
pcp = MemFree (pcp);
10031
prefix_list = ValNodeFree (prefix_list);
10036
static DiscrepancyItemPtr InconsistentPrefix (PrefixCheckPtr pcp, CharPtr bad_fmt, DiscrepancyType disc_type)
10038
DiscrepancyItemPtr dip = NULL;
10040
if (pcp == NULL || StringHasNoText (pcp->prefix) || pcp->feature_list == NULL)
10044
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10047
dip->disc_type = DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX;
10048
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + StringLen (pcp->prefix)+ 15));
10049
sprintf (dip->description, bad_fmt, ValNodeLen (pcp->feature_list), pcp->prefix);
10050
dip->callback_func = NULL;
10051
dip->datafree_func = NULL;
10052
dip->callback_data = NULL;
10053
dip->item_list = pcp->feature_list;
10054
pcp->feature_list = NULL;
10060
static DiscrepancyItemPtr InconsistentLocusTagPrefix (PrefixCheckPtr pcp)
10062
CharPtr bad_fmt = "%d features have locus tag prefix %s.";
10064
return InconsistentPrefix (pcp, bad_fmt, DISC_GENE_LOCUS_TAG_INCONSISTENT_PREFIX);
10068
static DiscrepancyItemPtr InconsistentProteinIDPrefix (PrefixCheckPtr pcp)
10070
CharPtr bad_fmt = "%d sequences have protein ID prefix %s.";
10072
return InconsistentPrefix (pcp, bad_fmt, DISC_INCONSISTENT_PROTEIN_ID_PREFIX);
10076
typedef struct missinconstprotids
10078
ValNodePtr missing_list;
10079
ValNodePtr inconsistent_list;
10080
} MissInconstProtIDsData, PNTR MissInconstProtIDsPtr;
10083
static void FindMissingAndInconsistentProteinIDsCallback (BioseqPtr bsp, Pointer userdata)
10086
MissInconstProtIDsPtr mipip;
10087
DbtagPtr dbt = NULL;
10088
PrefixCheckPtr pcp;
10091
if (bsp == NULL || ! ISA_aa (bsp->mol) || userdata == NULL)
10096
mipip = (MissInconstProtIDsPtr) userdata;
10098
for (sip = bsp->id; sip != NULL && dbt == NULL; sip = sip->next)
10100
if (sip->choice == SEQID_GENERAL)
10102
dbt = (DbtagPtr) sip->data.ptrvalue;
10103
if (dbt != NULL && StringICmp (dbt->db, "TMSMART") == 0)
10111
ValNodeAddPointer (&(mipip->missing_list), OBJ_BIOSEQ, bsp);
10115
/* look for inconsistent prefixes */
10117
for (vnp = mipip->inconsistent_list; vnp != NULL && pcp == NULL; vnp = vnp->next)
10119
pcp = (PrefixCheckPtr) vnp->data.ptrvalue;
10120
if (pcp != NULL && StringCmp (pcp->prefix, dbt->db) == 0)
10122
ValNodeAddPointer (&pcp->feature_list, OBJ_BIOSEQ, bsp);
10131
pcp = (PrefixCheckPtr) MemNew (sizeof (PrefixCheckData));
10134
pcp->prefix = StringSave (dbt->db);
10135
pcp->feature_list = ValNodeNew (NULL);
10136
pcp->feature_list->choice = OBJ_BIOSEQ;
10137
pcp->feature_list->data.ptrvalue = bsp;
10138
pcp->feature_list->next = NULL;
10139
ValNodeAddPointer (&(mipip->inconsistent_list), 0, pcp);
10146
static void FindMissingProteinIDs (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10148
CharPtr bad_fmt = "%d proteins have invalid IDs.";
10149
DiscrepancyItemPtr dip;
10150
MissInconstProtIDsData mipid;
10153
if (discrepancy_list == NULL) return;
10155
mipid.missing_list = NULL;
10156
mipid.inconsistent_list = NULL;
10158
VisitBioseqsInSep (sep, &mipid, FindMissingAndInconsistentProteinIDsCallback);
10160
if (mipid.missing_list != NULL)
10162
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10165
dip->disc_type = DISC_MISSING_PROTEIN_ID;
10166
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10167
sprintf (dip->description, bad_fmt, ValNodeLen (mipid.missing_list));
10168
dip->callback_func = NULL;
10169
dip->callback_data = NULL;
10170
dip->datafree_func = NULL;
10171
dip->item_list = mipid.missing_list;
10172
ValNodeAddPointer (discrepancy_list, 0, dip);
10177
if (mipid.inconsistent_list != NULL)
10179
if (mipid.inconsistent_list->next != NULL)
10181
for (vnp = mipid.inconsistent_list; vnp != NULL; vnp = vnp->next)
10183
dip = InconsistentProteinIDPrefix (vnp->data.ptrvalue);
10186
ValNodeAddPointer (discrepancy_list, 0, dip);
10190
mipid.inconsistent_list = FreePrefixCheckList (mipid.inconsistent_list);
10195
typedef struct locustagcheck
10197
ValNodePtr locus_tags_list;
10198
ValNodePtr missing_list;
10199
ValNodePtr duplicate_list;
10200
ValNodePtr bad_format_list;
10201
ValNodePtr inconsistent_prefix;
10202
} LocusTagCheckData, PNTR LocusTagCheckPtr;
10204
static void GeneLocusTagDiscrepancyCallback (ValNodePtr item_list, Pointer userdata)
10206
Message (MSG_OK, "I could launch the editor for the individual gene...");
10209
static Boolean IsLocusTagFormatBad (CharPtr locus_tag)
10212
Boolean after_underscore = FALSE;
10214
if (StringHasNoText (locus_tag))
10220
if (!isalpha (*cp))
10229
if (after_underscore)
10235
after_underscore = TRUE;
10236
if (*(cp + 1) == 0)
10242
else if (!isalpha (*cp) && !isdigit (*cp))
10248
if (after_underscore)
10258
static CharPtr GetLocusTagPrefix (CharPtr locus_tag)
10263
if (StringHasNoText (locus_tag))
10268
prefix_len = StringCSpn (locus_tag, "_");
10269
if (prefix_len == 0 || prefix_len == StringLen (locus_tag))
10275
prefix = (CharPtr) MemNew ((prefix_len + 1) * sizeof (Char));
10276
StringNCpy (prefix, locus_tag, prefix_len);
10277
prefix [prefix_len] = 0;
10282
static void CheckGeneLocusTag (SeqFeatPtr sfp, Pointer userdata)
10285
LocusTagCheckPtr ltcp;
10287
Boolean found_duplicate;
10288
SeqFeatPtr dup_sfp = NULL;
10290
PrefixCheckPtr pcp;
10292
if (sfp == NULL || userdata == NULL || sfp->data.choice != SEQFEAT_GENE || sfp->data.value.ptrvalue == NULL)
10297
ltcp = (LocusTagCheckPtr) userdata;
10299
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
10300
if (StringHasNoText (grp->locus_tag))
10302
ValNodeAddPointer (&(ltcp->missing_list), OBJ_SEQFEAT, sfp);
10306
/* look for badly formatted locus tags */
10307
if (IsLocusTagFormatBad (grp->locus_tag))
10309
ValNodeAddPointer (&(ltcp->bad_format_list), OBJ_SEQFEAT, sfp);
10312
/* look for inconsistent locus tag prefixes */
10313
prefix = GetLocusTagPrefix (grp->locus_tag);
10314
if (prefix != NULL)
10317
for (vnp = ltcp->inconsistent_prefix; vnp != NULL && pcp == NULL; vnp = vnp->next)
10319
pcp = (PrefixCheckPtr) vnp->data.ptrvalue;
10320
if (pcp != NULL && StringCmp (pcp->prefix, prefix) == 0)
10322
ValNodeAddPointer (&pcp->feature_list, OBJ_SEQFEAT, sfp);
10323
prefix = MemFree (prefix);
10332
pcp = (PrefixCheckPtr) MemNew (sizeof (PrefixCheckData));
10335
pcp->prefix = prefix;
10336
pcp->feature_list = ValNodeNew (NULL);
10337
pcp->feature_list->choice = OBJ_SEQFEAT;
10338
pcp->feature_list->data.ptrvalue = sfp;
10339
pcp->feature_list->next = NULL;
10340
ValNodeAddPointer (&(ltcp->inconsistent_prefix), 0, pcp);
10347
for (vnp = ltcp->locus_tags_list, found_duplicate = FALSE;
10348
vnp != NULL && !found_duplicate;
10351
dup_sfp = (SeqFeatPtr) vnp->data.ptrvalue;
10352
if (dup_sfp != NULL && dup_sfp->data.choice == SEQFEAT_GENE)
10354
grp = (GeneRefPtr) dup_sfp->data.value.ptrvalue;
10355
if (grp != NULL && StringCmp (vnp->data.ptrvalue, grp->locus_tag) == 0)
10357
found_duplicate = TRUE;
10358
if (vnp->choice != 0)
10377
if (!found_duplicate)
10379
ValNodeAddPointer (&(ltcp->locus_tags_list), 0, sfp);
10383
if (dup_sfp != NULL)
10385
ValNodeAddPointer (&(ltcp->duplicate_list), OBJ_SEQFEAT, dup_sfp);
10387
ValNodeAddPointer (&(ltcp->duplicate_list), OBJ_SEQFEAT, sfp);
10392
static void AddDiscrepanciesForMissingOrNonUniqueGeneLocusTags (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10394
LocusTagCheckData ltcd;
10395
DiscrepancyItemPtr dip = NULL;
10396
CharPtr missing_fmt = "%d genes have no locus tags.";
10397
CharPtr duplicate_fmt = "%d genes have duplicate locus tags.";
10398
CharPtr bad_fmt = "%d locus tags are incorrectly formatted.";
10401
if (discrepancy_list == NULL) return;
10402
ltcd.locus_tags_list = NULL;
10403
ltcd.missing_list = NULL;
10404
ltcd.duplicate_list = NULL;
10405
ltcd.bad_format_list = NULL;
10406
ltcd.inconsistent_prefix = NULL;
10408
VisitFeaturesInSep (sep, <cd, CheckGeneLocusTag);
10409
ltcd.locus_tags_list = ValNodeFree (ltcd.locus_tags_list);
10411
if (ltcd.missing_list != NULL)
10413
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10416
dip->disc_type = DISC_GENE_MISSING_LOCUS_TAG;
10417
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_fmt) + 15));
10418
sprintf (dip->description, missing_fmt, ValNodeLen (ltcd.missing_list));
10419
dip->callback_func = GeneLocusTagDiscrepancyCallback;
10420
dip->datafree_func = NULL;
10421
dip->callback_data = NULL;
10422
dip->item_list = ltcd.missing_list;
10423
ValNodeAddPointer (discrepancy_list, 0, dip);
10427
if (ltcd.duplicate_list != NULL)
10429
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10432
dip->disc_type = DISC_GENE_DUPLICATE_LOCUS_TAG;
10433
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (duplicate_fmt) + 15));
10434
sprintf (dip->description, duplicate_fmt, ValNodeLen (ltcd.duplicate_list));
10435
dip->callback_func = GeneLocusTagDiscrepancyCallback;
10436
dip->datafree_func = NULL;
10437
dip->callback_data = NULL;
10438
dip->item_list = ltcd.duplicate_list;
10439
ValNodeAddPointer (discrepancy_list, 0, dip);
10443
if (ltcd.bad_format_list != NULL)
10445
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10448
dip->disc_type = DISC_GENE_LOCUS_TAG_BAD_FORMAT;
10449
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10450
sprintf (dip->description, bad_fmt, ValNodeLen (ltcd.bad_format_list));
10451
dip->callback_func = NULL;
10452
dip->datafree_func = NULL;
10453
dip->callback_data = NULL;
10454
dip->item_list = ltcd.bad_format_list;
10455
ValNodeAddPointer (discrepancy_list, 0, dip);
10459
if (ltcd.inconsistent_prefix != NULL)
10461
if (ltcd.inconsistent_prefix->next != NULL)
10463
for (vnp = ltcd.inconsistent_prefix; vnp != NULL; vnp = vnp->next)
10465
dip = InconsistentLocusTagPrefix (vnp->data.ptrvalue);
10468
ValNodeAddPointer (discrepancy_list, 0, dip);
10472
ltcd.inconsistent_prefix = FreePrefixCheckList (ltcd.inconsistent_prefix);
10476
static void NonGeneLocusTagDiscrepancyCallback (ValNodePtr item_list, Pointer userdata)
10478
Message (MSG_OK, "I could launch the editor for the individual gene, or I could remove all non-gene locus tags, or I could convert all non-gene locus tags to notes...");
10481
static void AddDiscrepancyForNonGeneLocusTag (SeqFeatPtr sfp, Pointer userdata)
10483
ValNodePtr PNTR locus_tag_list;
10486
if (sfp == NULL || userdata == NULL || sfp->data.choice == SEQFEAT_GENE)
10491
locus_tag_list = (ValNodePtr PNTR) userdata;
10493
for (qual = sfp->qual; qual != NULL; qual = qual->next)
10495
if (StringICmp(qual->qual, "locus_tag") == 0)
10497
ValNodeAddPointer (locus_tag_list, OBJ_SEQFEAT, sfp);
10503
static void AddDiscrepanciesForNonGeneLocusTags (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10505
ValNodePtr locus_tag_list = NULL;
10506
CharPtr bad_fmt = "%d non-gene features have locus tags.";
10507
DiscrepancyItemPtr dip;
10509
VisitFeaturesInSep (sep, &locus_tag_list, AddDiscrepancyForNonGeneLocusTag);
10510
if (locus_tag_list != NULL)
10512
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10515
dip->disc_type = DISC_NON_GENE_LOCUS_TAG;
10516
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10517
sprintf (dip->description, bad_fmt, ValNodeLen (locus_tag_list));
10518
dip->callback_func = NonGeneLocusTagDiscrepancyCallback;
10519
dip->datafree_func = NULL;
10520
dip->callback_data = NULL;
10521
dip->item_list = locus_tag_list;
10522
ValNodeAddPointer (discrepancy_list, 0, dip);
10530
(SeqMgrFeatContextPtr feat_context,
10531
SeqMgrFeatContextPtr gene_context,
10534
SeqFeatPtr rbs_sfp;
10535
SeqMgrFeatContext rbs_context;
10537
if (feat_context == NULL || gene_context == NULL)
10541
else if (feat_context->strand != gene_context->strand)
10545
else if (gene_context->left == feat_context->left && gene_context->right == feat_context->right)
10549
else if ((gene_context->strand == Seq_strand_minus && gene_context->left == feat_context->left)
10550
|| (gene_context->strand != Seq_strand_minus && gene_context->right == feat_context->right))
10552
/* find RBS to extend gene on 5' end */
10553
for (rbs_sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_RBS, &rbs_context);
10555
rbs_sfp = SeqMgrGetNextFeature (bsp, rbs_sfp, 0, FEATDEF_RBS, &rbs_context))
10557
if (rbs_context.strand != gene_context->strand)
10561
if (rbs_context.strand == Seq_strand_minus)
10563
if (rbs_context.right == gene_context->right
10564
&& rbs_context.left >= feat_context->right)
10571
if (rbs_context.left == gene_context->left
10572
&& rbs_context.right <= feat_context->left)
10583
CheckFeatureTypeForLocationDiscrepancies
10585
Uint2 feature_type,
10586
ValNodePtr PNTR feature_list)
10588
SeqMgrFeatContext context, gene_context;
10590
SeqFeatPtr sfp, gene_sfp;
10591
ValNodePtr found_genes = NULL;
10592
Boolean found_match;
10594
if (bsp == NULL || ISA_aa (bsp->mol) || feature_list == NULL)
10599
for (sfp = SeqMgrGetNextFeature (bsp, NULL, feature_type, 0, &context);
10601
sfp = SeqMgrGetNextFeature (bsp, sfp, feature_type, 0, &context))
10603
grp = SeqMgrGetGeneXref (sfp);
10606
gene_sfp = SeqMgrGetOverlappingGene (sfp->location, &gene_context);
10607
if (gene_sfp != NULL && !IsGeneLocationOk (&context, &gene_context, bsp))
10609
ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
10610
ValNodeAddPointer (feature_list, OBJ_SEQFEAT, gene_sfp);
10613
else if (!SeqMgrGeneIsSuppressed (grp))
10615
found_match = FALSE;
10616
for (gene_sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, FEATDEF_GENE, &gene_context);
10617
gene_sfp != NULL && ! found_match;
10618
gene_sfp = SeqMgrGetNextFeature (bsp, gene_sfp, SEQFEAT_GENE, FEATDEF_GENE, &gene_context))
10620
if (GeneRefMatch (gene_sfp->data.value.ptrvalue, grp) && gene_context.strand == context.strand)
10622
if (IsGeneLocationOk (&context, &gene_context, bsp))
10624
found_match = TRUE;
10628
ValNodeAddPointer (&found_genes, OBJ_SEQFEAT, gene_sfp);
10632
if (found_match || found_genes == NULL)
10634
found_genes = ValNodeFree (found_genes);
10637
ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
10638
ValNodeLink (feature_list, found_genes);
10644
static void CDSmRNAGeneLocationDiscrepanciesCallback (BioseqPtr bsp, Pointer userdata)
10646
ValNodePtr PNTR feature_list;
10648
if (bsp == NULL || ! ISA_na (bsp->mol) || userdata == NULL)
10653
feature_list = (ValNodePtr PNTR) userdata;
10655
CheckFeatureTypeForLocationDiscrepancies (bsp, SEQFEAT_CDREGION, feature_list);
10656
CheckFeatureTypeForLocationDiscrepancies (bsp, SEQFEAT_RNA, feature_list);
10660
static void FindCDSmRNAGeneLocationDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10662
ValNodePtr feature_list = NULL;
10663
CharPtr bad_fmt = "%d coding regions, mRNAs, and genes have inconsistent locations.";
10664
DiscrepancyItemPtr dip;
10666
VisitBioseqsInSep (sep, &feature_list, CDSmRNAGeneLocationDiscrepanciesCallback);
10668
if (feature_list != NULL)
10670
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10673
dip->disc_type = DISC_GENE_CDS_mRNA_LOCATION_CONFLICT;
10674
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10675
sprintf (dip->description, bad_fmt, ValNodeLen (feature_list));
10676
dip->callback_func = NULL;
10677
dip->datafree_func = NULL;
10678
dip->callback_data = NULL;
10679
dip->item_list = feature_list;
10680
ValNodeAddPointer (discrepancy_list, 0, dip);
10686
typedef struct cdsgeneproduct
10689
CharPtr gene_locus;
10690
CharPtr product_name;
10691
} CDSGeneProductData, PNTR CDSGeneProductPtr;
10694
static CharPtr GetGeneLabel (SeqFeatPtr sfp)
10697
SeqFeatPtr gene_sfp;
10699
grp = SeqMgrGetGeneXref (sfp);
10702
gene_sfp = SeqMgrGetOverlappingGene (sfp->location, NULL);
10703
if (gene_sfp != NULL)
10705
grp = gene_sfp->data.value.ptrvalue;
10710
if (!StringHasNoText (grp->locus))
10718
static void FindCDSGeneProductConflictsCallback (SeqFeatPtr sfp, Pointer userdata)
10720
CDSGeneProductPtr cgpp, cgpp_compare;
10721
SeqMgrFeatContext context;
10722
ValNodePtr PNTR cds_list;
10723
ValNodePtr prev = NULL, vnp;
10724
Boolean found_match = FALSE;
10725
Uint1 new_choice = 0;
10726
CharPtr gene_label;
10728
if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION || userdata == NULL)
10733
sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context);
10739
gene_label = GetGeneLabel (sfp);
10740
if (StringHasNoText (gene_label)) return;
10742
cgpp = (CDSGeneProductPtr) MemNew (sizeof (CDSGeneProductData));
10746
cgpp->gene_locus = gene_label;
10747
cgpp->product_name = StringSave (context.label);
10749
cds_list = (ValNodePtr PNTR) userdata;
10750
if (*cds_list == NULL)
10752
ValNodeAddPointer (cds_list, 0, cgpp);
10757
while (vnp != NULL && !found_match)
10759
cgpp_compare = (CDSGeneProductPtr) vnp->data.ptrvalue;
10760
if (cgpp_compare != NULL
10761
&& StringCmp (cgpp_compare->gene_locus, cgpp->gene_locus) == 0
10762
&& StringCmp (cgpp_compare->product_name, cgpp->product_name) != 0)
10764
found_match = TRUE;
10775
/* insert at end of matches */
10776
while (found_match && vnp != NULL)
10778
cgpp_compare = (CDSGeneProductPtr) vnp->data.ptrvalue;
10779
if (cgpp_compare != NULL
10780
&& StringCmp (cgpp_compare->gene_locus, cgpp->gene_locus) != 0)
10782
found_match = FALSE;
10793
vnp = ValNodeNew (NULL);
10794
vnp->choice = new_choice;
10795
vnp->data.ptrvalue = cgpp;
10796
vnp->next = prev->next;
10803
static void FindCDSGeneProductConflicts (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10805
ValNodePtr cds_list = NULL, non_conflict = NULL, vnp;
10806
CDSGeneProductPtr cgpp;
10807
CharPtr bad_fmt = "%d coding regions have the same gene name as another coding region but a different product.";
10808
DiscrepancyItemPtr dip;
10810
VisitFeaturesInSep (sep, &cds_list, FindCDSGeneProductConflictsCallback);
10812
/* remove CDSs without conflicts */
10813
non_conflict = ValNodeExtractList (&cds_list, 0);
10814
non_conflict = ValNodeFree (non_conflict);
10816
/* for each item, replace structure used for search with just the feature */
10817
for (vnp = cds_list; vnp != NULL; vnp = vnp->next)
10819
cgpp = (CDSGeneProductPtr) vnp->data.ptrvalue;
10822
vnp->data.ptrvalue = cgpp->cds;
10823
vnp->choice = OBJ_SEQFEAT;
10824
cgpp->product_name = MemFree (cgpp->product_name);
10825
/* note - we are not freeing gene_locus because we didn't make a copy */
10826
cgpp = MemFree (cgpp);
10830
if (cds_list != NULL)
10832
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10835
dip->disc_type = DISC_GENE_PRODUCT_CONFLICT;
10836
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10837
sprintf (dip->description, bad_fmt, ValNodeLen (cds_list));
10838
dip->callback_func = NULL;
10839
dip->datafree_func = NULL;
10840
dip->callback_data = NULL;
10841
dip->item_list = cds_list;
10842
ValNodeAddPointer (discrepancy_list, 0, dip);
10848
static void DuplicateGeneLocusCallback (SeqFeatPtr sfp, Pointer userdata)
10850
ValNodePtr PNTR gene_list;
10851
SeqFeatPtr sfp_compare;
10852
GeneRefPtr grp, grp_compare;
10853
ValNodePtr prev = NULL, vnp;
10854
Boolean found_match = FALSE;
10855
Uint1 new_choice = 0;
10857
if (sfp == NULL || sfp->data.choice != SEQFEAT_GENE || sfp->data.value.ptrvalue == NULL
10858
|| userdata == NULL)
10863
gene_list = (ValNodePtr PNTR) userdata;
10864
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
10865
if (StringHasNoText (grp->locus))
10870
if (*gene_list == NULL)
10872
ValNodeAddPointer (gene_list, 0, sfp);
10877
while (vnp != NULL && !found_match)
10879
sfp_compare = (SeqFeatPtr) vnp->data.ptrvalue;
10880
grp_compare = (GeneRefPtr) sfp_compare->data.value.ptrvalue;
10881
if (StringCmp (grp_compare->locus, grp->locus) == 0)
10883
found_match = TRUE;
10884
vnp->choice = OBJ_SEQFEAT;
10885
new_choice = OBJ_SEQFEAT;
10894
/* insert at end of matches */
10895
while (found_match && vnp != NULL)
10897
sfp_compare = (SeqFeatPtr) vnp->data.ptrvalue;
10898
grp_compare = (GeneRefPtr) sfp_compare->data.value.ptrvalue;
10899
if (StringCmp (grp_compare->locus, grp->locus) != 0)
10901
found_match = FALSE;
10912
vnp = ValNodeNew (NULL);
10913
vnp->choice = new_choice;
10914
vnp->data.ptrvalue = sfp;
10915
vnp->next = prev->next;
10923
static void FindDuplicateGeneLocus (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
10925
ValNodePtr gene_list = NULL, non_conflict = NULL;
10926
CharPtr bad_fmt = "%d genes have the same locus as another gene.";
10927
DiscrepancyItemPtr dip;
10929
VisitFeaturesInSep (sep, &gene_list, DuplicateGeneLocusCallback);
10931
/* remove Genes without conflicts */
10932
non_conflict = ValNodeExtractList (&gene_list, 0);
10933
non_conflict = ValNodeFree (non_conflict);
10935
if (gene_list != NULL)
10937
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
10940
dip->disc_type = DISC_GENE_DUPLICATE_LOCUS;
10941
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
10942
sprintf (dip->description, bad_fmt, ValNodeLen (gene_list));
10943
dip->callback_func = NULL;
10944
dip->datafree_func = NULL;
10945
dip->callback_data = NULL;
10946
dip->item_list = gene_list;
10947
ValNodeAddPointer (discrepancy_list, 0, dip);
10954
static void ECNumberNoteDiscrepancyCallback (ValNodePtr item_list, Pointer userdata)
10956
Message (MSG_OK, "I could launch the editor for the individual feature, or I could convert the note to an EC number qual");
10960
static Boolean ECNumberInText (CharPtr text)
10962
CharPtr cp_num, cp_dash, cp_start;
10963
Int4 nums_found = 0;
10965
if (StringHasNoText (text)) {
10969
cp_num = text + StrCSpn (text, "1234567890");
10970
cp_dash = StringStr (text, "-");
10971
if (cp_dash == NULL || cp_dash > cp_num) {
10974
cp_start = cp_dash;
10977
while (*cp_start != 0) {
10979
while (isdigit (*cp_start) || *cp_start == '-') {
10982
if (*cp_start != '.') {
10983
if (nums_found == 4) {
10986
return ECNumberInText (cp_start);
10992
if (nums_found == 4) {
11000
static void FindECNumberNotes (SeqFeatPtr sfp, Pointer userdata)
11002
ValNodePtr PNTR ec_number_features;
11003
BioseqPtr prot_bsp;
11004
SeqMgrFeatContext fcontext;
11005
SeqFeatPtr prot_sfp;
11009
if (sfp == NULL || userdata == NULL || StringHasNoText (sfp->comment))
11014
ec_number_features = (ValNodePtr PNTR) userdata;
11016
if (ECNumberInText (sfp->comment))
11018
ValNodeAddPointer (ec_number_features, OBJ_SEQFEAT, sfp);
11020
else if (sfp->data.choice == SEQFEAT_CDREGION && sfp->product != NULL)
11022
prot_bsp = BioseqFindFromSeqLoc(sfp->product);
11023
prot_sfp = SeqMgrGetNextFeature(prot_bsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext);
11024
if (prot_sfp != NULL && prot_sfp->data.value.ptrvalue != NULL) {
11025
prp = (ProtRefPtr) prot_sfp->data.value.ptrvalue;
11026
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
11027
if (ECNumberInText (vnp->data.ptrvalue)) {
11028
ValNodeAddPointer (ec_number_features, OBJ_SEQFEAT, sfp);
11032
if (ECNumberInText (prp->desc)) {
11033
ValNodeAddPointer (ec_number_features, OBJ_SEQFEAT, sfp);
11040
static void AddECNumberNoteDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11042
ValNodePtr ec_number_features = NULL;
11043
DiscrepancyItemPtr dip;
11044
CharPtr bad_fmt = "%d features have EC numbers in notes or products.";
11046
if (discrepancy_list == NULL) return;
11048
VisitFeaturesInSep (sep, &ec_number_features, FindECNumberNotes);
11050
if (ec_number_features != NULL)
11052
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11055
dip->disc_type = DISC_EC_NUMBER_NOTE;
11056
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11057
sprintf (dip->description, bad_fmt, ValNodeLen (ec_number_features));
11058
dip->callback_func = ECNumberNoteDiscrepancyCallback;
11059
dip->datafree_func = NULL;
11060
dip->callback_data = NULL;
11061
dip->item_list = ec_number_features;
11062
ValNodeAddPointer (discrepancy_list, 0, dip);
11069
static void FindPseudoDiscrepanciesCallback (SeqFeatPtr sfp, Pointer userdata)
11071
ValNodePtr PNTR pseudo_features;
11073
SeqFeatPtr gene_sfp = NULL;
11074
Boolean found_conflict = FALSE;
11076
if (sfp == NULL || (sfp->data.choice != SEQFEAT_CDREGION && sfp->data.choice != SEQFEAT_RNA)
11077
|| userdata == NULL)
11082
grp = SeqMgrGetGeneXref (sfp);
11088
gene_sfp = SeqMgrGetOverlappingGene (sfp->location, NULL);
11089
if (gene_sfp == NULL)
11094
if ((sfp->pseudo && ! gene_sfp->pseudo)
11095
|| (!sfp->pseudo && gene_sfp->pseudo))
11097
pseudo_features = (ValNodePtr PNTR) userdata;
11098
ValNodeAddPointer (pseudo_features, OBJ_SEQFEAT, sfp);
11099
if (gene_sfp != NULL)
11101
ValNodeAddPointer (pseudo_features, OBJ_SEQFEAT, gene_sfp);
11107
static void FindPseudoDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11109
ValNodePtr pseudo_features = NULL;
11110
DiscrepancyItemPtr dip;
11111
CharPtr bad_fmt = "%d CDSs, RNAs, and genes have mismatching pseudos.";
11113
if (discrepancy_list == NULL) return;
11115
VisitFeaturesInSep (sep, &pseudo_features, FindPseudoDiscrepanciesCallback);
11117
if (pseudo_features != NULL)
11119
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11122
dip->disc_type = DISC_PSEUDO_MISMATCH;
11123
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11124
sprintf (dip->description, bad_fmt, ValNodeLen (pseudo_features));
11125
dip->callback_func = NULL;
11126
dip->datafree_func = NULL;
11127
dip->callback_data = NULL;
11128
dip->item_list = pseudo_features;
11129
ValNodeAddPointer (discrepancy_list, 0, dip);
11136
static void FindJoinedLocations (SeqFeatPtr sfp, Pointer userdata)
11138
ValNodePtr PNTR joined_features;
11140
if (sfp == NULL || userdata == NULL || sfp->location == NULL)
11145
joined_features = (ValNodePtr PNTR) userdata;
11146
if (sfp->location->choice == SEQLOC_MIX)
11148
ValNodeAddPointer (joined_features, OBJ_SEQFEAT, sfp);
11152
static void AddJoinedFeatureDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11154
ValNodePtr joined_features = NULL;
11156
DiscrepancyItemPtr dip;
11157
CharPtr bad_fmt = "%d features have joined locations.";
11159
if (discrepancy_list == NULL) return;
11161
VisitFeaturesInSep (sep, &joined_features, FindJoinedLocations);
11163
if (joined_features != NULL)
11165
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11168
dip->disc_type = DISC_JOINED_FEATURES;
11169
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11170
sprintf (dip->description, bad_fmt, ValNodeLen (joined_features));
11171
dip->callback_func = NULL;
11172
dip->datafree_func = NULL;
11173
dip->callback_data = NULL;
11174
dip->item_list = joined_features;
11175
ValNodeAddPointer (discrepancy_list, 0, dip);
11181
static void FindOverlappingGenes (BioseqPtr bsp, Pointer userdata)
11183
SeqFeatPtr sfp, sfp_compare;
11184
SeqMgrFeatContext context;
11185
ValNodePtr PNTR overlapping_genes = NULL, non_overlap;
11186
ValNodePtr gene_list = NULL, vnp, vnp_next;
11188
if (bsp == NULL || userdata == NULL)
11193
overlapping_genes = (ValNodePtr PNTR) userdata;
11195
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_GENE, FEATDEF_GENE, &context);
11197
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_GENE, FEATDEF_GENE, &context))
11199
ValNodeAddPointer (&gene_list, 0, sfp);
11202
for (vnp = gene_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next)
11204
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
11205
for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next)
11207
sfp_compare = (SeqFeatPtr) vnp_next->data.ptrvalue;
11209
if (SeqLocStrand (sfp->location) != SeqLocStrand (sfp_compare->location))
11214
if (SeqLocCompare (sfp->location, sfp_compare->location) != SLC_NO_MATCH)
11216
vnp->choice = OBJ_SEQFEAT;
11217
vnp_next->choice = OBJ_SEQFEAT;
11222
non_overlap = ValNodeExtractList (&gene_list, 0);
11223
non_overlap = ValNodeFree (non_overlap);
11224
ValNodeLink (overlapping_genes, gene_list);
11228
static void AddOverlappingGeneDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11230
DiscrepancyItemPtr dip;
11231
CharPtr bad_fmt = "%d genes overlap another gene on the same strand.";
11232
ValNodePtr overlapping_genes = NULL;
11234
if (discrepancy_list == NULL)
11239
VisitBioseqsInSep (sep, &overlapping_genes, FindOverlappingGenes);
11241
if (overlapping_genes != NULL)
11243
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11246
dip->disc_type = DISC_OVERLAPPING_GENES;
11247
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11248
sprintf (dip->description, bad_fmt, ValNodeLen (overlapping_genes));
11249
dip->callback_func = NULL;
11250
dip->datafree_func = NULL;
11251
dip->callback_data = NULL;
11252
dip->item_list = overlapping_genes;
11253
ValNodeAddPointer (discrepancy_list, 0, dip);
11259
typedef struct cdsoverlap
11261
CharPtr product_name;
11265
} CDSOverlapData, PNTR CDSOverlapPtr;
11268
static CDSOverlapPtr CDSOverlapNew (SeqFeatPtr sfp, CharPtr product_name, Int4 left, Int4 right)
11272
cop = (CDSOverlapPtr) MemNew (sizeof (CDSOverlapData));
11275
cop->product_name = StringSave (product_name);
11278
cop->right = right;
11284
static ValNodePtr FreeCDSOverlapList (ValNodePtr vnp)
11290
vnp->next = FreeCDSOverlapList (vnp->next);
11291
cop = (CDSOverlapPtr) vnp->data.ptrvalue;
11294
cop->product_name = MemFree (cop->product_name);
11295
cop = MemFree (cop);
11296
vnp->data.ptrvalue = NULL;
11298
vnp = ValNodeFree (vnp);
11304
static ValNodePtr FeatureListFromOverlapList (ValNodePtr vnp)
11306
ValNodePtr feat_list = NULL;
11309
while (vnp != NULL)
11311
if (vnp->choice != 0 && vnp->data.ptrvalue != NULL)
11313
cop = (CDSOverlapPtr) vnp->data.ptrvalue;
11314
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, cop->sfp);
11322
static CharPtr similar_product_words[] =
11327
const int num_similar_product_words = sizeof (similar_product_words) / sizeof (CharPtr);
11329
static CharPtr ignore_similar_product_words[] =
11330
{ "hypothetical protein",
11334
const int num_ignore_similar_product_words = sizeof (ignore_similar_product_words) / sizeof (CharPtr);
11337
static Boolean OverlappingProductNameSimilar (CharPtr str1, CharPtr str2)
11340
Boolean str1_has_similarity_word = FALSE, str2_has_similarity_word = FALSE;
11342
if (StringHasNoText (str1) && StringHasNoText (str2))
11346
else if (StringHasNoText (str1) || StringHasNoText (str2))
11351
/* if both product names contain one of the special case similarity words,
11352
* the product names are similar. */
11353
for (i = 0; i < num_similar_product_words; i++)
11355
if (StringISearch (str1, similar_product_words [i]) != NULL)
11357
str1_has_similarity_word = TRUE;
11359
if (StringISearch (str2, similar_product_words [i]) != NULL)
11361
str2_has_similarity_word = TRUE;
11364
if (str1_has_similarity_word && str2_has_similarity_word)
11369
/* otherwise, if one of the product names contains one of special ignore similarity
11370
* words, the product names are not similar.
11372
for (i = 0; i < num_ignore_similar_product_words; i++)
11374
if (StringISearch (str1, ignore_similar_product_words[i]) != NULL
11375
|| StringISearch (str2, ignore_similar_product_words[i]) != NULL)
11381
if (StringICmp (str1, str2) == 0)
11392
static void FindOverlappingCDSs (BioseqPtr bsp, Pointer userdata)
11395
SeqMgrFeatContext context;
11396
ValNodePtr PNTR overlapping_cds = NULL, cds_list;
11397
ValNodePtr overlap_list = NULL, vnp, vnp_next;
11398
CDSOverlapPtr cop, cop_compare;
11400
if (bsp == NULL || userdata == NULL)
11405
overlapping_cds = (ValNodePtr PNTR) userdata;
11407
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_CDREGION, FEATDEF_CDS, &context);
11409
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_CDREGION, FEATDEF_CDS, &context))
11411
ValNodeAddPointer (&overlap_list, 0, CDSOverlapNew (sfp, context.label, context.left, context.right));
11414
for (vnp = overlap_list; vnp != NULL && vnp->next != NULL; vnp = vnp->next)
11416
cop = (CDSOverlapPtr) vnp->data.ptrvalue;
11421
for (vnp_next = vnp->next; vnp_next != NULL; vnp_next = vnp_next->next)
11423
cop_compare = (CDSOverlapPtr) vnp_next->data.ptrvalue;
11424
if (cop_compare == NULL)
11428
else if (cop_compare->left > cop->right)
11432
if (!OverlappingProductNameSimilar (cop->product_name, cop_compare->product_name))
11436
if (SeqLocStrand (cop->sfp->location) != SeqLocStrand (cop_compare->sfp->location))
11441
if (SeqLocCompare (cop->sfp->location, cop_compare->sfp->location) != SLC_NO_MATCH)
11443
vnp->choice = OBJ_SEQFEAT;
11444
vnp_next->choice = OBJ_SEQFEAT;
11449
cds_list = FeatureListFromOverlapList(overlap_list);
11450
if (cds_list != NULL)
11452
ValNodeLink (overlapping_cds, cds_list);
11454
overlap_list = FreeCDSOverlapList (overlap_list);
11458
static void AddOverlappingCodingRegionDiscrepancies (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11460
DiscrepancyItemPtr dip;
11461
CharPtr bad_fmt = "%d coding regions overlap another coding region with a similar or identical name.";
11462
ValNodePtr overlapping_cds = NULL;
11464
if (discrepancy_list == NULL)
11469
VisitBioseqsInSep (sep, &overlapping_cds, FindOverlappingCDSs);
11471
if (overlapping_cds != NULL)
11473
dip = NewDiscrepancyItem (DISC_OVERLAPPING_CDS, bad_fmt, overlapping_cds);
11477
ValNodeAddPointer (discrepancy_list, 0, dip);
11483
static void FindShortContigsCallback (BioseqPtr bsp, Pointer userdata)
11485
ValNodePtr PNTR bioseq_list;
11487
if (bsp == NULL || !ISA_na (bsp->mol) || userdata == NULL || bsp->length >= 200)
11492
bioseq_list = (ValNodePtr PNTR) userdata;
11494
ValNodeAddPointer (bioseq_list, OBJ_BIOSEQ, bsp);
11497
static void FindShortContigs (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11499
DiscrepancyItemPtr dip;
11500
CharPtr bad_fmt = "%d contigs are shorter than 200 nt.";
11501
ValNodePtr bioseq_list = NULL;
11503
if (discrepancy_list == NULL) return;
11505
VisitBioseqsInSep (sep, &bioseq_list, FindShortContigsCallback);
11507
if (bioseq_list != NULL)
11509
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11512
dip->disc_type = DISC_SHORT_CONTIG;
11513
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11514
sprintf (dip->description, bad_fmt, ValNodeLen (bioseq_list));
11515
dip->callback_func = NULL;
11516
dip->datafree_func = NULL;
11517
dip->callback_data = NULL;
11518
dip->item_list = bioseq_list;
11519
ValNodeAddPointer (discrepancy_list, 0, dip);
11524
typedef struct biosrccheck
11527
ValNodePtr sdp_list;
11528
} BioSrcCheckData, PNTR BioSrcCheckPtr;
11530
static ValNodePtr FreeBioSrcCheckList (ValNodePtr biosrc_list)
11532
BioSrcCheckPtr bscp;
11534
if (biosrc_list == NULL)
11539
biosrc_list->next = FreeBioSrcCheckList (biosrc_list->next);
11541
bscp = (BioSrcCheckPtr) biosrc_list->data.ptrvalue;
11544
bscp->sdp_list = ValNodeFree (bscp->sdp_list);
11545
bscp = MemFree (bscp);
11547
biosrc_list = ValNodeFree (biosrc_list);
11552
static void FindInconsistentSourcesCallback (BioseqPtr bsp, Pointer userdata)
11554
ValNodePtr PNTR biosrc_list, vnp;
11556
BioSrcCheckPtr bscp;
11557
Boolean found = FALSE;
11558
SeqMgrDescContext context;
11560
if (bsp == NULL || !ISA_na (bsp->mol) || userdata == NULL)
11565
biosrc_list = (ValNodePtr PNTR) userdata;
11567
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
11570
for (vnp = *biosrc_list; vnp != NULL && !found; vnp = vnp->next)
11572
bscp = (BioSrcCheckPtr) vnp->data.ptrvalue;
11573
if (bscp != NULL && BioSourceMatch (sdp->data.ptrvalue, bscp->biop))
11575
ValNodeAddPointer (&(bscp->sdp_list), OBJ_SEQDESC, sdp);
11581
bscp = (BioSrcCheckPtr) MemNew (sizeof (BioSrcCheckData));
11584
bscp->biop = sdp->data.ptrvalue;
11585
ValNodeAddPointer (&(bscp->sdp_list), OBJ_SEQDESC, sdp);
11586
ValNodeAddPointer (biosrc_list, 0, bscp);
11593
static DiscrepancyItemPtr InconsistentBiosrc (BioSrcCheckPtr bscp)
11595
DiscrepancyItemPtr dip = NULL;
11596
CharPtr bad_fmt = "%d contigs have identical sources that do not match another contig source.";
11598
if (bscp == NULL || bscp->sdp_list == NULL)
11603
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11606
dip->disc_type = DISC_INCONSISTENT_BIOSRC;
11607
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + 15));
11608
sprintf (dip->description, bad_fmt, ValNodeLen (bscp->sdp_list));
11609
dip->callback_func = NULL;
11610
dip->datafree_func = NULL;
11611
dip->callback_data = NULL;
11612
dip->item_list = bscp->sdp_list;
11613
bscp->sdp_list = NULL;
11619
static void FindNonmatchingContigSources (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11621
DiscrepancyItemPtr dip;
11622
ValNodePtr biosrc_list = NULL, vnp;
11624
if (discrepancy_list == NULL) return;
11626
VisitBioseqsInSep (sep, &biosrc_list, FindInconsistentSourcesCallback);
11628
if (biosrc_list != NULL && biosrc_list->next != NULL)
11630
for (vnp = biosrc_list; vnp != NULL; vnp = vnp->next)
11632
dip = InconsistentBiosrc (vnp->data.ptrvalue);
11633
ValNodeAddPointer (discrepancy_list, 0, dip);
11636
biosrc_list = FreeBioSrcCheckList (biosrc_list);
11640
static CharPtr suspect_product_names[] =
11659
"Brackets or parenthesis [] ()",
11660
"ending with period or comma",
11692
const int num_suspect_product_names = sizeof (suspect_product_names) / sizeof (CharPtr);
11694
const int brackets_name = 17;
11695
const int end_with_punct_name = 18;
11698
static void FindSuspectProductNamesCallback (SeqFeatPtr sfp, Pointer userdata)
11700
ValNodePtr PNTR feature_list;
11706
if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT || sfp->data.value.ptrvalue == NULL
11707
|| userdata == NULL)
11712
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
11713
feature_list = (ValNodePtr PNTR) userdata;
11715
for (k = 0; k < num_suspect_product_names; k++)
11717
if (k == brackets_name)
11719
for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
11721
if (StringChr (vnp->data.ptrvalue, '[') != NULL
11722
|| StringChr (vnp->data.ptrvalue, ']') != NULL
11723
|| StringChr (vnp->data.ptrvalue, '(') != NULL
11724
|| StringChr (vnp->data.ptrvalue, ')') != NULL)
11726
ValNodeAddPointer (&(feature_list[k]), OBJ_SEQFEAT, sfp);
11731
else if (k == end_with_punct_name)
11733
for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
11735
len = StringLen (vnp->data.ptrvalue);
11736
str = (CharPtr) vnp->data.ptrvalue;
11737
if (str[len - 1] == '.' || str[len - 1] == ',')
11739
ValNodeAddPointer (&(feature_list[k]), OBJ_SEQFEAT, sfp);
11746
if (StringLen (suspect_product_names [k]) < 4)
11748
for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
11750
if (StringSearch(vnp->data.ptrvalue, suspect_product_names[k]) != NULL)
11752
ValNodeAddPointer (&(feature_list[k]), OBJ_SEQFEAT, sfp);
11759
for (vnp = prp->name; vnp != NULL; vnp = vnp->next)
11761
if (StringISearch(vnp->data.ptrvalue, suspect_product_names[k]) != NULL)
11763
ValNodeAddPointer (&(feature_list[k]), OBJ_SEQFEAT, sfp);
11774
static DiscrepancyItemPtr SuspectProductName (CharPtr product_name, ValNodePtr feature_list)
11776
DiscrepancyItemPtr dip = NULL;
11777
CharPtr bad_fmt = "%d product names contain %s";
11779
if (feature_list == NULL || StringHasNoText (product_name))
11784
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11787
dip->disc_type = DISC_SUSPECT_PRODUCT_NAME;
11788
dip->description = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_fmt) + StringLen (product_name) + 15));
11789
sprintf (dip->description, bad_fmt, ValNodeLen (feature_list), product_name);
11790
dip->callback_func = NULL;
11791
dip->datafree_func = NULL;
11792
dip->callback_data = NULL;
11793
dip->item_list = feature_list;
11799
static void FindSuspectProductNames (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11801
ValNodePtr PNTR feature_list = NULL;
11802
ValNodePtr master_list = NULL;
11804
DiscrepancyItemPtr dip;
11805
ValNodePtr subcategories = NULL;
11807
if (discrepancy_list == NULL) return;
11809
feature_list = (ValNodePtr PNTR) MemNew (sizeof (ValNodePtr) * num_suspect_product_names);
11810
if (feature_list == NULL) return;
11812
/* initialize array for suspicious product names */
11813
for (k = 0; k < num_suspect_product_names; k++)
11815
feature_list[k] = NULL;
11818
VisitFeaturesInSep (sep, feature_list, FindSuspectProductNamesCallback);
11820
for (k = 0; k < num_suspect_product_names; k++)
11822
if (feature_list[k] != NULL)
11824
dip = SuspectProductName (suspect_product_names[k], feature_list[k]);
11827
ValNodeAddPointer (&subcategories, 0, dip);
11829
ValNodeLinkCopy (&master_list, feature_list[k]);
11833
if (master_list != NULL)
11835
dip = SuspectProductName ("suspect phrase or characters", master_list);
11838
dip->subcategories = subcategories;
11839
ValNodeAddPointer (discrepancy_list, 0, dip);
11843
MemFree (feature_list);
11847
static DiscrepancyItemPtr InconsistentSourceDefline (SeqDescrPtr biop_sdp, SeqDescrPtr title_sdp)
11849
DiscrepancyItemPtr dip = NULL;
11850
CharPtr bad_fmt = "Organism description not found in definition line: %s.";
11852
CharPtr desc = NULL;
11854
if (biop_sdp == NULL || title_sdp == NULL)
11859
biop = (BioSourcePtr) biop_sdp->data.ptrvalue;
11860
if (biop != NULL && biop->org != NULL && !StringHasNoText (biop->org->taxname))
11862
desc = biop->org->taxname;
11866
desc = title_sdp->data.ptrvalue;
11868
if (StringHasNoText (desc)) {
11872
dip = (DiscrepancyItemPtr) MemNew (sizeof (DiscrepancyItemData));
11875
dip->disc_type = DISC_INCONSISTENT_BIOSRC_DEFLINE;
11876
dip->description = (CharPtr)MemNew (StringLen (bad_fmt) + StringLen (desc));
11877
sprintf (dip->description, bad_fmt, desc);
11878
dip->callback_func = NULL;
11879
dip->datafree_func = NULL;
11880
dip->callback_data = NULL;
11881
dip->item_list = NULL;
11882
ValNodeAddPointer (&(dip->item_list), OBJ_SEQDESC, biop_sdp);
11883
ValNodeAddPointer (&(dip->item_list), OBJ_SEQDESC, title_sdp);
11889
static void FindInconsistentSourceAndDeflineCallback (BioseqPtr bsp, Pointer userdata)
11891
DiscrepancyItemPtr dip;
11892
ValNodePtr PNTR discrepancy_list;
11893
SeqDescrPtr biop_sdp, title_sdp;
11894
SeqMgrDescContext context;
11897
discrepancy_list = (ValNodePtr PNTR) userdata;
11898
if (bsp == NULL || discrepancy_list == NULL) return;
11900
biop_sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_source, &context);
11901
if (biop_sdp == NULL || biop_sdp->data.ptrvalue == NULL)
11905
biop = (BioSourcePtr) biop_sdp->data.ptrvalue;
11906
if (biop->org == NULL)
11910
if (StringHasNoText (biop->org->taxname))
11915
title_sdp = SeqMgrGetNextDescriptor(bsp, NULL, Seq_descr_title, &context);
11916
if (title_sdp == NULL) return;
11918
if (StringStr (title_sdp->data.ptrvalue, biop->org->taxname) == NULL)
11920
dip = InconsistentSourceDefline (biop_sdp, title_sdp);
11923
ValNodeAddPointer (discrepancy_list, 0, dip);
11929
static void FindInconsistentSourceAndDefline (SeqEntryPtr sep, ValNodePtr PNTR discrepancy_list)
11931
ValNodePtr disc_pairs = NULL;
11932
CharPtr bad_fmt = "%d sources do not match definition lines.";
11933
DiscrepancyItemPtr dip;
11935
if (discrepancy_list == NULL) return;
11937
VisitBioseqsInSep (sep, &disc_pairs, FindInconsistentSourceAndDeflineCallback);
11939
if (disc_pairs == NULL)
11943
else if (disc_pairs->next == NULL)
11945
ValNodeLink (discrepancy_list, disc_pairs);
11949
dip = NewDiscrepancyItem (DISC_INCONSISTENT_BIOSRC_DEFLINE, bad_fmt, disc_pairs);
11950
dip->item_list = NULL;
11951
dip->subcategories = disc_pairs;
11953
ValNodeAddPointer (discrepancy_list, 0, dip);
11958
static void SetDiscrepancyLevels (ValNodePtr discrepancy_list, Int4 level)
11960
DiscrepancyItemPtr dip;
11962
while (discrepancy_list != NULL)
11964
dip = (DiscrepancyItemPtr) discrepancy_list->data.ptrvalue;
11967
dip->level = level;
11968
SetDiscrepancyLevels (dip->subcategories, level + 1);
11970
discrepancy_list = discrepancy_list->next;
11975
/* Note that this function contains a hack - it assumes that all of the
11976
* test types that use the same collection function are listed together.
11978
static ValNodePtr CollectDiscrepancies (SeqEntryPtr sep, DiscrepancyConfigPtr dcp)
11980
ValNodePtr discrepancy_list = NULL;
11982
PerformDiscrepancyTest last_test_func = NULL;
11984
for (i = 0; i < MAX_DISC_TYPE; i++)
11986
if ((dcp == NULL || dcp->conf_list[i])
11987
&& discrepancy_info_list[i].test_func != NULL
11988
&& discrepancy_info_list[i].test_func != last_test_func)
11990
discrepancy_info_list[i].test_func (sep, &discrepancy_list);
11991
last_test_func = discrepancy_info_list[i].test_func;
11995
/* because some tests are run together, need to remove unwanted results */
11996
RemoveUnwantedDiscrepancyItems (&discrepancy_list, dcp);
11998
/* normalize the discrepancy levels so that they will be correctly displayed */
11999
SetDiscrepancyLevels (discrepancy_list, 0);
12000
return discrepancy_list;
12003
static Nlm_ParData discParFmt = {FALSE, FALSE, FALSE, FALSE, FALSE, 0, 0};
12004
static Nlm_ColData discColFmt[2] = {{16, 0, 0, 0, NULL, 'l', 0,0,0,0, FALSE},
12005
{1000, 0, 0, 0, NULL, 'l', 1,0,0,0, TRUE}};
12008
static Int4 CountChosenDiscrepancies (ValNodePtr discrepancy_list, Boolean count_all)
12010
Int4 num_chosen = 0;
12011
DiscrepancyItemPtr dip;
12013
while (discrepancy_list != NULL)
12015
dip = (DiscrepancyItemPtr) discrepancy_list->data.ptrvalue;
12018
if (dip->chosen || count_all)
12022
num_chosen += CountChosenDiscrepancies (dip->subcategories, TRUE);
12029
else if (dip->expanded)
12031
num_chosen += CountChosenDiscrepancies (dip->subcategories, FALSE);
12034
discrepancy_list = discrepancy_list->next;
12039
static Int4 CountLevels (ValNodePtr discrepancy_list)
12041
Int4 num_levels = 1, num, num_sublevels = 0;
12043
DiscrepancyItemPtr dip;
12045
if (discrepancy_list == NULL)
12050
for (vnp = discrepancy_list; vnp != NULL; vnp = vnp->next)
12052
dip = (DiscrepancyItemPtr) vnp->data.ptrvalue;
12053
if (dip == NULL || dip->subcategories == NULL || !dip->expanded)
12057
num = CountLevels (dip->subcategories);
12058
if (num > num_sublevels) num_sublevels = num;
12061
/* one level for the top plus levels for the subcategories */
12063
return 1 + num_sublevels;
12067
static Nlm_ColPtr PNTR GetColumnFormatArrays (Int4 num_levels, DoC doc)
12070
Nlm_ColPtr PNTR col_fmt_array_array = NULL;
12074
if (num_levels == 0)
12079
ObjectRect (doc, &r);
12080
InsetRect (&r, 4, 4);
12081
doc_width = r.right - r.left;
12083
col_fmt_array_array = (Nlm_ColPtr PNTR) MemNew (sizeof (Nlm_ColPtr) * num_levels);
12084
for (n = 0; n < num_levels; n++)
12086
col_fmt_array_array[n] = (Nlm_ColPtr) MemNew (sizeof (Nlm_ColData) * (n + 3));
12087
for (k = 0; k < n + 2; k++)
12089
col_fmt_array_array[n][k].pixWidth = 16;
12090
col_fmt_array_array[n][k].pixInset = 0;
12091
col_fmt_array_array[n][k].charWidth = 0;
12092
col_fmt_array_array[n][k].charInset = 0;
12093
col_fmt_array_array[n][k].font = NULL;
12094
col_fmt_array_array[n][k].just = 'l';
12095
col_fmt_array_array[n][k].wrap = 0;
12096
col_fmt_array_array[n][k].bar = 0;
12097
col_fmt_array_array[n][k].underline = 0;
12098
col_fmt_array_array[n][k].left = 0;
12099
col_fmt_array_array[n][k].last = 0;
12101
col_fmt_array_array[n][k].pixWidth = doc_width - ((n + 2) * 16);
12102
col_fmt_array_array[n][k].pixInset = 0;
12103
col_fmt_array_array[n][k].charWidth = 0;
12104
col_fmt_array_array[n][k].charInset = 0;
12105
col_fmt_array_array[n][k].font = NULL;
12106
col_fmt_array_array[n][k].just = 'l';
12107
col_fmt_array_array[n][k].wrap = 1;
12108
col_fmt_array_array[n][k].bar = 0;
12109
col_fmt_array_array[n][k].underline = 0;
12110
col_fmt_array_array[n][k].left = 0;
12111
col_fmt_array_array[n][k].last = 1;
12113
return col_fmt_array_array;
12117
static Nlm_ColPtr PNTR FreeColumnFormatArrays (Nlm_ColPtr PNTR col_fmt_array_array, Int4 num_levels)
12121
if (col_fmt_array_array == NULL || num_levels < 1)
12125
for (n = 0; n < num_levels; n++)
12127
col_fmt_array_array [n] = MemFree (col_fmt_array_array [n]);
12129
col_fmt_array_array = MemFree (col_fmt_array_array);
12130
return col_fmt_array_array;
12133
static void AddDiscrepancyItem (DiscrepancyReportFormPtr drfp, DiscrepancyItemPtr dip, Int4 level)
12143
item_text = (CharPtr) MemNew (sizeof (Char) * (StringLen (dip->description) + 6 + level));
12144
for (n = 0; n < level; n++)
12146
StringCat (item_text, "\t");
12148
StringCat (item_text, " \t \t");
12149
StringCat (item_text, dip->description);
12150
StringCat (item_text, "\n");
12151
AppendText (drfp->doc, item_text, &discParFmt, drfp->col_fmt_array_array [level], programFont);
12154
for (vnp = dip->subcategories; vnp != NULL; vnp = vnp->next)
12156
AddDiscrepancyItem (drfp, vnp->data.ptrvalue, level + 1);
12162
static void PopulateDiscrepancyList (DiscrepancyReportFormPtr drfp, ValNodePtr discrepancy_list)
12167
if (drfp == NULL || drfp->doc == NULL)
12174
num_levels = CountLevels (drfp->discrepancy_list);
12175
if (num_levels != drfp->num_levels)
12177
drfp->col_fmt_array_array = FreeColumnFormatArrays (drfp->col_fmt_array_array, drfp->num_levels);
12178
drfp->num_levels = num_levels;
12179
drfp->col_fmt_array_array = GetColumnFormatArrays (drfp->num_levels, drfp->doc);
12182
while (discrepancy_list != NULL)
12184
AddDiscrepancyItem (drfp, discrepancy_list->data.ptrvalue, 0);
12185
discrepancy_list = discrepancy_list->next;
12187
GetDocParams (drfp->doc, &numItems, NULL);
12188
UpdateDocument (drfp->doc, 0, numItems);
12192
static Nlm_ParData discItemParFmt = {FALSE, FALSE, FALSE, FALSE, FALSE, 0, 0};
12193
static Nlm_ColData discItemColFmt [3] = {{0, 5, 10, 0, NULL, 'l', 1,0,0,0, FALSE},
12194
{0, 0, 10, 0, NULL, 'l', 1,0,0,0, FALSE},
12195
{0, 0, 10, 0, NULL, 'l', 1,0,0,0, TRUE}};
12198
static CharPtr GetDiscrepancyItemText (ValNodePtr vnp)
12200
CharPtr row_text = NULL;
12203
SeqMgrFeatContext context;
12212
if (vnp->choice == OBJ_SEQFEAT)
12214
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12217
sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context);
12220
location = SeqLocPrintUseBestID (sfp->location);
12221
label = (CharPtr) FeatDefTypeLabel(sfp);
12222
row_text = (CharPtr) MemNew (sizeof (Char) *
12224
+ StringLen (context.label)
12225
+ StringLen (location)
12227
sprintf (row_text, "%s\t%s\t%s\n", label, context.label, location);
12228
location = MemFree (location);
12232
else if (vnp->choice == OBJ_BIOSEQ)
12234
bsp = (BioseqPtr) vnp->data.ptrvalue;
12237
row_text = (CharPtr) MemNew (sizeof (Char) * 41);
12238
SeqIdWrite (bsp->id, row_text, PRINTID_REPORT, 39);
12239
StringCat (row_text, "\n");
12242
else if (vnp->choice == OBJ_SEQDESC)
12244
sdp = (SeqDescrPtr) vnp->data.ptrvalue;
12247
row_text = (CharPtr) MemNew (sizeof (Char) * 61);
12248
SeqDescLabel (sdp, row_text, 59, TRUE);
12249
StringCat (row_text, "\n");
12257
static void PopulateDiscrepancyItemList (DoC doc, DiscrepancyItemPtr dip)
12275
if (dip->item_list == NULL)
12277
AppendText (doc, "No items listed", NULL, NULL, programFont);
12280
ObjectRect (doc, &r);
12281
InsetRect (&r, 4, 4);
12283
discItemColFmt[0].pixWidth = 5 * stdCharWidth;
12284
discItemColFmt[1].pixWidth = (r.right - r.left - discItemColFmt[0].pixWidth) / 2;
12285
discItemColFmt[2].pixWidth = (r.right - r.left - discItemColFmt[0].pixWidth) / 2;
12287
vnp = dip->item_list;
12289
while (vnp != NULL)
12291
row_text = GetDiscrepancyItemText (vnp);
12292
if (row_text != NULL)
12294
if (vnp->choice == OBJ_SEQFEAT)
12296
AppendText (doc, row_text, &discItemParFmt, discItemColFmt, programFont);
12300
AppendText (doc, row_text, &discItemParFmt, NULL, programFont);
12302
row_text = MemFree (row_text);
12306
GetDocParams (doc, &numItems, NULL);
12307
UpdateDocument (doc, 0, numItems);
12310
static DiscrepancyItemPtr GetSubDiscrepancy (ValNodePtr discrepancy_list, Int2Ptr pitem)
12312
DiscrepancyItemPtr dip = NULL;
12314
if (discrepancy_list == NULL || pitem == NULL)
12318
while (*pitem > 0 && discrepancy_list != NULL)
12321
dip = (DiscrepancyItemPtr) discrepancy_list->data.ptrvalue;
12324
if (dip != NULL && dip->expanded)
12326
dip = GetSubDiscrepancy (dip->subcategories, pitem);
12329
discrepancy_list = discrepancy_list->next;
12338
static DiscrepancyItemPtr GetSelectedDiscrepancy (ValNodePtr discrepancy_list, Int2 item)
12340
DiscrepancyItemPtr dip = NULL;
12342
dip = GetSubDiscrepancy (discrepancy_list, &item);
12348
static void ClickDiscrepancy (DoC d, PoinT pt)
12351
Int2 item, numItems;
12354
DiscrepancyReportFormPtr drfp;
12355
DiscrepancyItemPtr dip;
12357
drfp = GetObjectExtra (d);
12358
if (drfp != NULL) {
12359
MapDocPoint (d, pt, &item, &row, &col, NULL);
12360
if (item > 0 && row > 0 && drfp->clicked == item) {
12361
drfp->dblClick = dblClick;
12363
drfp->dblClick = FALSE;
12366
if (item > 0 && row > 0) {
12367
drfp->clicked = item;
12369
if (item > 0 && row > 0 && !dblClick)
12371
dip = GetSelectedDiscrepancy (drfp->discrepancy_list, item);
12374
if (col == dip->level + 1)
12376
dip->chosen = !dip->chosen;
12377
GetDocParams (d, &numItems, NULL);
12378
UpdateDocument (d, 0, numItems);
12380
else if (col == dip->level + 2)
12382
dip->expanded = !dip->expanded;
12383
PopulateDiscrepancyList (drfp, drfp->discrepancy_list);
12391
static void ActOnDiscrepancy (ValNodePtr discrepancy_list, Int2 item)
12393
DiscrepancyItemPtr dip;
12395
dip = GetSelectedDiscrepancy (discrepancy_list, item);
12396
if (dip != NULL && dip->callback_func != NULL)
12398
(dip->callback_func) (dip->item_list, dip->callback_data);
12402
static void InvalBorder (DoC d, Int2 item)
12409
ObjectRect (d, &r);
12410
InsetRect (&r, 4, 4);
12411
if (ItemIsVisible (d, item, &top, &bottom, NULL)) {
12414
r.right = r.left + 4;
12415
InsetRect (&r, -1, -1);
12420
static void ReleaseDiscrepancy (DoC d, PoinT pt)
12426
DiscrepancyReportFormPtr drfp;
12428
drfp = GetObjectExtra (d);
12429
if (drfp != NULL) {
12431
MapDocPoint (d, pt, &item, &row, NULL, NULL);
12432
if (item > 0 && row > 0) {
12433
if (item == drfp->clicked) {
12434
old = drfp->selected;
12435
drfp->selected = item;
12438
UpdateDocument (d, item, item);
12440
UpdateDocument (d, old, old);
12441
UpdateDocument (d, item, item);
12446
} else if (drfp->clicked == 0) {
12447
if (drfp->selected != 0) {
12448
old = drfp->selected;
12449
drfp->selected = 0;
12450
InvalBorder (d, old);
12454
if (drfp->selected > 0 && drfp->dblClick)
12456
ActOnDiscrepancy (drfp->discrepancy_list, drfp->selected);
12458
else if (drfp->selected > 0)
12460
drfp->item_selected = 0;
12461
PopulateDiscrepancyItemList (drfp->item_list,
12462
GetSelectedDiscrepancy (drfp->discrepancy_list,
12470
static void DrawDiscrepancy (DoC d, RectPtr r, Int2 item, Int2 firstLine)
12473
DiscrepancyReportFormPtr drfp;
12475
DiscrepancyItemPtr dip;
12478
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (d);
12479
if (drfp != NULL && r != NULL && item > 0 && firstLine == 0) {
12482
dip = GetSelectedDiscrepancy (drfp->discrepancy_list, item);
12485
level_offset = dip->level * 16;
12486
rct.left += level_offset;
12487
rct.right += level_offset;
12490
/* draw selection */
12491
if (item == drfp->selected) {
12492
rct.right = rct.left + 4;
12496
/* draw chosen checkboxes */
12498
rct.right = rct.left + 10;
12499
rct.bottom = rct.top + (rct.right - rct.left);
12502
if (dip != NULL && dip->chosen) {
12503
MoveTo (rct.left, rct.top);
12504
LineTo (rct.right - 1, rct.bottom - 1);
12505
MoveTo (rct.left, rct.bottom - 1);
12506
LineTo (rct.right - 1, rct.top);
12509
/* draw open/closed checkboxes */
12510
if (dip!= NULL && dip->subcategories != NULL)
12513
rct.right = rct.left + 10;
12514
rct.bottom = rct.top + (rct.right - rct.left);
12516
MoveTo (rct.left, (rct.top + rct.bottom) / 2);
12517
LineTo (rct.right - 1, (rct.top + rct.bottom) / 2);
12518
if (!dip->expanded)
12520
MoveTo ((rct.left + rct.right) / 2, rct.top);
12521
LineTo ((rct.left + rct.right) / 2, rct.bottom - 1);
12529
static void DrawDiscrepancyItem (DoC d, RectPtr r, Int2 item, Int2 firstLine)
12532
DiscrepancyReportFormPtr drfp;
12535
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (d);
12536
if (drfp != NULL && r != NULL && item > 0 && firstLine == 0) {
12539
/* draw selection */
12540
if (item == drfp->item_selected) {
12542
rct.right = rct.left + 4;
12549
static void EditDiscrepancyItem (ValNodePtr vnp)
12560
if (vnp->choice == OBJ_SEQFEAT)
12562
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12565
GatherProcLaunch (OMPROC_EDIT, FALSE, sfp->idx.entityID, sfp->idx.itemID,
12566
OBJ_SEQFEAT, 0, 0, OBJ_SEQFEAT, 0);
12569
else if (vnp->choice == OBJ_BIOSEQ)
12571
bsp = (BioseqPtr) vnp->data.ptrvalue;
12574
GatherProcLaunch (OMPROC_EDIT, FALSE, bsp->idx.entityID, bsp->idx.itemID,
12575
OBJ_BIOSEQ, 0, 0, OBJ_BIOSEQ, 0);
12578
else if (vnp->choice == OBJ_SEQDESC)
12580
sdp = (SeqDescrPtr) (vnp->data.ptrvalue);
12581
if (sdp != NULL && sdp->extended != 0)
12583
ovp = (ObjValNodePtr) sdp;
12584
GatherProcLaunch (OMPROC_EDIT, FALSE, ovp->idx.entityID, ovp->idx.itemID,
12585
OBJ_SEQDESC, 0, 0, OBJ_SEQDESC, 0);
12592
static void SetBioseqViewTargetByBioseq (BaseFormPtr bfp, BioseqPtr bsp)
12596
if (bsp != NULL && bfp != NULL)
12598
SeqIdWrite (SeqIdFindBest (bsp->id, 0), id_text, PRINTID_REPORT, sizeof (id_text));
12599
SetBioseqViewTarget (bfp, id_text);
12604
static BioseqPtr GetFirstBioseqInSeqEntry (SeqEntryPtr sep)
12606
BioseqPtr bsp = NULL;
12609
if (sep == NULL || sep->data.ptrvalue == NULL)
12613
else if (IS_Bioseq (sep))
12615
bsp = sep->data.ptrvalue;
12617
else if (IS_Bioseq_set (sep))
12619
bssp = (BioseqSetPtr) sep->data.ptrvalue;
12620
for (sep = bssp->seq_set; sep != NULL && bsp == NULL; sep = sep->next)
12622
bsp = GetFirstBioseqInSeqEntry (sep);
12629
static BioseqPtr GetBioseqForDescriptor (ObjValNodePtr ovp)
12631
BioseqPtr bsp = NULL;
12635
if (ovp == NULL || ovp->idx.parentptr == NULL)
12639
else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
12640
bsp = (BioseqPtr) ovp->idx.parentptr;
12641
} else if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
12642
bssp = (BioseqSetPtr) ovp->idx.parentptr;
12643
for (sep = bssp->seq_set; sep != NULL && bsp == NULL; sep = sep->next)
12645
bsp = GetFirstBioseqInSeqEntry (sep);
12651
static void ScrollToDiscrepancyItem (ValNodePtr vnp, BaseFormPtr bfp)
12658
if (vnp == NULL || bfp == NULL)
12662
if (vnp->choice == OBJ_SEQFEAT)
12664
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12667
/* need to scroll to item */
12668
bsp = BioseqFindFromSeqLoc (sfp->location);
12669
SetBioseqViewTargetByBioseq (bfp, bsp);
12670
ObjMgrSelect (sfp->idx.entityID, sfp->idx.itemID, OBJ_SEQFEAT, 0, NULL);
12673
else if (vnp->choice == OBJ_BIOSEQ)
12675
bsp = (BioseqPtr) vnp->data.ptrvalue;
12676
SetBioseqViewTargetByBioseq (bfp, bsp);
12678
else if (vnp->choice == OBJ_SEQDESC)
12680
sdp = (SeqDescrPtr) (vnp->data.ptrvalue);
12681
if (sdp != NULL && sdp->extended != 0)
12683
ovp = (ObjValNodePtr) sdp;
12684
bsp = GetBioseqForDescriptor (ovp);
12685
SetBioseqViewTargetByBioseq (bfp, bsp);
12686
ObjMgrSelect (ovp->idx.entityID, ovp->idx.itemID, OBJ_SEQDESC, 0, NULL);
12691
static void ClickDiscrepancyItem (DoC d, PoinT pt)
12694
Int2 item, last_selected, numItems;
12696
DiscrepancyReportFormPtr drfp;
12697
DiscrepancyItemPtr dip;
12700
drfp = GetObjectExtra (d);
12701
if (drfp != NULL) {
12702
MapDocPoint (d, pt, &item, &row, NULL, NULL);
12703
if (item > 0 && row > 0) {
12704
dip = GetSelectedDiscrepancy (drfp->discrepancy_list, drfp->selected);
12705
if (dip != NULL && dip->item_list != NULL)
12707
vnp = dip->item_list;
12709
last_selected = drfp->item_selected;
12710
drfp->item_selected = item;
12712
if (item != last_selected)
12714
GetDocParams (d, &numItems, NULL);
12715
UpdateDocument (d, 0, numItems);
12718
/* find item in list */
12719
while (item > 1 && vnp != NULL)
12727
EditDiscrepancyItem (vnp);
12731
ScrollToDiscrepancyItem (vnp, drfp->bfp);
12740
static void CleanupDiscrepancyReportForm (GraphiC g, VoidPtr data)
12743
DiscrepancyReportFormPtr drfp;
12745
drfp = (DiscrepancyReportFormPtr) data;
12746
if (drfp != NULL) {
12747
drfp->discrepancy_list = FreeDiscrepancyList (drfp->discrepancy_list);
12748
drfp->dcp = DiscrepancyConfigFree (drfp->dcp);
12749
drfp->col_fmt_array_array = FreeColumnFormatArrays (drfp->col_fmt_array_array, drfp->num_levels);
12750
ObjMgrFreeUserData (drfp->input_entityID, drfp->procid, drfp->proctype, drfp->userkey);
12751
discrepancyReportWindow = NULL;
12753
StdCleanupFormProc (g, data);
12756
static void RecheckDiscrepancyProc (ButtoN b)
12758
DiscrepancyReportFormPtr drfp;
12760
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (b);
12763
drfp->discrepancy_list = FreeDiscrepancyList (drfp->discrepancy_list);
12764
drfp->discrepancy_list = CollectDiscrepancies (drfp->sep, drfp->dcp);
12765
PopulateDiscrepancyList (drfp, drfp->discrepancy_list);
12766
if (drfp->discrepancy_list != NULL)
12768
drfp->selected = 1;
12769
drfp->item_selected = 0;
12770
PopulateDiscrepancyItemList (drfp->item_list,
12771
GetSelectedDiscrepancy (drfp->discrepancy_list,
12776
Reset (drfp->item_list);
12782
static ValNodePtr ValNodePointerDup (ValNodePtr vnp)
12784
ValNodePtr vnp_new = NULL;
12788
vnp_new = ValNodeNew (NULL);
12789
vnp_new->choice = vnp->choice;
12790
vnp_new->data.ptrvalue = vnp->data.ptrvalue;
12791
vnp_new->next = ValNodePointerDup (vnp->next);
12796
static ValNodePtr ReplaceDiscrepancyItemWithFeatureTableStrings (ValNodePtr feat_list)
12799
CstType custom_flags = 0;
12804
ValNodePtr vnp, list_copy = NULL, list_vnp;
12806
if (feat_list == NULL) return NULL;
12808
list_copy = ValNodePointerDup (feat_list);
12809
for (vnp = list_copy; vnp != NULL; vnp = vnp->next)
12811
if (vnp->choice == OBJ_SEQFEAT)
12813
sfp = (SeqFeatPtr) vnp->data.ptrvalue;
12814
bsp = BioseqFindFromSeqLoc (sfp->location);
12815
ajp = asn2gnbk_setup (bsp, NULL, NULL, FTABLE_FMT, DUMP_MODE, NORMAL_STYLE,
12816
0, 0, custom_flags, NULL);
12819
for (index = 0; index < ajp->numParagraphs; index++)
12821
bbp = ajp->paragraphArray [index];
12822
for (list_vnp = vnp; list_vnp != NULL; list_vnp = list_vnp->next)
12824
if (list_vnp->choice == OBJ_SEQFEAT)
12826
sfp = (SeqFeatPtr) list_vnp->data.ptrvalue;
12828
&& bbp->entityID == sfp->idx.entityID
12829
&& bbp->itemtype == sfp->idx.itemtype
12830
&& bbp->itemID == sfp->idx.itemID)
12832
/* replace list feature with description, change choice */
12833
list_vnp->choice = 0;
12834
list_vnp->data.ptrvalue = asn2gnbk_format (ajp, (Int4) index);
12839
asn2gnbk_cleanup (ajp);
12847
static void WriteDiscrepancy (FILE *fp, DiscrepancyItemPtr dip, Boolean use_feature_table_fmt)
12849
ValNodePtr vnp, list_copy = NULL;
12852
if (fp == NULL || dip == NULL)
12857
fprintf (fp, "%s\n", dip->description);
12858
vnp = dip->item_list;
12860
if (use_feature_table_fmt)
12862
list_copy = ReplaceDiscrepancyItemWithFeatureTableStrings (vnp);
12866
while (vnp != NULL)
12868
if (vnp->choice == 0 && use_feature_table_fmt)
12870
row_text = vnp->data.ptrvalue;
12874
row_text = GetDiscrepancyItemText (vnp);
12876
if (row_text != NULL)
12878
fprintf (fp, row_text);
12879
row_text = MemFree (row_text);
12884
fprintf (fp, "\n");
12888
WriteDiscrepancyReport
12890
ValNodePtr discrepancy_list,
12892
Boolean use_feature_table_fmt)
12894
DiscrepancyItemPtr dip;
12898
if (fp == NULL || discrepancy_list == NULL)
12902
for (vnp = discrepancy_list; vnp != NULL; vnp = vnp->next)
12904
dip = (DiscrepancyItemPtr) vnp->data.ptrvalue;
12909
num_chosen = CountChosenDiscrepancies (dip->subcategories, show_all | dip->chosen);
12910
if (num_chosen > 0)
12912
if (dip->chosen || show_all)
12914
fprintf (fp, "%s\n", dip->description);
12916
WriteDiscrepancyReport (fp, dip->subcategories, show_all | dip->chosen, use_feature_table_fmt);
12919
else if (dip->chosen || show_all)
12921
WriteDiscrepancy (fp, dip, use_feature_table_fmt);
12927
static Boolean DiscrepancyReportExportProc (ForM f, CharPtr filename)
12931
Char path [PATH_MAX];
12932
DiscrepancyReportFormPtr drfp;
12934
Boolean show_all = FALSE;
12936
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (f);
12942
num_disc = CountChosenDiscrepancies (drfp->discrepancy_list, FALSE);
12946
if (ANS_CANCEL == Message (MSG_OKC, "No discrepancies selected! Export all?"))
12957
StringNCpy_0 (path, filename, sizeof (path));
12958
if (path [0] != '\0' || GetOutputFileName (path, sizeof (path), NULL)) {
12960
fp = FileOpen (path, "r");
12964
FileCreate (path, "TEXT", "ttxt");
12967
fp = FileOpen (path, "w");
12969
WriteDiscrepancyReport (fp, drfp->discrepancy_list, show_all,
12970
(Boolean)(drfp->dcp != NULL && drfp->dcp->use_feature_table_format));
12979
static void GenerateDiscrepancyReport (ButtoN b)
12981
DiscrepancyReportFormPtr drfp;
12982
Char path [PATH_MAX];
12984
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (b);
12991
if (DiscrepancyReportExportProc (drfp->form, path))
12993
LaunchGeneralTextViewer (path, "Discrepancy Report");
12999
static void CopyDiscrepancyReportToClipboard (DiscrepancyReportFormPtr drfp)
13003
Char path [PATH_MAX];
13005
if (drfp == NULL || drfp->discrepancy_list == NULL) return;
13007
fp = FileOpen (path, "w");
13009
WriteDiscrepancyReport (fp, drfp->discrepancy_list, TRUE,
13010
(Boolean)(drfp->dcp != NULL && drfp->dcp->use_feature_table_format));
13012
FileToClipboard (path);
13018
static void DiscrepancyReportFormMessage (ForM f, Int2 mssg)
13021
DiscrepancyReportFormPtr drfp;
13023
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (f);
13024
if (drfp != NULL) {
13026
case VIB_MSG_EXPORT :
13027
DiscrepancyReportExportProc (f, NULL);
13029
case VIB_MSG_PRINT :
13031
case VIB_MSG_CLOSE :
13035
CopyDiscrepancyReportToClipboard (drfp);
13037
case VIB_MSG_COPY :
13038
CopyDiscrepancyReportToClipboard (drfp);
13040
case VIB_MSG_PASTE :
13042
case VIB_MSG_DELETE :
13043
drfp->discrepancy_list = ValNodeFree (drfp->discrepancy_list);
13045
Reset (drfp->item_list);
13048
if (drfp->appmessage != NULL) {
13049
drfp->appmessage (f, mssg);
13057
static void ReactivateDiscrepancyReport (BaseFormPtr bfp)
13059
DiscrepancyReportFormPtr drfp;
13061
if (discrepancyReportWindow == NULL)
13063
CreateDiscrepancyReportWindow (bfp);
13066
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (discrepancyReportWindow);
13069
Remove (discrepancyReportWindow);
13070
discrepancyReportWindow = NULL;
13071
CreateDiscrepancyReportWindow (bfp);
13075
drfp->input_entityID = bfp->input_entityID;
13076
drfp->sep = GetTopSeqEntryForEntityID (drfp->input_entityID);
13078
/* populate discrepancy lists */
13079
RecheckDiscrepancyProc (drfp->recheck_btn);
13080
Show (discrepancyReportWindow);
13084
static void EditDiscrepancyConfigBtn (ButtoN b)
13086
DiscrepancyReportFormPtr drfp;
13088
drfp = (DiscrepancyReportFormPtr) GetObjectExtra (discrepancyReportWindow);
13089
if (drfp == NULL) return;
13091
if (EditDiscrepancyConfig (drfp->dcp))
13093
RecheckDiscrepancyProc (b);
13099
extern void CreateStdValidatorFormMenus (WindoW w);
13103
static Int2 LIBCALLBACK DiscrepancyReportMsgFunc (OMMsgStructPtr ommsp)
13105
WindoW currentport,
13107
OMUserDataPtr omudp;
13108
DiscrepancyReportFormPtr drfp = NULL;
13110
omudp = (OMUserDataPtr)(ommsp->omuserdata);
13111
if (omudp == NULL) return OM_MSG_RET_ERROR;
13112
drfp = (DiscrepancyReportFormPtr) omudp->userdata.ptrvalue;
13113
if (drfp == NULL) return OM_MSG_RET_ERROR;
13115
currentport = ParentWindow (drfp->form);
13116
temport = SavePort (currentport);
13117
UseWindow (currentport);
13118
Select (drfp->form);
13119
switch (ommsp->message)
13121
case OM_MSG_UPDATE:
13123
case OM_MSG_DESELECT:
13126
case OM_MSG_SELECT:
13129
Remove (drfp->form);
13136
Remove (drfp->form);
13141
RestorePort (temport);
13142
UseWindow (temport);
13143
return OM_MSG_RET_OK;
13147
extern void CreateDiscrepancyReportWindow (BaseFormPtr bfp)
13149
DiscrepancyReportFormPtr drfp;
13155
OMUserDataPtr omudp;
13157
if (bfp == NULL) return;
13159
if (discrepancyReportWindow != NULL)
13161
ReactivateDiscrepancyReport (bfp);
13165
drfp = (DiscrepancyReportFormPtr) MemNew (sizeof (DiscrepancyReportFormData));
13172
drfp->input_entityID = bfp->input_entityID;
13173
drfp->sep = GetTopSeqEntryForEntityID (drfp->input_entityID);
13174
w = FixedWindow (-50, -33, -10, -10, "Discrepancy Report", StdCloseWindowProc);
13175
SetObjectExtra (w, drfp, CleanupDiscrepancyReportForm);
13176
drfp->form = (ForM) w;
13177
drfp->formmessage = DiscrepancyReportFormMessage;
13178
drfp->exportform = DiscrepancyReportExportProc;
13180
/* read in config file */
13181
drfp->dcp = ReadDiscrepancyConfig();
13183
/* register to receive update messages */
13184
drfp->userkey = OMGetNextUserKey ();
13186
drfp->proctype = OMPROC_EDIT;
13187
omudp = ObjMgrAddUserData (drfp->input_entityID, drfp->procid, drfp->proctype, drfp->userkey);
13188
if (omudp != NULL) {
13189
omudp->userdata.ptrvalue = (Pointer) drfp;
13190
omudp->messagefunc = DiscrepancyReportMsgFunc;
13195
CreateStdValidatorFormMenus (w);
13198
h = HiddenGroup (w, -1, 0, NULL);
13199
SetGroupSpacing (h, 10, 10);
13201
g = HiddenGroup (h, 2, 0, NULL);
13202
StaticPrompt (g, "Discrepancies", 0, popupMenuHeight, programFont, 'c');
13203
StaticPrompt (g, "Affected Items", 0, popupMenuHeight, programFont, 'c');
13205
drfp->doc = DocumentPanel (g, stdCharWidth * 30, stdLineHeight * 20);
13206
SetObjectExtra (drfp->doc, drfp, NULL);
13207
SetDocAutoAdjust (drfp->doc, FALSE);
13208
SetDocProcs (drfp->doc, ClickDiscrepancy, NULL, ReleaseDiscrepancy, NULL);
13209
SetDocShade (drfp->doc, DrawDiscrepancy, NULL, NULL, NULL);
13211
drfp->item_list = DocumentPanel (g, stdCharWidth * 30 + 5, stdLineHeight * 20);
13212
SetObjectExtra (drfp->item_list, drfp, NULL);
13213
SetDocAutoAdjust (drfp->item_list, FALSE);
13214
SetDocProcs (drfp->item_list, ClickDiscrepancyItem, NULL, NULL, NULL);
13215
SetDocShade (drfp->item_list, DrawDiscrepancyItem, NULL, NULL, NULL);
13217
c = HiddenGroup (h, 4, 0, NULL);
13218
SetGroupSpacing (c, 10, 10);
13219
b = PushButton (c, "Generate Report", GenerateDiscrepancyReport);
13220
SetObjectExtra (b, drfp, NULL);
13221
drfp->recheck_btn = PushButton (c, "Recheck", RecheckDiscrepancyProc);
13222
SetObjectExtra (drfp->recheck_btn, drfp, NULL);
13224
b = PushButton (c, "Configure", EditDiscrepancyConfigBtn);
13225
SetObjectExtra (b, drfp, NULL);
13227
PushButton (c, "Dismiss", StdCancelButtonProc);
13229
AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
13233
/* adjust column width for discrepancy list */
13234
ObjectRect (drfp->doc, &r);
13235
InsetRect (&r, 4, 4);
13236
discColFmt[1].pixWidth = r.right - r.left - discColFmt[0].pixWidth;
13238
/* populate discrepancy lists */
13239
RecheckDiscrepancyProc (drfp->recheck_btn);
13241
discrepancyReportWindow = w;