4278
4403
CleanupInference (gbq);
4279
4404
unlink = FALSE;
4406
} else if (StringICmp (gbq->qual, "transposon") == 0) {
4407
if (StringICmp (gbq->val, "class I integron") == 0 ||
4408
StringICmp (gbq->val, "class II integron") == 0 ||
4409
StringICmp (gbq->val, "class III integron") == 0 ||
4410
StringICmp (gbq->val, "class 1 integron") == 0 ||
4411
StringICmp (gbq->val, "class 2 integron") == 0 ||
4412
StringICmp (gbq->val, "class 3 integron") == 0) {
4413
len = StringLen ("integron") + StringLen (gbq->val) + 5;
4414
str = MemNew (sizeof (Char) * len);
4415
StringCpy (str, "integron");
4416
StringCat (str, ":");
4417
ptr = StringStr (gbq->val, " integron");
4421
StringCat (str, gbq->val);
4422
gbq->val = MemFree (gbq->val);
4424
gbq->qual = MemFree (gbq->qual);
4425
gbq->qual = StringSave ("mobile_element");
4428
len = StringLen ("transposon") + StringLen (gbq->val) + 5;
4429
str = MemNew (sizeof (Char) * len);
4430
StringCpy (str, "transposon");
4431
StringCat (str, ":");
4432
StringCat (str, gbq->val);
4433
gbq->val = MemFree (gbq->val);
4435
gbq->qual = MemFree (gbq->qual);
4436
gbq->qual = StringSave ("mobile_element");
4439
} else if (StringICmp (gbq->qual, "insertion_seq") == 0) {
4440
len = StringLen ("insertion sequence") + StringLen (gbq->val) + 5;
4441
str = MemNew (sizeof (Char) * len);
4442
StringCpy (str, "insertion sequence");
4443
StringCat (str, ":");
4444
StringCat (str, gbq->val);
4445
gbq->val = MemFree (gbq->val);
4447
gbq->qual = MemFree (gbq->qual);
4448
gbq->qual = StringSave ("mobile_element");
4282
4451
unlink = FALSE;
4453
if (StringICmp (gbq->qual, "mobile_element") == 0) {
4454
if (StringStr (gbq->val, " :") == 0 || StringStr (gbq->val, ": ") == 0) {
4455
len = StringLen (gbq->val) + 5;
4456
ptr = StringChr (gbq->val, ':');
4460
TrimSpacesAroundString (gbq->val);
4461
TrimSpacesAroundString (ptr);
4462
str = MemNew (sizeof (Char) * len);
4463
StringCpy (str, gbq->val);
4464
StringCat (str, ":");
4465
StringCat (str, ptr);
4466
gbq->val = MemFree (gbq->val);
4284
4471
if (rpt_unit_seq != NULL) {
4285
4472
CleanupRptUnit (rpt_unit_seq);
4998
static CharPtr unstructured_orgmod_list [] = {
4999
"?", "?", "strain", "substrain", "type", "subtype", "variety",
5000
"serotype", "serogroup", "serovar", "cultivar", "pathovar", "chemovar",
5001
"biovar", "biotype", "group", "subgroup", "isolate", "common name",
5002
"acronym", "dosage", "natural host", "sub-species", "specimen-voucher",
5003
"authority", "forma", "forma-specialis", "ecotype", "synonym",
5004
"anamorph", "teleomorph", "breed", "gb-acronym", "gb-anamorph",
5005
"gb-synonym", "old-lineage", "old-name", NULL
5294
Nlm_QualNameAssoc current_orgmod_subtype_alist[] = {
5296
{"Acronym", ORGMOD_acronym},
5297
{"Anamorph", ORGMOD_anamorph},
5298
{"Authority", ORGMOD_authority},
5299
{"Bio-material", ORGMOD_bio_material},
5300
{"Biotype", ORGMOD_biotype},
5301
{"Biovar", ORGMOD_biovar},
5302
{"Breed", ORGMOD_breed},
5303
{"Chemovar", ORGMOD_chemovar},
5304
{"Common", ORGMOD_common},
5305
{"Cultivar", ORGMOD_cultivar},
5306
{"Culture-collection", ORGMOD_culture_collection},
5307
{"Ecotype", ORGMOD_ecotype},
5308
{"Forma", ORGMOD_forma},
5309
{"Forma-specialis", ORGMOD_forma_specialis},
5310
{"Group", ORGMOD_group},
5311
{"Isolate", ORGMOD_isolate},
5312
{"Metagenome-source", ORGMOD_metagenome_source},
5313
{"Pathovar", ORGMOD_pathovar},
5314
{"Serogroup", ORGMOD_serogroup},
5315
{"Serotype", ORGMOD_serotype},
5316
{"Serovar", ORGMOD_serovar},
5317
{"Specific-host", ORGMOD_nat_host},
5318
{"Specimen-voucher", ORGMOD_specimen_voucher},
5319
{"Strain", ORGMOD_strain},
5320
{"Subgroup", ORGMOD_subgroup},
5321
{"Sub-species", ORGMOD_sub_species},
5322
{"Substrain", ORGMOD_substrain},
5323
{"Subtype", ORGMOD_subtype},
5324
{"Synonym", ORGMOD_synonym},
5325
{"Teleomorph", ORGMOD_teleomorph},
5326
{"Type", ORGMOD_type},
5327
{"Variety", ORGMOD_variety},
5330
Nlm_QualNameAssoc discouraged_orgmod_subtype_alist[] = {
5331
{"Old Lineage", ORGMOD_old_lineage},
5332
{"Old Name", ORGMOD_old_name},
5335
Nlm_QualNameAssoc discontinued_orgmod_subtype_alist[] = {
5336
{"Dosage", ORGMOD_dosage},
5340
Nlm_NameNameAssoc orgmod_aliases[] = {
5341
{"Sub-species", "subspecies", ORGMOD_sub_species},
5342
{"Specific-host", "nat-host", ORGMOD_nat_host},
5343
{ NULL, NULL, 0 } };
5345
extern CharPtr GetOrgModQualName (Uint1 subtype)
5349
if (subtype == ORGMOD_other) {
5352
for (i = 0; current_orgmod_subtype_alist[i].name != NULL; i++) {
5353
if (current_orgmod_subtype_alist[i].value == subtype) {
5354
return current_orgmod_subtype_alist[i].name;
5357
for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL; i++) {
5358
if (discouraged_orgmod_subtype_alist[i].value == subtype) {
5359
return discouraged_orgmod_subtype_alist[i].name;
5363
for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL; i++) {
5364
if (discontinued_orgmod_subtype_alist[i].value == subtype) {
5365
return discontinued_orgmod_subtype_alist[i].name;
5373
extern void BioSourceHasOldOrgModQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
5376
Boolean discouraged = FALSE, discontinued = FALSE;
5379
if (biop != NULL && biop->org != NULL && biop->org->orgname != NULL) {
5380
mod = biop->org->orgname->mod;
5381
while (mod != NULL && (!discouraged || !discontinued)) {
5382
for (i = 0; discouraged_orgmod_subtype_alist[i].name != NULL && !discouraged; i++) {
5383
if (mod->subtype == discouraged_orgmod_subtype_alist[i].value) {
5387
for (i = 0; discontinued_orgmod_subtype_alist[i].name != NULL && !discontinued; i++) {
5388
if (mod->subtype == discontinued_orgmod_subtype_alist[i].value) {
5389
discontinued = TRUE;
5396
if (has_discouraged != NULL) {
5397
*has_discouraged = discouraged;
5399
if (has_discontinued != NULL) {
5400
*has_discontinued = discontinued;
5405
static void StringHasOrgModPrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
5409
Uint1 subtype_val = 0;
5411
for (i = 0; current_orgmod_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
5412
val = StringHasPrefix (str, current_orgmod_subtype_alist [i].name, FALSE, skippref);
5414
subtype_val = current_orgmod_subtype_alist[i].value;
5417
if (subtype_val == 0) {
5418
for (i = 0; orgmod_aliases[i].name != NULL && subtype_val == 0; i++) {
5419
val = StringHasPrefix (str, orgmod_aliases [i].alias, FALSE, skippref);
5421
subtype_val = orgmod_aliases[i].value;
5428
if (p_subtypeval != NULL) {
5429
*p_subtypeval = subtype_val;
5008
5433
static void OrpModToOrgMod (ValNodePtr PNTR vnpp, OrgModPtr PNTR ompp)
5013
5437
ValNodePtr next;
5014
5438
Int2 numcommas;
5015
5439
Int2 numspaces;
5068
static CharPtr unstructured_subsource_list [] = {
5069
"?", "chromosome", "map", "clone", "subclone", "haplotype",
5070
"genotype", "sex", "cell-line", "cell-type", "tissue-type",
5071
"clone-lib", "dev-stage", "frequency", "germline", "rearranged",
5072
"lab-host", "pop-variant", "tissue-lib", "plasmid", "transposon",
5073
"ins-seq", "plastid", "country", "segment", "endogenous-virus",
5074
"transgenic", "environmental-sample", "isolation-source",
5075
"lat-lon", "collection-date", "collected-by", "identified-by",
5076
"fwd-primer-seq", "rev-primer-seq", "fwd-primer-name", "rev-primer-name",
5491
Nlm_QualNameAssoc current_subsource_subtype_alist[] = {
5493
{"Cell-line", SUBSRC_cell_line},
5494
{"Cell-type", SUBSRC_cell_type},
5495
{"Chromosome", SUBSRC_chromosome},
5496
{"Clone", SUBSRC_clone},
5497
{"Clone-lib", SUBSRC_clone_lib},
5498
{"Collected-by", SUBSRC_collected_by},
5499
{"Collection-date", SUBSRC_collection_date},
5500
{"Country", SUBSRC_country},
5501
{"Dev-stage", SUBSRC_dev_stage},
5502
{"Endogenous-virus-name", SUBSRC_endogenous_virus_name},
5503
{"Environmental-sample", SUBSRC_environmental_sample},
5504
{"Frequency", SUBSRC_frequency},
5505
{"Fwd-PCR-primer-name", SUBSRC_fwd_primer_name},
5506
{"Fwd-PCR-primer-seq", SUBSRC_fwd_primer_seq},
5507
{"Genotype", SUBSRC_genotype},
5508
{"Germline", SUBSRC_germline},
5509
{"Haplotype", SUBSRC_haplotype},
5510
{"Identified-by", SUBSRC_identified_by},
5511
{"Isolation-source", SUBSRC_isolation_source},
5512
{"Lab-host", SUBSRC_lab_host},
5513
{"Lat-Lon", SUBSRC_lat_lon},
5514
{"Map", SUBSRC_map},
5515
{"Metagenomic", SUBSRC_metagenomic},
5516
{"Plasmid-name", SUBSRC_plasmid_name},
5517
{"Pop-variant", SUBSRC_pop_variant},
5518
{"Rearranged", SUBSRC_rearranged},
5519
{"Rev-PCR-primer-name", SUBSRC_rev_primer_name},
5520
{"Rev-PCR-primer-seq", SUBSRC_rev_primer_seq},
5521
{"Segment", SUBSRC_segment},
5522
{"Sex", SUBSRC_sex},
5523
{"Subclone", SUBSRC_subclone},
5524
{"Tissue-lib", SUBSRC_tissue_lib},
5525
{"Tissue-type", SUBSRC_tissue_type},
5526
{"Transgenic", SUBSRC_transgenic},
5529
Nlm_QualNameAssoc discouraged_subsource_subtype_alist[] = {
5530
{"Plastid-name", SUBSRC_plastid_name},
5533
Nlm_QualNameAssoc discontinued_subsource_subtype_alist[] = {
5534
{"Ins-seq-name", SUBSRC_insertion_seq_name},
5535
{"Transposon-name", SUBSRC_transposon_name},
5538
Nlm_NameNameAssoc subsource_aliases[] = {
5539
{"Fwd-PCR-primer-name", "fwd-primer-name", SUBSRC_fwd_primer_name},
5540
{"Fwd-PCR-primer-seq", "fwd-primer-seq", SUBSRC_fwd_primer_seq},
5541
{"Rev-PCR-primer-name", "rev-primer-name", SUBSRC_rev_primer_name},
5542
{"Rev-PCR-primer-seq", "rev-primer-seq", SUBSRC_rev_primer_seq},
5543
{"Subclone", "sub-clone", SUBSRC_subclone},
5544
{"Lat-Lon", "Lat-long", SUBSRC_lat_lon},
5545
{"Lat-Lon", "Latitude-Longitude", SUBSRC_lat_lon },
5546
{ NULL, NULL, 0 } };
5548
extern CharPtr GetSubsourceQualName (Uint1 subtype)
5552
if (subtype == SUBSRC_other) {
5555
for (i = 0; current_subsource_subtype_alist[i].name != NULL; i++) {
5556
if (current_subsource_subtype_alist[i].value == subtype) {
5557
return current_subsource_subtype_alist[i].name;
5561
for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL; i++) {
5562
if (discouraged_subsource_subtype_alist[i].value == subtype) {
5563
return discouraged_subsource_subtype_alist[i].name;
5567
for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL; i++) {
5568
if (discontinued_subsource_subtype_alist[i].value == subtype) {
5569
return discontinued_subsource_subtype_alist[i].name;
5577
extern void BioSourceHasOldSubSourceQualifiers (BioSourcePtr biop, BoolPtr has_discouraged, BoolPtr has_discontinued)
5580
Boolean discouraged = FALSE, discontinued = FALSE;
5584
ssp = biop->subtype;
5585
while (ssp != NULL && (!discouraged || !discontinued)) {
5586
for (i = 0; discouraged_subsource_subtype_alist[i].name != NULL && !discouraged; i++) {
5587
if (ssp->subtype == discouraged_subsource_subtype_alist[i].value) {
5591
for (i = 0; discontinued_subsource_subtype_alist[i].name != NULL && !discontinued; i++) {
5592
if (ssp->subtype == discontinued_subsource_subtype_alist[i].value) {
5593
discontinued = TRUE;
5600
if (has_discouraged != NULL) {
5601
*has_discouraged = discouraged;
5603
if (has_discontinued != NULL) {
5604
*has_discontinued = discontinued;
5609
static void StringHasSubSourcePrefix (CharPtr str, CharPtr PNTR pval, Uint1Ptr p_subtypeval, Boolean skippref)
5613
Uint1 subtype_val = 0;
5615
for (i = 0; current_subsource_subtype_alist[i].name != NULL && subtype_val == 0; i++) {
5616
val = StringHasPrefix (str, current_subsource_subtype_alist [i].name,
5617
(Boolean) (current_subsource_subtype_alist[i].value == SUBSRC_germline ||
5618
current_subsource_subtype_alist[i].value == SUBSRC_rearranged ||
5619
current_subsource_subtype_alist[i].value == SUBSRC_transgenic ||
5620
current_subsource_subtype_alist[i].value == SUBSRC_environmental_sample ||
5621
current_subsource_subtype_alist[i].value == SUBSRC_metagenomic),
5624
subtype_val = current_subsource_subtype_alist[i].value;
5627
if (subtype_val == 0) {
5628
for (i = 0; subsource_aliases[i].name != NULL && subtype_val == 0; i++) {
5629
val = StringHasPrefix (str, subsource_aliases [i].alias,
5630
(Boolean) (subsource_aliases[i].value == SUBSRC_germline ||
5631
subsource_aliases[i].value == SUBSRC_rearranged ||
5632
subsource_aliases[i].value == SUBSRC_transgenic ||
5633
subsource_aliases[i].value == SUBSRC_environmental_sample ||
5634
subsource_aliases[i].value == SUBSRC_metagenomic),
5637
subtype_val = subsource_aliases[i].value;
5644
if (p_subtypeval != NULL) {
5645
*p_subtypeval = subtype_val;
5080
5649
static void OrpModToSubSource (ValNodePtr PNTR vnpp, SubSourcePtr PNTR sspp)
5085
5653
ValNodePtr next;
5086
5654
Int2 numcommas;
5087
5655
Int2 numspaces;
7554
static void CleanupGoTerms (
7555
UserFieldPtr entryhead
7559
UserFieldPtr entry, topufp, ufp;
7560
CharPtr goid, goref, str;
7564
if (entryhead == NULL) return;
7566
for (entry = entryhead; entry != NULL; entry = entry->next) {
7567
if (entry == NULL || entry->choice != 11) break;
7568
topufp = (UserFieldPtr) entry->data.ptrvalue;
7569
if (topufp == NULL) continue;
7573
for (ufp = topufp; ufp != NULL; ufp = ufp->next) {
7575
if (oip == NULL) continue;
7576
for (j = 0; goFieldType [j] != NULL; j++) {
7577
if (StringICmp (oip->str, goFieldType [j]) == 0) break;
7579
if (goFieldType [j] == NULL) continue;
7582
if (ufp->choice == 1) {
7583
goid = (CharPtr) ufp->data.ptrvalue;
7584
if (goid != NULL && *goid != '\0') {
7585
if (StringNICmp (goid, "GO:", 3) == 0) {
7586
str = StringSave (goid + 3);
7587
ufp->data.ptrvalue = (Pointer) str;
7594
if (ufp->choice == 1) {
7595
goref = (CharPtr) ufp->data.ptrvalue;
7596
if (goref != NULL && *goref != '\0') {
7597
if (StringNICmp (goref, "GO_REF:", 7) == 0) {
7598
str = StringSave (goref + 7);
7599
ufp->data.ptrvalue = (Pointer) str;
7612
static void CleanupGoTermsUfp (
7622
if (ufp == NULL || ufp->choice != 11) return;
7624
if (oip == NULL) return;
7625
for (i = 0; goQualType [i] != NULL; i++) {
7626
if (StringICmp (oip->str, goQualType [i]) == 0) break;
7628
if (goQualType [i] == NULL) return;
7630
entry = ufp->data.ptrvalue;
7631
if (entry == NULL || entry->choice != 11) return;
7633
CleanupGoTerms (entry);
7636
static void CleanupGoTermsSfp (
7644
if (uop == NULL) return;
7646
if (oip == NULL) return;
7647
if (StringCmp (oip->str, "GeneOntology") == 0) {
7648
VisitUserFieldsInUop (uop, userdata, CleanupGoTermsUfp);
6968
7652
static void CleanupFeatureStrings (SeqFeatPtr sfp, Boolean stripSerial, ValNodePtr PNTR publist)
8824
CharPtr ncrnaClassList[] = {
8826
"autocatalytically_spliced_intron",
8827
"hammerhead_ribozyme",
8845
Int4 NcrnaOTHER = sizeof (ncrnaClassList) / sizeof (CharPtr) - 1;
8848
extern Boolean IsStringInNcRNAClassList (CharPtr str)
8852
if (StringHasNoText (str)) return FALSE;
8853
for (p = ncrnaClassList; *p != NULL; p++)
8855
if (StringICmp (str, *p) == 0)
8864
static void AddNonCopiedQual (SeqFeatPtr sfp, CharPtr qual, CharPtr class_val)
8868
if (sfp == NULL || StringHasNoText (qual) || StringHasNoText (class_val))
8874
&& (StringCmp (gbq->qual, qual) != 0
8875
|| StringCmp (gbq->val, class_val) != 0))
8882
gbq->qual = StringSave (qual);
8883
gbq->val = StringSave (class_val);
8884
gbq->next = sfp->qual;
8891
static CharPtr GetMiRNAProduct (CharPtr str)
8894
CharPtr product = NULL;
8896
if (StringHasNoText (str)) return NULL;
8897
if (StringNCmp (str, "miRNA ", 6) == 0)
8899
product = StringSave (str + 6);
8901
else if (StringNCmp (str, "microRNA ", 9) == 0)
8903
product = StringSave (str + 9);
8907
len = StringLen (str);
8908
if (len > 6 && StringCmp (str + len - 6, " miRNA") == 0
8909
&& (len < 15 || StringCmp (str - 15, "precursor miRNA") != 0))
8911
product = (CharPtr) MemNew (sizeof (Char) * (len - 5));
8912
StringNCpy (product, str, len - 6);
8913
product[len - 6] = 0;
8915
else if (len > 9 && StringCmp (str + len - 9, " microRNA") == 0
8916
&& (len < 21 || StringCmp (str - 21, "precursor microRNA") != 0))
8918
product = (CharPtr) MemNew (sizeof (Char) * (len - 8));
8919
StringNCpy (product, str, len - 9);
8920
product[len - 9] = 0;
8927
static Boolean ConvertToNcRNA (SeqFeatPtr sfp)
8931
Boolean was_converted = FALSE;
8932
CharPtr miRNAproduct = NULL;
8934
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL)
8938
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
8939
if (rrp->type == 5 || rrp->type == 6 || rrp->type == 7)
8943
AddNonCopiedQual (sfp, "ncRNA_class", "snRNA");
8945
else if (rrp->type == 6)
8947
AddNonCopiedQual (sfp, "ncRNA_class", "scRNA");
8949
else if (rrp->type == 7)
8951
AddNonCopiedQual (sfp, "ncRNA_class", "snoRNA");
8953
if (rrp->ext.choice == 1)
8955
AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
8956
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
8958
rrp->ext.choice = 1;
8959
rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8961
was_converted = TRUE;
8963
else if (rrp->type == 255 && rrp->ext.choice == 1)
8965
if (IsStringInNcRNAClassList (rrp->ext.value.ptrvalue))
8967
AddNonCopiedQual (sfp, "ncRNA_class", rrp->ext.value.ptrvalue);
8968
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
8969
rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8970
was_converted = TRUE;
8972
else if ((miRNAproduct = GetMiRNAProduct (rrp->ext.value.ptrvalue)) != NULL)
8974
AddNonCopiedQual (sfp, "ncRNA_class", "miRNA");
8975
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
8976
rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8977
AddNonCopiedQual (sfp, "product", miRNAproduct);
8978
miRNAproduct = MemFree (miRNAproduct);
8979
was_converted = TRUE;
8981
else if (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
8982
&& StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
8983
&& StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0)
8985
AddNonCopiedQual (sfp, "product", rrp->ext.value.ptrvalue);
8986
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
8987
rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
8990
if (rrp->type == 255 && rrp->ext.choice == 0) {
8991
rrp->ext.choice = 1;
8992
rrp->ext.value.ptrvalue = StringSave ("misc_RNA");
8994
if (rrp->type == 255 && rrp->ext.choice == 1 &&
8995
StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0) {
8996
for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
8997
if (StringCmp (gbq->qual, "ncRNA_class") == 0) {
8998
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
8999
rrp->ext.value.ptrvalue = StringSave ("ncRNA");
9000
was_converted = TRUE;
9001
} else if (StringCmp (gbq->qual, "tag_peptide") == 0) {
9002
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
9003
rrp->ext.value.ptrvalue = StringSave ("tmRNA");
9004
was_converted = TRUE;
9008
return was_converted;
8083
9012
NLM_EXTERN void CleanUpSeqFeat (
8084
9013
SeqFeatPtr sfp,
8085
9014
Boolean isEmblOrDdbj,
8113
9042
if (sfp == NULL) return;
8115
9044
if (sfp->data.choice == SEQFEAT_IMP) {
8116
ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
8118
if (ifp->loc != NULL) {
8119
str = StringStr (ifp->loc, "replace");
8121
AddReplaceQual (sfp, str);
8122
ifp->loc = MemFree (ifp->loc);
8125
if (StringCmp (ifp->key, "CDS") == 0) {
8126
if (! isEmblOrDdbj) {
8127
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
8128
sfp->data.choice = SEQFEAT_CDREGION;
8129
crp = CdRegionNew ();
8130
sfp->data.value.ptrvalue = crp;
8131
sfp->idx.subtype = FEATDEF_CDS;
8133
} else if (StringCmp (ifp->key, "allele") == 0 ||
8134
StringCmp (ifp->key, "mutation") == 0) {
8135
ifp->key = MemFree (ifp->key);
8136
ifp->key = StringSave ("variation");
8137
sfp->idx.subtype = FEATDEF_variation;
8138
} else if (StringCmp (ifp->key, "Import") == 0 ||
8139
StringCmp (ifp->key, "virion") == 0) {
8140
ifp->key = MemFree (ifp->key);
8141
ifp->key = StringSave ("misc_feature");
8142
sfp->idx.subtype = FEATDEF_misc_feature;
8143
} else if (StringHasNoText (ifp->loc)) {
8145
if (StringCmp (ifp->key, "precursor_RNA") == 0) {
8147
} else if (StringCmp (ifp->key, "mRNA") == 0) {
8149
} else if (StringCmp (ifp->key, "tRNA") == 0) {
8151
} else if (StringCmp (ifp->key, "rRNA") == 0) {
8153
} else if (StringCmp (ifp->key, "snRNA") == 0) {
8155
} else if (StringCmp (ifp->key, "scRNA") == 0) {
8157
} else if (StringCmp (ifp->key, "snoRNA") == 0) {
8159
} else if (StringCmp (ifp->key, "misc_RNA") == 0) {
8163
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
8164
sfp->data.choice = SEQFEAT_RNA;
8166
sfp->data.value.ptrvalue = rrp;
8167
rrp->type = rrptype;
8168
sfp->idx.subtype = FindFeatDefType (sfp);
8171
if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) {
8173
} else if (StringCmp (ifp->key, "mat_peptide") == 0) {
8175
} else if (StringCmp (ifp->key, "sig_peptide") == 0) {
8177
} else if (StringCmp (ifp->key, "transit_peptide") == 0) {
8180
if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
8181
bsp = BioseqFind (SeqLocId (sfp->location));
8182
if (bsp != NULL && ISA_aa (bsp->mol)) {
8183
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
8184
sfp->data.choice = SEQFEAT_PROT;
8185
prp = ProtRefNew ();
8186
sfp->data.value.ptrvalue = prp;
8187
prp->processed = processed;
8188
sfp->idx.subtype = FindFeatDefType (sfp);
9045
ifp = (ImpFeatPtr) sfp->data.value.ptrvalue;
9047
if (ifp->loc != NULL) {
9048
str = StringStr (ifp->loc, "replace");
9050
AddReplaceQual (sfp, str);
9051
ifp->loc = MemFree (ifp->loc);
9054
if (StringCmp (ifp->key, "CDS") == 0) {
9055
if (! isEmblOrDdbj) {
9056
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
9057
sfp->data.choice = SEQFEAT_CDREGION;
9058
crp = CdRegionNew ();
9059
sfp->data.value.ptrvalue = crp;
9060
sfp->idx.subtype = FEATDEF_CDS;
9062
} else if (StringCmp (ifp->key, "allele") == 0 ||
9063
StringCmp (ifp->key, "mutation") == 0) {
9064
ifp->key = MemFree (ifp->key);
9065
ifp->key = StringSave ("variation");
9066
sfp->idx.subtype = FEATDEF_variation;
9067
} else if (StringCmp (ifp->key, "Import") == 0 ||
9068
StringCmp (ifp->key, "virion") == 0) {
9069
ifp->key = MemFree (ifp->key);
9070
ifp->key = StringSave ("misc_feature");
9071
sfp->idx.subtype = FEATDEF_misc_feature;
9072
} else if (StringHasNoText (ifp->loc)) {
9074
if (StringCmp (ifp->key, "precursor_RNA") == 0) {
9076
} else if (StringCmp (ifp->key, "mRNA") == 0) {
9078
} else if (StringCmp (ifp->key, "tRNA") == 0) {
9080
} else if (StringCmp (ifp->key, "rRNA") == 0) {
9082
} else if (StringCmp (ifp->key, "snRNA") == 0) {
9084
} else if (StringCmp (ifp->key, "scRNA") == 0) {
9086
} else if (StringCmp (ifp->key, "snoRNA") == 0) {
9088
} else if (StringCmp (ifp->key, "misc_RNA") == 0) {
9092
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
9093
sfp->data.choice = SEQFEAT_RNA;
9095
sfp->data.value.ptrvalue = rrp;
9096
rrp->type = rrptype;
9097
sfp->idx.subtype = FindFeatDefType (sfp);
9100
if (StringCmp (ifp->key, "proprotein") == 0 || StringCmp (ifp->key, "preprotein") == 0) {
9102
} else if (StringCmp (ifp->key, "mat_peptide") == 0) {
9104
} else if (StringCmp (ifp->key, "sig_peptide") == 0) {
9106
} else if (StringCmp (ifp->key, "transit_peptide") == 0) {
9109
if (processed != 0 || StringCmp (ifp->key, "Protein") == 0) {
9110
bsp = BioseqFind (SeqLocId (sfp->location));
9111
if (bsp != NULL && ISA_aa (bsp->mol)) {
9112
sfp->data.value.ptrvalue = ImpFeatFree (ifp);
9113
sfp->data.choice = SEQFEAT_PROT;
9114
prp = ProtRefNew ();
9115
sfp->data.value.ptrvalue = prp;
9116
prp->processed = processed;
9117
sfp->idx.subtype = FindFeatDefType (sfp);
8195
9124
if (crp != NULL && crp->frame == 0 && (! sfp->pseudo)) {
8196
crp->frame = GetFrameFromLoc (sfp->location);
9125
crp->frame = GetFrameFromLoc (sfp->location);
8198
9127
ModernizeFeatureGBQuals (sfp);
8199
9128
sfp->qual = SortFeatureGBQuals (sfp->qual);
8224
9153
} else if (sfp->data.choice == SEQFEAT_CDREGION) {
8225
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
8227
crp->code_break = SortCodeBreaks (sfp, crp->code_break);
8228
CleanupDuplicatedCodeBreaks (&(crp->code_break));
8229
for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
8230
CleanupSeqLoc (cbp->loc);
8231
if (strand == Seq_strand_minus && id != NULL) {
8233
if (slp != NULL && slp->choice == SEQLOC_INT) {
8234
sip = SeqLocId (slp);
8235
if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
8236
sintp = (SeqIntPtr) slp->data.ptrvalue;
8237
if (sintp != NULL) {
8238
sintp->strand = Seq_strand_minus;
9154
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
9156
crp->code_break = SortCodeBreaks (sfp, crp->code_break);
9157
CleanupDuplicatedCodeBreaks (&(crp->code_break));
9158
for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
9159
CleanupSeqLoc (cbp->loc);
9160
if (strand == Seq_strand_minus && id != NULL) {
9162
if (slp != NULL && slp->choice == SEQLOC_INT) {
9163
sip = SeqLocId (slp);
9164
if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
9165
sintp = (SeqIntPtr) slp->data.ptrvalue;
9166
if (sintp != NULL) {
9167
sintp->strand = Seq_strand_minus;
8245
9174
} else if (sfp->data.choice == SEQFEAT_RNA) {
8246
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
9175
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
8247
9176
if (rrp != NULL) {
8248
9177
if (rrp->pseudo) {
8249
9178
sfp->pseudo = TRUE;
8250
9179
rrp->pseudo = FALSE;
8253
if (rrp != NULL && rrp->ext.choice == 2) {
8254
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
8255
if (trp != NULL && trp->anticodon != NULL) {
8256
CleanupSeqLoc (trp->anticodon);
8257
if (strand == Seq_strand_minus && id != NULL) {
8258
slp = trp->anticodon;
8259
if (slp != NULL && slp->choice == SEQLOC_INT) {
8260
sip = SeqLocId (slp);
8261
if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
8262
sintp = (SeqIntPtr) slp->data.ptrvalue;
8263
if (sintp != NULL) {
8264
sintp->strand = Seq_strand_minus;
9182
if (rrp != NULL && rrp->ext.choice == 2) {
9183
trp = (tRNAPtr) rrp->ext.value.ptrvalue;
9184
if (trp != NULL && trp->anticodon != NULL) {
9185
CleanupSeqLoc (trp->anticodon);
9186
if (strand == Seq_strand_minus && id != NULL) {
9187
slp = trp->anticodon;
9188
if (slp != NULL && slp->choice == SEQLOC_INT) {
9189
sip = SeqLocId (slp);
9190
if (sip != NULL && SeqIdComp (id, sip) == SIC_YES) {
9191
sintp = (SeqIntPtr) slp->data.ptrvalue;
9192
if (sintp != NULL) {
9193
sintp->strand = Seq_strand_minus;
9200
if (ConvertToNcRNA (sfp)) {
9201
sfp->idx.subtype = FindFeatDefType (sfp);
8271
9203
} else if (sfp->data.choice == SEQFEAT_REGION ||
8272
9204
sfp->data.choice == SEQFEAT_SITE ||
8273
9205
sfp->data.choice == SEQFEAT_BOND ||
9726
10676
NLM_EXTERN SeqIdPtr MakeUniqueSeqID (CharPtr prefix)
9740
vn.choice = SEQID_GENBANK;
9742
vn.data.ptrvalue = &tsi;
9744
tsi.accession = NULL;
9746
tsi.version = INT2_MIN;
9748
len = StringLen (prefix);
9749
if (len > 0 && len < 32) {
9750
tmp = StringMove(buf, prefix);
9752
tmp = StringMove(buf, "tmpseq_");
9755
newid = ValNodeNew(NULL);
9756
oid = ObjectIdNew();
9757
oid->str = buf; /* allocate this later */
9758
newid->choice = SEQID_LOCAL;
9759
newid->data.ptrvalue = oid;
9761
tsi.name = buf; /* check for alternative form */
9763
for (ctr = 1; ctr < 32000; ctr++)
9765
sprintf(tmp, "%d", (int)ctr);
9766
if ((BioseqFindCore(newid) == NULL) && (BioseqFindCore(altid) == NULL))
9768
oid->str = StringSave(buf);
10690
vn.choice = SEQID_GENBANK;
10692
vn.data.ptrvalue = &tsi;
10694
tsi.accession = NULL;
10695
tsi.release = NULL;
10696
tsi.version = INT2_MIN;
10698
len = StringLen (prefix);
10699
if (len > 0 && len < 32) {
10700
tmp = StringMove(buf, prefix);
10702
tmp = StringMove(buf, "tmpseq_");
10705
newid = ValNodeNew(NULL);
10706
oid = ObjectIdNew();
10707
oid->str = buf; /* allocate this later */
10708
newid->choice = SEQID_LOCAL;
10709
newid->data.ptrvalue = oid;
10711
tsi.name = buf; /* check for alternative form */
10713
for (ctr = 1; ctr < 32000; ctr++)
10715
sprintf(tmp, "%d", (int)ctr);
10716
if ((BioseqFindCore(newid) == NULL) && (BioseqFindCore(altid) == NULL))
10718
oid->str = StringSave(buf);
9776
10726
NLM_EXTERN SeqIdPtr SeqIdFindWorst (SeqIdPtr sip)