~ubuntu-branches/ubuntu/oneiric/ncbi-tools6/oneiric

1.1.8 by Aaron M. Ucko
Import upstream version 6.1.20080302
1
/*   macro.c
2
* ===========================================================================
3
*
4
*                            PUBLIC DOMAIN NOTICE
5
*            National Center for Biotechnology Information (NCBI)
6
*
7
*  This software/database is a "United States Government Work" under the
8
*  terms of the United States Copyright Act.  It was written as part of
9
*  the author's official duties as a United States Government employee and
10
*  thus cannot be copyrighted.  This software/database is freely available
11
*  to the public for use. The National Library of Medicine and the U.S.
12
*  Government do not place any restriction on its use or reproduction.
13
*  We would, however, appreciate having the NCBI and the author cited in
14
*  any work or product based on this material
15
*
16
*  Although all reasonable efforts have been taken to ensure the accuracy
17
*  and reliability of the software and data, the NLM and the U.S.
18
*  Government do not and cannot warrant the performance or results that
19
*  may be obtained by using this software or data. The NLM and the U.S.
20
*  Government disclaim all warranties, express or implied, including
21
*  warranties of performance, merchantability or fitness for any particular
22
*  purpose.
23
*
24
* ===========================================================================
25
*
26
* File Name:  macro.c
27
*
28
* Author:  Colleen Bollin
29
*
30
* Version Creation Date:   11/8/2007
31
*
32
* $Revision: 1.58 $
33
*
34
* File Description: 
35
*
36
* Modifications:  
37
* --------------------------------------------------------------------------
38
* Date     Name        Description of modification
39
* -------  ----------  -----------------------------------------------------
40
*
41
*
42
* ==========================================================================
43
*/
44
45
#include <asn.h>
46
#include <objmacro.h>
47
#include <objfeat.h>
48
#include <subutil.h>
49
#include <objmgr.h>
50
#include <objfdef.h>
51
#include <gbftdef.h>
52
#include <sqnutils.h>
53
#include <edutil.h>
54
#include <gather.h>
55
#include <asn2gnbi.h>
56
#define NLM_GENERATED_CODE_PROTO
57
#include <macroapi.h>
58
#include <seqport.h>
59
60
/* structure and create/free functions for CGPSet, used for handling CDS-Gene-Prot sets */
61
typedef struct cgpset 
62
{
63
  ValNodePtr cds_list;
64
  ValNodePtr gene_list;
65
  ValNodePtr prot_list;
66
  ValNodePtr mrna_list;
67
} CGPSetData, PNTR CGPSetPtr;
68
69
70
71
static CGPSetPtr CGPSetNew (void)
72
{
73
  CGPSetPtr c;
74
75
  c = (CGPSetPtr) MemNew (sizeof (CGPSetData));
76
  c->cds_list = NULL;
77
  c->gene_list = NULL;
78
  c->prot_list = NULL;
79
  c->mrna_list = NULL;
80
  return c;
81
}
82
83
84
static CGPSetPtr CGPSetFree (CGPSetPtr c)
85
{
86
  if (c != NULL) {
87
    c->cds_list = ValNodeFree (c->cds_list);
88
    c->gene_list = ValNodeFree (c->gene_list);
89
    c->prot_list = ValNodeFree (c->prot_list);
90
    c->mrna_list = ValNodeFree (c->mrna_list);
91
    c = MemFree (c);
92
  }
93
  return c;
94
}
95
96
97
static ValNodePtr FreeCGPSetList (ValNodePtr vnp)
98
{
99
  ValNodePtr vnp_next;
100
  
101
  while (vnp != NULL) {
102
    vnp_next = vnp->next;
103
    vnp->next = NULL;
104
    vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue);
105
    vnp = ValNodeFree (vnp);
106
    vnp = vnp_next;
107
  }
108
  return NULL;
109
}
110
111
112
/* generic functions for mapping constraints */
113
114
typedef struct feattypefeatdef {
115
  Int4 feattype;
116
  Int4 featdef;
117
  CharPtr featname;
118
} FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr;
119
120
static FeatTypeFeatDefData feattype_featdef[] = {
121
 { Feature_type_any , FEATDEF_ANY , "any" } , 
122
 { Feature_type_gene , FEATDEF_GENE , "gene" } , 
123
 { Feature_type_org , FEATDEF_ORG , "org" } , 
124
 { Feature_type_cds , FEATDEF_CDS , "CDS" } , 
125
 { Feature_type_prot , FEATDEF_PROT , "Protein" } , 
126
 { Feature_type_preRNA , FEATDEF_preRNA , "preRNA" } , 
127
 { Feature_type_mRNA , FEATDEF_mRNA , "mRNA" } , 
128
 { Feature_type_tRNA , FEATDEF_tRNA , "tRNA" } , 
129
 { Feature_type_rRNA , FEATDEF_rRNA , "rRNA" } , 
130
 { Feature_type_snRNA , FEATDEF_snRNA , "snRNA" } , 
131
 { Feature_type_scRNA , FEATDEF_scRNA , "scRNA" } , 
132
 { Feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } , 
133
 { Feature_type_pub , FEATDEF_PUB , "pub" } , 
134
 { Feature_type_seq , FEATDEF_SEQ , "seq" } , 
135
 { Feature_type_imp , FEATDEF_IMP , "imp" } , 
136
 { Feature_type_allele , FEATDEF_allele , "allele" } , 
137
 { Feature_type_attenuator , FEATDEF_attenuator , "attenuator" } , 
138
 { Feature_type_c_region , FEATDEF_C_region , "c_region" } , 
139
 { Feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } , 
140
 { Feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } , 
141
 { Feature_type_conflict , FEATDEF_conflict , "conflict" } , 
142
 { Feature_type_d_loop , FEATDEF_D_loop , "d_loop" } , 
143
 { Feature_type_d_segment , FEATDEF_D_segment , "d_segment" } , 
144
 { Feature_type_enhancer , FEATDEF_enhancer , "enhancer" } , 
145
 { Feature_type_exon , FEATDEF_exon , "exon" } , 
146
 { Feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } , 
147
 { Feature_type_iDNA , FEATDEF_iDNA , "iDNA" } , 
148
 { Feature_type_intron , FEATDEF_intron , "intron" } , 
149
 { Feature_type_j_segment , FEATDEF_J_segment , "j_segment" } , 
150
 { Feature_type_ltr , FEATDEF_LTR , "ltr" } , 
151
 { Feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } , 
152
 { Feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } , 
153
 { Feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } , 
154
 { Feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } , 
155
 { Feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } , 
156
 { Feature_type_misc_RNA , FEATDEF_misc_RNA , "misc_RNA" } , 
157
 { Feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } , 
158
 { Feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } , 
159
 { Feature_type_modified_base , FEATDEF_modified_base , "modified_base" } , 
160
 { Feature_type_mutation , FEATDEF_mutation , "mutation" } , 
161
 { Feature_type_n_region , FEATDEF_N_region , "n_region" } , 
162
 { Feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } , 
163
 { Feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } , 
164
 { Feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } , 
165
 { Feature_type_precursor_RNA , FEATDEF_precursor_RNA , "precursor_RNA" } , 
166
 { Feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } , 
167
 { Feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } , 
168
 { Feature_type_promoter , FEATDEF_promoter , "promoter" } , 
169
 { Feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } , 
170
 { Feature_type_rbs , FEATDEF_RBS , "rbs" } , 
171
 { Feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } , 
172
 { Feature_type_repeat_unit , FEATDEF_repeat_unit , "repeat_unit" } , 
173
 { Feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } , 
174
 { Feature_type_s_region , FEATDEF_S_region , "s_region" } , 
175
 { Feature_type_satellite , FEATDEF_satellite , "satellite" } , 
176
 { Feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } , 
177
 { Feature_type_source , FEATDEF_source , "source" } , 
178
 { Feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } , 
179
 { Feature_type_sts , FEATDEF_STS , "sts" } , 
180
 { Feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } , 
181
 { Feature_type_terminator , FEATDEF_terminator , "terminator" } , 
182
 { Feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } , 
183
 { Feature_type_unsure , FEATDEF_unsure , "unsure" } , 
184
 { Feature_type_v_region , FEATDEF_V_region , "v_region" } , 
185
 { Feature_type_v_segment , FEATDEF_V_segment , "v_segment" } , 
186
 { Feature_type_variation , FEATDEF_variation , "variation" } , 
187
 { Feature_type_virion , FEATDEF_virion , "virion" } , 
188
 { Feature_type_n3clip , FEATDEF_3clip , "3clip" } , 
189
 { Feature_type_n3UTR , FEATDEF_3UTR , "3UTR" } , 
190
 { Feature_type_n5clip , FEATDEF_5clip , "5clip" } , 
191
 { Feature_type_n5UTR , FEATDEF_5UTR , "5UTR" } , 
192
 { Feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } , 
193
 { Feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } , 
194
 { Feature_type_site_ref , FEATDEF_site_ref , "site_ref" } , 
195
 { Feature_type_region , FEATDEF_REGION , "region" } , 
196
 { Feature_type_comment , FEATDEF_COMMENT , "comment" } , 
197
 { Feature_type_bond , FEATDEF_BOND , "bond" } , 
198
 { Feature_type_site , FEATDEF_SITE , "site" } , 
199
 { Feature_type_rsite , FEATDEF_RSITE , "rsite" } , 
200
 { Feature_type_user , FEATDEF_USER , "user" } , 
201
 { Feature_type_txinit , FEATDEF_TXINIT , "txinit" } , 
202
 { Feature_type_num , FEATDEF_NUM , "num" } , 
203
 { Feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } , 
204
 { Feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } , 
205
 { Feature_type_het , FEATDEF_HET , "het" } , 
206
 { Feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } , 
207
 { Feature_type_preprotein , FEATDEF_preprotein , "preprotein" } , 
208
 { Feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } , 
209
 { Feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } , 
210
 { Feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } , 
211
 { Feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } , 
212
 { Feature_type_gap , FEATDEF_gap , "gap" } , 
213
 { Feature_type_operon , FEATDEF_operon , "operon" } , 
214
 { Feature_type_oriT , FEATDEF_oriT , "oriT" } , 
215
 { Feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } , 
216
 { Feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" }};
217
218
#define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData)
219
220
NLM_EXTERN Int4 GetFeatdefFromFeatureType (Int4 feature_type) 
221
{
222
  Int4 i;
223
224
  for (i = 0; i < NUM_feattype_featdef; i++) {
225
    if (feature_type == feattype_featdef[i].feattype) {
226
      return feattype_featdef[i].featdef;
227
    }
228
  }
229
  return FEATDEF_BAD;
230
}
231
232
233
NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type)
234
{
235
  CharPtr str = NULL;
236
  Int4 i;
237
238
  for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) {
239
    if (feature_type == feattype_featdef[i].feattype) {
240
      str = feattype_featdef[feature_type].featname;
241
    }
242
  } 
243
  if (str == NULL) {
244
    str = "Unknown feature type";
245
  }
246
  return str;
247
}
248
249
250
static Boolean Matchnamestring (CharPtr name1, CharPtr name2)
251
{
252
  if (name1 == NULL && name2 == NULL) {
253
    return TRUE;
254
  } else if (name1 == NULL || name2 == NULL) {
255
    return FALSE;
256
  } else {
257
    while (*name1 != 0 && *name2 != 0) {
258
      while (*name1 == ' ' || *name1 == '-' || *name1 == '_') {
259
        name1++;
260
      }
261
      while (*name2 == ' ' || *name2 == '-' || *name2 == '_') {
262
        name2++;
263
      }
264
      if (*name1 != *name2) {
265
        return FALSE;
266
      }
267
      name1++;
268
      name2++;
269
    }
270
    if (*name1 == 0 && *name2 == 0) {
271
      return TRUE;
272
    } else {
273
      return FALSE;
274
    }
275
  }
276
}
277
278
279
NLM_EXTERN Int4 GetFeatureTypeByName (CharPtr feat_name)
280
{
281
  Int4 i;
282
283
  for (i = 0; i < NUM_feattype_featdef; i++) {
284
    if (Matchnamestring (feattype_featdef[i].featname, feat_name)) {
285
      return feattype_featdef[i].feattype;
286
    }
287
  }
288
  return -1;  
289
}
290
291
292
NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
293
{
294
  Int4 i, seqfeattype;
295
  CharPtr featname;
296
  ValNodePtr tmp_list = NULL;
297
298
  for (i = 1; i < NUM_feattype_featdef; i++) {
299
    if (feattype_featdef[i].feattype == Feature_type_gap) continue;
300
    seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef);
301
    if (seqfeattype == SEQFEAT_IMP) {
302
      featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
303
      if (featname != NULL) {
304
        ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
305
      }
306
    }
307
  }
308
  tmp_list = ValNodeSort (tmp_list, SortVnpByString);
309
  ValNodeLink (feature_type_list, tmp_list);
310
}
311
312
313
314
static Boolean IsMostUsedFeature (Uint1 val)
315
{
316
  if (val == Feature_type_gene
317
      || val == Feature_type_cds
318
      || val == Feature_type_prot
319
      || val == Feature_type_exon
320
      || val == Feature_type_intron
321
      || val == Feature_type_mRNA
322
      || val == Feature_type_rRNA
323
      || val == Feature_type_otherRNA) {
324
    return TRUE;
325
  } else {
326
    return FALSE;
327
  }
328
}
329
330
331
static int LIBCALLBACK SortVnpByFeatureName (VoidPtr ptr1, VoidPtr ptr2)
332
333
{
334
  CharPtr     str1;
335
  CharPtr     str2;
336
  ValNodePtr  vnp1;
337
  ValNodePtr  vnp2;
338
  Boolean     most_used1, most_used2;
339
340
  if (ptr1 != NULL && ptr2 != NULL) {
341
    vnp1 = *((ValNodePtr PNTR) ptr1);
342
    vnp2 = *((ValNodePtr PNTR) ptr2);
343
    if (vnp1 != NULL && vnp2 != NULL) {
344
      most_used1 = IsMostUsedFeature (vnp1->choice);
345
      most_used2 = IsMostUsedFeature (vnp2->choice);
346
      if (most_used1 && !most_used2) {
347
        return -1;
348
      } else if (!most_used1 && most_used2) {
349
        return 1;
350
      } else {
351
        str1 = (CharPtr) vnp1->data.ptrvalue;
352
        str2 = (CharPtr) vnp2->data.ptrvalue;
353
        if (str1 != NULL && str2 != NULL) {
354
          return StringICmp (str1, str2);
355
        }
356
      }
357
    }
358
  }
359
  return 0;
360
}
361
362
363
NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list)
364
{
365
  Int4 i;
366
  CharPtr featname;
367
  ValNodePtr tmp_list = NULL;
368
369
  for (i = 1; i < NUM_feattype_featdef; i++) {
370
    if (feattype_featdef[i].feattype == Feature_type_gap) continue;
371
    featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype);
372
    if (featname != NULL) {
373
      ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname));
374
    }
375
  }
376
  tmp_list = ValNodeSort (tmp_list, SortVnpByFeatureName);
377
  ValNodeLink (feature_type_list, tmp_list);
378
}
379
380
381
typedef struct featqualgbqual {
382
  Int4 featqual;
383
  Int4 gbqual;
384
  CharPtr qualname;
385
} FeatQualGBQualData, PNTR FeatQualGBQualPtr;
386
387
static FeatQualGBQualData featqual_gbqual[] = {
388
 { Feat_qual_legal_allele , GBQUAL_allele , "allele" } , 
389
 { Feat_qual_legal_anticodon , GBQUAL_anticodon , "anticodon" } , 
390
 { Feat_qual_legal_bound_moiety , GBQUAL_bound_moiety , "bound-moiety" } , 
391
 { Feat_qual_legal_chromosome , GBQUAL_chromosome , "chromosome" } , 
392
 { Feat_qual_legal_citation , GBQUAL_citation , "citation" } , 
393
 { Feat_qual_legal_codon , GBQUAL_codon , "codon" } , 
394
 { Feat_qual_legal_codon_start , GBQUAL_codon_start , "codon-start" } , 
395
 { Feat_qual_legal_compare , GBQUAL_compare , "compare" } , 
396
 { Feat_qual_legal_cons_splice , GBQUAL_cons_splice , "cons-splice" } , 
397
 { Feat_qual_legal_db_xref , GBQUAL_db_xref , "db-xref" } , 
398
 { Feat_qual_legal_direction , GBQUAL_direction , "direction" } , 
399
 { Feat_qual_legal_environmental_sample , GBQUAL_environmental_sample , "environmental-sample" } , 
400
 { Feat_qual_legal_evidence , GBQUAL_evidence , "evidence" } , 
401
 { Feat_qual_legal_exception , GBQUAL_exception , "exception" } , 
402
 { Feat_qual_legal_experiment , GBQUAL_experiment , "experiment" } , 
403
 { Feat_qual_legal_focus , GBQUAL_focus , "focus" } , 
404
 { Feat_qual_legal_frequency , GBQUAL_frequency , "frequency" } , 
405
 { Feat_qual_legal_function , GBQUAL_function , "function" } , 
406
 { Feat_qual_legal_gene , GBQUAL_gene , "locus" } , 
407
 { Feat_qual_legal_inference , GBQUAL_inference , "inference" } , 
408
 { Feat_qual_legal_label , GBQUAL_label , "label" } , 
409
 { Feat_qual_legal_locus_tag , GBQUAL_locus_tag , "locus-tag" } , 
410
 { Feat_qual_legal_map , GBQUAL_map , "map" } , 
411
 { Feat_qual_legal_mobile_element , GBQUAL_mobile_element , "mobile-element" } , 
412
 { Feat_qual_legal_mod_base , GBQUAL_mod_base , "mod-base" } , 
413
 { Feat_qual_legal_mol_type , GBQUAL_mol_type , "mol-type" } , 
414
 { Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , "ncRNA-class" } , 
415
 { Feat_qual_legal_note , GBQUAL_note , "note" } , 
416
 { Feat_qual_legal_number , GBQUAL_number , "number" } , 
417
 { Feat_qual_legal_old_locus_tag , GBQUAL_old_locus_tag , "old-locus-tag" } , 
418
 { Feat_qual_legal_operon , GBQUAL_operon , "operon" } , 
419
 { Feat_qual_legal_organism , GBQUAL_organism , "organism" } , 
420
 { Feat_qual_legal_organelle , GBQUAL_organelle , "organelle" } , 
421
 { Feat_qual_legal_partial , GBQUAL_partial , "partial" } , 
422
 { Feat_qual_legal_phenotype , GBQUAL_phenotype , "phenotype" } , 
423
 { Feat_qual_legal_plasmid , GBQUAL_plasmid , "plasmid" } , 
424
 { Feat_qual_legal_product , GBQUAL_product , "product" } , 
425
 { Feat_qual_legal_protein_id , GBQUAL_protein_id , "protein-id" } , 
426
 { Feat_qual_legal_pseudo , GBQUAL_pseudo , "pseudo" } , 
427
 { Feat_qual_legal_rearranged , GBQUAL_rearranged , "rearranged" } , 
428
 { Feat_qual_legal_replace , GBQUAL_replace , "replace" } , 
429
 { Feat_qual_legal_rpt_family , GBQUAL_rpt_family , "rpt-family" } , 
430
 { Feat_qual_legal_rpt_type , GBQUAL_rpt_type , "rpt-type" } , 
431
 { Feat_qual_legal_rpt_unit , GBQUAL_rpt_unit , "rpt-unit" } , 
432
 { Feat_qual_legal_rpt_unit_seq , GBQUAL_rpt_unit_seq , "rpt-unit-seq" } , 
433
 { Feat_qual_legal_rpt_unit_range , GBQUAL_rpt_unit_range , "rpt-unit-range" } , 
434
 { Feat_qual_legal_segment , GBQUAL_segment , "segment" } , 
435
 { Feat_qual_legal_sequenced_mol , GBQUAL_sequenced_mol , "sequenced-mol" } , 
436
 { Feat_qual_legal_standard_name , GBQUAL_standard_name , "standard-name" } , 
437
 { Feat_qual_legal_transcript_id , GBQUAL_transcript_id , "transcript-id" } , 
438
 { Feat_qual_legal_transgenic , GBQUAL_transgenic , "transgenic" } , 
439
 { Feat_qual_legal_translation , GBQUAL_translation , "translation" } , 
440
 { Feat_qual_legal_transl_except , GBQUAL_transl_except , "transl-except" } , 
441
 { Feat_qual_legal_transl_table , GBQUAL_transl_table , "transl-table" } , 
442
 { Feat_qual_legal_usedin , GBQUAL_usedin , "usedin" } };
443
444
#define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData)
445
446
447
NLM_EXTERN Int4 GetNumFeatQual (void)
448
{
449
  return NUM_featqual_gbqual;
450
}
451
452
453
static Int4 GetGBQualFromFeatQual (Int4 featqual) 
454
{
455
  Int4 i;
456
457
  for (i = 0; i < NUM_featqual_gbqual; i++) {
458
    if (featqual == featqual_gbqual[i].featqual) {
459
      return featqual_gbqual[i].gbqual;
460
    }
461
  }
462
  return -1;
463
}
464
465
466
NLM_EXTERN CharPtr GetFeatQualName (Int4 featqual) 
467
{
468
  Int4 i;
469
470
  for (i = 0; i < NUM_featqual_gbqual; i++) {
471
    if (featqual == featqual_gbqual[i].featqual) {
472
      return featqual_gbqual[i].qualname;
473
    }
474
  }
475
  return NULL;
476
}
477
478
479
NLM_EXTERN Int4 GetFeatQualByName (CharPtr qualname) 
480
{
481
  Int4 i;
482
483
  for (i = 0; i < NUM_featqual_gbqual; i++) {
484
    if (Matchnamestring (featqual_gbqual[i].qualname, qualname)) {
485
      return featqual_gbqual[i].featqual;
486
    }
487
  }
488
  return -1;  
489
}
490
491
492
NLM_EXTERN void AddAllFeatureFieldsToChoiceList (ValNodePtr PNTR field_list)
493
{
494
  Int4 i;
495
496
  for (i = 1; i < NUM_featqual_gbqual; i++) {
497
    ValNodeAddPointer (field_list, featqual_gbqual[i].featqual, StringSave (featqual_gbqual[i].qualname));
498
  }
499
}
500
501
502
#define IS_ORGMOD 1
503
#define IS_SUBSRC 2
504
#define IS_OTHER  3
505
506
typedef struct srcqualscqual {
507
  Int4 srcqual;
508
  Int4 subtype;
509
  Int4 typeflag;
510
  CharPtr qualname;
511
} SrcQualSCQualData, PNTR SrcQualSCQualPtr;
512
513
static SrcQualSCQualData srcqual_scqual[] = {
514
 { Source_qual_acronym , ORGMOD_acronym , IS_ORGMOD , "acronym" } , 
515
 { Source_qual_anamorph , ORGMOD_anamorph , IS_ORGMOD , "anamorph" } , 
516
 { Source_qual_authority , ORGMOD_authority , IS_ORGMOD , "authority" } , 
517
 { Source_qual_bio_material , ORGMOD_bio_material , IS_ORGMOD , "bio-material" } , 
518
 { Source_qual_biotype , ORGMOD_biotype , IS_ORGMOD , "biotype" } , 
519
 { Source_qual_biovar , ORGMOD_biovar , IS_ORGMOD , "biovar" } , 
520
 { Source_qual_breed , ORGMOD_breed , IS_ORGMOD , "breed" } , 
521
 { Source_qual_cell_line , SUBSRC_cell_line , IS_SUBSRC , "cell-line" } , 
522
 { Source_qual_cell_type , SUBSRC_cell_type , IS_SUBSRC , "cell-type" } , 
523
 { Source_qual_chemovar , ORGMOD_chemovar , IS_ORGMOD , "chemovar" } , 
524
 { Source_qual_chromosome , SUBSRC_chromosome , IS_SUBSRC , "chromosome" } , 
525
 { Source_qual_clone , SUBSRC_clone , IS_SUBSRC , "clone" } , 
526
 { Source_qual_clone_lib , SUBSRC_clone_lib , IS_SUBSRC , "clone-lib" } , 
527
 { Source_qual_collected_by , SUBSRC_collected_by , IS_SUBSRC , "collected-by" } , 
528
 { Source_qual_collection_date , SUBSRC_collection_date , IS_SUBSRC , "collection-date" } , 
529
 { Source_qual_common , ORGMOD_common , IS_ORGMOD , "common" } , 
530
 { Source_qual_common_name , 0 , IS_OTHER , "common name" } , 
531
 { Source_qual_country , SUBSRC_country , IS_SUBSRC , "country" } , 
532
 { Source_qual_cultivar , ORGMOD_cultivar , IS_ORGMOD , "cultivar" } , 
533
 { Source_qual_culture_collection , ORGMOD_culture_collection , IS_ORGMOD , "culture-collection" } , 
534
 { Source_qual_dev_stage , SUBSRC_dev_stage , IS_SUBSRC , "dev-stage" } , 
535
 { Source_qual_division , 0 , IS_OTHER, "divistion" } ,
536
 { Source_qual_dosage , ORGMOD_dosage , IS_ORGMOD , "dosage" } , 
537
 { Source_qual_ecotype , ORGMOD_ecotype , IS_ORGMOD , "ecotype" } , 
538
 { Source_qual_endogenous_virus_name , SUBSRC_endogenous_virus_name , IS_SUBSRC , "endogenous-virus-name" } , 
539
 { Source_qual_environmental_sample , SUBSRC_environmental_sample , IS_SUBSRC , "environmental-sample" } , 
540
 { Source_qual_forma , ORGMOD_forma , IS_ORGMOD , "forma" } , 
541
 { Source_qual_forma_specialis , ORGMOD_forma_specialis , IS_ORGMOD , "forma-specialis" } , 
542
 { Source_qual_frequency , SUBSRC_frequency , IS_SUBSRC , "frequency" } , 
543
 { Source_qual_fwd_primer_name , SUBSRC_fwd_primer_name , IS_SUBSRC , "fwd-primer-name" } , 
544
 { Source_qual_fwd_primer_seq , SUBSRC_fwd_primer_seq , IS_SUBSRC , "fwd-primer-seq" } , 
545
 { Source_qual_gb_acronym , ORGMOD_gb_acronym , IS_ORGMOD , "gb-acronym" } , 
546
 { Source_qual_gb_anamorph , ORGMOD_gb_anamorph , IS_ORGMOD , "gb-anamorph" } , 
547
 { Source_qual_gb_synonym , ORGMOD_gb_synonym , IS_ORGMOD , "gb-synonym" } , 
548
 { Source_qual_genotype , SUBSRC_genotype , IS_SUBSRC , "genotype" } , 
549
 { Source_qual_germline , SUBSRC_germline , IS_SUBSRC , "germline" } , 
550
 { Source_qual_group , ORGMOD_group , IS_ORGMOD , "group" } , 
551
 { Source_qual_haplotype , SUBSRC_haplotype , IS_SUBSRC , "haplotype" } , 
552
 { Source_qual_identified_by , SUBSRC_identified_by , IS_SUBSRC , "identified-by" } , 
553
 { Source_qual_insertion_seq_name , SUBSRC_insertion_seq_name , IS_SUBSRC , "insertion-seq-name" } , 
554
 { Source_qual_isolate , ORGMOD_isolate , IS_ORGMOD , "isolate" } , 
555
 { Source_qual_isolation_source , SUBSRC_isolation_source , IS_SUBSRC , "isolation-source" } , 
556
 { Source_qual_lab_host , SUBSRC_lab_host , IS_SUBSRC , "lab-host" } , 
557
 { Source_qual_lat_lon , SUBSRC_lat_lon , IS_SUBSRC , "lat-lon" } , 
558
 { Source_qual_lineage , 0, IS_OTHER, "lineage" } ,
559
 { Source_qual_map , SUBSRC_map , IS_SUBSRC , "map" } , 
560
 { Source_qual_metagenome_source , ORGMOD_metagenome_source , IS_ORGMOD , "metagenome-source" } , 
561
 { Source_qual_metagenomic , SUBSRC_metagenomic , IS_SUBSRC , "metagenomic" } , 
562
 { Source_qual_old_lineage , ORGMOD_old_lineage , IS_ORGMOD , "old-lineage" } , 
563
 { Source_qual_old_name , ORGMOD_old_name , IS_ORGMOD , "old-name" } , 
564
 { Source_qual_orgmod_note , ORGMOD_other, IS_ORGMOD, "orgmod note" } ,
565
 { Source_qual_nat_host , ORGMOD_nat_host , IS_ORGMOD , "nat-host" } , 
566
 { Source_qual_pathovar , ORGMOD_pathovar , IS_ORGMOD , "pathovar" } , 
567
 { Source_qual_plasmid_name , SUBSRC_plasmid_name , IS_SUBSRC , "plasmid-name" } , 
568
 { Source_qual_plastid_name , SUBSRC_plastid_name , IS_SUBSRC , "plastid-name" } , 
569
 { Source_qual_pop_variant , SUBSRC_pop_variant , IS_SUBSRC , "pop-variant" } , 
570
 { Source_qual_rearranged , SUBSRC_rearranged , IS_SUBSRC , "rearranged" } , 
571
 { Source_qual_rev_primer_name , SUBSRC_rev_primer_name , IS_SUBSRC , "rev-primer-name" } , 
572
 { Source_qual_rev_primer_seq , SUBSRC_rev_primer_seq , IS_SUBSRC , "rev-primer-seq" } , 
573
 { Source_qual_segment , SUBSRC_segment , IS_SUBSRC , "segment" } , 
574
 { Source_qual_serogroup , ORGMOD_serogroup , IS_ORGMOD , "serogroup" } , 
575
 { Source_qual_serotype , ORGMOD_serotype , IS_ORGMOD , "serotype" } , 
576
 { Source_qual_serovar , ORGMOD_serovar , IS_ORGMOD , "serovar" } , 
577
 { Source_qual_sex , SUBSRC_sex , IS_SUBSRC , "sex" } , 
578
 { Source_qual_specimen_voucher , ORGMOD_specimen_voucher , IS_ORGMOD , "specimen-voucher" } , 
579
 { Source_qual_strain , ORGMOD_strain , IS_ORGMOD , "strain" } , 
580
 { Source_qual_subclone , SUBSRC_subclone , IS_SUBSRC , "subclone" } , 
581
 { Source_qual_subgroup , ORGMOD_subgroup , IS_ORGMOD , "subgroup" } , 
582
 { Source_qual_subsource_note , SUBSRC_other , IS_SUBSRC , "subsource note" } ,
583
 { Source_qual_sub_species , ORGMOD_sub_species , IS_ORGMOD , "sub-species" } , 
584
 { Source_qual_substrain , ORGMOD_substrain , IS_ORGMOD , "substrain" } , 
585
 { Source_qual_subtype , ORGMOD_subtype , IS_ORGMOD , "subtype" } , 
586
 { Source_qual_synonym , ORGMOD_synonym , IS_ORGMOD , "synonym" } , 
587
 { Source_qual_taxname , 0 , IS_OTHER , "taxname" } , 
588
 { Source_qual_teleomorph , ORGMOD_teleomorph , IS_ORGMOD , "teleomorph" } , 
589
 { Source_qual_tissue_lib , SUBSRC_tissue_lib , IS_SUBSRC , "tissue-lib" } , 
590
 { Source_qual_tissue_type , SUBSRC_tissue_type , IS_SUBSRC , "tissue-type" } , 
591
 { Source_qual_transgenic , SUBSRC_transgenic , IS_SUBSRC , "transgenic" } , 
592
 { Source_qual_transposon_name , SUBSRC_transposon_name , IS_SUBSRC , "transposon-name" } , 
593
 { Source_qual_type , ORGMOD_type , IS_ORGMOD , "type" } , 
594
 { Source_qual_variety , ORGMOD_variety , IS_ORGMOD , "variety" } };
595
596
#define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData)
597
598
static Int4 GetSubSrcQualFromSrcQual (Int4 srcqual) 
599
{
600
  Int4 i;
601
602
  for (i = 0; i < NUM_srcqual_scqual; i++) {
603
    if (srcqual == srcqual_scqual[i].srcqual) {
604
      if (srcqual_scqual[i].typeflag == IS_SUBSRC) {
605
        return srcqual_scqual[i].subtype;
606
      } else {
607
        return -1;
608
      }
609
    }
610
  }
611
  return -1;
612
}
613
614
615
static Int4 GetOrgModQualFromSrcQual (Int4 srcqual) 
616
{
617
  Int4 i;
618
619
  for (i = 0; i < NUM_srcqual_scqual; i++) {
620
    if (srcqual == srcqual_scqual[i].srcqual) {
621
      if (srcqual_scqual[i].typeflag == IS_ORGMOD) {
622
        return srcqual_scqual[i].subtype;
623
      } else {
624
        return -1;
625
      }
626
    }
627
  }
628
  return -1;
629
}
630
631
632
NLM_EXTERN Boolean IsNonTextSourceQual (Int4 srcqual)
633
{
634
  if (srcqual == Source_qual_transgenic
635
      || srcqual == Source_qual_germline
636
      || srcqual == Source_qual_metagenomic
637
      || srcqual == Source_qual_environmental_sample
638
      || srcqual == Source_qual_rearranged)
639
  {
640
    return TRUE;  
641
  }
642
  else
643
  {
644
    return FALSE;
645
  }
646
}
647
648
649
NLM_EXTERN CharPtr GetSourceQualName (Int4 srcqual)
650
{
651
  CharPtr str = NULL;
652
  Int4    i;
653
654
  for (i = 0; i < NUM_srcqual_scqual && str == NULL; i++) {
655
    if (srcqual_scqual[i].srcqual == srcqual) {
656
      str = srcqual_scqual[i].qualname;
657
    }
658
  }
659
  if (str == NULL) {
660
    str = "Unknown source qualifier";
661
  }
662
  return str;
663
}
664
665
666
NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname)
667
{
668
  Int4    i;
669
670
  for (i = 0; i < NUM_srcqual_scqual; i++) {
671
    if (Matchnamestring(srcqual_scqual[i].qualname, qualname)) {
672
      return srcqual_scqual[i].srcqual;
673
    }
674
  }
675
  return -1;
676
}
677
678
679
NLM_EXTERN ValNodePtr GetSourceQualList (void)
680
{
681
  ValNodePtr list = NULL;
682
  Int4 i;
683
684
  for (i = 0; i < NUM_srcqual_scqual; i++) {
685
    ValNodeAddPointer (&list, 0, StringSave (srcqual_scqual[i].qualname));
686
  }
687
  return list;
688
}
689
690
typedef struct srclocgenome {
691
  Int4 srcloc;
692
  Int4 genome;
693
  CharPtr name;
694
} SrcLocGenomeData, PNTR SrcLocGenomePtr;
695
696
static SrcLocGenomeData srcloc_genome[] = {
697
 { Source_location_unknown , GENOME_unknown , "unknown" } ,
698
 { Source_location_genomic , GENOME_genomic , "genomic" } ,
699
 { Source_location_chloroplast , GENOME_chloroplast , "chloroplast" } ,
700
 { Source_location_chromoplast , GENOME_chromoplast , "chromoplast" } ,
701
 { Source_location_kinetoplast , GENOME_kinetoplast , "kinetoplast" } ,
702
 { Source_location_mitochondrion , GENOME_mitochondrion , "mitochondrion" } ,
703
 { Source_location_plastid , GENOME_plastid , "plastid" } ,
704
 { Source_location_macronuclear , GENOME_macronuclear , "macronuclear" } ,
705
 { Source_location_extrachrom , GENOME_extrachrom , "extrachrom" } ,
706
 { Source_location_plasmid , GENOME_plasmid , "plasmid" } ,
707
 { Source_location_transposon , GENOME_transposon , "transposon" } ,
708
 { Source_location_insertion_seq , GENOME_insertion_seq , "insertion-seq" } ,
709
 { Source_location_cyanelle , GENOME_cyanelle , "cyanelle" } ,
710
 { Source_location_proviral , GENOME_proviral , "proviral" } ,
711
 { Source_location_virion , GENOME_virion , "virion" } ,
712
 { Source_location_nucleomorph , GENOME_nucleomorph , "nucleomorph" } ,
713
 { Source_location_apicoplast , GENOME_apicoplast , "apicoplast" } ,
714
 { Source_location_leucoplast , GENOME_leucoplast , "leucoplast" } ,
715
 { Source_location_proplastid , GENOME_proplastid , "proplastid" } ,
716
 { Source_location_endogenous_virus , GENOME_endogenous_virus , "endogenous-virus" } ,
717
 { Source_location_hydrogenosome , GENOME_hydrogenosome , "hydrogenosome" } ,
718
 { Source_location_chromosome , 21 , "chromosome" } ,
719
 { Source_location_chromatophore , 22 , "chromatophore" } };
720
721
#define NUM_srcloc_genome sizeof (srcloc_genome) / sizeof (SrcLocGenomeData)
722
723
NLM_EXTERN Int4 GenomeFromSrcLoc (Int4 srcloc) \
724
{
725
  Int4 i;
726
727
  for (i = 0; i < NUM_srcloc_genome; i++) {
728
    if (srcloc_genome[i].srcloc == srcloc) {
729
      return srcloc_genome[i].genome;
730
    }
731
  }
732
  return -1;
733
}
734
735
736
NLM_EXTERN CharPtr LocNameFromGenome (Int4 genome) 
737
{
738
  Int4 i;
739
740
  for (i = 0; i < NUM_srcloc_genome; i++) {
741
    if (srcloc_genome[i].genome == genome) {
742
      return srcloc_genome[i].name;
743
    }
744
  }
745
  return NULL;
746
}
747
748
749
static Int4 GenomeFromLocName (CharPtr loc_name)
750
{
751
  Int4 i;
752
753
  for (i = 0; i < NUM_srcloc_genome; i++) {
754
    if (StringICmp (srcloc_genome[i].name, loc_name) == 0) {
755
      return srcloc_genome[i].genome;
756
    }
757
  }
758
  return -1;
759
}
760
761
762
NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove)
763
{
764
  ValNodePtr list = NULL;
765
  Int4 i;
766
767
  for (i = 0; i < NUM_srcloc_genome; i++) {
768
    if (for_remove && srcloc_genome[i].srcloc == Source_location_unknown) {
769
      ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave ("any"));
770
    } else {
771
      ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave (srcloc_genome[i].name));
772
    }
773
  }
774
  return list;
775
}
776
777
778
typedef struct srcorigorigin {
779
  Int4 srcorig;
780
  Int4 origin;
781
  CharPtr name;
782
} SrcOrigOriginData, PNTR SrcrigOriginPtr;
783
784
static SrcOrigOriginData srcorig_origin[] = {
785
 { Source_origin_unknown , 0 , "unknown" } ,
786
 { Source_origin_natural , 1 , "natural" } ,
787
 { Source_origin_natmut , 2 , "natmut" } ,
788
 { Source_origin_mut , 3 , "mut" } ,
789
 { Source_origin_artificial , 4 , "artificial" } ,
790
 { Source_origin_synthetic , 5 , "synthetic" } ,
791
 { Source_origin_other , 255 , "other" } };
792
793
#define NUM_srcorig_origin sizeof (srcorig_origin) / sizeof (SrcOrigOriginData)
794
795
NLM_EXTERN Int4 OriginFromSrcOrig (Int4 srcorig) 
796
{
797
  Int4 i;
798
799
  for (i = 0; i < NUM_srcorig_origin; i++) {
800
    if (srcorig_origin[i].srcorig == srcorig) {
801
      return srcorig_origin[i].origin;
802
    }
803
  }
804
  return -1;
805
}
806
807
808
NLM_EXTERN CharPtr OriginNameFromOrigin (Int4 origin) 
809
{
810
  Int4 i;
811
812
  for (i = 0; i < NUM_srcorig_origin; i++) {
813
    if (srcorig_origin[i].origin == origin) {
814
      return srcorig_origin[i].name;
815
    }
816
  }
817
  return NULL;
818
}
819
820
821
static Int4 OriginFromOriginName (CharPtr origin_name)
822
{
823
  Int4 i;
824
825
  for (i = 0; i < NUM_srcorig_origin; i++) {
826
    if (StringCmp (srcorig_origin[i].name, origin_name) == 0) {
827
      return srcorig_origin[i].origin;
828
    }
829
  }
830
  return -1;
831
}
832
833
834
NLM_EXTERN ValNodePtr GetOriginList (Boolean for_remove)
835
{
836
  ValNodePtr list = NULL;
837
  Int4 i;
838
839
  for (i = 0; i < NUM_srcorig_origin; i++) {
840
    if (for_remove && srcorig_origin[i].srcorig == Source_origin_unknown) {
841
      ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave ("any"));
842
    } else {
843
      ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave (srcorig_origin[i].name));
844
    }
845
  }
846
  return list;
847
}
848
849
850
typedef struct cdsgeneprotfieldname {
851
  Int4 field;
852
  CharPtr name;
853
} CDSGeneProtFieldNameData, PNTR CDSGeneProtFieldNamePtr;
854
855
static CDSGeneProtFieldNameData cdsgeneprotfield_name[] = {
856
{ CDSGeneProt_field_cds_comment , "CDS comment" } ,
857
{ CDSGeneProt_field_gene_locus , "gene locus" } ,
858
{ CDSGeneProt_field_gene_description , "gene description" } ,
859
{ CDSGeneProt_field_gene_comment , "gene comment" } ,
860
{ CDSGeneProt_field_gene_allele , "allele" } ,
861
{ CDSGeneProt_field_gene_maploc , "maploc" } ,
862
{ CDSGeneProt_field_gene_locus_tag , "locus tag" } ,
863
{ CDSGeneProt_field_gene_synonym , "synonym" } ,
864
{ CDSGeneProt_field_gene_old_locus_tag , "old locus tag" } ,
865
{ CDSGeneProt_field_mrna_product , "mRNA product" } ,
866
{ CDSGeneProt_field_mrna_comment , "mRNA comment" } ,
867
{ CDSGeneProt_field_prot_name , "protein name" } ,
868
{ CDSGeneProt_field_prot_description , "protein description" } ,
869
{ CDSGeneProt_field_prot_ec_number , "protein EC number" } ,
870
{ CDSGeneProt_field_prot_activity , "protein activity" } ,
871
{ CDSGeneProt_field_prot_comment , "protein comment" } ,
872
{ CDSGeneProt_field_mat_peptide_name , "mat-peptide name" } ,
873
{ CDSGeneProt_field_mat_peptide_description ,  "mat-peptide description" } ,
874
{ CDSGeneProt_field_mat_peptide_ec_number , "mat-peptide EC number" } ,
875
{ CDSGeneProt_field_mat_peptide_activity , "mat-peptide activity" } ,
876
{ CDSGeneProt_field_mat_peptide_comment , "mat-peptide comment" } };
877
878
#define NUM_cdsgeneprotfield_name sizeof (cdsgeneprotfield_name) / sizeof (CDSGeneProtFieldNameData)
879
880
NLM_EXTERN CharPtr CDSGeneProtNameFromField (Int4 field) 
881
{
882
  Int4 i;
883
884
  for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
885
    if (cdsgeneprotfield_name[i].field == field) {
886
      return cdsgeneprotfield_name[i].name;
887
    }
888
  }
889
  return NULL;
890
}
891
892
893
NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list)
894
{
895
  Int4 i;
896
897
  for (i = 0; i < NUM_cdsgeneprotfield_name; i++) {
898
    ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name));
899
  }
900
}
901
902
903
typedef struct cdsgeneprotfeatname {
904
  Int4 feature_type;
905
  CharPtr name;
906
} CDSGeneProtFeatNameData, PNTR CDSGeneProtFeatNamePtr;
907
908
static CDSGeneProtFeatNameData cdsgeneprotfeat_name[] = {
909
{ CDSGeneProt_feature_type_constraint_gene , "gene" } ,
910
{ CDSGeneProt_feature_type_constraint_mRNA , "mRNA" } ,
911
{ CDSGeneProt_feature_type_constraint_cds , "CDS" } ,
912
{ CDSGeneProt_feature_type_constraint_prot , "protein" } ,
913
{ CDSGeneProt_feature_type_constraint_mat_peptide , "mat-peptide" }};
914
915
#define NUM_cdsgeneprotfeat_name sizeof (cdsgeneprotfeat_name) / sizeof (CDSGeneProtFeatNameData)
916
917
NLM_EXTERN CharPtr CDSGeneProtFeatureNameFromFeatureType (Int4 feature_type)
918
{
919
  Int4 i;
920
921
  for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) {
922
    if (cdsgeneprotfeat_name[i].feature_type == feature_type) {
923
      return cdsgeneprotfeat_name[i].name;
924
    }
925
  }
926
  return NULL;
927
}
928
929
930
NLM_EXTERN void AddAllCDSGeneProtFeaturesToChoiceList (ValNodePtr PNTR field_list)
931
{
932
  Int4 i;
933
934
  for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) {
935
    ValNodeAddPointer (field_list, cdsgeneprotfeat_name[i].feature_type, StringSave (cdsgeneprotfeat_name[i].name));
936
  }
937
}
938
939
940
NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot_field)
941
{
942
  FeatureFieldPtr f = NULL;
943
944
  switch (cds_gene_prot_field) {
945
    case CDSGeneProt_field_cds_comment:
946
      f = FeatureFieldNew ();
947
      f->type = Feature_type_cds;
948
      f->field = ValNodeNew (NULL);
949
      f->field->choice = FeatQualChoice_legal_qual;
950
      f->field->data.intvalue = Feat_qual_legal_note;
951
      break;
952
    case CDSGeneProt_field_gene_locus:
953
      f = FeatureFieldNew ();
954
      f->type = Feature_type_gene;
955
      f->field = ValNodeNew (NULL);
956
      f->field->choice = FeatQualChoice_legal_qual;
957
      f->field->data.intvalue = Feat_qual_legal_gene;
958
      break;
959
    case CDSGeneProt_field_gene_description:
960
      f = FeatureFieldNew ();
961
      f->type = Feature_type_gene;
962
      f->field = ValNodeNew (NULL);
963
      f->field->choice = FeatQualChoice_legal_qual;
964
      f->field->data.intvalue = Feat_qual_legal_gene_description;
965
      break;
966
    case CDSGeneProt_field_gene_comment:
967
      f = FeatureFieldNew ();
968
      f->type = Feature_type_gene;
969
      f->field = ValNodeNew (NULL);
970
      f->field->choice = FeatQualChoice_legal_qual;
971
      f->field->data.intvalue = Feat_qual_legal_note;
972
      break;
973
    case CDSGeneProt_field_gene_allele:
974
      f = FeatureFieldNew ();
975
      f->type = Feature_type_gene;
976
      f->field = ValNodeNew (NULL);
977
      f->field->choice = FeatQualChoice_legal_qual;
978
      f->field->data.intvalue = Feat_qual_legal_allele;
979
      break;
980
    case CDSGeneProt_field_gene_maploc:
981
      f = FeatureFieldNew ();
982
      f->type = Feature_type_gene;
983
      f->field = ValNodeNew (NULL);
984
      f->field->choice = FeatQualChoice_legal_qual;
985
      f->field->data.intvalue = Feat_qual_legal_map;
986
      break;
987
    case CDSGeneProt_field_gene_locus_tag:
988
      f = FeatureFieldNew ();
989
      f->type = Feature_type_gene;
990
      f->field = ValNodeNew (NULL);
991
      f->field->choice = FeatQualChoice_legal_qual;
992
      f->field->data.intvalue = Feat_qual_legal_locus_tag;
993
      break;
994
    case CDSGeneProt_field_gene_synonym:
995
      f = FeatureFieldNew ();
996
      f->type = Feature_type_gene;
997
      f->field = ValNodeNew (NULL);
998
      f->field->choice = FeatQualChoice_legal_qual;
999
      f->field->data.intvalue = Feat_qual_legal_synonym;
1000
      break;
1001
    case CDSGeneProt_field_gene_old_locus_tag:
1002
      f = FeatureFieldNew ();
1003
      f->type = Feature_type_gene;
1004
      f->field = ValNodeNew (NULL);
1005
      f->field->choice = FeatQualChoice_legal_qual;
1006
      f->field->data.intvalue = Feat_qual_legal_old_locus_tag;
1007
      break;
1008
    case CDSGeneProt_field_mrna_product:
1009
      f = FeatureFieldNew ();
1010
      f->type = Feature_type_mRNA;
1011
      f->field = ValNodeNew (NULL);
1012
      f->field->choice = FeatQualChoice_legal_qual;
1013
      f->field->data.intvalue = Feat_qual_legal_product;
1014
      break;
1015
    case CDSGeneProt_field_mrna_comment:
1016
      f = FeatureFieldNew ();
1017
      f->type = Feature_type_mRNA;
1018
      f->field = ValNodeNew (NULL);
1019
      f->field->choice = FeatQualChoice_legal_qual;
1020
      f->field->data.intvalue = Feat_qual_legal_note;
1021
      break;
1022
    case CDSGeneProt_field_prot_name:
1023
      f = FeatureFieldNew ();
1024
      f->type = Feature_type_prot;
1025
      f->field = ValNodeNew (NULL);
1026
      f->field->choice = FeatQualChoice_legal_qual;
1027
      f->field->data.intvalue = Feat_qual_legal_product;
1028
      break;
1029
    case CDSGeneProt_field_prot_description:
1030
      f = FeatureFieldNew ();
1031
      f->type = Feature_type_prot;
1032
      f->field = ValNodeNew (NULL);
1033
      f->field->choice = FeatQualChoice_legal_qual;
1034
      f->field->data.intvalue = Feat_qual_legal_description;
1035
      break;
1036
    case CDSGeneProt_field_prot_ec_number:
1037
      f = FeatureFieldNew ();
1038
      f->type = Feature_type_prot;
1039
      f->field = ValNodeNew (NULL);
1040
      f->field->choice = FeatQualChoice_legal_qual;
1041
      f->field->data.intvalue = Feat_qual_legal_ec_number;
1042
      break;
1043
    case CDSGeneProt_field_prot_activity:
1044
      f = FeatureFieldNew ();
1045
      f->type = Feature_type_prot;
1046
      f->field = ValNodeNew (NULL);
1047
      f->field->choice = FeatQualChoice_legal_qual;
1048
      f->field->data.intvalue = Feat_qual_legal_activity;
1049
      break;
1050
    case CDSGeneProt_field_prot_comment:
1051
      f = FeatureFieldNew ();
1052
      f->type = Feature_type_prot;
1053
      f->field = ValNodeNew (NULL);
1054
      f->field->choice = FeatQualChoice_legal_qual;
1055
      f->field->data.intvalue = Feat_qual_legal_note;
1056
      break;
1057
    case CDSGeneProt_field_mat_peptide_name:
1058
      f = FeatureFieldNew ();
1059
      f->type = Feature_type_mat_peptide;
1060
      f->field = ValNodeNew (NULL);
1061
      f->field->choice = FeatQualChoice_legal_qual;
1062
      f->field->data.intvalue = Feat_qual_legal_product;
1063
      break;
1064
    case CDSGeneProt_field_mat_peptide_description:
1065
      f = FeatureFieldNew ();
1066
      f->type = Feature_type_mat_peptide;
1067
      f->field = ValNodeNew (NULL);
1068
      f->field->choice = FeatQualChoice_legal_qual;
1069
      f->field->data.intvalue = Feat_qual_legal_description;
1070
      break;
1071
    case CDSGeneProt_field_mat_peptide_ec_number:
1072
      f = FeatureFieldNew ();
1073
      f->type = Feature_type_mat_peptide;
1074
      f->field = ValNodeNew (NULL);
1075
      f->field->choice = FeatQualChoice_legal_qual;
1076
      f->field->data.intvalue = Feat_qual_legal_ec_number;
1077
      break;
1078
    case CDSGeneProt_field_mat_peptide_activity:
1079
      f = FeatureFieldNew ();
1080
      f->type = Feature_type_mat_peptide;
1081
      f->field = ValNodeNew (NULL);
1082
      f->field->choice = FeatQualChoice_legal_qual;
1083
      f->field->data.intvalue = Feat_qual_legal_activity;
1084
      break;
1085
    case CDSGeneProt_field_mat_peptide_comment:
1086
      f = FeatureFieldNew ();
1087
      f->type = Feature_type_mat_peptide;
1088
      f->field = ValNodeNew (NULL);
1089
      f->field->choice = FeatQualChoice_legal_qual;
1090
      f->field->data.intvalue = Feat_qual_legal_note;
1091
      break;
1092
  }
1093
  return f;
1094
}
1095
1096
1097
/* Molinfo fields */
1098
typedef struct moleculetypebiomol {
1099
  Int4 molecule_type;
1100
  Int4 biomol;
1101
  CharPtr name;
1102
} MoleculeTypeBiomolData, PNTR MoleculeTypeBiomolPtr;
1103
1104
static MoleculeTypeBiomolData moleculetype_biomol[] = {
1105
 { Molecule_type_unknown , 0, "unknown" } ,
1106
 { Molecule_type_genomic , MOLECULE_TYPE_GENOMIC , "genomic" } ,
1107
 { Molecule_type_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "precursor RNA" } ,
1108
 { Molecule_type_mRNA , MOLECULE_TYPE_MRNA , "mRNA" } ,
1109
 { Molecule_type_rRNA , MOLECULE_TYPE_RRNA , "rRNA" } ,
1110
 { Molecule_type_tRNA , MOLECULE_TYPE_TRNA , "tRNA" } ,
1111
 { Molecule_type_snRNA , MOLECULE_TYPE_SNRNA , "snRNA" } ,
1112
 { Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } ,
1113
 { Molecule_type_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "genomic mRNA" } ,
1114
 { Molecule_type_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } ,
1115
 { Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } ,
1116
 { Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } ,
1117
 { Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } ,
1118
 { Molecule_type_snoRNA , MOLECULE_TYPE_SNORNA, "snoRNA" } ,
1119
 { Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } ,
1120
 { Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } ,
1121
 { Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } ,
1122
 { Molecule_type_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other" }
1123
};
1124
1125
1126
#define NUM_moleculetype_biomol sizeof (moleculetype_biomol) / sizeof (MoleculeTypeBiomolData)
1127
1128
NLM_EXTERN Int4 BiomolFromMoleculeType (Int4 molecule_type) 
1129
{
1130
  Int4 i;
1131
1132
  for (i = 0; i < NUM_moleculetype_biomol; i++) {
1133
    if (moleculetype_biomol[i].molecule_type == molecule_type) {
1134
      return moleculetype_biomol[i].biomol;
1135
    }
1136
  }
1137
  return -1;
1138
}
1139
1140
1141
NLM_EXTERN CharPtr BiomolNameFromBiomol (Int4 biomol) 
1142
{
1143
  Int4 i;
1144
1145
  for (i = 0; i < NUM_moleculetype_biomol; i++) {
1146
    if (moleculetype_biomol[i].biomol == biomol) {
1147
      return moleculetype_biomol[i].name;
1148
    }
1149
  }
1150
  return NULL;
1151
}
1152
1153
1154
static Int4 BiomolFromBiomolName (CharPtr biomol_name)
1155
{
1156
  Int4 i;
1157
1158
  for (i = 0; i < NUM_moleculetype_biomol; i++) {
1159
    if (StringCmp (moleculetype_biomol[i].name, biomol_name) == 0) {
1160
      return moleculetype_biomol[i].biomol;
1161
    }
1162
  }
1163
  return -1;
1164
}
1165
1166
1167
NLM_EXTERN ValNodePtr GetMoleculeTypeList (void)
1168
{
1169
  ValNodePtr list = NULL;
1170
  Int4 i;
1171
1172
  for (i = 0; i < NUM_moleculetype_biomol; i++) {
1173
    ValNodeAddPointer (&list, moleculetype_biomol[i].molecule_type, StringSave (moleculetype_biomol[i].name));
1174
  }
1175
  return list;
1176
}
1177
1178
1179
/* Technique fields */
1180
typedef struct techniquetypetech {
1181
  Int4 technique_type;
1182
  Int4 tech;
1183
  CharPtr name;
1184
} TechniqueTypeTechData, PNTR TechniqueTypeTechPtr;
1185
1186
static TechniqueTypeTechData techniquetype_tech[] = {
1187
 { Technique_type_unknown , MI_TECH_unknown , "unknown" } ,
1188
 { Technique_type_standard , MI_TECH_standard , "standard" } ,
1189
 { Technique_type_est , MI_TECH_est , "EST" } ,
1190
 { Technique_type_sts , MI_TECH_sts , "STS" } ,
1191
 { Technique_type_survey , MI_TECH_survey , "survey" } ,
1192
 { Technique_type_genetic_map , MI_TECH_genemap , "genetic map" } ,
1193
 { Technique_type_physical_map , MI_TECH_physmap , "physical map" } ,
1194
 { Technique_type_derived , MI_TECH_derived , "derived" } ,
1195
 { Technique_type_concept_trans , MI_TECH_concept_trans , "concept-trans" } ,
1196
 { Technique_type_seq_pept , MI_TECH_seq_pept , "seq-pept" } ,
1197
 { Technique_type_both , MI_TECH_both , "both" } ,
1198
 { Technique_type_seq_pept_overlap , MI_TECH_seq_pept_overlap , "seq-pept-overlap" } ,
1199
 { Technique_type_seq_pept_homol , MI_TECH_seq_pept_homol, "seq-pept-homol" } ,
1200
 { Technique_type_concept_trans_a, MI_TECH_concept_trans_a, "concept-trans-a" } ,
1201
 { Technique_type_htgs_1, MI_TECH_htgs_1, "HTGS-1" } ,
1202
 { Technique_type_htgs_2, MI_TECH_htgs_2, "HTGS-2" } ,
1203
 { Technique_type_htgs_3, MI_TECH_htgs_3, "HTGS-3" } ,
1204
 { Technique_type_fli_cDNA, MI_TECH_fli_cdna, "fli-cDNA" } ,
1205
 { Technique_type_htgs_0, MI_TECH_htgs_0, "HTGS-0" } ,
1206
 { Technique_type_htc, MI_TECH_htc, "HTC" } ,
1207
 { Technique_type_wgs, MI_TECH_wgs, "WGS" } ,
1208
 { Technique_type_barcode, MI_TECH_barcode, "BARCODE" } ,
1209
 { Technique_type_composite_wgs_htgs, MI_TECH_composite_wgs_htgs, "composite WGS-HTGS" } ,
1210
 { Technique_type_tsa, MI_TECH_tsa, "TSA" } ,
1211
 { Technique_type_other, MI_TECH_other, "other" } 
1212
};
1213
1214
1215
#define NUM_techniquetype_tech sizeof (techniquetype_tech) / sizeof (TechniqueTypeTechData)
1216
1217
NLM_EXTERN Int4 TechFromTechniqueType (Int4 technique_type) 
1218
{
1219
  Int4 i;
1220
1221
  for (i = 0; i < NUM_techniquetype_tech; i++) {
1222
    if (techniquetype_tech[i].technique_type == technique_type) {
1223
      return techniquetype_tech[i].tech;
1224
    }
1225
  }
1226
  return -1;
1227
}
1228
1229
1230
NLM_EXTERN CharPtr TechNameFromTech (Int4 tech) 
1231
{
1232
  Int4 i;
1233
1234
  for (i = 0; i < NUM_techniquetype_tech; i++) {
1235
    if (techniquetype_tech[i].tech == tech) {
1236
      return techniquetype_tech[i].name;
1237
    }
1238
  }
1239
  return NULL;
1240
}
1241
1242
1243
static Int4 TechFromTechName (CharPtr tech_name)
1244
{
1245
  Int4 i;
1246
1247
  for (i = 0; i < NUM_techniquetype_tech; i++) {
1248
    if (StringCmp (techniquetype_tech[i].name, tech_name) == 0) {
1249
      return techniquetype_tech[i].tech;
1250
    }
1251
  }
1252
  return -1;
1253
}
1254
1255
1256
NLM_EXTERN ValNodePtr GetTechniqueTypeList (void)
1257
{
1258
  ValNodePtr list = NULL;
1259
  Int4 i;
1260
1261
  for (i = 0; i < NUM_techniquetype_tech; i++) {
1262
    ValNodeAddPointer (&list, techniquetype_tech[i].technique_type, StringSave (techniquetype_tech[i].name));
1263
  }
1264
  return list;
1265
}
1266
1267
1268
/* Completedness fields */
1269
typedef struct completednesstypecompleteness {
1270
  Int4 completedness_type;
1271
  Int4 completeness;
1272
  CharPtr name;
1273
} CompletednessTypeCompletenessData, PNTR CompletednessTypeCompletenessPtr;
1274
1275
static CompletednessTypeCompletenessData completednesstype_completeness[] = {
1276
 { Completedness_type_unknown, 0, "unknown" } ,
1277
 { Completedness_type_complete, 1, "complete" } ,
1278
 { Completedness_type_partial, 2, "partial" } ,
1279
 { Completedness_type_no_left, 3, "no left" } ,
1280
 { Completedness_type_no_right, 4, "no right" } ,
1281
 { Completedness_type_no_ends, 5, "no ends" } ,
1282
 { Completedness_type_has_left, 6, "has left" } ,
1283
 { Completedness_type_has_right, 7, "has right" } ,
1284
 { Completedness_type_other, 255, "other" }
1285
};
1286
1287
#define NUM_completednesstype_completeness sizeof (completednesstype_completeness) / sizeof (CompletednessTypeCompletenessData)
1288
1289
NLM_EXTERN Int4 CompletenessFromCompletednessType (Int4 completedness_type) 
1290
{
1291
  Int4 i;
1292
1293
  for (i = 0; i < NUM_completednesstype_completeness; i++) {
1294
    if (completednesstype_completeness[i].completedness_type == completedness_type) {
1295
      return completednesstype_completeness[i].completeness;
1296
    }
1297
  }
1298
  return -1;
1299
}
1300
1301
1302
NLM_EXTERN CharPtr CompletenessNameFromCompleteness (Int4 completeness) 
1303
{
1304
  Int4 i;
1305
1306
  for (i = 0; i < NUM_completednesstype_completeness; i++) {
1307
    if (completednesstype_completeness[i].completeness == completeness) {
1308
      return completednesstype_completeness[i].name;
1309
    }
1310
  }
1311
  return NULL;
1312
}
1313
1314
1315
static Int4 CompletenessFromCompletenessName (CharPtr completeness_name)
1316
{
1317
  Int4 i;
1318
1319
  for (i = 0; i < NUM_completednesstype_completeness; i++) {
1320
    if (StringCmp (completednesstype_completeness[i].name, completeness_name) == 0) {
1321
      return completednesstype_completeness[i].completeness;
1322
    }
1323
  }
1324
  return -1;
1325
}
1326
1327
1328
NLM_EXTERN ValNodePtr GetCompletednessTypeList (void)
1329
{
1330
  ValNodePtr list = NULL;
1331
  Int4 i;
1332
1333
  for (i = 0; i < NUM_completednesstype_completeness; i++) {
1334
    ValNodeAddPointer (&list, completednesstype_completeness[i].completedness_type, StringSave (completednesstype_completeness[i].name));
1335
  }
1336
  return list;
1337
}
1338
1339
1340
/* Molecule class fields */
1341
typedef struct moleculeclasstypemol {
1342
  Int4 moleculeclass_type;
1343
  Int4 mol;
1344
  CharPtr name;
1345
} MoleculeClassTypeMolData, PNTR MoleculeClassTypeMolPtr;
1346
1347
static MoleculeClassTypeMolData moleculeclasstype_mol[] = {
1348
 { Molecule_class_type_unknown, 0, "unknown" } ,
1349
 { Molecule_class_type_dna, MOLECULE_CLASS_DNA, "DNA" } ,
1350
 { Molecule_class_type_rna, MOLECULE_CLASS_RNA, "RNA" } ,
1351
 { Molecule_class_type_protein, MOLECULE_CLASS_PROTEIN, "protein" } ,
1352
 { Molecule_class_type_nucleotide, MOLECULE_CLASS_NUC, "nucleotide" } ,
1353
 { Molecule_class_type_other, 255, "other" } 
1354
};
1355
1356
1357
#define NUM_moleculeclasstype_mol sizeof (moleculeclasstype_mol) / sizeof (MoleculeClassTypeMolData)
1358
1359
NLM_EXTERN Int4 MolFromMoleculeClassType (Int4 moleculeclass_type) 
1360
{
1361
  Int4 i;
1362
1363
  for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
1364
    if (moleculeclasstype_mol[i].moleculeclass_type == moleculeclass_type) {
1365
      return moleculeclasstype_mol[i].mol;
1366
    }
1367
  }
1368
  return -1;
1369
}
1370
1371
1372
NLM_EXTERN CharPtr MolNameFromMol (Int4 mol) 
1373
{
1374
  Int4 i;
1375
1376
  for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
1377
    if (moleculeclasstype_mol[i].mol == mol) {
1378
      return moleculeclasstype_mol[i].name;
1379
    }
1380
  }
1381
  return NULL;
1382
}
1383
1384
1385
static Int4 MolFromMolName (CharPtr mol_name)
1386
{
1387
  Int4 i;
1388
1389
  for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
1390
    if (StringCmp (moleculeclasstype_mol[i].name, mol_name) == 0) {
1391
      return moleculeclasstype_mol[i].mol;
1392
    }
1393
  }
1394
  return -1;
1395
}
1396
1397
1398
NLM_EXTERN ValNodePtr GetMoleculeClassTypeList (void)
1399
{
1400
  ValNodePtr list = NULL;
1401
  Int4 i;
1402
1403
  for (i = 0; i < NUM_moleculeclasstype_mol; i++) {
1404
    ValNodeAddPointer (&list, moleculeclasstype_mol[i].moleculeclass_type, StringSave (moleculeclasstype_mol[i].name));
1405
  }
1406
  return list;
1407
}
1408
1409
1410
/* Topology fields */
1411
typedef struct topologytypetopology {
1412
  Int4 topology_type;
1413
  Int4 topology;
1414
  CharPtr name;
1415
} TopologyTypeTopologyData, PNTR TopologyTypeTopologyPtr;
1416
1417
static TopologyTypeTopologyData topologytype_topology[] = {
1418
 { Topology_type_unknown, 0, "unknown" } ,
1419
 { Topology_type_linear, TOPOLOGY_LINEAR, "linear" } ,
1420
 { Topology_type_circular, TOPOLOGY_CIRCULAR, "circular" } ,
1421
 { Topology_type_tandem, TOPOLOGY_TANDEM, "tandem" } ,
1422
 { Topology_type_other, 255, "other" } 
1423
};
1424
1425
#define NUM_topologytype_topology sizeof (topologytype_topology) / sizeof (TopologyTypeTopologyData)
1426
1427
NLM_EXTERN Int4 TopologyFromTopologyType (Int4 topology_type) 
1428
{
1429
  Int4 i;
1430
1431
  for (i = 0; i < NUM_topologytype_topology; i++) {
1432
    if (topologytype_topology[i].topology_type == topology_type) {
1433
      return topologytype_topology[i].topology;
1434
    }
1435
  }
1436
  return -1;
1437
}
1438
1439
1440
NLM_EXTERN CharPtr TopologyNameFromTopology (Int4 topology) 
1441
{
1442
  Int4 i;
1443
1444
  for (i = 0; i < NUM_topologytype_topology; i++) {
1445
    if (topologytype_topology[i].topology == topology) {
1446
      return topologytype_topology[i].name;
1447
    }
1448
  }
1449
  return NULL;
1450
}
1451
1452
1453
static Int4 TopologyFromTopologyName (CharPtr topology_name)
1454
{
1455
  Int4 i;
1456
1457
  for (i = 0; i < NUM_topologytype_topology; i++) {
1458
    if (StringCmp (topologytype_topology[i].name, topology_name) == 0) {
1459
      return topologytype_topology[i].topology;
1460
    }
1461
  }
1462
  return -1;
1463
}
1464
1465
1466
NLM_EXTERN ValNodePtr GetTopologyTypeList (void)
1467
{
1468
  ValNodePtr list = NULL;
1469
  Int4 i;
1470
1471
  for (i = 0; i < NUM_topologytype_topology; i++) {
1472
    ValNodeAddPointer (&list, topologytype_topology[i].topology_type, StringSave (topologytype_topology[i].name));
1473
  }
1474
  return list;
1475
}
1476
1477
1478
/* strand fields */
1479
typedef struct strandtypestrand {
1480
  Int4 strand_type;
1481
  Int4 strand;
1482
  CharPtr name;
1483
} StrandTypeStrandData, PNTR StrandTypeStrandPtr;
1484
1485
static StrandTypeStrandData strandtype_strand[] = {
1486
 { Strand_type_unknown, 0, "unknown" } ,
1487
 { Strand_type_single, STRANDEDNESS_SINGLE, "single" } ,
1488
 { Strand_type_double__, STRANDEDNESS_DOUBLE, "double" } ,
1489
 { Strand_type_mixed, 3, "mixed" } ,
1490
 { Strand_type_mixed_rev, 4, "mixed-rev" } ,
1491
 { Strand_type_other, 255, "other" } 
1492
};
1493
1494
#define NUM_strandtype_strand sizeof (strandtype_strand) / sizeof (StrandTypeStrandData)
1495
1496
NLM_EXTERN Int4 StrandFromStrandType (Int4 strand_type) 
1497
{
1498
  Int4 i;
1499
1500
  for (i = 0; i < NUM_strandtype_strand; i++) {
1501
    if (strandtype_strand[i].strand_type == strand_type) {
1502
      return strandtype_strand[i].strand;
1503
    }
1504
  }
1505
  return -1;
1506
}
1507
1508
1509
NLM_EXTERN CharPtr StrandNameFromStrand (Int4 strand) 
1510
{
1511
  Int4 i;
1512
1513
  for (i = 0; i < NUM_strandtype_strand; i++) {
1514
    if (strandtype_strand[i].strand == strand) {
1515
      return strandtype_strand[i].name;
1516
    }
1517
  }
1518
  return NULL;
1519
}
1520
1521
1522
static Int4 StrandFromStrandName (CharPtr strand_name)
1523
{
1524
  Int4 i;
1525
1526
  for (i = 0; i < NUM_strandtype_strand; i++) {
1527
    if (StringCmp (strandtype_strand[i].name, strand_name) == 0) {
1528
      return strandtype_strand[i].strand;
1529
    }
1530
  }
1531
  return -1;
1532
}
1533
1534
1535
NLM_EXTERN ValNodePtr GetStrandTypeList (void)
1536
{
1537
  ValNodePtr list = NULL;
1538
  Int4 i;
1539
1540
  for (i = 0; i < NUM_strandtype_strand; i++) {
1541
    ValNodeAddPointer (&list, strandtype_strand[i].strand_type, StringSave (strandtype_strand[i].name));
1542
  }
1543
  return list;
1544
}
1545
1546
1547
static CharPtr GetSequenceQualValName (ValNodePtr field)
1548
{
1549
  CharPtr val = NULL;
1550
1551
  if (field == NULL) return NULL;
1552
  switch (field->choice) {
1553
    case MolinfoField_molecule:
1554
      val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue));
1555
      break;
1556
    case MolinfoField_technique:
1557
      val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue));
1558
      break;
1559
    case MolinfoField_completedness:
1560
      val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue));
1561
      break;
1562
    case MolinfoField_mol_class:
1563
      val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue));
1564
      break;
1565
    case MolinfoField_topology:
1566
      val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue));
1567
      break;
1568
    case MolinfoField_strand:
1569
      val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue));
1570
      break;
1571
  }
1572
  return val;
1573
}
1574
1575
1576
static CharPtr GetSequenceQualName (ValNodePtr field)
1577
{
1578
  CharPtr str = NULL, fieldname = "invalid field", val = "invalid value";
1579
  CharPtr fmt = "%s %s";
1580
1581
  if (field == NULL) return NULL;
1582
  switch (field->choice) {
1583
    case MolinfoField_molecule:
1584
      fieldname = "molecule";
1585
      val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue));
1586
      break;
1587
    case MolinfoField_technique:
1588
      fieldname = "technique";
1589
      val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue));
1590
      break;
1591
    case MolinfoField_completedness:
1592
      fieldname = "completeness";
1593
      val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue));
1594
      break;
1595
    case MolinfoField_mol_class:
1596
      fieldname = "class";
1597
      val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue));
1598
      break;
1599
    case MolinfoField_topology:
1600
      fieldname = "topology";
1601
      val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue));
1602
      break;
1603
    case MolinfoField_strand:
1604
      fieldname = "strand";
1605
      val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue));
1606
      break;
1607
  }
1608
  if (val == NULL) {
1609
    val = "Invalid value";
1610
  }
1611
  str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fieldname) + StringLen (val)));
1612
  sprintf (str, fmt, fieldname, val);
1613
  return str;
1614
}
1615
1616
1617
/* Simple constraints */
1618
static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len)
1619
{
1620
  Boolean rval = TRUE;
1621
  Char    char_after;
1622
  Char    char_before;
1623
  
1624
  if (match_len == 0)
1625
  {
1626
    rval = TRUE;
1627
  }
1628
  else if (start == NULL || found == NULL)
1629
  {
1630
    rval = FALSE;
1631
  }
1632
  else
1633
  {
1634
	  char_after = *(found + match_len);
1635
    if (found != start)
1636
	  {
1637
	    char_before = *(found - 1);
1638
	    if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before))
1639
	    {
1640
	      rval = FALSE;
1641
	    }
1642
	  }
1643
	  if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after)))
1644
	  {
1645
	    rval = FALSE;
1646
	  }   
1647
  }
1648
  return rval;
1649
}
1650
1651
1652
NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp)
1653
{
1654
  if (scp == NULL || StringHasNoText (scp->match_text)) return TRUE;
1655
  else return FALSE;
1656
}
1657
1658
1659
NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp)
1660
{
1661
  CharPtr pFound;
1662
  Boolean rval = FALSE;
1663
  Char    char_after = 0;
1664
  
1665
  if (IsStringConstraintEmpty (scp)) return TRUE;
1666
  if (StringHasNoText (str)) return FALSE;
1667
1668
  switch (scp->match_location) 
1669
  {
1670
    case String_location_contains:
1671
	    if (scp->case_sensitive)
1672
	    {
1673
	      pFound = StringSearch (str, scp->match_text);
1674
	    }
1675
	    else
1676
	    {
1677
	      pFound = StringISearch (str, scp->match_text);
1678
	    }
1679
      if (pFound == NULL) 
1680
      {
1681
        rval = FALSE;
1682
      }
1683
      else if (scp->whole_word) 
1684
      {
1685
        rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
1686
        while (!rval && pFound != NULL) 
1687
        {
1688
	        if (scp->case_sensitive)
1689
	        {
1690
	          pFound = StringSearch (pFound + 1, scp->match_text);
1691
	        }
1692
	        else
1693
	        {
1694
	          pFound = StringISearch (pFound + 1, scp->match_text);
1695
	        }
1696
          if (pFound != NULL)
1697
          {
1698
            rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
1699
          }
1700
        }
1701
      }
1702
      else
1703
      {
1704
        rval = TRUE;
1705
      }
1706
      break;
1707
    case String_location_starts:
1708
	    if (scp->case_sensitive)
1709
	    {
1710
	      pFound = StringSearch (str, scp->match_text);
1711
	    }
1712
	    else
1713
	    {
1714
	      pFound = StringISearch (str, scp->match_text);
1715
	    }
1716
      if (pFound == str)
1717
      {
1718
        if (scp->whole_word) 
1719
        {
1720
          rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
1721
        }
1722
        else
1723
        {
1724
          rval = TRUE;
1725
        }
1726
      }
1727
      break;
1728
    case String_location_ends:
1729
	    if (scp->case_sensitive)
1730
	    {
1731
	      pFound = StringSearch (str, scp->match_text);
1732
	    }
1733
	    else
1734
	    {
1735
	      pFound = StringISearch (str, scp->match_text);
1736
	    }
1737
      while (pFound != NULL && !rval) {
1738
  	    char_after = *(pFound + StringLen (scp->match_text));
1739
        if (char_after == 0)
1740
        {
1741
          if (scp->whole_word) 
1742
          {
1743
            rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text));
1744
          }
1745
          else
1746
          {
1747
            rval = TRUE;
1748
          }
1749
          /* stop the search, we're at the end of the string */
1750
          pFound = NULL;
1751
        }
1752
        else
1753
        {
1754
	        if (scp->case_sensitive)
1755
	        {
1756
	          pFound = StringSearch (pFound + 1, scp->match_text);
1757
	        }
1758
	        else
1759
	        {
1760
	          pFound = StringISearch (pFound + 1, scp->match_text);
1761
	        }
1762
        }
1763
      }
1764
      break;
1765
    case String_location_equals:
1766
      if (scp->case_sensitive) 
1767
      {
1768
        if (StringCmp (str, scp->match_text) == 0) 
1769
        {
1770
          rval = TRUE;
1771
        }
1772
      }
1773
      else
1774
      {
1775
        if (StringICmp (str, scp->match_text) == 0) 
1776
        {
1777
          rval = TRUE;
1778
        }
1779
      }
1780
      break;
1781
    case String_location_inlist:
1782
	    if (scp->case_sensitive)
1783
	    {
1784
	      pFound = StringSearch (scp->match_text, str);
1785
	    }
1786
	    else
1787
	    {
1788
	      pFound = StringISearch (scp->match_text, str);
1789
	    }
1790
      if (pFound == NULL) 
1791
      {
1792
        rval = FALSE;
1793
      }
1794
      else
1795
      {
1796
        rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str));
1797
        while (!rval && pFound != NULL) 
1798
        {
1799
	        if (scp->case_sensitive)
1800
	        {
1801
	          pFound = StringSearch (pFound + 1, str);
1802
	        }
1803
	        else
1804
	        {
1805
	          pFound = StringISearch (pFound + 1, str);
1806
	        }
1807
          if (pFound != NULL)
1808
          {
1809
            rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str));
1810
          }
1811
        }
1812
      }
1813
      if (!rval) {
1814
        /* look for spans */
1815
        rval = IsStringInSpanInList (str, scp->match_text);
1816
      }
1817
      break;
1818
	}
1819
	return rval;
1820
}
1821
1822
1823
NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp)
1824
{
1825
  Boolean rval;
1826
1827
  rval = DoesSingleStringMatchConstraint (str, scp);
1828
  if (scp != NULL && scp->not_present) {
1829
    rval = !rval;
1830
  }
1831
  return rval;
1832
}
1833
1834
1835
static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintPtr scp)
1836
{
1837
  Int4 len = 1;
1838
  CharPtr tmp;
1839
  Boolean rval = FALSE;
1840
  ValNodePtr vnp;
1841
1842
  if (IsStringConstraintEmpty (scp)) {
1843
    return TRUE;
1844
  }
1845
  if (list == NULL) return FALSE;
1846
1847
  for (vnp = list; vnp != NULL; vnp = vnp->next) {
1848
    len += StringLen (vnp->data.ptrvalue) + 2;
1849
  }
1850
1851
  tmp = (CharPtr) MemNew (sizeof (Char) * len);
1852
  for (vnp = list; vnp != NULL; vnp = vnp->next) {
1853
    StringCat (tmp, vnp->data.ptrvalue);
1854
    if (vnp->next != NULL) {
1855
      StringCat (tmp, "; ");
1856
    }
1857
  }
1858
1859
  rval = DoesStringMatchConstraint (tmp, scp);
1860
  tmp = MemFree (tmp);
1861
  return rval;  
1862
}
1863
1864
1865
static Boolean DoesStrandMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
1866
{
1867
  Uint2 strand;
1868
  Boolean rval = FALSE;
1869
  
1870
  if (slp == NULL)
1871
  {
1872
    rval = FALSE;
1873
  }
1874
  else if (lcp == NULL || lcp->strand == Strand_constraint_any)
1875
  {
1876
    rval = TRUE;
1877
  }
1878
  else
1879
  {
1880
    strand = SeqLocStrand (slp);
1881
    if (strand == Seq_strand_minus)
1882
    {
1883
      if (lcp->strand == Strand_constraint_minus)
1884
      {
1885
        rval = TRUE;
1886
      }
1887
      else
1888
      {
1889
        rval = FALSE;
1890
      }
1891
    }
1892
    else
1893
    {
1894
      if (lcp->strand == Strand_constraint_plus)
1895
      {
1896
        rval = TRUE;
1897
      }
1898
      else
1899
      {
1900
        rval = FALSE;
1901
      }
1902
    }
1903
  }
1904
  return rval;
1905
}
1906
1907
1908
static Boolean DoesBioseqMatchSequenceType (BioseqPtr bsp, Uint2 seq_type)
1909
{
1910
  Boolean rval = FALSE;
1911
1912
  if (bsp == NULL) return FALSE;
1913
  if (seq_type == Seqtype_constraint_any) return TRUE;
1914
1915
  if (ISA_na (bsp->mol) && seq_type == Seqtype_constraint_nuc)
1916
  {
1917
    rval = TRUE;
1918
  }
1919
  else if (ISA_aa (bsp->mol) && seq_type == Seqtype_constraint_prot)
1920
  {
1921
    rval = TRUE;
1922
  }
1923
  return rval;
1924
}
1925
1926
1927
static Boolean DoesSequenceTypeMatchContraint (SeqLocPtr slp, LocationConstraintPtr lcp)
1928
{
1929
  Boolean   rval = FALSE;
1930
  BioseqPtr bsp;
1931
  
1932
  if (slp == NULL)
1933
  {
1934
    rval = FALSE;
1935
  }
1936
  else if (lcp == NULL || lcp->seq_type == Seqtype_constraint_any)
1937
  {
1938
    rval = TRUE;
1939
  }
1940
  else
1941
  {
1942
    bsp = BioseqFindFromSeqLoc (slp);
1943
    rval = DoesBioseqMatchSequenceType (bsp, lcp->seq_type);
1944
  }
1945
  return rval;
1946
}
1947
1948
static Boolean DoesLocationMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp)
1949
1950
{
1951
  Boolean rval = FALSE;
1952
  
1953
  if (slp == NULL)
1954
  {
1955
    rval = FALSE;
1956
  }  
1957
  else if (lcp == NULL || (DoesStrandMatchConstraint (slp, lcp) && DoesSequenceTypeMatchContraint (slp, lcp)))
1958
  {
1959
    rval = TRUE;
1960
  }
1961
  return rval; 
1962
}
1963
1964
1965
static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint)
1966
{
1967
  SeqFeatPtr  sfp;
1968
  SeqDescrPtr sdp;
1969
  CGPSetPtr   cgp;
1970
  BioseqPtr  bsp = NULL;
1971
  BioseqSetPtr bssp;
1972
  ValNodePtr    vnp;
1973
  ObjValNodePtr ovp;
1974
  SeqMgrFeatContext context;
1975
1976
  if (data == NULL) return FALSE;
1977
 
1978
  if (constraint == NULL 
1979
      || (constraint->strand == Strand_constraint_any
1980
          && constraint->seq_type == Seqtype_constraint_any)) {
1981
    return TRUE;
1982
  }
1983
1984
  if (choice == OBJ_SEQFEAT) {
1985
    sfp = (SeqFeatPtr) data;
1986
    bsp = BioseqFindFromSeqLoc (sfp->location);
1987
  } else if (choice == OBJ_SEQDESC) {
1988
    sdp = (SeqDescrPtr) data;
1989
    if (sdp->extended != 0) {
1990
      ovp = (ObjValNodePtr) sdp;
1991
      if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
1992
        bssp = (BioseqSetPtr) ovp->idx.parentptr;
1993
        if (bssp != NULL && bssp->seq_set != NULL && IS_Bioseq_set (bssp->seq_set)) {
1994
          bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue;
1995
        }
1996
      } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
1997
        bsp = (BioseqPtr) ovp->idx.parentptr;
1998
      }
1999
    }
2000
  } else if (choice == 0) {
2001
    if (constraint->seq_type != Seqtype_constraint_any) {
2002
      return FALSE;
2003
    }
2004
    cgp = (CGPSetPtr) data;
2005
    if (cgp->cds_list != NULL && cgp->cds_list->data.ptrvalue != NULL) {
2006
      sfp = (SeqFeatPtr) cgp->cds_list->data.ptrvalue;
2007
      bsp = BioseqFindFromSeqLoc (sfp->location);
2008
    } else if (cgp->gene_list != NULL && cgp->gene_list->data.ptrvalue != NULL) {
2009
      sfp = (SeqFeatPtr) cgp->gene_list->data.ptrvalue;
2010
      bsp = BioseqFindFromSeqLoc (sfp->location);
2011
    } else if (cgp->mrna_list != NULL && cgp->mrna_list->data.ptrvalue != NULL) {
2012
      sfp = (SeqFeatPtr) cgp->mrna_list->data.ptrvalue;
2013
      bsp = BioseqFindFromSeqLoc (sfp->location);
2014
    } else if (cgp->prot_list != NULL && cgp->prot_list->data.ptrvalue != NULL) {
2015
      sfp = (SeqFeatPtr) cgp->prot_list->data.ptrvalue;
2016
      bsp = BioseqFindFromSeqLoc (sfp->location);
2017
    }
2018
  }
2019
  if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) {
2020
    return FALSE;
2021
  }
2022
  if (constraint->strand != Strand_constraint_any && ISA_aa (bsp->mol)) {      
2023
    sfp = SeqMgrGetCDSgivenProduct (bsp, &context);
2024
    if (constraint->strand == Strand_constraint_minus && context.strand != Seq_strand_minus) {
2025
      return FALSE;
2026
    }
2027
    if (constraint->strand == Strand_constraint_plus && context.strand == Seq_strand_minus) {
2028
      return FALSE;
2029
    }
2030
  } else if (constraint->strand != Strand_constraint_any) {
2031
    if (choice == 0) {
2032
      /* strand for CDS-Gene-Prot group */
2033
      cgp = (CGPSetPtr) data;
2034
      for (vnp = cgp->cds_list; vnp != NULL; vnp = vnp->next) {
2035
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
2036
        if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) {
2037
          return FALSE;
2038
        }
2039
      }
2040
      for (vnp = cgp->gene_list; vnp != NULL; vnp = vnp->next) {
2041
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
2042
        if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) {
2043
          return FALSE;
2044
        }
2045
      }
2046
      for (vnp = cgp->mrna_list; vnp != NULL; vnp = vnp->next) {
2047
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
2048
        if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) {
2049
          return FALSE;
2050
        }
2051
      }
2052
    } else if (choice == OBJ_SEQFEAT) {
2053
      sfp = (SeqFeatPtr) data;
2054
      if (!DoesStrandMatchConstraint (sfp->location, constraint)) {
2055
        return FALSE;
2056
      }
2057
    } else {
2058
      /* descriptors can't meet strand constraints */
2059
      return FALSE;
2060
    }
2061
  }
2062
  return TRUE;
2063
}
2064
2065
2066
/* for parsing and editing */
2067
static CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion)
2068
{
2069
  CharPtr portion = NULL;
2070
  CharPtr found_start, found_end;
2071
  Int4    found_len;
2072
2073
  if (StringHasNoText (str)) {
2074
    return NULL;
2075
  }
2076
  if (text_portion == NULL) {
2077
    return StringSave (str);
2078
  }  
2079
  
2080
  if (text_portion->left_text == NULL || text_portion->left_text [0] == 0)
2081
  {
2082
    found_start = str;
2083
  }
2084
  else
2085
  {
2086
    if (text_portion->case_sensitive)
2087
    {
2088
      found_start = StringSearch (str, text_portion->left_text);
2089
    }
2090
    else
2091
    {
2092
      found_start = StringISearch (str, text_portion->left_text);
2093
    }
2094
    
2095
    if (text_portion->whole_word && ! IsWholeWordMatch (str, found_start, StringLen (text_portion->left_text)))
2096
    {
2097
      found_start = NULL;
2098
    }
2099
  }
2100
  
2101
  if (found_start == NULL)
2102
  {
2103
    return NULL;
2104
  }
2105
  
2106
  if (!text_portion->include_left)
2107
  {
2108
    found_start += StringLen (text_portion->left_text);
2109
  }
2110
  
2111
  if (text_portion->right_text == NULL || text_portion->right_text [0] == 0)
2112
  {
2113
    found_len = StringLen (found_start);
2114
  }
2115
  else
2116
  {
2117
    if (text_portion->case_sensitive)
2118
    {
2119
      found_end = StringSearch (found_start, text_portion->right_text);
2120
    }
2121
    else
2122
    {
2123
      found_end = StringISearch (found_start, text_portion->right_text);
2124
    }
2125
    if (text_portion->whole_word && ! IsWholeWordMatch (str, found_end, StringLen (text_portion->right_text)))
2126
    {
2127
      found_end = NULL;
2128
    }    
2129
    
2130
    if (found_end == NULL)
2131
    {
2132
      found_len = 0;
2133
    }
2134
    else if (text_portion->include_right)
2135
    {
2136
      found_len = (Int4)(found_end - found_start) + StringLen (text_portion->right_text);
2137
    }
2138
    else
2139
    {
2140
      found_len = found_end - found_start;
2141
    }
2142
  }
2143
2144
  if (found_len > 0)
2145
  {
2146
    portion = (CharPtr) MemNew (sizeof (Char) * (found_len + 1));
2147
    StringNCpy (portion, found_start, found_len);
2148
    portion[found_len] = 0;
2149
  }
2150
  return portion;
2151
}
2152
2153
2154
2155
static CharPtr FindTextPortionLocationInString (CharPtr str, TextPortionPtr text_portion)
2156
{
2157
  CharPtr start, stop;
2158
2159
  if (str == NULL || text_portion == NULL) return FALSE;
2160
2161
  if (text_portion->left_text != NULL) {
2162
    start = StringSearch (str, text_portion->left_text);
2163
    if (start != NULL) {
2164
      if (!text_portion->include_left) {
2165
        start += StringLen (text_portion->left_text);
2166
      }
2167
    }
2168
  } else {
2169
    start = str;
2170
  }
2171
  if (start != NULL) {
2172
    if (text_portion->right_text != NULL) { 
2173
      stop = StringSearch (start, text_portion->right_text);
2174
      if (stop == NULL) {
2175
        start = NULL;
2176
      }
2177
    }
2178
  }
2179
  return start;
2180
}
2181
2182
2183
static void ReplaceStringForParse(CharPtr src_text, TextPortionPtr text_portion)
2184
{
2185
  CharPtr         src, dst;
2186
  
2187
  if (src_text == NULL || text_portion == NULL) {
2188
    return;
2189
  }
2190
2191
  dst = FindTextPortionLocationInString (src_text, text_portion);
2192
  if (dst == NULL) return;
2193
  if (text_portion->right_text == NULL) {
2194
    *dst = 0;
2195
  } else {
2196
    src = StringSearch (src_text, text_portion->right_text);
2197
    if (src != NULL) {
2198
      if (text_portion->include_right) {
2199
        src += StringLen (text_portion->right_text);
2200
      }
2201
      while (*src != 0) {
2202
        *dst = *src;
2203
        dst++;
2204
        src++;
2205
      }
2206
      *dst = 0;
2207
    }
2208
  }
2209
}
2210
2211
2212
/* generic functions for getting string values */
2213
static Int4 GetDbtagStringLen (DbtagPtr db_tag)
2214
{
2215
  Int4 len;
2216
  
2217
  if (db_tag == NULL)
2218
  {
2219
    return 0;
2220
  }
2221
  
2222
  len = StringLen (db_tag->db) + 2;
2223
  if (db_tag->tag != NULL)
2224
  {
2225
    if (db_tag->tag->str != NULL)
2226
    {
2227
      len += StringLen (db_tag->tag->str);
2228
    }
2229
    else
2230
    {
2231
      len += 10;
2232
    }
2233
  }
2234
  return len;
2235
}
2236
2237
2238
static CharPtr GetDbtagString (DbtagPtr db_tag)
2239
{
2240
  Int4    len;
2241
  CharPtr str;
2242
  
2243
  if (db_tag == NULL) {
2244
    return NULL;
2245
  }
2246
  
2247
  len = GetDbtagStringLen (db_tag);
2248
  if (len == 0) {
2249
    return NULL;
2250
  }
2251
  
2252
  str = (CharPtr) MemNew (len * sizeof (Char));
2253
  if (str != NULL) {
2254
    StringCpy (str, db_tag->db);
2255
    StringCat (str, ":");
2256
    if (db_tag->tag != NULL) {
2257
      if (db_tag->tag->str != NULL) {
2258
        StringCat (str, db_tag->tag->str);
2259
      } else {
2260
        sprintf (str + StringLen (str), "%d", db_tag->tag->id);
2261
      }
2262
    }
2263
  }
2264
  return str;
2265
}
2266
2267
2268
/* generic functions for setting field values */
2269
static Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text)
2270
{
2271
  Boolean rval = FALSE;
2272
  Int4 len;
2273
  CharPtr tmp;
2274
2275
  if (existing_val == NULL) {
2276
    return FALSE;
2277
  }
2278
2279
  if (StringHasNoText (*existing_val)) {
2280
    *existing_val = MemFree (*existing_val);
2281
    *existing_val = StringSave (new_val);
2282
    rval = TRUE;
2283
  } else {
2284
    switch (existing_text) {
2285
      case ExistingTextOption_replace_old :
2286
        *existing_val = MemFree (*existing_val);
2287
        *existing_val = StringSave (new_val);
2288
        rval = TRUE;
2289
        break;
2290
      case ExistingTextOption_append_semi :
2291
        len = StringLen (new_val) + StringLen (*existing_val) + 3;
2292
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2293
        if (tmp != NULL) {
2294
          sprintf (tmp, "%s; %s", *existing_val, new_val);
2295
          MemFree (*existing_val);
2296
          *existing_val = tmp;
2297
          rval = TRUE;
2298
        }
2299
        break;
2300
      case ExistingTextOption_append_space :
2301
        len = StringLen (new_val) + StringLen (*existing_val) + 2;
2302
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2303
        if (tmp != NULL) {
2304
          sprintf (tmp, "%s %s", *existing_val, new_val);
2305
          MemFree (*existing_val);
2306
          *existing_val = tmp;
2307
          rval = TRUE;
2308
        }
2309
        break;
2310
      case ExistingTextOption_append_colon :
2311
        len = StringLen (new_val) + StringLen (*existing_val) + 3;
2312
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2313
        if (tmp != NULL) {
2314
          sprintf (tmp, "%s: %s", *existing_val, new_val);
2315
          MemFree (*existing_val);
2316
          *existing_val = tmp;
2317
          rval = TRUE;
2318
        }
2319
        break;
2320
      case ExistingTextOption_append_none :
2321
        len = StringLen (new_val) + StringLen (*existing_val) + 1;
2322
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2323
        if (tmp != NULL) {
2324
          sprintf (tmp, "%s%s", *existing_val, new_val);
2325
          MemFree (*existing_val);
2326
          *existing_val = tmp;
2327
          rval = TRUE;
2328
        }
2329
        break;
2330
      case ExistingTextOption_prefix_semi :
2331
        len = StringLen (new_val) + StringLen (*existing_val) + 3;
2332
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2333
        if (tmp != NULL) {
2334
          sprintf (tmp, "%s; %s", new_val, *existing_val);
2335
          MemFree (*existing_val);
2336
          *existing_val = tmp;
2337
          rval = TRUE;
2338
        }
2339
        break;
2340
      case ExistingTextOption_prefix_space :
2341
        len = StringLen (new_val) + StringLen (*existing_val) + 2;
2342
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2343
        if (tmp != NULL) {
2344
          sprintf (tmp, "%s %s", new_val, *existing_val);
2345
          MemFree (*existing_val);
2346
          *existing_val = tmp;
2347
          rval = TRUE;
2348
        }
2349
        break;
2350
      case ExistingTextOption_prefix_colon :
2351
        len = StringLen (new_val) + StringLen (*existing_val) + 3;
2352
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2353
        if (tmp != NULL) {
2354
          sprintf (tmp, "%s: %s", new_val, *existing_val);
2355
          MemFree (*existing_val);
2356
          *existing_val = tmp;
2357
          rval = TRUE;
2358
        }
2359
        break;
2360
      case ExistingTextOption_prefix_none :
2361
        len = StringLen (new_val) + StringLen (*existing_val) + 1;
2362
        tmp = (CharPtr) MemNew (sizeof (Char) * len);
2363
        if (tmp != NULL) {
2364
          sprintf (tmp, "%s%s", new_val, *existing_val);
2365
          MemFree (*existing_val);
2366
          *existing_val = tmp;
2367
          rval = TRUE;
2368
        }
2369
        break;
2370
      case ExistingTextOption_leave_old :
2371
        rval = FALSE;
2372
    }
2373
  }
2374
  return rval;
2375
}
2376
2377
2378
static Boolean SetStringsInValNodeStringList (ValNodePtr PNTR list, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
2379
{
2380
  ValNodePtr vnp;
2381
  CharPtr    cp;
2382
  Boolean rval = FALSE;
2383
  
2384
  if (list == NULL)
2385
  {
2386
    return FALSE;
2387
  }
2388
2389
  if (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text))) {
2390
    ValNodeAddPointer (list, 0, StringSave (new_val));
2391
    rval = TRUE;
2392
  } else if (existing_text == ExistingTextOption_append_semi) {
2393
    if (DoesStringListMatchConstraint (*list, scp)) {
2394
      ValNodeAddPointer (list, 0, StringSave (new_val));
2395
      rval = TRUE;
2396
    }
2397
  } else if (existing_text == ExistingTextOption_prefix_semi) {
2398
    if (DoesStringListMatchConstraint (*list, scp)) {
2399
      vnp = ValNodeNew (NULL);
2400
      vnp->data.ptrvalue = StringSave (new_val);
2401
      vnp->next = *list;
2402
      *list = vnp;
2403
      rval = TRUE;
2404
    }
2405
  } else if (existing_text == ExistingTextOption_replace_old) {
2406
    if (DoesStringListMatchConstraint (*list, scp)) {
2407
      *list = ValNodeFreeData (*list);
2408
      vnp = ValNodeNew (NULL);
2409
      vnp->data.ptrvalue = StringSave (new_val);
2410
      *list = vnp;
2411
      rval = TRUE;
2412
    }
2413
  } else if (existing_text == ExistingTextOption_leave_old) {
2414
    rval = FALSE;
2415
  } else {
2416
    for (vnp = *list; vnp != NULL; vnp = vnp->next)
2417
    {
2418
      cp = (CharPtr) vnp->data.ptrvalue;
2419
      if (DoesStringMatchConstraint (cp, scp)) {
2420
        rval |= SetStringValue (&cp, new_val, existing_text);
2421
        vnp->data.ptrvalue = cp;
2422
      }
2423
    }
2424
  }
2425
  return rval;
2426
}
2427
2428
2429
static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
2430
{
2431
  Boolean rval = FALSE;
2432
  Int4 gbqual;
2433
  CharPtr qual_name = NULL;
2434
  GBQualPtr gbq, last_gbq = NULL;
2435
2436
  if (field == NULL) return FALSE;
2437
2438
  if (field->choice == FeatQualChoice_legal_qual) 
2439
  {
2440
    gbqual = GetGBQualFromFeatQual (field->data.intvalue);
2441
    if (gbqual > -1) {
2442
      qual_name = ParFlat_GBQual_names [gbqual].name;
2443
      for (gbq = *list; gbq != NULL; gbq = gbq->next) {
2444
        if (StringCmp (gbq->qual, qual_name) == 0
2445
            && DoesStringMatchConstraint (gbq->val, scp)) {
2446
          rval |= SetStringValue (&(gbq->val), new_val, existing_text);
2447
        }
2448
        last_gbq = gbq;
2449
      }
2450
      if (!rval && (scp == NULL || scp->match_text == NULL)) {
2451
        gbq = GBQualNew ();
2452
        gbq->qual = StringSave (qual_name);
2453
        gbq->val = StringSave (new_val);
2454
        if (last_gbq == NULL) {
2455
          *list = gbq;
2456
        } else {
2457
          last_gbq->next = gbq;
2458
        }
2459
        rval = TRUE;
2460
      }
2461
    }
2462
  } else if (field->choice == FeatQualChoice_illegal_qual) {
2463
    for (gbq = *list; gbq != NULL; gbq = gbq->next) {
2464
      if (DoesStringMatchConstraint (gbq->qual, field->data.ptrvalue)
2465
          && DoesStringMatchConstraint (gbq->val, scp)) {
2466
        rval |= SetStringValue (&(gbq->val), new_val, existing_text);
2467
      }
2468
    }
2469
  }
2470
2471
  return rval;
2472
}
2473
2474
2475
static Boolean IsAllDigits (CharPtr str)
2476
{
2477
  CharPtr cp;
2478
2479
  if (StringHasNoText (str)) return FALSE;
2480
2481
  cp = str;
2482
  while (*cp != 0 && isdigit (*cp)) {
2483
    cp++;
2484
  }
2485
  if (*cp == 0) {
2486
    return TRUE;
2487
  } else {
2488
    return FALSE;
2489
  }
2490
}
2491
2492
2493
static Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text)
2494
{
2495
  Boolean rval = FALSE;
2496
  Char    num[15];
2497
  CharPtr tmp = NULL;
2498
2499
  if (oip == NULL) {
2500
    return FALSE;
2501
  }
2502
2503
  if (oip->id > 0) {
2504
    sprintf (num, "%d", oip->id);
2505
    tmp = StringSave (num);
2506
  } else {
2507
    tmp = StringSaveNoNull (oip->str);
2508
  }
2509
  if (SetStringValue (&tmp, value, existing_text)) {
2510
    oip->str = MemFree (oip->str);        
2511
    oip->id = 0;
2512
    if (IsAllDigits (tmp)) {
2513
      oip->id = atoi (tmp);
2514
    } else {
2515
      oip->str = tmp;
2516
      tmp = NULL;
2517
    }
2518
    rval = TRUE;
2519
  }
2520
  tmp = MemFree (tmp);
2521
  return rval;
2522
}
2523
2524
2525
static Boolean SetDbtagString (DbtagPtr db_tag, CharPtr value, Uint2 existing_text)
2526
{
2527
  Boolean rval = FALSE;
2528
  CharPtr cp;
2529
  Int4    dbxvalid;
2530
  CharPtr tmp;
2531
  CharPtr twoval;
2532
  
2533
  if (db_tag == NULL || StringHasNoText (value)) {
2534
    return FALSE;
2535
  }
2536
2537
  cp = StringChr (value, ':');
2538
  if (cp == NULL) {
2539
    tmp = StringSave (db_tag->db);
2540
    if (SetStringValue (&tmp, value, existing_text)) {
2541
      dbxvalid = IsDbxrefValid (tmp, NULL, NULL, TRUE, NULL);
2542
      if (dbxvalid != 0) {
2543
        db_tag->db = MemFree (db_tag->db);
2544
        db_tag->db = tmp;
2545
        tmp = NULL;
2546
        rval = TRUE;
2547
      }
2548
    }
2549
    if (!rval) {
2550
      if (db_tag->tag == NULL) {
2551
        db_tag->tag = ObjectIdNew();
2552
      }
2553
      rval = SetObjectIdString (db_tag->tag, value, existing_text);
2554
    }
2555
    tmp = MemFree (tmp);
2556
  } else {
2557
    twoval = StringSave (value);
2558
    cp = StringChr (twoval, ':');
2559
    *cp = 0;
2560
    cp++;
2561
    rval = SetStringValue (&(db_tag->db), twoval, existing_text);
2562
    if (db_tag->tag == NULL) {
2563
      db_tag->tag = ObjectIdNew ();
2564
    }
2565
    rval |= SetObjectIdString (db_tag->tag, cp, existing_text);
2566
    twoval = MemFree (twoval);
2567
  }
2568
  return rval;
2569
}
2570
2571
2572
static Boolean SetDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
2573
{
2574
  ValNodePtr vnp;
2575
  Boolean    rval = FALSE, skip;
2576
  DbtagPtr   dbtag;
2577
  CharPtr    cp;
2578
  
2579
  if (sfp == NULL) {
2580
    return FALSE;
2581
  }
2582
2583
  if ((sfp->dbxref == NULL || existing_text == ExistingTextOption_append_semi) && (scp == NULL || StringHasNoText (scp->match_text))) {
2584
    dbtag = DbtagNew ();
2585
    rval = SetDbtagString (dbtag, value, existing_text);
2586
    if (rval) {
2587
      ValNodeAddPointer (&(sfp->dbxref), 0, dbtag);
2588
    } else {
2589
      dbtag = DbtagFree (dbtag);
2590
    }
2591
  } else {
2592
    for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
2593
      skip = FALSE;
2594
      if (scp != NULL) {
2595
        cp = GetDbtagString (vnp->data.ptrvalue);
2596
        if (!DoesStringMatchConstraint (cp, scp)) {
2597
          skip = TRUE;
2598
        }
2599
        cp = MemFree (cp);
2600
      }
2601
      if (!skip) {
2602
        rval |= SetDbtagString (vnp->data.ptrvalue, value, existing_text);
2603
      }
2604
    }
2605
  }
2606
  return rval;
2607
}
2608
2609
2610
2611
static CharPtr GetFirstValNodeStringMatch (ValNodePtr vnp, StringConstraintPtr scp)
2612
{
2613
  CharPtr str = NULL;
2614
  while (vnp != NULL && str == NULL) {
2615
    if (!StringHasNoText (vnp->data.ptrvalue)
2616
        && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
2617
      str = StringSave (vnp->data.ptrvalue);
2618
    } 
2619
    vnp = vnp->next;
2620
  }
2621
  return str;
2622
}
2623
2624
2625
static Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp)
2626
{
2627
  ValNodePtr vnp_prev = NULL, vnp_next, vnp;
2628
  Boolean    rval = FALSE;
2629
2630
  if (list == NULL) return FALSE;
2631
  vnp = *list;
2632
  while (vnp != NULL) {
2633
    vnp_next = vnp->next;
2634
    if (!StringHasNoText (vnp->data.ptrvalue) 
2635
        && DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) {
2636
      if (vnp_prev == NULL) {
2637
        *list = vnp->next;
2638
      } else {
2639
        vnp_prev->next = vnp->next;
2640
      }
2641
      vnp->next = NULL;
2642
      vnp = ValNodeFreeData (vnp);
2643
      rval = TRUE;
2644
    } else {
2645
      vnp_prev = vnp;
2646
    }
2647
    vnp = vnp_next;
2648
  }
2649
  return rval;
2650
}
2651
2652
2653
static CharPtr GetFirstGBQualMatch (GBQualPtr qual, CharPtr qual_name, StringConstraintPtr scp)
2654
{
2655
  CharPtr str = NULL;
2656
  while (qual != NULL && str == NULL) {
2657
    if (StringICmp (qual->qual, qual_name) == 0
2658
        &&!StringHasNoText (qual->val)
2659
        && DoesStringMatchConstraint (qual->val, scp)) {
2660
      str = StringSave (qual->val);
2661
    } 
2662
    qual = qual->next;
2663
  }
2664
  return str;
2665
}
2666
2667
2668
static CharPtr GetFirstGBQualMatchConstraintName (GBQualPtr qual, StringConstraintPtr qual_name, StringConstraintPtr scp)
2669
{
2670
  CharPtr str = NULL;
2671
  while (qual != NULL && str == NULL) {
2672
    if (DoesStringMatchConstraint (qual->qual, qual_name)
2673
        &&!StringHasNoText (qual->val)
2674
        && DoesStringMatchConstraint (qual->val, scp)) {
2675
      str = StringSave (qual->val);
2676
    } 
2677
    qual = qual->next;
2678
  }
2679
  return str;
2680
}
2681
2682
2683
static Boolean RemoveGBQualMatch (GBQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp)
2684
{
2685
  GBQualPtr qual_prev = NULL, qual_next, qual;
2686
  Boolean   rval = FALSE;
2687
2688
  if (list == NULL) return FALSE;
2689
  qual = *list;
2690
  while (qual != NULL) {
2691
    qual_next = qual->next;
2692
    if (StringICmp (qual->qual, qual_name) == 0
2693
        && !StringHasNoText (qual->val) 
2694
        && DoesStringMatchConstraint (qual->val, scp)) {
2695
      if (qual_prev == NULL) {
2696
        *list = qual->next;
2697
      } else {
2698
        qual_prev->next = qual->next;
2699
      }
2700
      qual->next = NULL;
2701
      qual = GBQualFree (qual);
2702
      rval = TRUE;
2703
    } else {
2704
      qual_prev = qual;
2705
    }
2706
    qual = qual_next;
2707
  }
2708
  return rval;
2709
}
2710
2711
2712
static Boolean RemoveGBQualMatchConstraintName (GBQualPtr PNTR list, StringConstraintPtr qual_name, StringConstraintPtr scp)
2713
{
2714
  GBQualPtr qual_prev = NULL, qual_next, qual;
2715
  Boolean   rval = FALSE;
2716
2717
  if (list == NULL) return FALSE;
2718
  qual = *list;
2719
  while (qual != NULL) {
2720
    qual_next = qual->next;
2721
    if (DoesStringMatchConstraint (qual->qual, qual_name)
2722
        && !StringHasNoText (qual->val) 
2723
        && DoesStringMatchConstraint (qual->val, scp)) {
2724
      if (qual_prev == NULL) {
2725
        *list = qual->next;
2726
      } else {
2727
        qual_prev->next = qual->next;
2728
      }
2729
      qual->next = NULL;
2730
      qual = GBQualFree (qual);
2731
      rval = TRUE;
2732
    } else {
2733
      qual_prev = qual;
2734
    }
2735
    qual = qual_next;
2736
  }
2737
  return rval;
2738
}
2739
2740
2741
static CharPtr GetDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp)
2742
{
2743
  ValNodePtr vnp;
2744
  Int4       len = 0;
2745
  CharPtr    str = NULL, cp;
2746
  
2747
  if (sfp == NULL || sfp->dbxref == NULL) {
2748
    return NULL;
2749
  }
2750
  
2751
  for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
2752
    cp = GetDbtagString (vnp->data.ptrvalue);
2753
    if (cp != NULL && DoesStringMatchConstraint(cp, scp)) {
2754
      len += StringLen (cp) + 1;
2755
    }
2756
    cp = MemFree (cp);
2757
  }
2758
  
2759
  if (len == 0) {
2760
    return NULL;
2761
  }
2762
  
2763
  str = (CharPtr) MemNew ((len + 1) * sizeof (Char));
2764
  if (str != NULL) {
2765
    for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
2766
      cp = GetDbtagString (vnp->data.ptrvalue);
2767
      if (cp != NULL && DoesStringMatchConstraint(cp, scp)) {
2768
        StringCat (str, cp);
2769
        StringCat (str, ";");
2770
      }
2771
      cp = MemFree (cp);
2772
    }
2773
  }
2774
  if (StringLen (str) >1) {
2775
    /* remove final semicolon */
2776
    str [StringLen (str) - 2] = 0;
2777
  }
2778
  return str;
2779
}
2780
2781
2782
static Boolean RemoveDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp)
2783
{
2784
  ValNodePtr vnp, vnp_prev = NULL, vnp_next;
2785
  CharPtr    cp;
2786
  Boolean    rval = FALSE;
2787
  
2788
  if (sfp == NULL || sfp->dbxref == NULL) {
2789
    return FALSE;
2790
  }
2791
  
2792
  vnp = sfp->dbxref;
2793
  while (vnp != NULL) {
2794
    vnp_next = vnp->next;
2795
    cp = GetDbtagString (vnp->data.ptrvalue);
2796
    if (DoesStringMatchConstraint(cp, scp)) {
2797
      if (vnp_prev == NULL) {
2798
        sfp->dbxref = vnp->next;
2799
      } else {
2800
        vnp_prev->next = vnp->next;
2801
      }
2802
      vnp->next = NULL;
2803
      vnp->data.ptrvalue = DbtagFree (vnp->data.ptrvalue);
2804
      vnp = ValNodeFree (vnp);
2805
      rval = TRUE;
2806
    } else {
2807
      vnp_prev = vnp;
2808
    }
2809
  }
2810
  return rval;  
2811
}
2812
2813
2814
static CharPtr GetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp)
2815
{
2816
  RnaRefPtr  rrp;
2817
  SeqMgrFeatContext context;
2818
  CharPtr    str = NULL;
2819
2820
  if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
2821
    return NULL;
2822
  }
2823
2824
  rrp = sfp->data.value.ptrvalue;
2825
  if (rrp->ext.choice == 0 
2826
      || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
2827
      || (rrp->ext.choice == 1 
2828
          && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 
2829
              || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
2830
              || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
2831
    str = GetFirstGBQualMatch (sfp->qual, "product", scp);
2832
  }
2833
2834
  if (str == NULL) {
2835
    if (rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue)
2836
        && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
2837
        && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
2838
        && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0) {
2839
      str = StringSave (rrp->ext.value.ptrvalue);        
2840
    } else if (rrp->ext.choice == 2 && rrp->ext.value.ptrvalue != NULL) {
2841
      if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL
2842
          && !StringHasNoText (context.label)
2843
          && StringCmp (context.label, "tRNA") != 0) {
2844
        str = (CharPtr) MemNew (sizeof (Char) + (StringLen (context.label) + 6));
2845
        sprintf (str, "tRNA-%s", context.label);
2846
      }
2847
    }
2848
    if (!DoesStringMatchConstraint(str, scp)) {
2849
      str = MemFree (str);
2850
    }
2851
  }
2852
  return str;
2853
}
2854
2855
2856
static Boolean IsParseabletRNAName (CharPtr name_string)
2857
{
2858
  if (StringHasNoText(name_string)) 
2859
  {
2860
    return TRUE;
2861
  }
2862
  else if (StringNICmp (name_string, "trna-", 5) != 0)
2863
  {
2864
    return FALSE;
2865
  }
2866
  else if (StringLen (name_string) != 8)
2867
  {
2868
    return FALSE;
2869
  }
2870
  else if (ParseTRnaString (name_string, NULL, NULL, TRUE) == 0)
2871
  {
2872
    return FALSE;
2873
  }
2874
  else
2875
  {
2876
    return TRUE;
2877
  }
2878
}
2879
2880
2881
static Boolean SetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text)
2882
{
2883
  RnaRefPtr  rrp;
2884
  Boolean rval = FALSE;
2885
  ValNode vn;
2886
  CharPtr cp, tmp;
2887
  tRNAPtr trp;
2888
  Boolean justTrnaText = FALSE;
2889
  Uint1   codon [6];
2890
2891
  if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
2892
    return FALSE;
2893
  }
2894
2895
  rrp = sfp->data.value.ptrvalue;
2896
  if (rrp->ext.choice == 0 
2897
      || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
2898
      || (rrp->ext.choice == 1 
2899
          && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 
2900
              || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
2901
              || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
2902
    vn.choice = FeatQualChoice_legal_qual;
2903
    vn.data.intvalue = Feat_qual_legal_product;
2904
2905
    rval = SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text);
2906
  }
2907
2908
  if (!rval) {
2909
    if ((rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)))
2910
        && (scp == NULL || scp->match_text == NULL)) {
2911
      rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
2912
      rrp->ext.value.ptrvalue = StringSave (new_val);
2913
      rrp->ext.choice = 1;
2914
      rval = TRUE;
2915
    } else if (rrp->ext.choice == 1 
2916
                && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
2917
                && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
2918
                && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0
2919
                && DoesStringMatchConstraint (rrp->ext.value.ptrvalue, scp)) {
2920
      cp = rrp->ext.value.ptrvalue;
2921
      rval = SetStringValue (&cp, new_val, existing_text);
2922
      rrp->ext.value.ptrvalue = cp;
2923
      rval = TRUE;
2924
    } else if (rrp->ext.choice == 2) {
2925
      tmp = GetRNAProductString (sfp, NULL);
2926
      if (DoesStringMatchConstraint (tmp, scp)
2927
          && SetStringValue (&tmp, new_val, existing_text)) {
2928
        trp = (tRNAPtr) rrp->ext.value.ptrvalue;
2929
        if (trp == NULL) {
2930
          trp = MemNew (sizeof (tRNA));
2931
          trp->aatype = 0;
2932
          MemSet (trp->codon, 255, sizeof (trp->codon));
2933
          trp->anticodon = NULL;
2934
          rrp->ext.value.ptrvalue = trp;
2935
        }
2936
2937
        if (!IsParseabletRNAName(tmp))
2938
        {
2939
          if (trp->anticodon == NULL
2940
              && trp->codon[0] == 255
2941
              && trp->codon[1] == 255
2942
              && trp->codon[2] == 255
2943
              && trp->codon[3] == 255
2944
              && trp->codon[4] == 255
2945
              && trp->codon[5] == 255)
2946
          {
2947
            trp = MemFree (trp);
2948
            rrp->ext.choice = 1;
2949
            rrp->ext.value.ptrvalue = tmp;
2950
            tmp = NULL;
2951
            rval = TRUE;
2952
          }
2953
          else
2954
          {
2955
            vn.choice = FeatQualChoice_legal_qual;
2956
            vn.data.intvalue = Feat_qual_legal_product;
2957
            if (SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text)) {
2958
              trp->aa = 0;
2959
              rval = TRUE;
2960
            }
2961
          }
2962
        }
2963
        else
2964
        {
2965
          trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE);
2966
          trp->aatype = 2;
2967
          rval = TRUE;
2968
        }
2969
        tmp = MemFree (tmp);
2970
      }
2971
    }
2972
  }
2973
  return rval;
2974
}
2975
2976
2977
static Boolean RemoveRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp)
2978
{
2979
  RnaRefPtr  rrp;
2980
  Boolean    rval = FALSE;
2981
2982
  if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) {
2983
    return FALSE;
2984
  }
2985
2986
  rrp = sfp->data.value.ptrvalue;
2987
  if (rrp->ext.choice == 0 
2988
      || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))
2989
      || (rrp->ext.choice == 1 
2990
          && (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 
2991
              || StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0
2992
              || StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) {
2993
    rval = RemoveGBQualMatch (&(sfp->qual), "product", scp);
2994
  }
2995
2996
  if (!rval 
2997
      && rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue)
2998
      && StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0
2999
      && StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0
3000
      && StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0
3001
      && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) {
3002
    rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
3003
    rrp->ext.choice = 0;
3004
    rval = TRUE;
3005
  }
3006
  return rval;
3007
}
3008
3009
3010
static SeqFeatPtr GetProtFeature (BioseqPtr protbsp)
3011
{
3012
  SeqMgrFeatContext fcontext;
3013
  SeqAnnotPtr sap;
3014
  SeqFeatPtr prot_sfp;
3015
  ProtRefPtr prp;
3016
3017
  if (protbsp == NULL) return NULL;
3018
3019
  prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
3020
  if (prot_sfp == NULL) {
3021
    sap = protbsp->annot;
3022
    while (sap != NULL && prot_sfp == NULL) {
3023
      if (sap->type == 1) {
3024
        prot_sfp = sap->data;
3025
        while (prot_sfp != NULL
3026
               && (prot_sfp->data.choice != SEQFEAT_PROT
3027
                   || (prp = prot_sfp->data.value.ptrvalue) == NULL
3028
                   || prp->processed != 0)) {
3029
          prot_sfp = prot_sfp->next;
3030
        }
3031
      }
3032
      sap = sap->next;
3033
    }
3034
  }
3035
  return prot_sfp;
3036
}
3037
3038
3039
static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp)
3040
{
3041
  BioseqPtr  protbsp;
3042
  SeqFeatPtr protsfp;
3043
  ProtRefPtr prp = NULL;
3044
  SeqFeatXrefPtr xref;
3045
3046
  if (sfp == NULL) return NULL;
3047
3048
  if (sfp->data.choice == SEQFEAT_PROT) {
3049
    prp = (ProtRefPtr) sfp->data.value.ptrvalue;
3050
  } else if (sfp->data.choice == SEQFEAT_CDREGION) {
3051
    xref = sfp->xref;
3052
    while (xref != NULL && xref->data.choice != SEQFEAT_PROT) {
3053
      xref = xref->next;
3054
    }
3055
    if (xref != NULL) {
3056
      prp = xref->data.value.ptrvalue;
3057
    }
3058
    if (prp == NULL && sfp->product != NULL) {
3059
      protbsp = BioseqFindFromSeqLoc (sfp->product);
3060
      protsfp = GetProtFeature (protbsp);    
3061
      if (protsfp != NULL) {
3062
        prp = protsfp->data.value.ptrvalue;
3063
      }
3064
    }
3065
  }
3066
  return prp;
3067
}
3068
3069
3070
NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
3071
{
3072
  CharPtr   str = NULL;
3073
  GeneRefPtr grp = NULL;
3074
  ProtRefPtr prp = NULL;
3075
  Int4      gbqual;
3076
3077
  if (sfp == NULL || field == NULL || field->field == NULL)
3078
  {
3079
    return NULL;
3080
  }
3081
  if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
3082
  {
3083
    return NULL;
3084
  }
3085
3086
  // for gene fields
3087
  if (sfp->idx.subtype == FEATDEF_GENE) {
3088
    grp = sfp->data.value.ptrvalue;
3089
  } else {
3090
    grp = SeqMgrGetGeneXref (sfp);
3091
  }
3092
3093
  // for protein fields
3094
  prp = GetProtRefForFeature (sfp);
3095
3096
  /* fields common to all features */
3097
  /* note, also known as comment */
3098
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
3099
      || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
3100
  {
3101
    if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
3102
    {
3103
      str = StringSave (sfp->comment);
3104
    }
3105
  }
3106
  /* db-xref */
3107
  if (str == NULL
3108
      && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
3109
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))))
3110
  {
3111
    str = GetDbxrefString (sfp, scp);
3112
  }
3113
  /* exception */
3114
  if (str == NULL 
3115
      && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
3116
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))))
3117
  {
3118
    if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp))
3119
    {
3120
      str = StringSave (sfp->except_text);
3121
    }
3122
  }
3123
  /* evidence */
3124
  if (str == NULL
3125
      && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
3126
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))))
3127
  {
3128
    if (sfp->exp_ev == 1)
3129
    {
3130
      str = StringSave ("experimental");
3131
    }
3132
    else if (sfp->exp_ev == 2)
3133
    {
3134
      str = StringSave ("non-experimental");
3135
    }
3136
    if (!DoesStringMatchConstraint(str, scp)) {
3137
      str = MemFree (str);
3138
    }
3139
  }
3140
3141
  /* fields common to some features */
3142
  /* product */
3143
  if (str == NULL
3144
      && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
3145
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))))
3146
  {
3147
    if (prp != NULL) {
3148
      str = GetFirstValNodeStringMatch (prp->name, scp);
3149
    } else if (sfp->data.choice == SEQFEAT_RNA) {
3150
      str = GetRNAProductString (sfp, scp);
3151
    }
3152
  }
3153
3154
  /* Gene fields */
3155
  /* locus */
3156
  if (str == NULL 
3157
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
3158
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
3159
       && grp != NULL)
3160
  {
3161
    if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp))
3162
    {
3163
      str = StringSave (grp->locus);
3164
    }
3165
  }
3166
  /* description */
3167
  if (str == NULL 
3168
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
3169
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3170
       && grp != NULL)
3171
  {
3172
    if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
3173
    {
3174
      str = StringSave (grp->desc);
3175
    }
3176
  }
3177
  /* maploc */
3178
  if (str == NULL 
3179
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
3180
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
3181
       && grp != NULL)
3182
  {
3183
    if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
3184
    {
3185
      str = StringSave (grp->maploc);
3186
    }
3187
  }
3188
  /* allele */
3189
  if (str == NULL 
3190
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
3191
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
3192
       && grp != NULL)
3193
  {
3194
    if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp))
3195
    {
3196
      str = StringSave (grp->allele);
3197
    }
3198
  }
3199
  /* locus_tag */
3200
  if (str == NULL 
3201
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
3202
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
3203
       && grp != NULL)
3204
  {
3205
    if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
3206
    {
3207
      str = StringSave (grp->locus_tag);
3208
    }
3209
  }
3210
  /* synonym */
3211
  if (str == NULL 
3212
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
3213
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
3214
       && grp != NULL)
3215
  {
3216
    str = GetFirstValNodeStringMatch (grp->syn, scp);
3217
  }
3218
3219
3220
  /* protein fields */
3221
  /* note - product handled above */
3222
  /* description */
3223
  if (str == NULL 
3224
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
3225
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3226
       && prp != NULL)
3227
  {
3228
    if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
3229
      str = StringSave (prp->desc);
3230
    }
3231
  }
3232
  /* ec_number */
3233
  if (str == NULL 
3234
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
3235
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
3236
       && prp != NULL)
3237
  {
3238
    str = GetFirstValNodeStringMatch (prp->ec, scp);
3239
  }
3240
  /* activity */
3241
  if (str == NULL 
3242
       && ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity)
3243
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)))
3244
       && prp != NULL)
3245
  {
3246
    str = GetFirstValNodeStringMatch (prp->activity, scp);
3247
  }
3248
  
3249
3250
  /* actual GenBank qualifiers */
3251
  if (str == NULL)
3252
  {
3253
    if (field->field->choice == FeatQualChoice_legal_qual) 
3254
    {
3255
      gbqual = GetGBQualFromFeatQual (field->field->data.intvalue);
3256
      if (gbqual > -1) {
3257
        str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, scp);
3258
      } else {
3259
        /* need to do something with non-qualifier qualifiers */
3260
      }
3261
    } else {
3262
      str = GetFirstGBQualMatchConstraintName (sfp->qual, field->field->data.ptrvalue, scp);
3263
    }
3264
  }
3265
  return str;
3266
}
3267
3268
3269
static Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp)
3270
{
3271
  Boolean rval = FALSE;
3272
  GeneRefPtr grp = NULL;
3273
  ProtRefPtr prp = NULL;
3274
  RnaRefPtr  rrp;
3275
  tRNAPtr trp;
3276
  Int4      gbqual;
3277
3278
  if (sfp == NULL || field == NULL || field->field == NULL)
3279
  {
3280
    return FALSE;
3281
  }
3282
  if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
3283
  {
3284
    return FALSE;
3285
  }
3286
3287
  // for gene fields
3288
  if (sfp->idx.subtype == FEATDEF_GENE) {
3289
    grp = sfp->data.value.ptrvalue;
3290
  } else {
3291
    grp = SeqMgrGetGeneXref (sfp);
3292
  }
3293
3294
  // for protein fields
3295
  prp = GetProtRefForFeature (sfp);
3296
3297
  // for RNA fields
3298
  if (sfp->data.choice == SEQFEAT_RNA) {
3299
    rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
3300
  }
3301
3302
  /* fields common to all features */
3303
  /* note, also known as comment */
3304
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
3305
      || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
3306
  {
3307
    if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp))
3308
    {
3309
      sfp->comment = MemFree (sfp->comment);
3310
      rval = TRUE;
3311
    }
3312
  }
3313
  /* db-xref */
3314
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
3315
      || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))
3316
  {
3317
    rval = RemoveDbxrefString (sfp, scp);
3318
  }
3319
  /* exception */
3320
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
3321
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))
3322
  {
3323
    if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp))
3324
    {
3325
      sfp->except_text = MemFree (sfp->except_text);
3326
      rval = TRUE;
3327
    }
3328
  }
3329
  /* evidence */
3330
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
3331
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))
3332
  {
3333
    if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp))
3334
        || (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) {
3335
      sfp->exp_ev = 0;
3336
      rval = TRUE;
3337
    }
3338
  }
3339
3340
  /* fields common to some features */
3341
  /* product */
3342
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
3343
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))
3344
  {
3345
    if (prp != NULL) {
3346
      rval = RemoveValNodeStringMatch (&(prp->name), scp);
3347
    } else if (sfp->data.choice == SEQFEAT_RNA) {
3348
      rval = RemoveRNAProductString (sfp, scp);
3349
    }
3350
  }
3351
3352
  /* Gene fields */
3353
  /* locus */
3354
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
3355
       || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
3356
      && grp != NULL)
3357
  {
3358
    if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) {
3359
      grp->locus = MemFree (grp->locus);
3360
      rval = TRUE;
3361
    }
3362
  }
3363
  /* description */
3364
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
3365
       || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3366
      && grp != NULL)
3367
  {
3368
    if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp))
3369
    {
3370
      grp->desc = MemFree (grp->desc);
3371
      rval = TRUE;
3372
    }
3373
  }
3374
  /* maploc */
3375
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
3376
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
3377
       && grp != NULL)
3378
  {
3379
    if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp))
3380
    {
3381
      grp->maploc = MemFree (grp->maploc);
3382
      rval = TRUE;
3383
    }
3384
  }
3385
  /* allele */
3386
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
3387
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
3388
      && grp != NULL)
3389
  {
3390
    if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp))
3391
    {
3392
      grp->allele = MemFree (grp->allele);
3393
      rval = TRUE;
3394
    }
3395
  }
3396
  /* locus_tag */
3397
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
3398
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
3399
       && grp != NULL)
3400
  {
3401
    if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp))
3402
    {
3403
      grp->locus_tag = MemFree (grp->locus_tag);
3404
      rval = TRUE;
3405
    }
3406
  }
3407
  /* synonym */
3408
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
3409
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
3410
       && grp != NULL)
3411
  {
3412
    rval = RemoveValNodeStringMatch (&(grp->syn), scp);
3413
  }
3414
3415
  /* protein fields */
3416
  /* note - product handled above */
3417
  /* description */
3418
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
3419
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3420
       && prp != NULL)
3421
  {
3422
    if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
3423
      prp->desc = MemFree (prp->desc);
3424
      rval = TRUE;
3425
    }
3426
  }
3427
  /* ec_number */
3428
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
3429
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
3430
       && prp != NULL)
3431
  {
3432
    rval = RemoveValNodeStringMatch (&(prp->ec), scp);
3433
  }
3434
  /* activity */
3435
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity)
3436
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)))
3437
       && prp != NULL)
3438
  {
3439
    rval = RemoveValNodeStringMatch (&(prp->activity), scp);
3440
  }
3441
  
3442
  /* RNA fields */
3443
  /* anticodon */
3444
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon)
3445
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))
3446
       && rrp != NULL && rrp->ext.choice == 2)
3447
  {
3448
    trp = (tRNAPtr) rrp->ext.value.ptrvalue;
3449
    if (trp != NULL && trp->anticodon != NULL) {
3450
      trp->anticodon = SeqLocFree (trp->anticodon);
3451
      rval = TRUE;
3452
    }
3453
  }
3454
  /* codons recognized */
3455
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon)
3456
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue)))
3457
       && rrp != NULL && rrp->ext.choice == 2)
3458
  {
3459
    trp = (tRNAPtr) rrp->ext.value.ptrvalue;
3460
    if (trp != NULL && (trp->codon[0] != 255 || trp->codon[1] != 255 || trp->codon[2] != 255
3461
                        || trp->codon[3] != 255 || trp->codon[4] != 255 || trp->codon[5] != 255)) {
3462
      trp->codon [0] = 255;
3463
      trp->codon [1] = 255;
3464
      trp->codon [2] = 255;
3465
      trp->codon [3] = 255;
3466
      trp->codon [4] = 255;
3467
      trp->codon [5] = 255;
3468
      rval = TRUE;
3469
    }
3470
  }
3471
3472
  if (!rval) {
3473
    /* actual GenBank qualifiers */
3474
    if (field->field->choice == FeatQualChoice_legal_qual) 
3475
    {
3476
      gbqual = GetGBQualFromFeatQual (field->field->data.intvalue);
3477
      if (gbqual > -1) {
3478
        rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, scp);
3479
      } else {
3480
        /* need to do something with non-qualifier qualifiers */
3481
      }
3482
    } else {
3483
      rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->field->data.ptrvalue, scp);
3484
    }
3485
  }
3486
3487
  return rval;
3488
}
3489
3490
3491
static Boolean ChooseBestFrame (SeqFeatPtr sfp)
3492
{
3493
  CdRegionPtr  crp;
3494
  Uint1        new_frame = 0, i, orig_frame;
3495
  ByteStorePtr bs;
3496
  Int4         lens [3];
3497
  Int4         max;
3498
  Boolean      retval = TRUE;
3499
3500
  if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE;
3501
  
3502
  crp = sfp->data.value.ptrvalue;
3503
  if (crp == NULL) return FALSE;
3504
  orig_frame = crp->frame;
3505
3506
  max = 0;
3507
  for (i = 1; i <= 3; i++) {
3508
    crp->frame = i;
3509
    bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE);
3510
    lens[i - 1] = BSLen (bs);
3511
    BSFree (bs);
3512
    if (lens[i - 1] > max) {
3513
      max = lens[i - 1];
3514
      new_frame = i;
3515
    }
3516
  }
3517
  for (i = 1; i <= 3; i++) {
3518
    if (lens [i - 1] == max && i != new_frame) {
3519
      retval = FALSE;
3520
    }
3521
  }
3522
  if (retval) {
3523
    crp->frame = new_frame;
3524
  } else {
3525
    crp->frame = orig_frame;
3526
  }
3527
  return retval;
3528
}
3529
3530
3531
static Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
3532
{
3533
  Boolean rval = FALSE;
3534
  GeneRefPtr grp = NULL;
3535
  ProtRefPtr prp = NULL;
3536
  CharPtr    tmp;
3537
  CdRegionPtr crp;
3538
3539
  if (sfp == NULL || field == NULL || field->field == NULL)
3540
  {
3541
    return FALSE;
3542
  }
3543
  if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type))
3544
  {
3545
    return FALSE;
3546
  }
3547
3548
  // for gene fields
3549
  if (sfp->idx.subtype == FEATDEF_GENE) {
3550
    grp = sfp->data.value.ptrvalue;
3551
  } else {
3552
    grp = SeqMgrGetGeneXref (sfp);
3553
  }
3554
3555
  // for protein fields
3556
  prp = GetProtRefForFeature (sfp);
3557
3558
  /* fields common to all features */
3559
  /* note, also known as comment */
3560
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note)
3561
      || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue)))
3562
  {
3563
    if (DoesStringMatchConstraint(sfp->comment, scp))
3564
    {
3565
      rval = SetStringValue ( &(sfp->comment), value, existing_text);
3566
    }
3567
  }
3568
  /* db-xref */
3569
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref)
3570
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))
3571
  {
3572
    rval = SetDbxrefString (sfp, scp, value, existing_text);
3573
  }
3574
  /* exception */
3575
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception)
3576
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))
3577
  {
3578
    if (DoesStringMatchConstraint(sfp->except_text, scp))
3579
    {
3580
      rval = SetStringValue ( &(sfp->except_text), value, existing_text);
3581
    }
3582
  }
3583
  /* evidence */
3584
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence)
3585
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))
3586
  {
3587
    tmp = NULL;
3588
    if (sfp->exp_ev == 1)
3589
    {
3590
      tmp = StringSave ("experimental");
3591
    }
3592
    else if (sfp->exp_ev == 2)
3593
    {
3594
      tmp = StringSave ("non-experimental");
3595
    }
3596
    if (DoesStringMatchConstraint(tmp, scp)) {
3597
      rval = SetStringValue (&tmp, value, existing_text);
3598
      if (rval) {
3599
        rval = FALSE;
3600
        if (StringICmp (tmp, "experimental") == 0) {
3601
          sfp->exp_ev = 1;
3602
          rval = TRUE;
3603
        } else if (StringICmp (tmp, "non-experimental") == 0) {
3604
          sfp->exp_ev = 2;
3605
          rval = TRUE;
3606
        } else if (StringHasNoText (tmp)) {
3607
          sfp->exp_ev = 0;
3608
          rval = TRUE;
3609
        }
3610
      }
3611
    }
3612
    tmp = MemFree (tmp);
3613
  }
3614
  
3615
3616
  /* fields common to some features */
3617
  /* product */
3618
  if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product)
3619
          || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))
3620
  {
3621
    if (prp != NULL) {
3622
      rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
3623
    } else if (sfp->data.choice == SEQFEAT_RNA) {
3624
      rval = SetRNAProductString (sfp, scp, value, existing_text);
3625
    }
3626
  }
3627
3628
  /* Gene fields */
3629
  /* locus */
3630
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene)
3631
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue)))
3632
       && grp != NULL)
3633
  {
3634
    if (DoesStringMatchConstraint(grp->locus, scp))
3635
    {
3636
      rval = SetStringValue (&(grp->locus), value, existing_text);
3637
    }
3638
  }
3639
  /* description */
3640
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description)
3641
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3642
       && grp != NULL)
3643
  {
3644
    if (DoesStringMatchConstraint(grp->desc, scp))
3645
    {
3646
      rval = SetStringValue (&(grp->desc), value, existing_text);
3647
    }
3648
  }
3649
  /* maploc */
3650
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map)
3651
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue)))
3652
       && grp != NULL)
3653
  {
3654
    if (DoesStringMatchConstraint(grp->maploc, scp))
3655
    {
3656
      rval = SetStringValue (&(grp->maploc), value, existing_text);
3657
    }
3658
  }
3659
  /* allele */
3660
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele)
3661
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue)))
3662
       && grp != NULL)
3663
  {
3664
    if (DoesStringMatchConstraint(grp->allele, scp))
3665
    {
3666
      rval = SetStringValue (&(grp->allele), value, existing_text);
3667
    }
3668
  }
3669
  /* locus_tag */
3670
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag)
3671
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue)))
3672
       && grp != NULL)
3673
  {
3674
    if (DoesStringMatchConstraint(grp->locus_tag, scp))
3675
    {
3676
      rval = SetStringValue (&(grp->locus_tag), value, existing_text);
3677
    }
3678
  }
3679
  /* synonym */
3680
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym)
3681
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue)))
3682
       && grp != NULL)
3683
  {
3684
    rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text);
3685
  }
3686
3687
3688
  /* protein fields */
3689
  /* note - product handled above */
3690
  /* description */
3691
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description)
3692
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue)))
3693
       && prp != NULL)
3694
  {
3695
    if (DoesStringMatchConstraint(prp->desc, scp)) {
3696
      rval = SetStringValue (&(prp->desc), value, existing_text);
3697
    }
3698
  }
3699
  /* ec_number */
3700
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number)
3701
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue)))
3702
       && prp != NULL)
3703
  {
3704
    rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
3705
  }
3706
  /* activity */
3707
  if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity)
3708
           || (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue)))
3709
       && prp != NULL)
3710
  {
3711
    rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
3712
  }
3713
 
3714
  if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start
3715
      && sfp->data.choice == SEQFEAT_CDREGION) 
3716
  {
3717
    crp = (CdRegionPtr) sfp->data.value.ptrvalue;
3718
    if (StringICmp (value, "best") == 0)
3719
    {
3720
      rval = ChooseBestFrame (sfp);
3721
    }
3722
    else if (StringCmp (value, "1") == 0) 
3723
    {
3724
      crp->frame = 1;
3725
      rval = TRUE;
3726
    }
3727
    else if (StringCmp (value, "2") == 0) 
3728
    {
3729
      crp->frame = 2;
3730
      rval = TRUE;
3731
    }
3732
    else if (StringCmp (value, "3") == 0)
3733
    {
3734
      crp->frame = 3;
3735
      rval = TRUE;
3736
    } 
3737
  } 
3738
3739
  /* actual GenBank qualifiers */
3740
  if (!rval)
3741
  {
3742
    rval = SetStringInGBQualList (&(sfp->qual), field->field, scp, value, existing_text);
3743
  }
3744
  return rval;
3745
}
3746
3747
3748
NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
3749
{
3750
  CharPtr str = NULL;
3751
  SubSourcePtr ssp;
3752
  OrgModPtr mod;
3753
  Int4 orgmod_subtype = -1, subsrc_subtype = -1;
3754
3755
  if (biop == NULL || scp == NULL) return NULL;
3756
3757
  switch (scp->choice) 
3758
  {
3759
    case SourceQualChoice_textqual:
3760
      if (scp->data.intvalue == Source_qual_taxname) {
3761
        if (biop->org != NULL && !StringHasNoText (biop->org->taxname)
3762
            && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
3763
          str = StringSave (biop->org->taxname);
3764
        }
3765
      } else if (scp->data.intvalue == Source_qual_common_name) {
3766
        if (biop->org != NULL && !StringHasNoText (biop->org->common)
3767
            && DoesStringMatchConstraint (biop->org->common, constraint)) {
3768
          str = StringSave (biop->org->common);
3769
        }
3770
      } else if (scp->data.intvalue == Source_qual_lineage) {
3771
        if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage)
3772
            && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
3773
          str = StringSave (biop->org->orgname->lineage);
3774
        }
3775
      } else if (scp->data.intvalue == Source_qual_division) {
3776
        if (biop->org != NULL && biop->org->orgname != NULL  && !StringHasNoText (biop->org->orgname->div)
3777
            && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
3778
          str = StringSave (biop->org->orgname->div);
3779
        }
3780
      } else {
3781
        orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue);
3782
        if (orgmod_subtype == -1) {
3783
          subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue);
3784
          for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) {
3785
            if (ssp->subtype == subsrc_subtype) {
3786
              if (StringHasNoText (ssp->name)) {
3787
                if (IsNonTextSourceQual (scp->data.intvalue)
3788
                    && DoesStringMatchConstraint ("TRUE", constraint)) {
3789
                  str = StringSave ("TRUE");
3790
                }
3791
              } else {
3792
                if (DoesStringMatchConstraint (ssp->name, constraint)) {
3793
                  str = StringSave (ssp->name);
3794
                }
3795
              }
3796
            }
3797
          }
3798
        } else {
3799
          if (biop->org != NULL && biop->org->orgname != NULL) {
3800
            for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) {
3801
              if (mod->subtype == orgmod_subtype) {
3802
                if (StringHasNoText (mod->subname)) {
3803
                  if (IsNonTextSourceQual (scp->data.intvalue)
3804
                      && DoesStringMatchConstraint ("TRUE", constraint)) {
3805
                    str = StringSave ("TRUE");
3806
                  }
3807
                } else {
3808
                  if (DoesStringMatchConstraint (mod->subname, constraint)) {
3809
                    str = StringSave (mod->subname);
3810
                  }
3811
                }
3812
              }
3813
            }
3814
          }
3815
        }
3816
      }
3817
      break;
3818
    case SourceQualChoice_location:
3819
      str = LocNameFromGenome (biop->genome);
3820
      if (DoesStringMatchConstraint (str, constraint)) {
3821
        str = StringSave (str);
3822
      } else {
3823
        str = NULL;
3824
      }
3825
      break;
3826
    case SourceQualChoice_origin:
3827
      str = OriginNameFromOrigin (biop->origin);
3828
      if (DoesStringMatchConstraint (str, constraint)) {
3829
        str = StringSave (str);
3830
      } else {
3831
        str = NULL;
3832
      }
3833
      break;
3834
  }
3835
  return str;
3836
}
3837
3838
3839
static Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint)
3840
{
3841
  SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
3842
  OrgModPtr mod, mod_prev = NULL, mod_next;
3843
  Int4 orgmod_subtype = -1, subsrc_subtype = -1;
3844
  CharPtr str;
3845
  Boolean rval = FALSE;
3846
3847
  if (biop == NULL || scp == NULL) return FALSE;
3848
3849
  switch (scp->choice) 
3850
  {
3851
    case SourceQualChoice_textqual:
3852
      if (scp->data.intvalue == Source_qual_taxname) {
3853
        if (biop->org != NULL && !StringHasNoText (biop->org->taxname)
3854
            && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
3855
          biop->org->taxname = MemFree (biop->org->taxname);
3856
          rval = TRUE;
3857
        }
3858
      } else if (scp->data.intvalue == Source_qual_common_name) {
3859
        if (biop->org != NULL && !StringHasNoText (biop->org->common)
3860
            && DoesStringMatchConstraint (biop->org->common, constraint)) {
3861
          biop->org->common = MemFree (biop->org->common);
3862
          rval = TRUE;
3863
        }
3864
      } else if (scp->data.intvalue == Source_qual_lineage) {
3865
        if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage)
3866
            && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
3867
          biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage);
3868
          rval = TRUE;
3869
        }
3870
      } else if (scp->data.intvalue == Source_qual_division) {
3871
        if (biop->org != NULL && biop->org->orgname != NULL  && !StringHasNoText (biop->org->orgname->div)
3872
            && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
3873
          biop->org->orgname->div = MemFree (biop->org->orgname->div);
3874
          rval = TRUE;
3875
        }
3876
      } else {
3877
        orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue);
3878
        if (orgmod_subtype == -1) {
3879
          subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue);
3880
          ssp = biop->subtype;
3881
          while (ssp != NULL) {
3882
            ssp_next = ssp->next;
3883
            if (ssp->subtype == subsrc_subtype 
3884
                && DoesStringMatchConstraint (ssp->name, constraint)) {
3885
              if (ssp_prev == NULL) {
3886
                biop->subtype = ssp->next;
3887
              } else {
3888
                ssp_prev->next = ssp->next;
3889
              }
3890
              ssp->next = NULL;
3891
              ssp = SubSourceFree (ssp);
3892
              rval = TRUE;
3893
            } else {
3894
              ssp_prev = ssp;
3895
            }
3896
            ssp = ssp_next;
3897
          }
3898
        } else {
3899
          if (biop->org != NULL && biop->org->orgname != NULL) {
3900
            mod = biop->org->orgname->mod;
3901
            while (mod != NULL) {
3902
              mod_next = mod->next;
3903
              if (mod->subtype == orgmod_subtype
3904
                  && DoesStringMatchConstraint (mod->subname, constraint)) {
3905
                if (mod_prev == NULL) {
3906
                  biop->org->orgname->mod = mod->next;
3907
                } else {
3908
                  mod_prev->next = mod->next;
3909
                }
3910
                mod->next = NULL;
3911
                mod = OrgModFree (mod);
3912
                rval = TRUE;
3913
              } else {
3914
                mod_prev = mod;
3915
              }
3916
              mod = mod_next;
3917
            }
3918
          }
3919
        }
3920
      }
3921
      break;
3922
    case SourceQualChoice_location:
3923
      str = LocNameFromGenome (biop->genome);
3924
      if (DoesStringMatchConstraint (str, constraint)) {
3925
        if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) {
3926
          biop->genome = 0;
3927
          rval = TRUE;
3928
        }
3929
      }
3930
      break;
3931
    case SourceQualChoice_origin:
3932
      str = OriginNameFromOrigin (biop->origin);
3933
      if (DoesStringMatchConstraint (str, constraint)) {
3934
        if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) {
3935
          biop->origin = 0;
3936
          rval = TRUE;
3937
        }
3938
      }
3939
      break; 
3940
  }
3941
  return rval;
3942
}
3943
3944
3945
NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text)
3946
{
3947
  SubSourcePtr ssp, ssp_prev = NULL, ssp_next;
3948
  OrgModPtr mod, mod_prev = NULL, mod_next;
3949
  Int4 orgmod_subtype = -1, subsrc_subtype = -1;
3950
  CharPtr str;
3951
  Boolean rval = FALSE, found = FALSE;
3952
3953
  if (biop == NULL || scp == NULL) return FALSE;
3954
3955
  switch (scp->choice) 
3956
  {
3957
    case SourceQualChoice_textqual:
3958
      if (scp->data.intvalue == Source_qual_taxname) {
3959
        if (biop->org != NULL
3960
            && DoesStringMatchConstraint (biop->org->taxname, constraint)) {
3961
          rval = SetStringValue (&(biop->org->taxname), value, existing_text);
3962
        }
3963
      } else if (scp->data.intvalue == Source_qual_common_name) {
3964
        if (biop->org != NULL
3965
            && DoesStringMatchConstraint (biop->org->common, constraint)) {
3966
          rval = SetStringValue (&(biop->org->common), value, existing_text);
3967
        }
3968
      } else if (scp->data.intvalue == Source_qual_lineage) {
3969
        if (biop->org != NULL && biop->org->orgname != NULL 
3970
            && DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) {
3971
          rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text);
3972
        }
3973
      } else if (scp->data.intvalue == Source_qual_division) {
3974
        if (biop->org != NULL && biop->org->orgname != NULL
3975
            && DoesStringMatchConstraint (biop->org->orgname->div, constraint)) {
3976
          rval = SetStringValue (&(biop->org->orgname->div), value, existing_text);
3977
        }
3978
      } else {
3979
        orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue);
3980
        if (orgmod_subtype == -1) {
3981
          subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue);
3982
          if (subsrc_subtype > -1) {
3983
            ssp = biop->subtype;
3984
            while (ssp != NULL) {
3985
              ssp_next = ssp->next;
3986
              if (ssp->subtype == subsrc_subtype
3987
                  && DoesStringMatchConstraint (ssp->name, constraint)) {
3988
                rval = SetStringValue (&(ssp->name), value, existing_text);
3989
                found = TRUE;
3990
                if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) {
3991
                  if (ssp_prev == NULL) {
3992
                    biop->subtype = ssp->next;
3993
                  } else {
3994
                    ssp_prev->next = ssp->next;
3995
                  }
3996
                  ssp->next = NULL;
3997
                  ssp = SubSourceFree (ssp);
3998
                } else {
3999
                  ssp_prev = ssp;
4000
                }
4001
              } else {
4002
                ssp_prev = ssp;
4003
              }
4004
              ssp = ssp_next;
4005
            }
4006
            if (!found && IsStringConstraintEmpty (constraint)) {
4007
              ssp = SubSourceNew ();
4008
              ssp->subtype = subsrc_subtype;
4009
              rval = SetStringValue (&(ssp->name), value, existing_text);
4010
              if (ssp_prev == NULL) {
4011
                biop->subtype = ssp;
4012
              } else {
4013
                ssp_prev->next = ssp;
4014
              }
4015
            }
4016
          }
4017
        } else {
4018
          if (biop->org != NULL && biop->org->orgname != NULL) {
4019
            mod = biop->org->orgname->mod;
4020
            while (mod != NULL) {
4021
              mod_next = mod->next;
4022
              if (mod->subtype == orgmod_subtype
4023
                  && DoesStringMatchConstraint (mod->subname, constraint)) {
4024
                rval = SetStringValue (&(mod->subname), value, existing_text);
4025
                found = TRUE;
4026
                if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) {
4027
                  if (mod_prev == NULL) {
4028
                    biop->org->orgname->mod = mod->next;
4029
                  } else {
4030
                    mod_prev->next = mod->next;
4031
                  }
4032
                  mod->next = NULL;
4033
                  mod = OrgModFree (mod);
4034
                } else {
4035
                  mod_prev = mod;
4036
                }
4037
              } else {
4038
                mod_prev = mod;
4039
              }
4040
              mod = mod_next;
4041
            }
4042
          }
4043
          if (!found && IsStringConstraintEmpty (constraint)) {
4044
            if (biop->org == NULL) {
4045
              biop->org = OrgRefNew();
4046
            }
4047
            if (biop->org->orgname == NULL) {
4048
              biop->org->orgname = OrgNameNew();
4049
            }
4050
            mod = OrgModNew ();
4051
            mod->subtype = orgmod_subtype;
4052
            rval = SetStringValue (&(mod->subname), value, existing_text);
4053
            if (mod_prev == NULL) {
4054
              biop->org->orgname->mod = mod;
4055
            } else {
4056
              mod_prev->next = mod;
4057
            }
4058
          }
4059
        }
4060
      }
4061
      break;
4062
    case SourceQualChoice_location:
4063
      str = LocNameFromGenome (biop->genome);
4064
      if (DoesStringMatchConstraint (str, constraint)) {
4065
        biop->genome = GenomeFromSrcLoc (scp->data.intvalue);
4066
        rval = TRUE;
4067
      }
4068
      break;
4069
    case SourceQualChoice_origin:
4070
      str = OriginNameFromOrigin (biop->origin);
4071
      if (DoesStringMatchConstraint (str, constraint)) {
4072
        biop->origin = OriginFromSrcOrig(scp->data.intvalue);
4073
        rval = TRUE;
4074
      }
4075
      break; 
4076
  }
4077
  return rval;
4078
}
4079
4080
4081
static BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data)
4082
{
4083
  BioseqPtr bsp = NULL;
4084
  SeqFeatPtr sfp;
4085
  SeqDescrPtr sdp;
4086
  ObjValNodePtr ovp;
4087
  CGPSetPtr cgp;
4088
  ValNodePtr vnp;
4089
4090
  if (data == NULL) return NULL;
4091
4092
  switch (choice) {
4093
    case OBJ_BIOSEQ:
4094
      bsp = (BioseqPtr) data;
4095
      break;
4096
    case OBJ_SEQFEAT:
4097
      sfp = (SeqFeatPtr) data;
4098
      bsp = BioseqFindFromSeqLoc (sfp->location);
4099
      break;
4100
    case OBJ_SEQDESC:
4101
      sdp = (SeqDescrPtr) data;
4102
      if (sdp->extended) {
4103
        ovp = (ObjValNodePtr) sdp;
4104
        if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) {
4105
          bsp = ovp->idx.parentptr;
4106
        }
4107
      }
4108
      break;
4109
    case 0:
4110
      cgp = (CGPSetPtr) data;
4111
      for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
4112
        sfp = vnp->data.ptrvalue;
4113
        if (sfp != NULL) {
4114
          bsp = BioseqFindFromSeqLoc (sfp->location);
4115
        }
4116
      }
4117
      for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
4118
        sfp = vnp->data.ptrvalue;
4119
        if (sfp != NULL) {
4120
          bsp = BioseqFindFromSeqLoc (sfp->location);
4121
        }
4122
      }
4123
      break;
4124
      for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) {
4125
        sfp = vnp->data.ptrvalue;
4126
        if (sfp != NULL) {
4127
          bsp = BioseqFindFromSeqLoc (sfp->location);
4128
        }
4129
      }
4130
      break;
4131
  }
4132
  return bsp;
4133
}
4134
4135
4136
NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data)
4137
{
4138
  BioSourcePtr biop = NULL;
4139
  SeqDescrPtr  sdp;
4140
  SeqFeatPtr   sfp;
4141
  BioseqPtr    bsp = NULL;
4142
  SeqMgrDescContext context;
4143
4144
  if (data == NULL) return NULL;
4145
4146
  switch (choice)
4147
  {
4148
    case OBJ_SEQDESC:
4149
      sdp = (SeqDescrPtr) data;
4150
      if (sdp->choice == Seq_descr_source) {
4151
        biop = sdp->data.ptrvalue;
4152
      }
4153
      break;
4154
    case OBJ_SEQFEAT:
4155
      sfp = (SeqFeatPtr) data;
4156
      if (sfp->data.choice == SEQFEAT_BIOSRC) {
4157
        biop = sfp->data.value.ptrvalue;
4158
      }
4159
      break;
4160
  }
4161
  if (biop == NULL) {
4162
    bsp = GetSequenceForObject (choice, data);
4163
    sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
4164
    if (sdp != NULL && sdp->choice == Seq_descr_source) {
4165
      biop = sdp->data.ptrvalue;
4166
    }
4167
  }
4168
  return biop;
4169
}
4170
4171
4172
/* functions for dealing with CDS-Gene-Prot sets */
4173
static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp)
4174
{
4175
  CharPtr str = NULL;
4176
  ValNodePtr vnp;
4177
  SeqFeatPtr sfp;
4178
  GeneRefPtr grp;
4179
  RnaRefPtr  rrp;
4180
  ProtRefPtr prp;
4181
  
4182
  if (c == NULL) return NULL;
4183
  switch (field) {
4184
    case CDSGeneProt_field_cds_comment:
4185
      for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4186
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4187
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4188
        {
4189
          str = StringSave (sfp->comment);
4190
        }
4191
      }
4192
      break;
4193
    case CDSGeneProt_field_gene_locus:
4194
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4195
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4196
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4197
            && (grp = sfp->data.value.ptrvalue) != NULL
4198
            && !StringHasNoText (grp->locus) 
4199
            && DoesStringMatchConstraint(grp->locus, scp))
4200
        {
4201
          str = StringSave (grp->locus);
4202
        }
4203
      }
4204
      break;
4205
    case CDSGeneProt_field_gene_description:
4206
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4207
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4208
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4209
            && (grp = sfp->data.value.ptrvalue) != NULL
4210
            && !StringHasNoText (grp->desc) 
4211
            && DoesStringMatchConstraint(grp->desc, scp))
4212
        {
4213
          str = StringSave (grp->desc);
4214
        }
4215
      }
4216
      break;
4217
    case CDSGeneProt_field_gene_comment:
4218
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4219
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4220
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4221
        {
4222
          str = StringSave (sfp->comment);
4223
        }
4224
      }
4225
      break;
4226
    case CDSGeneProt_field_gene_allele:
4227
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4228
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4229
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4230
            && (grp = sfp->data.value.ptrvalue) != NULL
4231
            && !StringHasNoText (grp->allele) 
4232
            && DoesStringMatchConstraint(grp->allele, scp))
4233
        {
4234
          str = StringSave (grp->allele);
4235
        }
4236
      }
4237
      break;
4238
    case CDSGeneProt_field_gene_maploc:
4239
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4240
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4241
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4242
            && (grp = sfp->data.value.ptrvalue) != NULL
4243
            && !StringHasNoText (grp->maploc) 
4244
            && DoesStringMatchConstraint(grp->maploc, scp))
4245
        {
4246
          str = StringSave (grp->maploc);
4247
        }
4248
      }
4249
      break;
4250
    case CDSGeneProt_field_gene_locus_tag:
4251
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4252
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4253
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4254
            && (grp = sfp->data.value.ptrvalue) != NULL
4255
            && !StringHasNoText (grp->locus_tag) 
4256
            && DoesStringMatchConstraint(grp->locus_tag, scp))
4257
        {
4258
          str = StringSave (grp->locus_tag);
4259
        }
4260
      }
4261
      break;
4262
    case CDSGeneProt_field_gene_synonym:
4263
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4264
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4265
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4266
            && (grp = sfp->data.value.ptrvalue) != NULL)
4267
        {
4268
          str = GetFirstValNodeStringMatch (grp->syn, scp);
4269
        }
4270
      }
4271
      break;
4272
    case CDSGeneProt_field_gene_old_locus_tag:
4273
      for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4274
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4275
        if (sfp != NULL) {
4276
          str = GetFirstGBQualMatch (sfp->qual, "old-locus-tag", scp);
4277
        }
4278
      }
4279
      break;
4280
    case CDSGeneProt_field_mrna_product:
4281
      for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4282
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4283
        if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA
4284
            && (rrp = sfp->data.value.ptrvalue) != NULL
4285
            && rrp->ext.choice == 1
4286
            && !StringHasNoText (rrp->ext.value.ptrvalue) 
4287
            && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp))
4288
        {
4289
          str = StringSave (rrp->ext.value.ptrvalue);
4290
        }
4291
      }
4292
      break;
4293
    case CDSGeneProt_field_mrna_comment:
4294
      for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4295
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4296
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4297
        {
4298
          str = StringSave (sfp->comment);
4299
        }
4300
      }
4301
      break;
4302
    case CDSGeneProt_field_prot_name:
4303
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4304
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4305
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4306
            && sfp->idx.subtype == FEATDEF_PROT
4307
            && (prp = sfp->data.value.ptrvalue) != NULL)
4308
        {
4309
          str = GetFirstValNodeStringMatch (prp->name, scp);
4310
        }
4311
      }
4312
      break;
4313
    case CDSGeneProt_field_prot_description:
4314
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4315
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4316
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4317
            && sfp->idx.subtype == FEATDEF_PROT
4318
            && (prp = sfp->data.value.ptrvalue) != NULL
4319
            && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
4320
          str = StringSave (prp->desc);
4321
        }
4322
      }
4323
      break;
4324
    case CDSGeneProt_field_prot_ec_number:
4325
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4326
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4327
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4328
            && sfp->idx.subtype == FEATDEF_PROT
4329
            && (prp = sfp->data.value.ptrvalue) != NULL)
4330
        {
4331
          str = GetFirstValNodeStringMatch (prp->ec, scp);
4332
        }
4333
      }
4334
      break;
4335
    case CDSGeneProt_field_prot_activity:
4336
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4337
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4338
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4339
            && sfp->idx.subtype == FEATDEF_PROT
4340
            && (prp = sfp->data.value.ptrvalue) != NULL)
4341
        {
4342
          str = GetFirstValNodeStringMatch (prp->activity, scp);
4343
        }
4344
      }
4345
      break;
4346
    case CDSGeneProt_field_prot_comment:
4347
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4348
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4349
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
4350
            && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4351
        {
4352
          str = StringSave (sfp->comment);
4353
        }
4354
      }
4355
      break;
4356
    case CDSGeneProt_field_mat_peptide_name:
4357
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4358
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4359
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4360
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4361
            && (prp = sfp->data.value.ptrvalue) != NULL)
4362
        {
4363
          str = GetFirstValNodeStringMatch (prp->name, scp);
4364
        }
4365
      }
4366
      break;
4367
    case CDSGeneProt_field_mat_peptide_description:
4368
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4369
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4370
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4371
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4372
            && (prp = sfp->data.value.ptrvalue) != NULL
4373
            && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
4374
          str = StringSave (prp->desc);
4375
        }
4376
      }
4377
      break;
4378
    case CDSGeneProt_field_mat_peptide_ec_number:
4379
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4380
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4381
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4382
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4383
            && (prp = sfp->data.value.ptrvalue) != NULL)
4384
        {
4385
          str = GetFirstValNodeStringMatch (prp->ec, scp);
4386
        }
4387
      }
4388
      break;
4389
    case CDSGeneProt_field_mat_peptide_activity:
4390
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4391
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4392
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4393
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4394
            && (prp = sfp->data.value.ptrvalue) != NULL)
4395
        {
4396
          str = GetFirstValNodeStringMatch (prp->activity, scp);
4397
        }
4398
      }
4399
      break;
4400
    case CDSGeneProt_field_mat_peptide_comment:
4401
      for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) {
4402
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4403
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4404
            && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4405
        {
4406
          str = StringSave (sfp->comment);
4407
        }
4408
      }
4409
      break;
4410
  }
4411
  return str;
4412
}
4413
4414
4415
static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp)
4416
{
4417
  Boolean    rval = FALSE;
4418
  ValNodePtr vnp;
4419
  SeqFeatPtr sfp;
4420
  GeneRefPtr grp;
4421
  RnaRefPtr  rrp;
4422
  ProtRefPtr prp;
4423
  
4424
  if (c == NULL) return FALSE;
4425
  switch (field) {
4426
    case CDSGeneProt_field_cds_comment:
4427
      for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) {
4428
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4429
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4430
        {
4431
          sfp->comment = MemFree (sfp->comment);
4432
          rval = TRUE;
4433
        }
4434
      }
4435
      break;
4436
    case CDSGeneProt_field_gene_locus:
4437
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4438
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4439
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4440
            && (grp = sfp->data.value.ptrvalue) != NULL
4441
            && !StringHasNoText (grp->locus) 
4442
            && DoesStringMatchConstraint(grp->locus, scp))
4443
        {
4444
          grp->locus = MemFree (grp->locus);
4445
          rval = TRUE;
4446
        }
4447
      }
4448
      break;
4449
    case CDSGeneProt_field_gene_description:
4450
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4451
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4452
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4453
            && (grp = sfp->data.value.ptrvalue) != NULL
4454
            && !StringHasNoText (grp->desc) 
4455
            && DoesStringMatchConstraint(grp->desc, scp))
4456
        {
4457
          grp->desc = MemFree(grp->desc);
4458
          rval = TRUE;
4459
        }
4460
      }
4461
      break;
4462
    case CDSGeneProt_field_gene_comment:
4463
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4464
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4465
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4466
        {
4467
          sfp->comment = MemFree (sfp->comment);
4468
          rval = TRUE;
4469
        }
4470
      }
4471
      break;
4472
    case CDSGeneProt_field_gene_allele:
4473
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4474
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4475
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4476
            && (grp = sfp->data.value.ptrvalue) != NULL
4477
            && !StringHasNoText (grp->allele) 
4478
            && DoesStringMatchConstraint(grp->allele, scp))
4479
        {
4480
          grp->allele = MemFree (grp->allele);
4481
          rval = TRUE;
4482
        }
4483
      }
4484
      break;
4485
    case CDSGeneProt_field_gene_maploc:
4486
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4487
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4488
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4489
            && (grp = sfp->data.value.ptrvalue) != NULL
4490
            && !StringHasNoText (grp->maploc) 
4491
            && DoesStringMatchConstraint(grp->maploc, scp))
4492
        {
4493
          grp->maploc = MemFree (grp->maploc);
4494
          rval = TRUE;
4495
        }
4496
      }
4497
      break;
4498
    case CDSGeneProt_field_gene_locus_tag:
4499
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4500
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4501
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4502
            && (grp = sfp->data.value.ptrvalue) != NULL
4503
            && !StringHasNoText (grp->locus_tag) 
4504
            && DoesStringMatchConstraint(grp->locus_tag, scp))
4505
        {
4506
          grp->locus_tag = MemFree (grp->locus_tag);
4507
          rval = TRUE;
4508
        }
4509
      }
4510
      break;
4511
    case CDSGeneProt_field_gene_synonym:
4512
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4513
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4514
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4515
            && (grp = sfp->data.value.ptrvalue) != NULL)
4516
        {
4517
          rval |= RemoveValNodeStringMatch (&(grp->syn), scp);
4518
        }
4519
      }
4520
      break;
4521
    case CDSGeneProt_field_gene_old_locus_tag:
4522
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4523
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4524
        if (sfp != NULL) {
4525
          rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", scp);
4526
        }
4527
      }
4528
      break;
4529
    case CDSGeneProt_field_mrna_product:
4530
      for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
4531
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4532
        if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA
4533
            && (rrp = sfp->data.value.ptrvalue) != NULL
4534
            && rrp->ext.choice == 1
4535
            && !StringHasNoText (rrp->ext.value.ptrvalue) 
4536
            && DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp))
4537
        {
4538
          rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
4539
          rrp->ext.choice = 0;
4540
          rval = TRUE;
4541
        }
4542
      }
4543
      break;
4544
    case CDSGeneProt_field_mrna_comment:
4545
      for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
4546
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4547
        if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4548
        {
4549
          sfp->comment = MemFree (sfp->comment);
4550
          rval = TRUE;
4551
        }
4552
      }
4553
      break;
4554
    case CDSGeneProt_field_prot_name:
4555
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4556
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4557
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4558
            && sfp->idx.subtype == FEATDEF_PROT
4559
            && (prp = sfp->data.value.ptrvalue) != NULL)
4560
        {
4561
          rval |= RemoveValNodeStringMatch (&(prp->name), scp);
4562
        }
4563
      }
4564
      break;
4565
    case CDSGeneProt_field_prot_description:
4566
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4567
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4568
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4569
            && sfp->idx.subtype == FEATDEF_PROT
4570
            && (prp = sfp->data.value.ptrvalue) != NULL
4571
            && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
4572
          prp->desc = MemFree (prp->desc);
4573
          rval = TRUE;
4574
        }
4575
      }
4576
      break;
4577
    case CDSGeneProt_field_prot_ec_number:
4578
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4579
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4580
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4581
            && sfp->idx.subtype == FEATDEF_PROT
4582
            && (prp = sfp->data.value.ptrvalue) != NULL)
4583
        {
4584
          rval |= RemoveValNodeStringMatch (&(prp->ec), scp);
4585
        }
4586
      }
4587
      break;
4588
    case CDSGeneProt_field_prot_activity:
4589
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4590
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4591
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4592
            && sfp->idx.subtype == FEATDEF_PROT
4593
            && (prp = sfp->data.value.ptrvalue) != NULL)
4594
        {
4595
          rval |= RemoveValNodeStringMatch (&(prp->activity), scp);
4596
        }
4597
      }
4598
      break;
4599
    case CDSGeneProt_field_prot_comment:
4600
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4601
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4602
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
4603
            && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4604
        {
4605
          sfp->comment = MemFree (sfp->comment);
4606
          rval = TRUE;
4607
        }
4608
      }
4609
      break;
4610
    case CDSGeneProt_field_mat_peptide_name:
4611
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4612
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4613
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4614
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4615
            && (prp = sfp->data.value.ptrvalue) != NULL)
4616
        {
4617
          rval |= RemoveValNodeStringMatch (&(prp->name), scp);
4618
        }
4619
      }
4620
      break;
4621
    case CDSGeneProt_field_mat_peptide_description:
4622
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4623
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4624
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4625
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4626
            && (prp = sfp->data.value.ptrvalue) != NULL
4627
            && !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) {
4628
          prp->desc = MemFree (prp->desc);
4629
          rval = TRUE;
4630
        }
4631
      }
4632
      break;
4633
    case CDSGeneProt_field_mat_peptide_ec_number:
4634
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4635
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4636
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4637
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4638
            && (prp = sfp->data.value.ptrvalue) != NULL)
4639
        {
4640
          rval |= RemoveValNodeStringMatch (&(prp->ec), scp);
4641
        }
4642
      }
4643
      break;
4644
    case CDSGeneProt_field_mat_peptide_activity:
4645
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4646
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4647
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4648
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4649
            && (prp = sfp->data.value.ptrvalue) != NULL)
4650
        {
4651
          rval |= RemoveValNodeStringMatch (&(prp->activity), scp);
4652
        }
4653
      }
4654
      break;
4655
    case CDSGeneProt_field_mat_peptide_comment:
4656
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4657
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4658
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4659
            && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp))
4660
        {
4661
          sfp->comment = MemFree (sfp->comment);
4662
          rval = TRUE;
4663
        }
4664
      }
4665
      break;
4666
  }
4667
  return rval;
4668
}
4669
4670
4671
static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c)
4672
{
4673
  SeqFeatPtr gene = NULL, sfp = NULL;
4674
  BioseqPtr  bsp;
4675
  ValNodePtr vnp;
4676
4677
  if (c == NULL) return NULL;
4678
4679
  for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) {
4680
    sfp = vnp->data.ptrvalue;
4681
  }
4682
  for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) {
4683
    sfp = vnp->data.ptrvalue;
4684
  }
4685
  if (sfp != NULL) {
4686
    bsp = BioseqFindFromSeqLoc (sfp->location);
4687
    if (bsp != NULL) {
4688
      gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location);
4689
      if (gene != NULL) {
4690
        gene->data.value.ptrvalue = GeneRefNew();
4691
      }
4692
    }
4693
  }
4694
  return gene;
4695
}
4696
4697
4698
static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
4699
{
4700
  Boolean    rval = FALSE;
4701
  ValNodePtr vnp;
4702
  SeqFeatPtr sfp;
4703
  GeneRefPtr grp;
4704
  ProtRefPtr prp;
4705
  
4706
  if (c == NULL) return FALSE;
4707
  switch (field) {
4708
    case CDSGeneProt_field_cds_comment:
4709
      for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) {
4710
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4711
        if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp))
4712
        {
4713
          rval |= SetStringValue ( &(sfp->comment), value, existing_text);
4714
        }
4715
      }
4716
      break;
4717
    case CDSGeneProt_field_gene_locus:
4718
      if (c->gene_list == NULL && scp == NULL) {
4719
        sfp = CreateGeneForCGPSet (c);
4720
        if (sfp != NULL) {
4721
          ValNodeAddPointer (&(c->gene_list), 0, sfp);
4722
        }
4723
      }
4724
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4725
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4726
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4727
            && (grp = sfp->data.value.ptrvalue) != NULL
4728
            && DoesStringMatchConstraint(grp->locus, scp))
4729
        {
4730
          rval |= SetStringValue ( &(grp->locus), value, existing_text);
4731
        }
4732
      }
4733
      break;
4734
    case CDSGeneProt_field_gene_description:
4735
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4736
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4737
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4738
            && (grp = sfp->data.value.ptrvalue) != NULL
4739
            && DoesStringMatchConstraint(grp->desc, scp))
4740
        {
4741
          rval |= SetStringValue ( &(grp->desc), value, existing_text);
4742
        }
4743
      }
4744
      break;
4745
    case CDSGeneProt_field_gene_comment:
4746
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4747
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4748
        if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp))
4749
        {
4750
          rval |= SetStringValue ( &(sfp->comment), value, existing_text);
4751
        }
4752
      }
4753
      break;
4754
    case CDSGeneProt_field_gene_allele:
4755
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4756
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4757
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4758
            && (grp = sfp->data.value.ptrvalue) != NULL
4759
            && DoesStringMatchConstraint(grp->allele, scp))
4760
        {
4761
          rval |= SetStringValue (&(grp->allele), value, existing_text);
4762
        }
4763
      }
4764
      break;
4765
    case CDSGeneProt_field_gene_maploc:
4766
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4767
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4768
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4769
            && (grp = sfp->data.value.ptrvalue) != NULL
4770
            && DoesStringMatchConstraint(grp->maploc, scp))
4771
        {
4772
          rval |= SetStringValue ( &(grp->maploc), value, existing_text);
4773
        }
4774
      }
4775
      break;
4776
    case CDSGeneProt_field_gene_locus_tag:
4777
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4778
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4779
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4780
            && (grp = sfp->data.value.ptrvalue) != NULL
4781
            && DoesStringMatchConstraint(grp->locus_tag, scp))
4782
        {
4783
          rval |= SetStringValue ( &(grp->locus_tag), value, existing_text);
4784
        }
4785
      }
4786
      break;
4787
    case CDSGeneProt_field_gene_synonym:
4788
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4789
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4790
        if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE
4791
            && (grp = sfp->data.value.ptrvalue) != NULL)
4792
        {
4793
          rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text);
4794
        }
4795
      }
4796
      break;
4797
    case CDSGeneProt_field_gene_old_locus_tag:
4798
      for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) {
4799
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4800
        if (sfp != NULL) {
4801
          rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", scp);
4802
        }
4803
      }
4804
      break;
4805
    case CDSGeneProt_field_mrna_product:
4806
      for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
4807
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4808
        rval |= SetRNAProductString (sfp, scp, value, existing_text);
4809
      }
4810
      break;
4811
    case CDSGeneProt_field_mrna_comment:
4812
      for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) {
4813
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4814
        if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp))
4815
        {
4816
          rval |= SetStringValue ( &(sfp->comment), value, existing_text);
4817
        }
4818
      }
4819
      break;
4820
    case CDSGeneProt_field_prot_name:
4821
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4822
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4823
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4824
            && sfp->idx.subtype == FEATDEF_PROT
4825
            && (prp = sfp->data.value.ptrvalue) != NULL)
4826
        {
4827
          rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
4828
        }
4829
      }
4830
      break;
4831
    case CDSGeneProt_field_prot_description:
4832
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4833
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4834
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4835
            && sfp->idx.subtype == FEATDEF_PROT
4836
            && (prp = sfp->data.value.ptrvalue) != NULL
4837
            && DoesStringMatchConstraint(prp->desc, scp)) {
4838
          rval |= SetStringValue ( &(prp->desc), value, existing_text);
4839
        }
4840
      }
4841
      break;
4842
    case CDSGeneProt_field_prot_ec_number:
4843
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4844
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4845
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4846
            && sfp->idx.subtype == FEATDEF_PROT
4847
            && (prp = sfp->data.value.ptrvalue) != NULL)
4848
        {
4849
          rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
4850
        }
4851
      }
4852
      break;
4853
    case CDSGeneProt_field_prot_activity:
4854
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4855
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4856
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4857
            && sfp->idx.subtype == FEATDEF_PROT
4858
            && (prp = sfp->data.value.ptrvalue) != NULL)
4859
        {
4860
          rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
4861
        }
4862
      }
4863
      break;
4864
    case CDSGeneProt_field_prot_comment:
4865
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4866
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4867
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT
4868
            && DoesStringMatchConstraint(sfp->comment, scp))
4869
        {
4870
          rval |= SetStringValue ( &(sfp->comment), value, existing_text);
4871
        }
4872
      }
4873
      break;
4874
    case CDSGeneProt_field_mat_peptide_name:
4875
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4876
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4877
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4878
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4879
            && (prp = sfp->data.value.ptrvalue) != NULL)
4880
        {
4881
          rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text);
4882
        }
4883
      }
4884
      break;
4885
    case CDSGeneProt_field_mat_peptide_description:
4886
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4887
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4888
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4889
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4890
            && (prp = sfp->data.value.ptrvalue) != NULL
4891
            && DoesStringMatchConstraint(prp->desc, scp)) {
4892
          rval |= SetStringValue ( &(prp->desc), value, existing_text);
4893
        }
4894
      }
4895
      break;
4896
    case CDSGeneProt_field_mat_peptide_ec_number:
4897
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4898
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4899
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4900
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4901
            && (prp = sfp->data.value.ptrvalue) != NULL)
4902
        {
4903
          rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text);
4904
        }
4905
      }
4906
      break;
4907
    case CDSGeneProt_field_mat_peptide_activity:
4908
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4909
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4910
        if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT
4911
            && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4912
            && (prp = sfp->data.value.ptrvalue) != NULL)
4913
        {
4914
          rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text);
4915
        }
4916
      }
4917
      break;
4918
    case CDSGeneProt_field_mat_peptide_comment:
4919
      for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) {
4920
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
4921
        if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa
4922
            && DoesStringMatchConstraint(sfp->comment, scp))
4923
        {
4924
          rval |= SetStringValue ( &(sfp->comment), value, existing_text);
4925
        }
4926
      }
4927
      break;
4928
  }
4929
  return rval;
4930
}
4931
4932
4933
static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp)
4934
{
4935
  MolInfoPtr m = NULL;
4936
  SeqDescrPtr sdp;
4937
4938
  if (bsp == NULL) return NULL;
4939
  sdp = bsp->descr;
4940
  while (sdp != NULL && sdp->choice != Seq_descr_molinfo) {
4941
    sdp = sdp->next;
4942
  }
4943
  if (sdp != NULL) {
4944
    m = (MolInfoPtr) sdp->data.ptrvalue;
4945
  }
4946
  return m;
4947
}
4948
  
4949
4950
static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field)
4951
{
4952
  CharPtr rval = NULL;
4953
  MolInfoPtr m;
4954
4955
  if (bsp == NULL || field == NULL) return NULL;
4956
4957
  switch (field->choice) {
4958
    case MolinfoField_molecule:
4959
      m = GetMolInfoForBioseq (bsp);
4960
      if (m != NULL) {
4961
        rval = BiomolNameFromBiomol (m->biomol);
4962
      }
4963
      break;
4964
    case MolinfoField_technique:
4965
      m = GetMolInfoForBioseq (bsp);
4966
      if (m != NULL) {
4967
        rval = TechNameFromTech (m->tech);
4968
      }
4969
      break;
4970
    case MolinfoField_completedness:
4971
      m = GetMolInfoForBioseq (bsp);
4972
      if (m != NULL) {
4973
        rval = CompletenessNameFromCompleteness (m->completeness);
4974
      }
4975
      break;
4976
    case MolinfoField_mol_class:
4977
      rval = MolNameFromMol (bsp->mol);
4978
      break;
4979
    case MolinfoField_topology:
4980
      rval = TopologyNameFromTopology (bsp->topology);
4981
      break;
4982
    case MolinfoField_strand:
4983
      rval = StrandNameFromStrand (bsp->strand);
4984
      break;
4985
  }
4986
  if (rval != NULL) rval = StringSave (rval);
4987
  return rval;
4988
}
4989
4990
4991
static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field)
4992
{
4993
  MolInfoPtr m;
4994
  Boolean    rval = FALSE;
4995
4996
  if (bsp == NULL || field == NULL) return FALSE;
4997
4998
  switch (field->choice) {
4999
    case MolinfoField_molecule:
5000
      m = GetMolInfoForBioseq (bsp);
5001
      if (m != NULL) {
5002
        m->biomol = 0;
5003
        rval = TRUE;
5004
      }
5005
      break;
5006
    case MolinfoField_technique:
5007
      m = GetMolInfoForBioseq (bsp);
5008
      if (m != NULL) {
5009
        m->tech = 0;
5010
        rval = TRUE;
5011
      }
5012
      break;
5013
    case MolinfoField_completedness:
5014
      m = GetMolInfoForBioseq (bsp);
5015
      if (m != NULL) {
5016
        m->completeness = 0;
5017
        rval = TRUE;
5018
      }
5019
      break;
5020
    case MolinfoField_mol_class:
5021
      bsp->mol = 0;
5022
      rval = TRUE;
5023
      break;
5024
    case MolinfoField_topology:
5025
      bsp->topology = 0;
5026
      rval = TRUE;
5027
      break;
5028
    case MolinfoField_strand:
5029
      bsp->strand = 0;
5030
      rval = TRUE;
5031
      break;
5032
  }
5033
  return rval;
5034
}
5035
5036
5037
static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp)
5038
{
5039
  SeqDescrPtr sdp;
5040
  MolInfoPtr  m;
5041
5042
  sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo);
5043
  m = MolInfoNew ();
5044
  sdp->data.ptrvalue = m;
5045
  return m;
5046
}
5047
5048
5049
static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field)
5050
{
5051
  MolInfoPtr m;
5052
  Boolean    rval = FALSE;
5053
5054
  if (bsp == NULL || field == NULL) return FALSE;
5055
5056
  switch (field->choice) {
5057
    case MolinfoField_molecule:
5058
      m = GetMolInfoForBioseq (bsp);
5059
      if (m == NULL) {
5060
        m = AddMolInfoToBioseq (bsp);
5061
      }
5062
      m->biomol = BiomolFromMoleculeType (field->data.intvalue);
5063
      rval = TRUE;
5064
      break;
5065
    case MolinfoField_technique:
5066
      m = GetMolInfoForBioseq (bsp);
5067
      if (m == NULL) {
5068
        m = AddMolInfoToBioseq (bsp);
5069
      }
5070
      m->tech = TechFromTechniqueType (field->data.intvalue);
5071
      rval = TRUE;
5072
      break;
5073
    case MolinfoField_completedness:
5074
      m = GetMolInfoForBioseq (bsp);
5075
      if (m == NULL) {
5076
        m = AddMolInfoToBioseq (bsp);
5077
      }
5078
      m->completeness = CompletenessFromCompletednessType (field->data.intvalue);
5079
      rval = TRUE;
5080
      break;
5081
    case MolinfoField_mol_class:
5082
      bsp->mol = MolFromMoleculeClassType (field->data.intvalue);
5083
      rval = TRUE;
5084
      break;
5085
    case MolinfoField_topology:
5086
      bsp->topology = TopologyFromTopologyType (field->data.intvalue);
5087
      rval = TRUE;
5088
      break;
5089
    case MolinfoField_strand:
5090
      bsp->strand = StrandFromStrandType (field->data.intvalue);
5091
      rval = TRUE;
5092
      break;
5093
  }
5094
  return rval;
5095
}
5096
5097
5098
NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair)
5099
{
5100
  SourceQualChoicePtr ss = NULL;
5101
  SourceQualPairPtr sqpp;
5102
  FeatureFieldPairPtr fp;
5103
  FeatureFieldPtr fs;
5104
  FieldTypePtr f = NULL;
5105
  CDSGeneProtFieldPairPtr cp;
5106
  MolinfoFieldPairPtr mp;
5107
  ValNodePtr vnp;
5108
5109
  if (fieldpair == NULL) return NULL;
5110
  switch (fieldpair->choice) {
5111
    case FieldPairType_source_qual:
5112
      sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue;
5113
      if (sqpp != NULL) {
5114
        ss = ValNodeNew (NULL);
5115
        ss->choice = SourceQualChoice_textqual;
5116
        ss->data.intvalue = sqpp->field_from;
5117
        f = ValNodeNew (NULL);
5118
        f->choice = FieldType_source_qual;
5119
        f->data.ptrvalue = ss;
5120
      }
5121
      break;
5122
    case FieldPairType_feature_field:
5123
      fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue;
5124
      if (fp != NULL) {
5125
        fs = FeatureFieldNew ();
5126
        fs->type = fp->type;
5127
        fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_from, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
5128
        f = ValNodeNew (NULL);
5129
        f->choice = FieldType_feature_field;
5130
        f->data.ptrvalue = fs;
5131
      }
5132
      break;
5133
    case FieldPairType_cds_gene_prot:
5134
      cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue;
5135
      if (cp != NULL) {
5136
        f = ValNodeNew (NULL);
5137
        f->choice = FieldType_cds_gene_prot;
5138
        f->data.intvalue = cp->field_from;
5139
      }
5140
      break;
5141
    case FieldPairType_molinfo_field:
5142
      mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue;
5143
      if (mp != NULL && mp->data.ptrvalue != NULL) {
5144
        vnp = NULL;
5145
        switch (mp->choice) {
5146
          case MolinfoFieldPair_molecule:
5147
            vnp = ValNodeNew (NULL);
5148
            vnp->choice = MolinfoField_molecule;
5149
            vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->from;
5150
            break;
5151
          case MolinfoFieldPair_technique:
5152
            vnp = ValNodeNew (NULL);
5153
            vnp->choice = MolinfoField_technique;
5154
            vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->from;
5155
            break;
5156
          case MolinfoFieldPair_completedness:
5157
            vnp = ValNodeNew (NULL);
5158
            vnp->choice = MolinfoField_completedness;
5159
            vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->from;
5160
            break;
5161
          case MolinfoFieldPair_mol_class:
5162
            vnp = ValNodeNew (NULL);
5163
            vnp->choice = MolinfoField_mol_class;
5164
            vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->from;
5165
            break;
5166
          case MolinfoFieldPair_topology:
5167
            vnp = ValNodeNew (NULL);
5168
            vnp->choice = MolinfoField_topology;
5169
            vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->from;
5170
            break;
5171
          case MolinfoFieldPair_strand:
5172
            vnp = ValNodeNew (NULL);
5173
            vnp->choice = MolinfoField_strand;
5174
            vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->from;
5175
            break;
5176
        }
5177
        if (vnp != NULL) {
5178
          f = ValNodeNew (NULL);
5179
          f->choice = FieldType_molinfo_field;
5180
          f->data.ptrvalue = vnp;
5181
        }
5182
      }
5183
      break;     
5184
  }
5185
  return f;
5186
}
5187
5188
5189
NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair)
5190
{
5191
  SourceQualChoicePtr ss = NULL;
5192
  SourceQualPairPtr sqpp;
5193
  FeatureFieldPairPtr fp;
5194
  FeatureFieldPtr fs;
5195
  FieldTypePtr f = NULL;
5196
  CDSGeneProtFieldPairPtr cp;
5197
  MolinfoFieldPairPtr     mp;
5198
  ValNodePtr              vnp;
5199
5200
  if (fieldpair == NULL) return NULL;
5201
  switch (fieldpair->choice) {
5202
    case FieldPairType_source_qual:
5203
      sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue;
5204
      if (sqpp != NULL) {
5205
        ss = ValNodeNew (NULL);
5206
        ss->choice = SourceQualChoice_textqual;
5207
        ss->data.intvalue = sqpp->field_to;
5208
        f = ValNodeNew (NULL);
5209
        f->choice = FieldType_source_qual;
5210
        f->data.ptrvalue = ss;
5211
      }
5212
      break;
5213
    case FieldPairType_feature_field:
5214
      fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue;
5215
      if (fp != NULL) {
5216
        fs = FeatureFieldNew ();
5217
        fs->type = fp->type;
5218
        fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_to, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite);
5219
        f = ValNodeNew (NULL);
5220
        f->choice = FieldType_feature_field;
5221
        f->data.ptrvalue = fs;
5222
      }
5223
      break;
5224
    case FieldPairType_cds_gene_prot:
5225
      cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue;
5226
      if (cp != NULL) {
5227
        f = ValNodeNew (NULL);
5228
        f->choice = FieldType_cds_gene_prot;
5229
        f->data.intvalue = cp->field_to;
5230
      }
5231
      break;
5232
    case FieldPairType_molinfo_field:
5233
      mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue;
5234
      if (mp != NULL && mp->data.ptrvalue != NULL) {
5235
        vnp = NULL;
5236
        switch (mp->choice) {
5237
          case MolinfoFieldPair_molecule:
5238
            vnp = ValNodeNew (NULL);
5239
            vnp->choice = MolinfoField_molecule;
5240
            vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->to;
5241
            break;
5242
          case MolinfoFieldPair_technique:
5243
            vnp = ValNodeNew (NULL);
5244
            vnp->choice = MolinfoField_technique;
5245
            vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->to;
5246
            break;
5247
          case MolinfoFieldPair_completedness:
5248
            vnp = ValNodeNew (NULL);
5249
            vnp->choice = MolinfoField_completedness;
5250
            vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->to;
5251
            break;
5252
          case MolinfoFieldPair_mol_class:
5253
            vnp = ValNodeNew (NULL);
5254
            vnp->choice = MolinfoField_mol_class;
5255
            vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->to;
5256
            break;
5257
          case MolinfoFieldPair_topology:
5258
            vnp = ValNodeNew (NULL);
5259
            vnp->choice = MolinfoField_topology;
5260
            vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->to;
5261
            break;
5262
          case MolinfoFieldPair_strand:
5263
            vnp = ValNodeNew (NULL);
5264
            vnp->choice = MolinfoField_strand;
5265
            vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->to;
5266
            break;
5267
        }
5268
        if (vnp != NULL) {
5269
          f = ValNodeNew (NULL);
5270
          f->choice = FieldType_molinfo_field;
5271
          f->data.ptrvalue = vnp;
5272
        }
5273
      }
5274
      break;     
5275
  }
5276
  return f;
5277
}
5278
5279
5280
static Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice)
5281
{
5282
  Uint1 field_type_choice = 0;
5283
5284
  switch (field_pair_choice) {
5285
    case FieldPairType_source_qual:
5286
      field_type_choice = FieldType_source_qual;
5287
      break;
5288
    case FieldPairType_feature_field:
5289
      field_type_choice = FieldType_feature_field;
5290
      break;
5291
    case FieldPairType_cds_gene_prot:
5292
      field_type_choice = FieldType_cds_gene_prot;
5293
      break;
5294
    case FieldPairType_molinfo_field:
5295
      field_type_choice = FieldType_molinfo_field;
5296
      break;
5297
  }
5298
5299
  return field_type_choice;
5300
}
5301
5302
5303
NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action)
5304
{
5305
  Uint1 field_type = 0;
5306
  ApplyActionPtr a;
5307
  EditActionPtr  e;
5308
  ConvertActionPtr v;
5309
  CopyActionPtr c;
5310
  SwapActionPtr s;
5311
  RemoveActionPtr r;
5312
  AECRParseActionPtr p;
5313
5314
  if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) {
5315
    return 0;
5316
  }
5317
  switch (action->action->choice) {
5318
    case ActionChoice_apply:
5319
      a = (ApplyActionPtr) action->action->data.ptrvalue;
5320
      if (a->field != NULL) {
5321
        field_type = a->field->choice;
5322
      }
5323
      break;
5324
    case ActionChoice_edit:
5325
      e = (EditActionPtr) action->action->data.ptrvalue;
5326
      if (e->field != NULL) {
5327
        field_type = e->field->choice;
5328
      }
5329
      break;
5330
    case ActionChoice_convert:
5331
      v = (ConvertActionPtr) action->action->data.ptrvalue;
5332
      field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice);
5333
      break;
5334
    case ActionChoice_copy:
5335
      c = (CopyActionPtr) action->action->data.ptrvalue;
5336
      field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice);
5337
      break;
5338
    case ActionChoice_swap:
5339
      s = (SwapActionPtr) action->action->data.ptrvalue;
5340
      field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice);
5341
      break;
5342
    case ActionChoice_remove:
5343
      r = (RemoveActionPtr) action->action->data.ptrvalue;
5344
      if (r->field != NULL) {
5345
        field_type = r->field->choice;
5346
      }
5347
      break;
5348
    case ActionChoice_parse:
5349
      p = (AECRParseActionPtr) action->action->data.ptrvalue;
5350
      field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice);
5351
      break;
5352
  }
5353
  return field_type;
5354
}
5355
5356
5357
static CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp)
5358
{
5359
  CharPtr str = NULL;
5360
  FeatureFieldPtr feature_field;
5361
5362
  if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
5363
5364
  switch (field->choice) {
5365
    case FieldType_source_qual :
5366
      str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
5367
      break;
5368
    case FieldType_feature_field :
5369
      if (choice == OBJ_SEQFEAT) {
5370
        str = GetQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp);
5371
      }
5372
      break;
5373
    case FieldType_cds_gene_prot :
5374
      if (choice == 0) {
5375
        str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp);
5376
      } else if (choice == OBJ_SEQFEAT) {
5377
        feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
5378
        str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
5379
        feature_field = FeatureFieldFree (feature_field);
5380
      }
5381
      break;
5382
    case FieldType_molinfo_field :
5383
      if (choice == OBJ_BIOSEQ) {
5384
        str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue);
5385
      }
5386
      break;
5387
  }
5388
  return str;
5389
}
5390
5391
5392
static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp)
5393
{
5394
  Boolean rval = FALSE;
5395
  FeatureFieldPtr feature_field;
5396
5397
  if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
5398
5399
  switch (field->choice) {
5400
    case FieldType_source_qual :
5401
      rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp);
5402
      break;
5403
    case FieldType_feature_field :
5404
      if (choice == OBJ_SEQFEAT) {
5405
        rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp);
5406
      }
5407
      break;
5408
    case FieldType_cds_gene_prot:
5409
      if (choice == 0) {
5410
        rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp);
5411
      } else if (choice == OBJ_SEQFEAT) {
5412
        feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
5413
        rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp);
5414
        feature_field = FeatureFieldFree (feature_field);
5415
      }
5416
      break;
5417
    case FieldType_molinfo_field :
5418
      if (choice == OBJ_BIOSEQ) {
5419
        rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue);
5420
      }
5421
      break;
5422
  }
5423
  return rval;
5424
}
5425
5426
5427
static Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text)
5428
{
5429
  Boolean rval = FALSE;
5430
  FeatureFieldPtr feature_field;
5431
5432
  if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE;
5433
5434
  switch (field->choice) {
5435
    case FieldType_source_qual :
5436
      rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text);
5437
      break;
5438
    case FieldType_feature_field :
5439
      if (choice == OBJ_SEQFEAT) {
5440
        rval = SetQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text);
5441
      }
5442
      break;
5443
    case FieldType_cds_gene_prot:
5444
      if (choice == 0) {
5445
        rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text);
5446
      } else if (choice == OBJ_SEQFEAT) {
5447
        feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
5448
        rval = SetQualOnFeature ((SeqFeatPtr) data, feature_field, scp, value, existing_text);
5449
        feature_field = FeatureFieldFree (feature_field);
5450
      }
5451
      break;
5452
    case FieldType_molinfo_field:
5453
      if (choice == OBJ_BIOSEQ) {
5454
        rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue);
5455
      }
5456
      break;
5457
  }
5458
  return rval;
5459
}
5460
5461
5462
static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field)
5463
{
5464
  SeqFeatPtr        sfp;
5465
  SeqDescrPtr       sdp;
5466
  FeatureFieldPtr   fp;
5467
  Boolean rval = FALSE;
5468
5469
  if (data == NULL || field == NULL) return FALSE;
5470
5471
  switch (field->choice) {
5472
    case FieldType_source_qual :
5473
      if (choice == OBJ_SEQFEAT) {
5474
        sfp = (SeqFeatPtr) data;
5475
        if (sfp->data.choice == SEQFEAT_BIOSRC) {
5476
          rval = TRUE;
5477
        }
5478
      } else if (choice == OBJ_SEQDESC) {
5479
        sdp = (SeqDescrPtr) data;
5480
        if (sdp->choice == Seq_descr_source) {
5481
          rval = TRUE;
5482
        }
5483
      }
5484
      break;
5485
    case FieldType_feature_field :
5486
      if (choice == OBJ_SEQFEAT) {
5487
        sfp = (SeqFeatPtr) data;
5488
        fp = (FeatureFieldPtr) field->data.ptrvalue;
5489
        if (fp != NULL && (fp->type == Feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) {
5490
          rval = TRUE;
5491
        }
5492
      }
5493
      break;
5494
    case FieldType_cds_gene_prot :
5495
      if (choice == 0) {
5496
        rval = TRUE;
5497
      }
5498
      break;
5499
    case FieldType_molinfo_field :
5500
      if (choice == OBJ_BIOSEQ) {
5501
        rval = TRUE;
5502
      }
5503
      break;
5504
  }
5505
  return rval;
5506
}
5507
5508
5509
static Boolean IsObjectAppropriateForFieldPair (Uint1 choice, Pointer data, FieldPairTypePtr fieldpair)
5510
{
5511
  FieldTypePtr f;
5512
  Boolean rval;
5513
5514
  f = GetFromFieldFromFieldPair(fieldpair);
5515
  rval = IsObjectAppropriateForFieldValue(choice, data, f);
5516
  f = FieldTypeFree (f);
5517
  return rval;
5518
}
5519
5520
5521
static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2)
5522
{
5523
  Boolean rval = FALSE;
5524
  SourceQualChoicePtr scp1, scp2;
5525
  FeatureFieldPtr fp1, fp2;
5526
5527
  if (field1 == NULL || field2 == NULL) return FALSE;
5528
  if (field1->choice != field2->choice) return FALSE;
5529
5530
  switch (field1->choice) {
5531
    case FieldType_source_qual :
5532
      scp1 = (SourceQualChoicePtr) field1->data.ptrvalue;
5533
      scp2 = (SourceQualChoicePtr) field2->data.ptrvalue;
5534
      if (scp1 != NULL && scp2 != NULL && scp1->choice == scp2->choice) {
5535
        switch (scp1->choice) {
5536
          case SourceQualChoice_textqual:
5537
            if (scp1->data.intvalue == scp2->data.intvalue) {
5538
              rval = TRUE;
5539
            }
5540
            break;
5541
          case SourceQualChoice_location:
5542
          case SourceQualChoice_origin:
5543
            rval = TRUE;
5544
            break;
5545
        }
5546
      }
5547
      break;
5548
    case FieldType_feature_field :
5549
      fp1 = (FeatureFieldPtr) field1->data.ptrvalue;
5550
      fp2 = (FeatureFieldPtr) field2->data.ptrvalue;
5551
      if (fp1 != NULL && fp2 != NULL
5552
          && (fp1->type == fp2->type || fp1->type == Feature_type_any || fp2->type == Feature_type_any)
5553
          && fp1->field != NULL && fp2->field != NULL
5554
          && fp1->field->choice == FeatQualChoice_legal_qual && fp2->field->choice == FeatQualChoice_legal_qual
5555
          && fp1->field->data.intvalue == fp2->field->data.intvalue) {
5556
        rval = TRUE;
5557
      }
5558
      break;
5559
    case FieldType_cds_gene_prot :
5560
      if (field1->data.intvalue == field2->data.intvalue) {
5561
        rval = TRUE;
5562
      }
5563
      break;
5564
  }
5565
  return rval;
5566
}
5567
5568
5569
static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual)
5570
{
5571
  Int4 orgmod_subtype, subsrc_subtype;
5572
  OrgModPtr mod;
5573
  SubSourcePtr ssp;
5574
  Boolean      rval = FALSE;
5575
5576
  if (biop == NULL) return FALSE;
5577
5578
  orgmod_subtype = GetOrgModQualFromSrcQual (srcqual);
5579
  if (orgmod_subtype == -1) {
5580
    subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual);
5581
    for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) {
5582
      if (ssp->subtype == subsrc_subtype) {
5583
        rval = TRUE;
5584
      }
5585
    }
5586
  } else {
5587
    if (biop->org != NULL && biop->org->orgname != NULL) {
5588
      for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) {
5589
        if (mod->subtype == orgmod_subtype) {
5590
          rval = TRUE;
5591
        }
5592
      }
5593
    }
5594
  }
5595
  return rval;
5596
}
5597
5598
5599
static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp)
5600
{
5601
  Boolean rval = FALSE;
5602
  CharPtr   str;
5603
5604
  if (biop == NULL) return FALSE;
5605
  if (scp == NULL) return TRUE;
5606
5607
  switch (scp->choice) {
5608
    case SourceQualChoice_textqual:
5609
      if (IsNonTextSourceQual (scp->data.intvalue)) {
5610
        rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue);
5611
      } else {
5612
        str = GetSourceQualFromBioSource (biop, scp, NULL);
5613
        if (!StringHasNoText (str)) {
5614
          rval = TRUE;
5615
        }
5616
        str = MemFree (str);
5617
      }
5618
      break;
5619
    case SourceQualChoice_location:
5620
      if (biop->genome != 0) {
5621
        rval = TRUE;
5622
      }
5623
      break;
5624
    case SourceQualChoice_origin:
5625
      if (biop->origin != 0) {
5626
        rval = TRUE;
5627
      }
5628
      break;
5629
  }
5630
  return rval;
5631
}
5632
5633
5634
typedef struct objecthasstring
5635
{
5636
  StringConstraintPtr scp;
5637
  Boolean             found;
5638
} ObjectHasStringData, PNTR ObjectHasStringPtr;
5639
5640
5641
static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS)
5642
5643
{
5644
  CharPtr            pchSource;
5645
  ObjectHasStringPtr ohsp;
5646
5647
  ohsp = (ObjectHasStringPtr) pAEOS->data;
5648
  if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) 
5649
  {
5650
	  pchSource = (CharPtr) pAEOS->dvp->ptrvalue;
5651
	  ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp);
5652
  }
5653
}
5654
5655
5656
static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp)
5657
5658
{
5659
  ObjMgrPtr         omp;
5660
  ObjMgrTypePtr     omtp;
5661
  AsnIoPtr          aip;
5662
  AsnExpOptPtr      aeop;
5663
  ObjectHasStringData ohsd;
5664
  SeqFeatPtr          sfp, prot;
5665
  SeqMgrFeatContext   fcontext;
5666
  CharPtr             search_txt;
5667
  CGPSetPtr           c;
5668
  ValNodePtr          vnp;
5669
  Boolean             all_match = TRUE, any_match = FALSE, rval;
5670
  BioseqPtr           protbsp;
5671
5672
  if (data == NULL) return FALSE;
5673
  if (scp == NULL) return TRUE;
5674
5675
  if (choice == 0) {
5676
    /* CDS-Gene-Prot set */
5677
    c = (CGPSetPtr) data;
5678
    for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
5679
      if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
5680
        any_match = TRUE;
5681
      } else {
5682
        all_match = FALSE;
5683
      }
5684
    }
5685
    for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
5686
      if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
5687
        any_match = TRUE;
5688
      } else {
5689
        all_match = FALSE;
5690
      }
5691
    }
5692
    for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
5693
      if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
5694
        any_match = TRUE;
5695
      } else {
5696
        all_match = FALSE;
5697
      }
5698
    }
5699
    for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) {
5700
      if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) {
5701
        any_match = TRUE;
5702
      } else {
5703
        all_match = FALSE;
5704
      }
5705
    }
5706
    if (scp->not_present) {
5707
      rval = all_match;
5708
    } else {
5709
      rval = any_match;
5710
    }        
5711
  } else {
5712
    omp = ObjMgrGet ();
5713
    omtp = ObjMgrTypeFind (omp, choice, NULL, NULL);
5714
    if (omtp == NULL) return FALSE;
5715
    aip = AsnIoNullOpen ();
5716
    aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack);
5717
    ohsd.found = FALSE;
5718
    ohsd.scp = scp;
5719
    if (aeop != NULL) {
5720
      aeop->user_data = (Pointer) &ohsd;
5721
    }
5722
    
5723
    (omtp->asnwrite) (data, aip, NULL);
5724
    
5725
    if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT)
5726
    {
5727
      sfp = (SeqFeatPtr) data;
5728
      if (sfp->data.choice == SEQFEAT_CDREGION) {
5729
        protbsp = BioseqFindFromSeqLoc (sfp->product);
5730
        prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext);
5731
        if (prot != NULL) {
5732
          (omtp->asnwrite) (prot, aip, NULL);
5733
        }
5734
      } else {
5735
        if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) {
5736
          SeqMgrIndexFeatures (sfp->idx.entityID, NULL);
5737
        }
5738
        if (sfp->idx.subtype == FEATDEF_tRNA) {
5739
          sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext);
5740
          ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp);
5741
          if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA)
5742
          {
5743
            search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char));
5744
            if (search_txt != NULL)
5745
            {
5746
              sprintf (search_txt, "tRNA-%s", fcontext.label);
5747
              ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp);
5748
              search_txt = MemFree (search_txt);
5749
            }
5750
          }
5751
        }
5752
      }
5753
    }
5754
    AsnIoClose (aip);
5755
    if (scp->not_present) {
5756
      rval = !ohsd.found;
5757
    } else {
5758
      rval = ohsd.found;
5759
    }
5760
  }
5761
  return rval;
5762
}
5763
5764
5765
NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp)
5766
{
5767
  if (scp == NULL) return TRUE;
5768
5769
  if (scp->field1 == NULL
5770
      && scp->field2 == NULL
5771
      && IsStringConstraintEmpty(scp->constraint)) {
5772
    return TRUE;
5773
  } else {
5774
    return FALSE;
5775
  }
5776
}
5777
5778
NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp)
5779
{
5780
  Boolean rval = FALSE;
5781
  CharPtr str1, str2;
5782
  ValNode vn;
5783
5784
  if (biop == NULL) return FALSE;
5785
  if (scp == NULL) return TRUE;
5786
5787
  if (IsStringConstraintEmpty(scp->constraint)) {
5788
    /* looking for qual present */
5789
    if (scp->field1 != NULL && scp->field2 == NULL) {
5790
      rval = IsSourceQualPresent (biop, scp->field1);
5791
    } else if (scp->field2 != NULL && scp->field1 == NULL) {
5792
      rval = IsSourceQualPresent (biop, scp->field2);
5793
    /* looking for quals to match */
5794
    } else if (scp->field1 != NULL && scp->field2 != NULL) {
5795
      str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
5796
      str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL);
5797
      if (StringCmp (str1, str2) == 0) {
5798
        rval = TRUE;
5799
      }
5800
      str1 = MemFree (str1);
5801
      str2 = MemFree (str2);
5802
    } else {
5803
      /* nothing specified, automatic match */
5804
      rval = TRUE;
5805
    }
5806
  } else {
5807
    if (scp->field1 != NULL && scp->field2 == NULL) {
5808
      str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
5809
      if (str1 == NULL) {
5810
        if (scp->constraint->not_present) {
5811
          str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL);
5812
          if (str1 == NULL) {
5813
            rval = TRUE;
5814
          }
5815
        }
5816
      } else if (!StringHasNoText (str1)) {
5817
        rval = TRUE;
5818
      }
5819
      str1 = MemFree (str1);
5820
    } else if (scp->field2 != NULL && scp->field1 == NULL) {
5821
      str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint);
5822
      if (str2 == NULL) {
5823
        if (scp->constraint->not_present) {
5824
          str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL);
5825
          if (str2 == NULL) {
5826
            rval = TRUE;
5827
          }
5828
        }
5829
      } else if (!StringHasNoText (str2)) {
5830
        rval = TRUE;
5831
      }
5832
      str2 = MemFree (str2);
5833
    } else if (scp->field1 != NULL && scp->field2 != NULL) {
5834
      str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint);
5835
      str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint);
5836
      if (StringCmp (str1, str2) == 0) {
5837
        rval = TRUE;
5838
      }
5839
      str1 = MemFree (str1);
5840
      str2 = MemFree (str2);
5841
    } else {
5842
      /* generic string constraint */
5843
      vn.choice = Seq_descr_source;
5844
      vn.next = NULL;
5845
      vn.extended = 0;
5846
      vn.data.ptrvalue = biop;
5847
      rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint);
5848
    }
5849
  }
5850
  return rval;
5851
}
5852
5853
5854
static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint)
5855
{
5856
  Boolean    any_pseudo = FALSE;
5857
  ValNodePtr vnp;
5858
  SeqFeatPtr sfp;
5859
  Boolean    rval = FALSE;
5860
5861
  if (c == NULL) return FALSE;
5862
  if (constraint == NULL) return TRUE;
5863
5864
  switch (constraint->feature) {
5865
    case CDSGeneProt_feature_type_constraint_gene :
5866
      for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
5867
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5868
        if (sfp != NULL && sfp->pseudo) {
5869
          any_pseudo = TRUE;
5870
        }
5871
      }
5872
      break;
5873
    case CDSGeneProt_feature_type_constraint_mRNA :
5874
      for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
5875
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5876
        if (sfp != NULL && sfp->pseudo) {
5877
          any_pseudo = TRUE;
5878
        }
5879
      }
5880
      break;
5881
    case CDSGeneProt_feature_type_constraint_cds :
5882
      for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
5883
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5884
        if (sfp != NULL && sfp->pseudo) {
5885
          any_pseudo = TRUE;
5886
        }
5887
      }
5888
      break;
5889
    case CDSGeneProt_feature_type_constraint_prot :
5890
      for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
5891
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5892
        if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) {
5893
          any_pseudo = TRUE;
5894
        }
5895
      }
5896
      break;
5897
    case CDSGeneProt_feature_type_constraint_mat_peptide :
5898
      for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) {
5899
        sfp = (SeqFeatPtr) vnp->data.ptrvalue;
5900
        if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) {
5901
          any_pseudo = TRUE;
5902
        }
5903
      }
5904
      break;
5905
  }
5906
5907
  if ((any_pseudo && constraint->is_pseudo)
5908
      || (!any_pseudo && !constraint->is_pseudo)) {
5909
    rval = TRUE;
5910
  }
5911
  return rval;
5912
}
5913
5914
5915
NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint)
5916
{
5917
  if (constraint == NULL) return TRUE;
5918
  if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) {
5919
    return TRUE;
5920
  } else {
5921
    return FALSE;
5922
  }
5923
}
5924
5925
5926
static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint)
5927
{
5928
  Boolean rval = FALSE, any_match = FALSE, all_match = TRUE;
5929
  CharPtr str, str1, str2;
5930
5931
  if (c == NULL) return FALSE;
5932
  if (constraint == NULL) return TRUE;
5933
5934
  if (IsStringConstraintEmpty (constraint->constraint)) {
5935
    /* looking for qual present */
5936
    if (constraint->field1 != NULL && constraint->field2 == NULL) {
5937
      str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
5938
      if (str != NULL) {
5939
        rval = TRUE;
5940
        str = MemFree (str);
5941
      }
5942
    } else if (constraint->field2 != NULL && constraint->field1 == NULL) {
5943
      str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
5944
      if (str == NULL) {
5945
        rval = FALSE;
5946
      } else {
5947
        str = MemFree (str);
5948
      }
5949
    /* looking for quals to match */
5950
    } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
5951
      str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
5952
      str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
5953
      if (StringCmp (str1, str2) == 0) {
5954
        rval = TRUE;
5955
      }
5956
      str1 = MemFree (str1);
5957
      str2 = MemFree (str2);
5958
    } else {
5959
      /* nothing specified, automatic match */
5960
      rval = TRUE;
5961
    }
5962
  } else {
5963
    if (constraint->field1 != NULL && constraint->field2 == NULL) {
5964
      str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint);
5965
      if (str1 == NULL) {
5966
        if (constraint->constraint->not_present) {
5967
          str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL);
5968
          if (str1 == NULL) {
5969
            rval = TRUE;
5970
          }
5971
        }
5972
      } else if (!StringHasNoText (str1)) {
5973
        rval = TRUE;
5974
      }
5975
      str1 = MemFree (str1);
5976
    } else if (constraint->field2 != NULL && constraint->field1 == NULL) {
5977
      str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint);
5978
      if (str2 == NULL) {
5979
        if (constraint->constraint->not_present) {
5980
          str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL);
5981
          if (str2 == NULL) {
5982
            rval = TRUE;
5983
          }
5984
        }
5985
      } else if (!StringHasNoText (str2)) {
5986
        rval = TRUE;
5987
      }
5988
      str2 = MemFree (str2);
5989
    } else if (constraint->field1 != NULL && constraint->field2 != NULL) {
5990
      str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint);
5991
      str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint);
5992
      if (StringCmp (str1, str2) == 0) {
5993
        rval = TRUE;
5994
      }
5995
      str1 = MemFree (str1);
5996
      str2 = MemFree (str2);
5997
    } else {
5998
      /* generic string constraint */
5999
      rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint);
6000
    }
6001
  }
6002
  return rval;
6003
}
6004
6005
6006
NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint)
6007
{
6008
  if (constraint == NULL) return TRUE;
6009
  if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE;
6010
  if (constraint->feature != Feature_type_any) return FALSE;
6011
  if (!IsStringConstraintEmpty (constraint->id)) return FALSE;
6012
  return TRUE;
6013
}
6014
6015
6016
extern Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint)
6017
{
6018
  Char       id [41];
6019
  CharPtr    cp, cp_dst;
6020
  SeqIdPtr   tmp;
6021
  Boolean    match, changed;
6022
6023
  if (sip == NULL) 
6024
  {
6025
    return FALSE;
6026
  }
6027
  if (string_constraint == NULL)
6028
  {
6029
    return TRUE;
6030
  }
6031
  
6032
  while (sip != NULL)
6033
  {
6034
    /* temporary disconnect ID from list */
6035
    tmp = sip->next;
6036
    sip->next = NULL;
6037
    id [0] = '\0';
6038
    SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1);
6039
    match = DoesSingleStringMatchConstraint (id, string_constraint);
6040
    if (!match) 
6041
    {
6042
      changed = FALSE;
6043
      /* remove terminating pipe character */
6044
      if (id[StringLen(id) - 1] == '|') 
6045
      {
6046
        id[StringLen(id) - 1] = 0;
6047
        changed = TRUE;
6048
      }
6049
      /* remove leading pipe identifier */
6050
      cp = StringChr (id, '|');
6051
      if (cp != NULL)
6052
      {
6053
        changed = TRUE;
6054
        cp++;
6055
        cp_dst = id;
6056
        while (*cp != 0) 
6057
        {
6058
          *cp_dst = *cp;
6059
          cp_dst++;
6060
          cp++;
6061
        }
6062
        *cp_dst = 0;
6063
      }  
6064
      if (changed) 
6065
      {
6066
        match = DoesSingleStringMatchConstraint (id, string_constraint);
6067
      }
6068
6069
      /* if search text doesn't have ., try ID without version */
6070
      if (!match && StringChr (string_constraint->match_text, '.') == NULL) 
6071
      {
6072
        cp = StringChr (id, '.');
6073
        if (cp != NULL) 
6074
        {
6075
          *cp = 0;
6076
          match = DoesSingleStringMatchConstraint (id, string_constraint);
6077
        }
6078
      }       
6079
    }
6080
    sip->next = tmp;
6081
6082
    if (match)
6083
    {
6084
      if (string_constraint->not_present)
6085
      {
6086
        return FALSE;
6087
      }
6088
      else
6089
      {
6090
        return TRUE;
6091
      }
6092
    }
6093
    sip = sip->next;
6094
  }
6095
  if (string_constraint->not_present)
6096
  {
6097
    return TRUE;
6098
  }
6099
  else
6100
  {
6101
    return FALSE;
6102
  }
6103
}
6104
6105
6106
typedef struct rnatypebiomol {
6107
  Int4 rnatype;
6108
  Uint1 biomol;
6109
  CharPtr rnamolname;
6110
} RnaTypeBiomolData, PNTR RnaTypeBiomolPtr;
6111
6112
static RnaTypeBiomolData rna_type_biomol[] = {
6113
{ Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } ,
6114
{ Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } ,
6115
{ Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } ,
6116
{ Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } ,
6117
{ Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } ,
6118
{ Sequence_constraint_rnamol_snRNA , MOLECULE_TYPE_SNRNA , "Small nuclear RNA" } ,
6119
{ Sequence_constraint_rnamol_scRNA , MOLECULE_TYPE_SCRNA , "Small cytoplasmic RNA" } ,
6120
{ Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } ,
6121
{ Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } ,
6122
{ Sequence_constraint_rnamol_snoRNA , MOLECULE_TYPE_SNORNA , "Small nucleolar RNA" } ,
6123
{ Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } ,
6124
{ Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding  RNA" } ,
6125
{ Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ;
6126
6127
#define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData)
6128
6129
6130
NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype) 
6131
{
6132
  Int4 i;
6133
6134
  for (i = 0; i <  NUM_rna_type_biomol; i++) {
6135
    if (rna_type_biomol[i].rnatype == rnatype) {
6136
      return rna_type_biomol[i].biomol;
6137
    }
6138
  }
6139
  return 0;
6140
}
6141
6142
6143
NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype)
6144
{
6145
  Int4 i;
6146
6147
  for (i = 0; i <  NUM_rna_type_biomol; i++) {
6148
    if (rna_type_biomol[i].rnatype == rnatype) {
6149
      return rna_type_biomol[i].rnamolname;
6150
    }
6151
  }
6152
  return "invalid RNA type";
6153
}
6154
6155
NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list)
6156
{
6157
  Int4 i;
6158
6159
  if (field_list == NULL) return;
6160
6161
  ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA"));
6162
  for (i = 0; i < NUM_rna_type_biomol; i++) {
6163
    ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname));
6164
  }
6165
}
6166
6167
6168
static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint)
6169
{
6170
  SeqFeatPtr sfp;
6171
  SeqMgrFeatContext fcontext;
6172
  SeqDescrPtr sdp;
6173
  SeqMgrDescContext dcontext;
6174
  MolInfoPtr mip;
6175
  
6176
  if (bsp == NULL) return FALSE;
6177
  if (IsSequenceConstraintEmpty (constraint)) return TRUE;
6178
6179
  if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) {
6180
    switch (constraint->seqtype->choice) {
6181
      case SequenceConstraintMolTypeConstraint_nucleotide :
6182
        if (ISA_aa (bsp->mol)) {
6183
          return FALSE;
6184
        }
6185
        break;
6186
      case SequenceConstraintMolTypeConstraint_dna :
6187
        if (bsp->mol != Seq_mol_dna) {
6188
          return FALSE;
6189
        }
6190
        break;
6191
      case SequenceConstraintMolTypeConstraint_rna :
6192
        if (bsp->mol != Seq_mol_rna) {
6193
          return FALSE;
6194
        }
6195
        if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) {
6196
          sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
6197
          if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) {
6198
            return FALSE;
6199
          }
6200
          mip = (MolInfoPtr) sdp->data.ptrvalue;
6201
          if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) {
6202
            return FALSE;
6203
          }
6204
        }
6205
        break;
6206
      case SequenceConstraintMolTypeConstraint_protein :
6207
        if (!ISA_aa (bsp->mol)) {
6208
          return FALSE;
6209
        }
6210
        break;
6211
    }
6212
  }
6213
6214
  if (constraint->feature != Feature_type_any) {
6215
    sfp = SeqMgrGetNextFeature (bsp, NULL, 0, GetFeatdefFromFeatureType (constraint->feature), &fcontext);
6216
    if (sfp == NULL) {
6217
      return FALSE;
6218
    }
6219
  }
6220
6221
  if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) {
6222
    return FALSE;
6223
  }
6224
  return TRUE;
6225
}
6226
6227
static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint)
6228
{
6229
  Boolean       rval = FALSE;
6230
  SeqEntryPtr   sep;
6231
6232
  if (bssp == NULL) return FALSE;
6233
  if (IsSequenceConstraintEmpty (constraint)) return TRUE;
6234
  
6235
  for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) {
6236
    if (IS_Bioseq (sep)) {
6237
      rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint);
6238
    } else if (IS_Bioseq_set (sep)) {
6239
      rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint);
6240
    }
6241
  }
6242
  return rval;
6243
}
6244
6245
6246
static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint)
6247
{
6248
  BioseqPtr bsp;
6249
  SeqDescrPtr sdp;
6250
  ObjValNodePtr ovp;
6251
  Boolean       rval = FALSE;
6252
6253
  if (data == NULL) return FALSE;
6254
  if (IsSequenceConstraintEmpty (constraint)) return TRUE;
6255
6256
  bsp = GetSequenceForObject (choice, data);
6257
  if (bsp == NULL) {
6258
    if (choice == OBJ_SEQDESC) {
6259
      sdp = (SeqDescrPtr) data;
6260
      if (sdp->extended) {
6261
        ovp = (ObjValNodePtr) sdp;
6262
        if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) {
6263
          rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint);
6264
        }
6265
      }
6266
    }
6267
  } else {
6268
    rval = DoesSequenceMatchSequenceConstraint (bsp, constraint);
6269
  }
6270
  return rval; 
6271
}
6272
6273
6274
static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint)
6275
{
6276
  Boolean rval = TRUE;
6277
6278
  if (data == NULL) return FALSE;
6279
  if (constraint == NULL) return TRUE;
6280
6281
  switch (constraint->choice) {
6282
    case ConstraintChoice_string :
6283
      rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue);
6284
      break;
6285
    case ConstraintChoice_location :
6286
      rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue);
6287
      break;
6288
    case ConstraintChoice_source :
6289
      rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue);
6290
      break;
6291
    case ConstraintChoice_cdsgeneprot_qual :
6292
      if (choice == 0) {
6293
        rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue);
6294
      } else {
6295
        rval = FALSE;
6296
      }
6297
      break;
6298
    case ConstraintChoice_cdsgeneprot_pseudo :
6299
      if (choice == 0) {
6300
        rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue);
6301
      } else {
6302
        rval = FALSE;
6303
      }
6304
      break;
6305
    case ConstraintChoice_sequence :
6306
      rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue);
6307
      break;
6308
  }
6309
  return rval;
6310
}
6311
6312
6313
static Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp)
6314
{
6315
  Boolean rval = TRUE;
6316
6317
  if (data == NULL) return FALSE;
6318
6319
  while (csp != NULL && rval) {
6320
    rval = DoesObjectMatchConstraint (choice, data, csp);
6321
    csp = csp->next;
6322
  }
6323
  return rval;
6324
}
6325
6326
6327
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp)
6328
{
6329
  StringConstraintPtr scp = NULL;
6330
  ConstraintChoicePtr constraint;
6331
  SourceConstraintPtr source_constraint;
6332
  CDSGeneProtQualConstraintPtr cgp_constraint;
6333
6334
  while (csp != NULL) {
6335
    constraint = (ConstraintChoicePtr) csp->data.ptrvalue;
6336
    switch (constraint->choice) {
6337
      case ConstraintChoice_string :
6338
        scp = constraint->data.ptrvalue;
6339
        break;
6340
      case ConstraintChoice_source :
6341
        source_constraint = (SourceConstraintPtr) constraint->data.ptrvalue;
6342
        if (source_constraint != NULL && source_constraint->constraint != NULL
6343
            && ((source_constraint->field1 != NULL
6344
                 && DoFieldTypesMatch (field, source_constraint->field1))
6345
                || (source_constraint->field2 != NULL
6346
                 && DoFieldTypesMatch (field, source_constraint->field2)))) {
6347
            scp = source_constraint->constraint;
6348
        } 
6349
      break;
6350
      case ConstraintChoice_cdsgeneprot_qual :
6351
        cgp_constraint = (CDSGeneProtQualConstraintPtr) field->data.ptrvalue;
6352
        if (field->choice == FieldType_cds_gene_prot
6353
            && cgp_constraint != NULL && cgp_constraint->constraint != NULL
6354
            && ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue)
6355
                || (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) {
6356
          scp = cgp_constraint->constraint;
6357
        }
6358
        break;
6359
    }
6360
    csp = csp->next;
6361
  }
6362
  return scp;
6363
}
6364
6365
6366
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp)
6367
{
6368
  StringConstraintPtr scp;
6369
  FieldTypePtr f;
6370
6371
  f = GetFromFieldFromFieldPair (fieldpair);
6372
  scp = FindStringConstraintInConstraintSetForField (f, csp);
6373
  f = FieldTypeFree (f);
6374
  return scp;
6375
}
6376
 
6377
6378
NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit)
6379
{
6380
  StringConstraintPtr scp;
6381
6382
  if (edit == NULL || edit->find_txt == NULL) return NULL;
6383
  scp = StringConstraintNew ();
6384
  scp->match_text = StringSave (edit->find_txt);
6385
6386
  switch (edit->location) {
6387
    case Field_edit_location_anywhere :
6388
      scp->match_location = String_location_contains;
6389
      break;
6390
    case Field_edit_location_beginning :
6391
      scp->match_location = String_location_starts;
6392
      break;
6393
    case Field_edit_location_end :
6394
      scp->match_location = String_location_ends;
6395
      break;
6396
  }
6397
6398
  scp->case_sensitive = TRUE;
6399
  scp->whole_word = FALSE;
6400
  scp->not_present = FALSE;
6401
6402
  return scp;
6403
}
6404
6405
6406
static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit)
6407
{
6408
  CharPtr cp_found, new_str;
6409
  Int4 found_len, replace_len, new_len;
6410
6411
  if (edit == NULL) return StringSave (str);
6412
6413
  str = StringSave (str);
6414
  cp_found = StringISearch (str, edit->find_txt);
6415
6416
  found_len = StringLen (edit->find_txt);
6417
  replace_len = StringLen (edit->repl_txt);
6418
  if (edit->location == Field_edit_location_beginning
6419
      && cp_found != str) {
6420
    cp_found = NULL;
6421
  } 
6422
  while (cp_found != NULL)
6423
  {
6424
    if (edit->location == Field_edit_location_end
6425
        && cp_found != str + StringLen (str) - found_len) {
6426
      cp_found = StringISearch (cp_found + found_len, edit->find_txt);
6427
    } else {
6428
      new_len = StringLen (str) + 1 - found_len + replace_len;
6429
      new_str = (CharPtr) MemNew (new_len * sizeof (Char));
6430
      if (new_str != NULL)
6431
      {
6432
        if (cp_found != str)
6433
        {
6434
          StringNCpy (new_str, str, cp_found - str);
6435
        }
6436
        StringCat (new_str, edit->repl_txt);
6437
        StringCat (new_str, cp_found + found_len);
6438
        cp_found = new_str + (cp_found - str) + replace_len;
6439
        str = MemFree (str);
6440
        str = new_str;
6441
      }
6442
      cp_found = StringISearch (cp_found, edit->find_txt);
6443
    }
6444
  }
6445
  return str;
6446
}
6447
6448
6449
typedef struct objectcollection {
6450
  AECRActionPtr action;
6451
  ValNodePtr object_list;
6452
} ObjectCollectionData, PNTR ObjectCollectionPtr;
6453
6454
6455
static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o)
6456
{
6457
  ApplyActionPtr a;
6458
  EditActionPtr e;
6459
  ConvertActionPtr v;
6460
  CopyActionPtr c;
6461
  SwapActionPtr s;
6462
  RemoveActionPtr r;
6463
  AECRParseActionPtr p;
6464
  CharPtr str, portion;
6465
  StringConstraintPtr scp;
6466
  FieldTypePtr field_from = NULL, field_to = NULL;
6467
6468
  if (objectdata == NULL || o == NULL) return;
6469
6470
  /* check to make sure object is appropriate for field and meets filter */
6471
  switch (o->action->action->choice) {
6472
    case ActionChoice_apply :
6473
      a = (ApplyActionPtr) o->action->action->data.ptrvalue;
6474
      if (a != NULL
6475
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field)
6476
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6477
        ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6478
      }
6479
      break;
6480
    case ActionChoice_edit :
6481
      e = (EditActionPtr) o->action->action->data.ptrvalue;
6482
      if (e != NULL
6483
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field)
6484
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6485
        scp = StringConstraintFromFieldEdit (e->edit);
6486
        str = GetFieldValueForObject (objecttype, objectdata, e->field, scp);
6487
        if (!StringHasNoText (str)) {
6488
          ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6489
        }
6490
        str = MemFree (str);
6491
      }
6492
      break;
6493
    case ActionChoice_convert :
6494
      v = (ConvertActionPtr) o->action->action->data.ptrvalue;
6495
      if (v != NULL
6496
          && (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL
6497
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
6498
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6499
        scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint);
6500
        str = GetFieldValueForObject (objecttype, objectdata, field_from, scp);
6501
        if (!StringHasNoText (str)) {
6502
          ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6503
        }
6504
        str = MemFree (str);
6505
      }
6506
      field_from = FieldTypeFree (field_from);
6507
      break;
6508
    case ActionChoice_copy :
6509
      c = (CopyActionPtr) o->action->action->data.ptrvalue;
6510
      if (c != NULL
6511
          && (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL
6512
          && (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL
6513
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
6514
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
6515
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6516
        ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6517
      }
6518
      field_from = FieldTypeFree (field_from);
6519
      field_to = FieldTypeFree (field_to);
6520
      break;
6521
    case ActionChoice_swap :
6522
      s = (SwapActionPtr) o->action->action->data.ptrvalue;
6523
      if (s != NULL
6524
          && (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL
6525
          && (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL
6526
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
6527
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
6528
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6529
        ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6530
      }
6531
      field_from = FieldTypeFree (field_from);
6532
      field_to = FieldTypeFree (field_to);
6533
      break;
6534
    case ActionChoice_remove :
6535
      r = (RemoveActionPtr) o->action->action->data.ptrvalue;
6536
      if (r != NULL
6537
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field)
6538
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6539
        ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6540
      }
6541
      break;
6542
    case ActionChoice_parse :
6543
      p = (AECRParseActionPtr) o->action->action->data.ptrvalue;
6544
      if (p != NULL
6545
          && (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL
6546
          && (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL
6547
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from)
6548
          && IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to)
6549
          && DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) {
6550
        scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint);
6551
        str = GetFieldValueForObject (objecttype, objectdata, field_from, scp);
6552
        portion = GetTextPortionFromString (str, p->portion);
6553
        if (!StringHasNoText (portion)) {
6554
          ValNodeAddPointer (&(o->object_list), objecttype, objectdata);
6555
        }
6556
        portion = MemFree (portion);
6557
        str = MemFree (str);
6558
      }
6559
      field_from = FieldTypeFree (field_from);
6560
      field_to = FieldTypeFree (field_to);
6561
      break;
6562
  }
6563
6564
}
6565
6566
6567
static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data)
6568
{
6569
  ObjectCollectionPtr o;
6570
  if (sfp == NULL || data == NULL) return;
6571
6572
  o = (ObjectCollectionPtr) data;
6573
  AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o);
6574
6575
}
6576
6577
6578
static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data)
6579
{
6580
  ObjectCollectionPtr o;
6581
6582
  if (sdp == NULL || data == NULL) return;
6583
6584
  o = (ObjectCollectionPtr) data;
6585
  AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o);
6586
}
6587
6588
6589
static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data)
6590
{
6591
  ObjectCollectionPtr o;
6592
6593
  if (bsp == NULL || data == NULL) return;
6594
6595
  o = (ObjectCollectionPtr) data;
6596
  AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o);
6597
}
6598
6599
6600
NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action)
6601
{
6602
  ObjectCollectionData ocd;
6603
6604
  ocd.action = action;
6605
  ocd.object_list = NULL;
6606
6607
  if (action == NULL) return NULL;
6608
  if (FieldTypeFromAECRAction (action) == FieldType_molinfo_field) {
6609
    VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback);
6610
  } else {
6611
    VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback);
6612
    VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback);
6613
  }
6614
  return ocd.object_list;
6615
}
6616
6617
6618
typedef struct buildcgpset
6619
{
6620
  ValNodePtr cds_list;
6621
  ValNodePtr mrna_list;
6622
  ValNodePtr gene_list;
6623
} BuildCGPSetData, PNTR BuildCGPSetPtr;
6624
6625
static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata)
6626
{
6627
  BuildCGPSetPtr b;
6628
6629
  if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return;
6630
  b = (BuildCGPSetPtr) userdata;
6631
  if (sfp->data.choice == SEQFEAT_CDREGION)
6632
  {
6633
    ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp);
6634
  }
6635
  else if (sfp->data.choice == SEQFEAT_GENE)
6636
  {
6637
    ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp);
6638
  }
6639
  else if (sfp->idx.subtype == FEATDEF_mRNA)
6640
  {
6641
    ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp);
6642
  }
6643
  else if (SeqMgrGetGeneXref (sfp) != NULL)
6644
  {
6645
    ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp);
6646
  }
6647
}
6648
6649
6650
static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed)
6651
{
6652
  SeqMgrFeatContext fcontext;
6653
  SeqFeatPtr        gene = NULL, mrna, prot;
6654
  BioseqPtr         protbsp;
6655
  CGPSetPtr         cdsp;
6656
  ProtRefPtr        prp;
6657
6658
  if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL;
6659
6660
  cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData));
6661
  ValNodeAddPointer (&(cdsp->cds_list), 0, cds);
6662
6663
  gene = GetGeneForFeature (cds);
6664
  if (gene != NULL)
6665
  {
6666
    ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
6667
    /* mark gene, so that we'll know it isn't lonely */
6668
    gene->idx.deleteme = TRUE;
6669
  }
6670
6671
  mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext);
6672
  if (mrna != NULL)
6673
  {
6674
    ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna);
6675
    /* mark mrna, so that we'll know it's already in a set */
6676
    mrna->idx.deleteme = TRUE;
6677
  }
6678
6679
  if (cds->product != NULL)
6680
  {
6681
    protbsp = BioseqFindFromSeqLoc (cds->product);
6682
    if (protbsp != NULL)
6683
    {
6684
      prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext);
6685
      /* if there is no full-length protein feature, make one */
6686
      if (prot == NULL)
6687
      {
6688
        prp = ProtRefNew ();
6689
        prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL);
6690
        if (prot != NULL)
6691
        {
6692
          prot->data.value.ptrvalue = prp;
6693
          if (indexing_needed != NULL)
6694
          {
6695
            *indexing_needed = TRUE;
6696
          }
6697
        }
6698
      }
6699
      if (prot != NULL)
6700
      {
6701
        ValNodeAddPointer (&(cdsp->prot_list), 0, prot);
6702
      }
6703
      
6704
      /* also add in mat_peptides from protein feature */
6705
      prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
6706
      while (prot != NULL)
6707
      {
6708
        ValNodeAddPointer (&(cdsp->prot_list), 0, prot);
6709
        prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext);
6710
      }
6711
    }
6712
  }  
6713
  return cdsp;
6714
}
6715
6716
6717
static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna)
6718
{
6719
  SeqFeatPtr        gene;
6720
  CGPSetPtr          cdsp;
6721
6722
  if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL;
6723
6724
  cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData));
6725
  ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna);
6726
6727
  gene = GetGeneForFeature (mrna);
6728
  if (gene != NULL)
6729
  {
6730
    ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
6731
    /* mark gene, so that we'll know it isn't lonely */
6732
    gene->idx.deleteme = TRUE;
6733
  }
6734
6735
  return cdsp;
6736
}
6737
6738
6739
static void UnmarkFeatureList (ValNodePtr list)
6740
{
6741
  SeqFeatPtr sfp;
6742
6743
  while (list != NULL)
6744
  {
6745
    sfp = list->data.ptrvalue;
6746
    if (sfp != NULL)
6747
    {
6748
      sfp->idx.deleteme = FALSE;
6749
    }
6750
    list = list->next;
6751
  }
6752
}
6753
6754
6755
static ValNodePtr BuildCGPSetList (Uint2 entityID, ValNodePtr constraint)
6756
{
6757
  SeqEntryPtr    sep;
6758
  BuildCGPSetData b;
6759
  CGPSetPtr       cdsp;
6760
  ValNodePtr     vnp, vnp_next, vnp_prev;
6761
  ValNodePtr     cdset_list = NULL;
6762
  SeqFeatPtr     cds, gene, mrna;
6763
  Boolean        need_indexing = FALSE;
6764
  
6765
  sep = GetTopSeqEntryForEntityID (entityID);
6766
6767
  b.cds_list = NULL;
6768
  b.gene_list = NULL;
6769
  b.mrna_list = NULL;
6770
  
6771
  VisitFeaturesInSep (sep, &b, BuildCGPSetCallback);
6772
6773
  /* build cdsets that have coding regions */
6774
  for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next)
6775
  {
6776
    cds = (SeqFeatPtr) vnp->data.ptrvalue;
6777
    if (cds == NULL) continue;
6778
    cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing);
6779
    if (cdsp != NULL)
6780
    {
6781
      ValNodeAddPointer (&cdset_list, 0, cdsp);
6782
    }
6783
  }
6784
  if (need_indexing)
6785
  {
6786
    /* indexing because we have created full-length protein features */
6787
    SeqMgrIndexFeatures (entityID, NULL);
6788
  }
6789
6790
  /* build cdsets for mrna features that don't have coding regions */
6791
  for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next)
6792
  {
6793
    mrna = (SeqFeatPtr) vnp->data.ptrvalue;
6794
    if (mrna == NULL || mrna->idx.deleteme) continue;
6795
    cdsp = BuildCGPSetFrommRNA (mrna);
6796
    if (cdsp != NULL)
6797
    {
6798
      ValNodeAddPointer (&cdset_list, 0, cdsp);
6799
    }
6800
  }
6801
6802
  /* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */
6803
  for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next)
6804
  {
6805
    gene = (SeqFeatPtr) vnp->data.ptrvalue;
6806
    if (gene == NULL || gene->idx.deleteme) continue;
6807
    cdsp = CGPSetNew ();
6808
    ValNodeAddPointer (&(cdsp->gene_list), 0, gene);
6809
    ValNodeAddPointer (&cdset_list, 0, cdsp);
6810
  }
6811
6812
  /* now unmark features */
6813
  UnmarkFeatureList (b.cds_list);
6814
  UnmarkFeatureList (b.mrna_list);
6815
  UnmarkFeatureList (b.gene_list);
6816
6817
  b.cds_list = ValNodeFree (b.cds_list);
6818
  b.mrna_list = ValNodeFree (b.mrna_list);
6819
  b.gene_list = ValNodeFree (b.gene_list);
6820
6821
  /* now remove sets that don't match our choice constraint */
6822
  vnp_prev = NULL;
6823
  for (vnp = cdset_list; vnp != NULL; vnp = vnp_next)
6824
  {
6825
    vnp_next = vnp->next;
6826
    if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, constraint))
6827
    {
6828
      if (vnp_prev == NULL)
6829
      {
6830
        cdset_list = vnp->next;
6831
      }
6832
      else
6833
      {
6834
        vnp_prev->next = vnp->next;
6835
      }
6836
      vnp->next = NULL;
6837
      FreeCGPSetList (vnp);     
6838
    }
6839
    else
6840
    {
6841
      vnp_prev = vnp;
6842
    }
6843
  }
6844
  
6845
  return cdset_list;
6846
}
6847
6848
6849
NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
6850
{
6851
  ValNodePtr vnp;
6852
  Int4       num_succeed = 0, num_fail = 0;
6853
6854
  if (action == NULL || object_list == NULL) return 0;
6855
6856
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6857
    if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text)) {
6858
      num_succeed ++;
6859
    } else {
6860
      num_fail++;
6861
    }
6862
  }
6863
  return num_succeed;
6864
}
6865
6866
6867
NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list)
6868
{
6869
  ValNodePtr vnp;
6870
  Int4       num_succeed = 0, num_fail = 0;
6871
  StringConstraintPtr scp;
6872
  CharPtr    str, new_str;
6873
6874
  if (action == NULL || object_list == NULL) return 0;
6875
  scp = StringConstraintFromFieldEdit (action->edit);
6876
6877
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6878
    str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp);
6879
    new_str = ApplyEditToString (str, action->edit);
6880
    if (StringCmp (str, new_str) != 0
6881
        && SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old)) {
6882
      num_succeed ++;
6883
    } else {
6884
      num_fail++;
6885
    }
6886
    new_str = MemFree (new_str);
6887
    str = MemFree (str);
6888
  }
6889
  return num_succeed;
6890
}
6891
6892
6893
NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
6894
{
6895
  ValNodePtr vnp;
6896
  Int4       num_succeed = 0, num_fail = 0;
6897
  CharPtr    str, from_val;
6898
  FieldTypePtr field_from, field_to;
6899
6900
  if (action == NULL || object_list == NULL || action->fields == NULL) return 0;
6901
6902
  field_from = GetFromFieldFromFieldPair (action->fields);
6903
  field_to = GetToFieldFromFieldPair (action->fields);
6904
6905
  if (action->fields->choice == FieldPairType_molinfo_field) {
6906
    for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6907
      str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL);
6908
      from_val = GetSequenceQualValName (field_from->data.ptrvalue);
6909
      if (StringCmp (str, from_val) == 0
6910
          && SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old)) {
6911
        num_succeed ++;
6912
      }
6913
      str = MemFree (str);
6914
    }
6915
  } else {
6916
    for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6917
      str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp);
6918
      if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text)
6919
          && RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp)) {
6920
        num_succeed ++;
6921
      } else {
6922
        num_fail++;
6923
      }
6924
      str = MemFree (str);
6925
    }
6926
  }
6927
6928
  field_from = FieldTypeFree (field_from);
6929
  field_to = FieldTypeFree (field_to);
6930
6931
  return num_succeed;
6932
}
6933
6934
6935
NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
6936
{
6937
  ValNodePtr vnp;
6938
  Int4       num_succeed = 0, num_fail = 0;
6939
  CharPtr    str;
6940
  FieldTypePtr field_from, field_to;
6941
6942
  if (action == NULL || object_list == NULL) return 0;
6943
  field_from = GetFromFieldFromFieldPair (action->fields);
6944
  field_to = GetToFieldFromFieldPair (action->fields);
6945
6946
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6947
    str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp);
6948
    if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text)) {
6949
      num_succeed ++;
6950
    } else {
6951
      num_fail++;
6952
    }
6953
    str = MemFree (str);
6954
  }
6955
6956
  field_from = FieldTypeFree (field_from);
6957
  field_to = FieldTypeFree (field_to);
6958
  return num_succeed;
6959
}
6960
6961
6962
NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
6963
{
6964
  ValNodePtr vnp;
6965
  Int4       num_succeed = 0, num_fail = 0;
6966
  CharPtr    str1, str2;
6967
  FieldTypePtr field_from, field_to;
6968
6969
  if (action == NULL || object_list == NULL) return 0;
6970
  field_from = GetFromFieldFromFieldPair (action->fields);
6971
  field_to = GetToFieldFromFieldPair (action->fields);
6972
6973
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6974
    str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp);
6975
    str2 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL);
6976
    if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old)
6977
        && SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old)) {
6978
      num_succeed ++;
6979
    } else {
6980
      num_fail++;
6981
    }
6982
    str1 = MemFree (str1);
6983
    str2 = MemFree (str2);
6984
  }
6985
  field_from = FieldTypeFree (field_from);
6986
  field_to = FieldTypeFree (field_to);
6987
  return num_succeed;
6988
}
6989
6990
6991
NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
6992
{
6993
  ValNodePtr vnp;
6994
  Int4       num_succeed = 0, num_fail = 0;
6995
6996
  if (action == NULL || object_list == NULL) return 0;
6997
6998
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
6999
    if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) {
7000
      num_succeed ++;
7001
    } else {
7002
      num_fail++;
7003
    }
7004
  }
7005
  return num_succeed;
7006
}
7007
7008
7009
NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, StringConstraintPtr scp)
7010
{
7011
  ValNodePtr vnp;
7012
  CharPtr    str1, str2, cp;
7013
  Int4       len, num_succeed = 0;
7014
  FieldTypePtr field_from, field_to;
7015
7016
  if (action == NULL || object_list == NULL) return 0;
7017
  field_from = GetFromFieldFromFieldPair (action->fields);
7018
  field_to = GetToFieldFromFieldPair (action->fields);
7019
7020
  for (vnp = object_list; vnp != NULL; vnp = vnp->next) {
7021
    str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp);
7022
    str2 = GetTextPortionFromString (str1, action->portion);    
7023
    if (str2 != NULL) {
7024
      if (action->remove_from_parsed) {
7025
        cp = StringSearch (str1, str2);
7026
        len = StringLen (str2);
7027
        StringCpy (cp, cp + len);
7028
        SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old);
7029
      }
7030
      if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, action->existing_text)) {
7031
        num_succeed++;
7032
      }
7033
    }
7034
    str1 = MemFree (str1);
7035
    str2 = MemFree (str2);
7036
  }
7037
  field_from = FieldTypeFree (field_from);
7038
  field_to = FieldTypeFree (field_to);
7039
  return num_succeed;
7040
}
7041
7042
7043
static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep)
7044
{
7045
  StringConstraintPtr scp;
7046
  ApplyActionPtr      a;
7047
  ValNodePtr          object_list = NULL;
7048
  Uint1               field_type;
7049
  Uint2               entityID;
7050
  Int4                num_succeed = 0;
7051
7052
  if (act == NULL || act->action == NULL) return 0;
7053
  field_type = FieldTypeFromAECRAction (act);
7054
  if (field_type == FieldType_cds_gene_prot) {
7055
    entityID = ObjMgrGetEntityIDForChoice(sep);
7056
    object_list = BuildCGPSetList (entityID, act->constraint);
7057
  } else {
7058
    object_list = GetObjectListForAECRAction (sep, act);
7059
  }
7060
7061
  switch (act->action->choice) {
7062
    case ActionChoice_apply:
7063
      a = (ApplyActionPtr) act->action->data.ptrvalue;
7064
      scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint);
7065
      num_succeed = DoApplyActionToObjectList (act->action->data.ptrvalue, object_list, scp);
7066
      scp = StringConstraintFree (scp);
7067
      break;
7068
    case ActionChoice_edit:
7069
      num_succeed = DoEditActionToObjectList (act->action->data.ptrvalue, object_list);
7070
      break;
7071
    case ActionChoice_convert:
7072
      num_succeed = DoConvertActionToObjectList (act->action->data.ptrvalue, object_list, NULL);
7073
      break;
7074
    case ActionChoice_swap:
7075
      num_succeed = DoSwapActionToObjectList (act->action->data.ptrvalue, object_list, NULL);
7076
      break;
7077
    case ActionChoice_copy:
7078
      num_succeed = DoCopyActionToObjectList (act->action->data.ptrvalue, object_list, NULL);
7079
      break;
7080
    case ActionChoice_remove:
7081
      num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, NULL);
7082
      break;
7083
    case ActionChoice_parse:
7084
      num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, NULL);
7085
      break;
7086
  }
7087
  object_list = ValNodeFree (object_list);  
7088
  return num_succeed;
7089
}
7090
7091
7092
/* This section handles parsing where the source field and destination field may not be on the same
7093
 * group of objects. */
7094
typedef struct parsesourceinfo 
7095
{
7096
  BioseqPtr   bsp;
7097
  SeqFeatPtr  sfp;
7098
  SeqDescrPtr sdp;
7099
  SeqIdPtr    sip;
7100
  ValNodePtr  dest_list;
7101
  CharPtr     parse_src_txt;
7102
} ParseSourceInfoData, PNTR ParseSourceInfoPtr;
7103
7104
static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt)
7105
{
7106
  ParseSourceInfoPtr psip;
7107
7108
  psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData));
7109
  if (psip != NULL) {
7110
    psip->bsp = bsp;
7111
    psip->sdp = sdp;
7112
    psip->sfp = sfp;
7113
    psip->sip = sip;
7114
    psip->dest_list = NULL;
7115
    psip->parse_src_txt = parse_src_txt;
7116
  } 
7117
  return psip;
7118
}
7119
7120
7121
static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip)
7122
{
7123
  if (psip != NULL)
7124
  {
7125
    psip->dest_list = ValNodeFree (psip->dest_list);
7126
    psip->parse_src_txt = MemFree (psip->parse_src_txt);
7127
    psip = MemFree (psip);
7128
  }
7129
  return psip;
7130
}
7131
7132
static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip)
7133
{
7134
  ParseSourceInfoPtr pcopy = NULL;
7135
  
7136
  if (psip != NULL) 
7137
  {
7138
    pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData));
7139
    if (pcopy != NULL) {
7140
      pcopy->bsp = psip->bsp;
7141
      pcopy->sfp = psip->sfp;
7142
      pcopy->sdp = psip->sdp;
7143
      pcopy->sip = psip->sip;
7144
      pcopy->dest_list = NULL;
7145
      pcopy->parse_src_txt = NULL;
7146
    }
7147
  }
7148
  return pcopy;
7149
}
7150
7151
static ValNodePtr ParseSourceListFree (ValNodePtr vnp)
7152
{
7153
  ValNodePtr vnp_next;
7154
  while (vnp != NULL) {
7155
    vnp_next = vnp->next;
7156
    vnp->next = NULL;
7157
    vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue);
7158
    vnp = ValNodeFree (vnp);
7159
    vnp = vnp_next;
7160
  }
7161
  return vnp;
7162
}
7163
7164
7165
static void 
7166
GetDeflineSourcesForBioseq 
7167
(BioseqPtr              bsp,
7168
 TextPortionPtr         portion,
7169
 ValNodePtr PNTR source_list)
7170
{
7171
  SeqDescrPtr        sdp;
7172
  SeqMgrDescContext  dcontext;
7173
  CharPtr            str;
7174
  ParseSourceInfoPtr psip;
7175
  
7176
  if (bsp == NULL || source_list == NULL)
7177
  {
7178
    return;
7179
  }
7180
  
7181
  sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
7182
  while (sdp != NULL)
7183
  {
7184
    str = GetTextPortionFromString (sdp->data.ptrvalue, portion);    
7185
    if (str != NULL) {
7186
      psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
7187
      if (psip != NULL) {
7188
        ValNodeAddPointer (source_list, 0, psip);
7189
      } else {
7190
        str = MemFree (str);
7191
      }
7192
    }
7193
    sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext);
7194
  }
7195
}
7196
7197
7198
static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag)
7199
{
7200
  DbtagPtr    dbt = NULL;
7201
  ObjectIdPtr oip = NULL;
7202
  Char        id_str[128];
7203
  CharPtr     str_src = NULL;
7204
7205
  if (sip == NULL || sip->choice != id_type) return NULL;
7206
7207
  if (id_type == SEQID_GENERAL)
7208
  {
7209
    dbt = (DbtagPtr) sip->data.ptrvalue;
7210
    if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL;
7211
    oip = dbt->tag;
7212
  }
7213
  else if (id_type == SEQID_LOCAL)
7214
  {
7215
    oip = sip->data.ptrvalue;
7216
  }
7217
7218
  if (oip == NULL)
7219
  {
7220
    SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str));
7221
    str_src = StringSave (id_str);
7222
  }
7223
  else
7224
  {
7225
    if (oip->str == NULL)
7226
    {
7227
      sprintf (id_str, "%d", oip->id);
7228
      str_src = StringSave (id_str);
7229
    }
7230
    else
7231
    {
7232
      str_src = StringSave (oip->str);
7233
    }
7234
  }
7235
  return str_src;
7236
}
7237
7238
7239
static void
7240
GetIDSourcesForBioseq
7241
(BioseqPtr       bsp,
7242
 TextPortionPtr  portion,
7243
 Uint1           id_type,
7244
 CharPtr         tag,
7245
 ValNodePtr PNTR source_list)
7246
{
7247
  SeqIdPtr           sip;
7248
  ParseSourceInfoPtr psip;
7249
  CharPtr            src_str = NULL, str;
7250
  
7251
  if (bsp == NULL || source_list == NULL)
7252
  {
7253
    return;
7254
  }
7255
  
7256
  sip = bsp->id;
7257
  while (sip != NULL)
7258
  {
7259
    if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) { 
7260
      str = GetTextPortionFromString (src_str, portion); 
7261
      if (str != NULL) {
7262
        psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str);
7263
        if (psip != NULL) {
7264
          ValNodeAddPointer (source_list, 0, psip);
7265
        } else {
7266
          str = MemFree (str);
7267
        }
7268
      }
7269
      src_str = MemFree (src_str);
7270
    }
7271
    sip = sip->next;
7272
  }
7273
}
7274
7275
7276
static void
7277
GetLocalIDSourcesForBioseq
7278
(BioseqPtr       bsp,
7279
 TextPortionPtr  tp,
7280
 ValNodePtr PNTR source_list)
7281
{
7282
  GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list);
7283
}
7284
7285
7286
static void GetNcbiFileSourceForBioseq
7287
(BioseqPtr       bsp,
7288
 TextPortionPtr  tp,
7289
 ValNodePtr PNTR source_list)
7290
{
7291
  GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list);
7292
}
7293
7294
7295
static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp)
7296
{
7297
  UserObjectPtr      uop;
7298
  ObjectIdPtr        oip;
7299
  UserFieldPtr       ufp;
7300
  
7301
  if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) {
7302
    return;
7303
  }
7304
  
7305
  /* Bankit Comments */
7306
  uop = (UserObjectPtr) sdp->data.ptrvalue;
7307
  if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) {
7308
    oip = uop->type;
7309
    if (oip != NULL && StringCmp (oip->str, "Submission") == 0) {
7310
      for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7311
        oip = ufp->label;
7312
        if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) {
7313
          ReplaceStringForParse (ufp->data.ptrvalue, tp);
7314
        }
7315
      }
7316
    }
7317
  }
7318
}
7319
7320
7321
static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp)
7322
{
7323
  UserObjectPtr      uop;
7324
  ObjectIdPtr        oip;
7325
  UserFieldPtr       ufp;
7326
7327
  if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) {
7328
    return;
7329
  }
7330
    
7331
  uop = (UserObjectPtr) sdp->data.ptrvalue;
7332
  oip = uop->type;
7333
  if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
7334
    for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7335
      oip = ufp->label;
7336
      if (oip != NULL && StringCmp (oip->str, comment_field) == 0) {
7337
        ReplaceStringForParse (ufp->data.ptrvalue, tp);
7338
      }
7339
    }
7340
  }
7341
}
7342
7343
7344
static void
7345
GetBankitCommentSourcesForBioseq 
7346
(BioseqPtr       bsp,
7347
 TextPortionPtr  tp,
7348
 ValNodePtr PNTR source_list)
7349
{
7350
  SeqDescrPtr        sdp;
7351
  SeqMgrDescContext  dcontext;
7352
  ParseSourceInfoPtr psip;
7353
  UserObjectPtr      uop;
7354
  ObjectIdPtr        oip;
7355
  UserFieldPtr       ufp;
7356
  CharPtr            str = NULL;
7357
  
7358
  if (bsp == NULL || source_list == NULL) {
7359
    return;
7360
  }
7361
  
7362
  sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7363
  while (sdp != NULL) {
7364
    if (sdp->extended != 0) {
7365
      /* Bankit Comments */
7366
      uop = (UserObjectPtr) sdp->data.ptrvalue;
7367
      if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) {
7368
        oip = uop->type;
7369
        if (oip != NULL && StringCmp (oip->str, "Submission") == 0) {
7370
          for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7371
            oip = ufp->label;
7372
            if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) {
7373
              str = GetTextPortionFromString (ufp->data.ptrvalue, tp);
7374
              if (str != NULL) {
7375
                psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
7376
                if (psip == NULL) {
7377
                  str = MemFree (str);
7378
                } else {
7379
                  ValNodeAddPointer (source_list, 0, psip);
7380
                }
7381
              }
7382
            }
7383
          }
7384
        }
7385
      }
7386
    }
7387
    sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7388
  }
7389
}
7390
7391
7392
static void 
7393
GetCommentSourcesForBioseq 
7394
(BioseqPtr       bsp,
7395
 TextPortionPtr  tp,
7396
 ValNodePtr PNTR source_list)
7397
{
7398
  SeqDescrPtr        sdp;
7399
  SeqFeatPtr         sfp;
7400
  SeqMgrFeatContext  fcontext;
7401
  SeqMgrDescContext  dcontext;
7402
  ParseSourceInfoPtr psip;
7403
  CharPtr            str;
7404
  
7405
  if (bsp == NULL || source_list == NULL) {
7406
    return;
7407
  }
7408
  
7409
  sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext);
7410
  while (sdp != NULL) {
7411
    str = GetTextPortionFromString (sdp->data.ptrvalue, tp);
7412
    if (str != NULL) {
7413
      psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
7414
      if (psip == NULL) {
7415
        str = MemFree (str);
7416
      } else {
7417
        ValNodeAddPointer (source_list, 0, psip);
7418
      }
7419
    }
7420
    sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext);
7421
  }
7422
  
7423
  sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext);
7424
  while (sfp != NULL) {
7425
    str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp);
7426
    if (str != NULL) {
7427
      psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str);
7428
      if (psip == NULL) {
7429
        str = MemFree (str);
7430
      } else {
7431
        ValNodeAddPointer (source_list, 0, psip);
7432
      }
7433
    }
7434
    sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext);
7435
  }
7436
  GetBankitCommentSourcesForBioseq (bsp, tp, source_list);
7437
}
7438
7439
7440
static void 
7441
GetStructuredCommentSourcesForBioseq 
7442
(BioseqPtr       bsp,
7443
 TextPortionPtr  tp,
7444
 CharPtr         comment_field,
7445
 ValNodePtr PNTR source_list)
7446
{
7447
  SeqDescrPtr        sdp;
7448
  UserObjectPtr      uop;
7449
  ObjectIdPtr        oip;
7450
  UserFieldPtr       ufp;
7451
  SeqMgrDescContext  dcontext;
7452
  CharPtr            str;
7453
  ParseSourceInfoPtr psip;
7454
  
7455
  if (bsp == NULL || source_list == NULL)
7456
  {
7457
    return;
7458
  }
7459
  
7460
  sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext);
7461
  while (sdp != NULL) {  
7462
    if (sdp->extended != 0
7463
        && sdp->data.ptrvalue != NULL) {
7464
      uop = (UserObjectPtr) sdp->data.ptrvalue;
7465
      oip = uop->type;
7466
      if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) {
7467
        for (ufp = uop->data; ufp != NULL; ufp = ufp->next) {
7468
          oip = ufp->label;
7469
          if (oip != NULL && StringCmp (oip->str, comment_field) == 0) {
7470
            str = GetTextPortionFromString (ufp->data.ptrvalue, tp);
7471
            if (str != NULL) {
7472
              psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str);
7473
              if (psip == NULL) {
7474
                str = MemFree (str);
7475
              } else {
7476
                ValNodeAddPointer (source_list, 0, psip);
7477
              }
7478
            }
7479
          }
7480
        }
7481
      }
7482
    }
7483
    sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext);
7484
  }
7485
}
7486
7487
7488
const CharPtr nomial_keywords[] = {
7489
"f. sp. ",
7490
"var.",
7491
"pv.",
7492
"bv.",
7493
"serovar",
7494
"subsp." };
7495
7496
const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr);
7497
7498
static CharPtr GetTextAfterNomial (CharPtr taxname)
7499
7500
{
7501
  CharPtr ptr, nomial_end;
7502
  Int4    i;
7503
  Boolean found_keyword = TRUE;
7504
  
7505
  ptr = StringChr (taxname, ' ');
7506
  if (ptr == NULL) return NULL;
7507
  /* skip over the first word and the spaces after it. */
7508
  while (*ptr == ' ') {
7509
    ptr++;
7510
  }
7511
  ptr = StringChr (ptr, ' ');
7512
  /* if there are only two words, give up. */
7513
  if (ptr == NULL) {
7514
    return NULL;
7515
  }
7516
  nomial_end = ptr;
7517
  while (*ptr == ' ') {
7518
    ptr++;
7519
  }
7520
  
7521
  while (found_keyword) {
7522
    found_keyword = FALSE;
7523
    /* if the next word is a nomial keyword, skip that plus the first word that follows it. */
7524
    for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) {
7525
      if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) {
7526
        ptr += StringLen(nomial_keywords[i]);
7527
        while (*ptr == ' ' ) {
7528
          ptr++;
7529
        }
7530
        nomial_end = StringChr (ptr, ' ');
7531
        if (nomial_end == NULL) {
7532
          nomial_end = ptr + StringLen (ptr);
7533
        } else {          
7534
          ptr = nomial_end;
7535
          while (*ptr == ' ') {
7536
            ptr++;
7537
          }
7538
          found_keyword = TRUE;
7539
        }
7540
      }
7541
    }
7542
  }
7543
  return nomial_end;
7544
}
7545
7546
7547
static void 
7548
GetOrgParseSourcesForBioSource 
7549
(BioSourcePtr    biop,
7550
 BioseqPtr       bsp,
7551
 SeqDescrPtr     sdp,
7552
 SeqFeatPtr      sfp,
7553
 ParseSrcOrgPtr  o,
7554
 TextPortionPtr  tp,
7555
 ValNodePtr PNTR source_list)
7556
{
7557
  CharPtr str = NULL, portion, tmp;
7558
  ValNode vn;
7559
  ParseSourceInfoPtr psip;
7560
7561
  if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return;
7562
7563
  switch (o->field->choice) {
7564
    case ParseSrcOrgChoice_source_qual :
7565
      vn.choice = SourceQualChoice_textqual;
7566
      vn.data.intvalue = o->field->data.intvalue;
7567
      vn.next = NULL;
7568
      str = GetSourceQualFromBioSource (biop, &vn, NULL);
7569
      break;
7570
    case ParseSrcOrgChoice_taxname_after_binomial :
7571
      vn.choice = SourceQualChoice_textqual;
7572
      vn.data.intvalue = Source_qual_taxname;
7573
      vn.next = NULL;
7574
      str = GetSourceQualFromBioSource (biop, &vn, NULL);
7575
      tmp = GetTextAfterNomial (str);
7576
      tmp = StringSave (tmp);
7577
      str = MemFree (str);
7578
      str = tmp;
7579
      break;
7580
  }
7581
  portion = GetTextPortionFromString (str, tp);
7582
  if (portion != NULL) {
7583
    psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion);
7584
    if (psip == NULL) {
7585
      portion = MemFree (portion);
7586
    } else {
7587
      ValNodeAddPointer (source_list, 0, psip);
7588
    }
7589
  }
7590
  str = MemFree (str);
7591
}
7592
7593
7594
static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list)
7595
{
7596
  SeqDescrPtr        sdp;
7597
  SeqFeatPtr         sfp;
7598
  SeqMgrFeatContext  fcontext;
7599
  SeqMgrDescContext  dcontext;
7600
7601
  if (bsp == NULL || o == NULL || source_list == NULL) return;
7602
7603
  if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) {
7604
    for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
7605
         sdp != NULL;
7606
         sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) {
7607
      GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list);
7608
    }
7609
  }
7610
7611
  if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) {
7612
    for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
7613
         sfp != NULL;
7614
         sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) {
7615
      GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list);
7616
    }
7617
  }
7618
}
7619
7620
7621
typedef struct parsesrccollection {
7622
  ParseSrcPtr src;
7623
  TextPortionPtr portion;
7624
  ValNodePtr src_list;
7625
} ParseSrcCollectionData, PNTR ParseSrcCollectionPtr;
7626
7627
7628
static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata)
7629
{
7630
  ParseSrcCollectionPtr psp;
7631
  
7632
  if (bsp == NULL || userdata == NULL)
7633
  {
7634
    return;
7635
  }
7636
  
7637
  psp = (ParseSrcCollectionPtr) userdata;
7638
  if (psp->src == NULL) return;
7639
7640
  switch (psp->src->choice)
7641
  {
7642
    case ParseSrc_defline:
7643
      if (!ISA_aa (bsp->mol)) {
7644
        GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
7645
      }
7646
      break;
7647
    case ParseSrc_local_id:
7648
      if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) {
7649
        GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
7650
      }
7651
      break;
7652
    case ParseSrc_file_id:
7653
      GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list));
7654
      break;
7655
    case ParseSrc_org:
7656
      GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list));
7657
      break;
7658
    case ParseSrc_comment:
7659
      GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
7660
      break;
7661
    case ParseSrc_structured_comment:
7662
      GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list));
7663
      break;
7664
    case ParseSrc_bankit_comment:
7665
      if (!ISA_aa (bsp->mol)) {
7666
        GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list));
7667
      }
7668
      break;
7669
  }
7670
}
7671
7672
7673
static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata)
7674
{
7675
  ValNodePtr PNTR org_names;
7676
  
7677
  if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname)
7678
      || userdata == NULL)
7679
  {
7680
    return;
7681
  }
7682
  
7683
  org_names = (ValNodePtr PNTR) userdata;
7684
  
7685
  ValNodeAddPointer (org_names, 0, biop->org->taxname);
7686
}
7687
7688
7689
static void SetToUpper (CharPtr cp)
7690
{
7691
  if (cp == NULL) return;
7692
  while (*cp != 0) {
7693
    if (isalpha (*cp)) {
7694
      *cp = toupper (*cp);
7695
    }
7696
    cp++;
7697
  }
7698
}
7699
7700
7701
static void 
7702
FixCapitalizationInString 
7703
(CharPtr PNTR pTitle,
7704
 Uint2 capitalization,
7705
 ValNodePtr   org_names)
7706
{
7707
  if (pTitle == NULL || capitalization == Cap_change_none) return;
7708
7709
  switch (capitalization) {
7710
    case Cap_change_tolower:
7711
      ResetCapitalization (FALSE, *pTitle);
7712
      FixAbbreviationsInElement (pTitle);
7713
      FixOrgNamesInString (*pTitle, org_names);
7714
      break;
7715
    case Cap_change_toupper:
7716
      SetToUpper (*pTitle);
7717
      FixAbbreviationsInElement (pTitle);
7718
      FixOrgNamesInString (*pTitle, org_names);
7719
      break;
7720
    case Cap_change_firstcap:
7721
      ResetCapitalization (TRUE, *pTitle);
7722
      FixAbbreviationsInElement (pTitle);
7723
      FixOrgNamesInString (*pTitle, org_names);
7724
      break;
7725
  }
7726
}
7727
7728
7729
static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list)
7730
{
7731
  SeqDescrPtr        sdp;
7732
  SeqMgrDescContext  dcontext;
7733
7734
  if (bsp == NULL || dest_list == NULL) {
7735
    return;
7736
  }
7737
  
7738
  sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext);
7739
  while (sdp != NULL) {
7740
    ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
7741
    sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext);
7742
  }
7743
}
7744
7745
7746
static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list)
7747
{
7748
  SeqFeatPtr        sfp;
7749
  SeqMgrFeatContext fcontext;
7750
  Int4             featdef;
7751
7752
  if (bsp == NULL || featfield == NULL || dest_list == NULL) return;
7753
7754
  featdef = GetFeatdefFromFeatureType (featfield->type);
7755
  for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
7756
       sfp != NULL;
7757
       sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) {
7758
    ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp);
7759
  }
7760
}
7761
7762
7763
static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list)
7764
{
7765
  SeqDescrPtr        sdp;
7766
  SeqFeatPtr         sfp;
7767
  SeqMgrFeatContext  fcontext;
7768
  SeqMgrDescContext  dcontext;
7769
7770
  if (bsp == NULL || dest_list == NULL)
7771
  {
7772
    return;
7773
  }
7774
  
7775
  if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor) 
7776
  {
7777
    sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext);
7778
    while (sdp != NULL)
7779
    {
7780
      ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp);
7781
      sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext);
7782
    }
7783
  }
7784
  
7785
  if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature)
7786
  {
7787
    sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext);
7788
    while (sfp != NULL)
7789
    {
7790
      ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp);
7791
      sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext);
7792
    }  
7793
  }
7794
}
7795
7796
7797
static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst)
7798
{
7799
  ParseDstOrgPtr o;
7800
7801
  if (psip == NULL || dst == NULL) return;
7802
7803
  switch (dst->choice) {
7804
    case ParseDest_defline :
7805
      AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list));
7806
      break;
7807
    case ParseDest_org :
7808
      o = (ParseDstOrgPtr) dst->data.ptrvalue;
7809
      if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor)
7810
          && psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) {
7811
        ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp);
7812
      } else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature)
7813
                 && psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) {
7814
        ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp);
7815
      } else {
7816
        GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list));
7817
      }
7818
      break;
7819
    case ParseDest_featqual :
7820
      AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list));
7821
      break;
7822
    case ParseDest_dbxref :
7823
      GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list));
7824
      break;
7825
  }
7826
}
7827
7828
7829
static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source)
7830
{
7831
  Int4       num_seen = 0;
7832
  ValNodePtr vnp;
7833
  
7834
  if (source == NULL
7835
      || source->dest_list == NULL)
7836
  {
7837
    return FALSE;
7838
  }
7839
  
7840
  vnp = source->dest_list;
7841
  while (vnp != NULL && num_seen < 2)
7842
  {
7843
    if (vnp->choice > 1)
7844
    {
7845
      num_seen ++;
7846
    }
7847
    vnp = vnp->next;
7848
  }
7849
  if (num_seen == 1)
7850
  {
7851
    return TRUE;
7852
  }
7853
  else
7854
  {
7855
    return FALSE;
7856
  }
7857
}
7858
7859
7860
static void CombineSourcesForDestinations (ValNodePtr PNTR source_list)
7861
{
7862
  ValNodePtr         source1_vnp, source2_vnp, dest1_vnp, dest2_vnp;
7863
  ValNodePtr         source_new, del_vnp;
7864
  ParseSourceInfoPtr psip1, psip2, new_psip;
7865
  CharPtr            comb_txt;
7866
  
7867
  for (source1_vnp = *source_list;
7868
       source1_vnp != NULL; 
7869
       source1_vnp = source1_vnp->next)
7870
  {
7871
    psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue;
7872
    if (psip1 == NULL || psip1->dest_list == NULL)
7873
    {
7874
      continue;
7875
    }
7876
    for (source2_vnp = source1_vnp->next;
7877
         source2_vnp != NULL; 
7878
         source2_vnp = source2_vnp->next)
7879
    {
7880
      if (source2_vnp->choice > 0) 
7881
      {
7882
        /* already marked for deletion */
7883
        continue;
7884
      }
7885
      psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue;
7886
      if (psip2 == NULL || psip2->dest_list == NULL)
7887
      {
7888
        continue;
7889
      }
7890
      for (dest1_vnp = psip1->dest_list;
7891
           dest1_vnp != NULL; 
7892
           dest1_vnp = dest1_vnp->next)
7893
      {
7894
        if (dest1_vnp->choice == 0)
7895
        {
7896
          /* already marked for deletion */
7897
          continue;
7898
        }
7899
        for (dest2_vnp = psip2->dest_list;
7900
             dest2_vnp != NULL;
7901
             dest2_vnp = dest2_vnp->next)
7902
        {
7903
          if (dest2_vnp->choice == 0)
7904
          {
7905
            /* already marked for deletion */
7906
            continue;
7907
          }
7908
          if (dest1_vnp->choice == dest2_vnp->choice
7909
              && dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue)
7910
          {
7911
            comb_txt = (CharPtr) (MemNew (sizeof (Char) 
7912
                                  * (StringLen (psip1->parse_src_txt)
7913
                                     + StringLen (psip2->parse_src_txt)
7914
                                     + 2)));
7915
            StringCpy (comb_txt, psip1->parse_src_txt);
7916
            StringCat (comb_txt, ";");
7917
            StringCat (comb_txt, psip2->parse_src_txt);
7918
            
7919
            /* If the first source has a single destination, then we can 
7920
             * add the text from the second source to the first and remove
7921
             * the destination from the second source.
7922
             */
7923
            if (SourceHasOneUndeletedDestination (psip1))
7924
            {
7925
              
7926
              psip1->parse_src_txt = MemFree (psip1->parse_src_txt);
7927
              psip1->parse_src_txt = comb_txt;
7928
              dest2_vnp->choice = 0;
7929
            }             
7930
            /* If the first source has more than one destination and
7931
             * the second source has a single destination, then we can 
7932
             * remove the repeated desination from the first source
7933
             * and add the text from the first source to the second source.
7934
             */
7935
            else if (SourceHasOneUndeletedDestination (psip2))
7936
            {
7937
              psip2->parse_src_txt = MemFree (psip2->parse_src_txt);
7938
              psip2->parse_src_txt = comb_txt;
7939
              dest1_vnp->choice = 0;
7940
            }
7941
            /* If the first and second sources have multiple destinations,
7942
             * we need to remove the repeated destination from both the first
7943
             * and second source and create a new source with the combined 
7944
             * text for just the repeated destination.
7945
             */
7946
            else
7947
            {
7948
              new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt);
7949
              ValNodeAddPointer (&(new_psip->dest_list), 
7950
                                 dest1_vnp->choice, 
7951
                                 dest1_vnp->data.ptrvalue);
7952
              dest1_vnp->choice = 0;
7953
              dest2_vnp->choice = 0;
7954
              source_new = ValNodeNew (NULL);
7955
              source_new->choice = 0;
7956
              source_new->data.ptrvalue = new_psip;
7957
              source_new->next = source1_vnp->next;
7958
              source1_vnp->next = source_new;
7959
            }
7960
          }
7961
        }
7962
      }
7963
      
7964
      del_vnp = ValNodeExtractList (&(psip1->dest_list), 0);
7965
      del_vnp = ValNodeFree (del_vnp);
7966
      if (psip1->dest_list == NULL)
7967
      {
7968
        source1_vnp->choice = 1;
7969
      }
7970
      del_vnp = ValNodeExtractList (&(psip2->dest_list), 0);
7971
      del_vnp = ValNodeFree (del_vnp);
7972
      if (psip2->dest_list == NULL)
7973
      {
7974
        source2_vnp->choice = 1;
7975
      }
7976
    }
7977
  }
7978
7979
  /* now remove sources deleted */
7980
  del_vnp = ValNodeExtractList (source_list, 1);
7981
  del_vnp = ParseSourceListFree (del_vnp); 
7982
}
7983
7984
7985
static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp)
7986
{
7987
  ObjValNodePtr ovp;
7988
  BioseqSetPtr  bssp;
7989
  SeqEntryPtr   sep;
7990
  
7991
  if (sdp == NULL || sdp->extended != 1) {
7992
    return NULL;
7993
  }
7994
  ovp = (ObjValNodePtr) sdp;
7995
  if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) {
7996
    return NULL;
7997
  }
7998
  bssp = (BioseqSetPtr) ovp->idx.parentptr;
7999
  
8000
  if (bssp->_class == BioseqseqSet_class_nuc_prot
8001
      && IS_Bioseq_set (bssp->seq_set)
8002
      && bssp->seq_set->data.ptrvalue != NULL) {
8003
    bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue;
8004
  }
8005
  
8006
  if (bssp->_class == BioseqseqSet_class_segset) {
8007
    sep = bssp->seq_set;
8008
    while (sep != NULL) {
8009
      if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) {
8010
        bssp = (BioseqSetPtr) sep->data.ptrvalue;
8011
        if (bssp->_class == BioseqseqSet_class_parts) {
8012
          return bssp;
8013
        }
8014
      }
8015
      sep = sep->next;
8016
    }
8017
  }
8018
8019
  return NULL;
8020
}
8021
8022
8023
static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep)
8024
{
8025
  BioseqPtr    bsp;
8026
  BioseqSetPtr bssp;
8027
  SeqDescrPtr  sdp = NULL;
8028
  
8029
  if (sep != NULL && sep->data.ptrvalue != NULL) {
8030
    if (IS_Bioseq (sep)) {
8031
      bsp = (BioseqPtr) sep->data.ptrvalue;
8032
      sdp = bsp->descr;
8033
    } else if (IS_Bioseq_set (sep)) {
8034
      bssp = (BioseqSetPtr) sep->data.ptrvalue;
8035
      sdp = bssp->descr;
8036
    }
8037
    while (sdp != NULL && sdp->choice != Seq_descr_source)
8038
    {
8039
      sdp = sdp->next;
8040
    }
8041
  }
8042
  return sdp;
8043
}
8044
8045
8046
static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp)
8047
{
8048
  BioseqPtr    bsp;
8049
  BioseqSetPtr bssp;
8050
  SeqDescrPtr  new_sdp = NULL;
8051
  
8052
  if (sep != NULL && sep->data.ptrvalue != NULL) {
8053
    if (IS_Bioseq (sep)) {
8054
      bsp = (BioseqPtr) sep->data.ptrvalue;
8055
      new_sdp = AsnIoMemCopy ((Pointer) sdp,
8056
                              (AsnReadFunc) SeqDescrAsnRead,
8057
                              (AsnWriteFunc) SeqDescrAsnWrite);
8058
      ValNodeLink (&(bsp->descr), new_sdp);
8059
    } else if (IS_Bioseq_set (sep)) {
8060
      bssp = (BioseqSetPtr) sep->data.ptrvalue;
8061
      new_sdp = AsnIoMemCopy ((Pointer) sdp,
8062
                              (AsnReadFunc) SeqDescrAsnRead,
8063
                              (AsnWriteFunc) SeqDescrAsnWrite);
8064
      ValNodeLink (&(bssp->descr), new_sdp);
8065
    }
8066
  }
8067
  return new_sdp;
8068
}
8069
8070
8071
static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list)
8072
{
8073
  ParseSourceInfoPtr psip;
8074
  ValNodePtr         vnp_src, vnp_dst;
8075
  SeqDescrPtr        sdp, other_sdp;
8076
  SeqEntryPtr        sep;
8077
  ValNodePtr         extra_dests = NULL;
8078
  BioseqSetPtr       parts_bssp;
8079
  
8080
  for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) {
8081
    psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue;
8082
    if (psip != NULL) {
8083
      for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) {
8084
        if (vnp_dst->choice == OBJ_SEQDESC) {
8085
          sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue;
8086
          if (sdp != NULL && sdp->choice == Seq_descr_source) {
8087
            parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp);
8088
            if (parts_bssp != NULL) {
8089
              for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) {
8090
                if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) {
8091
                  other_sdp = FindSourceDescriptorInSeqEntry (sep);
8092
                  if (other_sdp == NULL) {
8093
                    other_sdp = PropagateToSeqEntry (sep, sdp);
8094
                    ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp);
8095
                  }
8096
                }
8097
              }
8098
            
8099
              /* set choice to 0 so master won't be a destination */
8100
              vnp_dst->choice = 0;
8101
            
8102
            }
8103
          }
8104
        }
8105
      }
8106
      /* add extra destinations to list */
8107
      ValNodeLink (&psip->dest_list, extra_dests);
8108
      extra_dests = NULL;
8109
    }
8110
  }
8111
  
8112
}
8113
8114
8115
static Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text)
8116
{
8117
  ValNodePtr    dbx;
8118
  DbtagPtr      dbtag;
8119
  Boolean       found = FALSE;
8120
  Char          buf[20];
8121
  Boolean       rval = FALSE;
8122
8123
  if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) {
8124
    return FALSE;
8125
  }
8126
8127
  if (biop->org == NULL)
8128
  {
8129
    biop->org = OrgRefNew();
8130
  }
8131
  dbx = biop->org->db;
8132
  while (dbx != NULL && !found)
8133
  {
8134
    dbtag = (DbtagPtr) dbx->data.ptrvalue;
8135
    if (dbtag != NULL && dbtag->tag != NULL
8136
        && StringCmp (dbtag->db, db_name) == 0)
8137
    {
8138
      found = TRUE;
8139
    }
8140
    if (!found)
8141
    {
8142
      dbx = dbx->next;
8143
    }
8144
  }
8145
  if (!found)
8146
  {
8147
    dbtag = DbtagNew();
8148
    dbtag->db = StringSave (db_name);      
8149
    ValNodeAddPointer (&(biop->org->db), 0, dbtag);
8150
  }
8151
  if (dbtag->tag == NULL)
8152
  {
8153
    dbtag->tag = ObjectIdNew();
8154
  }
8155
  /* if it was a number before, make it a string now */
8156
  if (dbtag->tag->id > 0 && dbtag->tag->str == NULL)
8157
  {
8158
    sprintf (buf, "%s", dbtag->tag->id);
8159
    dbtag->tag->id = 0;
8160
    dbtag->tag->str = StringSave (buf);
8161
  }
8162
  rval = SetStringValue (&(dbtag->tag->str), str, existing_text);
8163
  return rval;
8164
}
8165
8166
8167
static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text)
8168
{
8169
  ValNodePtr vnp;
8170
  SeqDescrPtr sdp;
8171
  CharPtr     cp;
8172
  BioSourcePtr biop;
8173
  ParseDstOrgPtr o;
8174
  FeatureFieldLegalPtr fl;
8175
  FeatureField f;
8176
  Int4         num_succeeded = 0;
8177
8178
  if (dest_list == NULL || field == NULL) return 0;
8179
8180
  switch (field->choice) {
8181
    case ParseDest_defline :
8182
      for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
8183
        if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) {
8184
          sdp = (SeqDescrPtr) vnp->data.ptrvalue;
8185
          if (sdp->choice == Seq_descr_title) {
8186
            cp = sdp->data.ptrvalue;
8187
            if (SetStringValue (&cp, str, existing_text)) {
8188
              num_succeeded++;
8189
            }
8190
            sdp->data.ptrvalue = cp;
8191
          }
8192
        }
8193
      }
8194
      break;
8195
    case ParseDest_org :
8196
      o = (ParseDstOrgPtr) field->data.ptrvalue;
8197
      if (o != NULL) {
8198
        for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
8199
          biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
8200
          if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) {
8201
            num_succeeded++;
8202
          }
8203
        }
8204
      }
8205
      break;
8206
    case ParseDest_featqual:
8207
      fl = (FeatureFieldLegalPtr) field->data.ptrvalue;
8208
      if (fl != NULL) {
8209
        f.type = fl->type;
8210
        f.field = ValNodeNew(NULL);
8211
        f.field->next = NULL;
8212
        f.field->choice = FeatQualChoice_legal_qual;
8213
        f.field->data.intvalue = fl->field;        
8214
        for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
8215
          if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) {
8216
            num_succeeded++;
8217
          }
8218
        }
8219
        f.field = ValNodeFree (f.field);
8220
      }
8221
      break;
8222
    case ParseDest_dbxref:
8223
      if (!StringHasNoText (field->data.ptrvalue)) {
8224
        for (vnp = dest_list; vnp != NULL; vnp = vnp->next) {
8225
          biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue);
8226
          if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) {
8227
            num_succeeded++;
8228
          }
8229
        }
8230
      }
8231
      break;
8232
  }
8233
  return num_succeeded;
8234
}
8235
8236
8237
static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion)
8238
{
8239
  CharPtr     str;
8240
  ParseSrcOrgPtr o;
8241
  BioSourcePtr biop;
8242
8243
  if (psip == NULL || field == NULL || text_portion == NULL) return;
8244
8245
  switch (field->choice) {
8246
    case ParseSrc_defline :
8247
      if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) {
8248
        ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion);
8249
      }
8250
      break;
8251
    case ParseSrc_org :
8252
      o = (ParseSrcOrgPtr) field->data.ptrvalue;
8253
      if (o != NULL) {
8254
        if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) {
8255
          biop = (BioSourcePtr) psip->sdp->data.ptrvalue;
8256
          str = GetSourceQualFromBioSource (biop, o->field, NULL);
8257
          ReplaceStringForParse (str, text_portion);
8258
          SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old);
8259
          str = MemFree (str);
8260
        } else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) {
8261
          biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue;
8262
          str = GetSourceQualFromBioSource (biop, o->field, NULL);
8263
          ReplaceStringForParse (str, text_portion);
8264
          SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old);
8265
          str = MemFree (str);
8266
        }
8267
      }
8268
      break;
8269
    case ParseSrc_comment:
8270
      if (psip->sdp != NULL) {
8271
        if (psip->sdp->choice == Seq_descr_user) {
8272
          StripBankitCommentForParse (psip->sdp, text_portion);
8273
        } else if (psip->sdp->choice == Seq_descr_comment) {
8274
          ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion);
8275
        }
8276
      }
8277
      if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) {
8278
        ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion);
8279
      }
8280
      break;
8281
    case ParseSrc_bankit_comment:
8282
      if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) {
8283
        StripBankitCommentForParse (psip->sdp, text_portion);
8284
      }
8285
      break;
8286
    case ParseSrc_structured_comment:
8287
      if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) {
8288
        StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion);
8289
      }
8290
      break;
8291
  }
8292
}
8293
8294
8295
static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep)
8296
{
8297
  ParseSrcCollectionData psd;
8298
  ParseSourceInfoPtr     psip;
8299
  ValNodePtr             orgnames = NULL, source_list_for_removal = NULL, vnp;
8300
  Int4                   num_succeeded = 0;
8301
8302
  if (action == NULL || sep == NULL) return 0;
8303
8304
  psd.src = action->src;
8305
  psd.portion = action->portion;
8306
  psd.src_list = NULL;
8307
8308
  /* first, we need to get a list of the parse sources */  
8309
  VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback);
8310
8311
  if (action->capitalization != Cap_change_none) {
8312
    /* if we will be fixing capitalization, get org names to use in fixes */
8313
    VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback);
8314
  }
8315
8316
  /* for each parse source, we need to get a list of the destinations */
8317
  for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next)
8318
  {
8319
    if (vnp->data.ptrvalue == NULL) continue;
8320
    psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
8321
    if (action->remove_from_parsed) {
8322
        ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip));
8323
    }
8324
    /* fix source text */
8325
    FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames);
8326
8327
    /* find destinations */
8328
    AddParseDestinations (psip, action->dest);
8329
8330
  }
8331
8332
  /* free orgname list if we created it */
8333
  orgnames = ValNodeFree (orgnames);
8334
8335
  CombineSourcesForDestinations (&(psd.src_list));
8336
8337
  if (action->dest->choice == ParseDest_org) {
8338
    PropagateSourceOnSegSetForParse (psd.src_list);
8339
  }
8340
  
8341
  /* now do the parsing */
8342
  for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) {
8343
    psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
8344
    num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text);
8345
  }
8346
8347
  /* now remove strings from sources */
8348
  for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next)
8349
  {
8350
    if (vnp->data.ptrvalue == NULL) continue;
8351
    psip = (ParseSourceInfoPtr) vnp->data.ptrvalue;
8352
    StripFieldForSrcList (psip, action->src, action->portion);
8353
  }
8354
  return num_succeeded;
8355
}
8356
8357
8358
static void SetCdRegionGeneticCode (SeqFeatPtr cds)
8359
{
8360
  CdRegionPtr crp;
8361
  SeqEntryPtr parent_sep;
8362
  BioseqPtr   bsp;
8363
  Int4        genCode;
8364
  ValNodePtr  code, vnp;
8365
8366
  if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return;
8367
  if (cds->data.value.ptrvalue == NULL) {
8368
    cds->data.value.ptrvalue = CdRegionNew();
8369
  }
8370
  crp = (CdRegionPtr) cds->data.value.ptrvalue;
8371
  bsp = BioseqFindFromSeqLoc (cds->location);
8372
  if (bsp == NULL) return;
8373
  parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp);
8374
  genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0);
8375
8376
  code = ValNodeNew (NULL);
8377
  if (code != NULL) {
8378
    code->choice = 254;
8379
    vnp = ValNodeNew (NULL);
8380
    code->data.ptrvalue = vnp;
8381
    if (vnp != NULL) {
8382
      vnp->choice = 2;
8383
      vnp->data.intvalue = genCode;
8384
    }
8385
  }
8386
  crp->genetic_code = code;
8387
}
8388
8389
  
8390
static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type)
8391
{
8392
  Int4 featdef, seqfeattype;
8393
  CharPtr    label = NULL;
8394
  RnaRefPtr  rrp;
8395
  GBQualPtr  gbq;
8396
  ImpFeatPtr ifp;
8397
8398
  featdef = GetFeatdefFromFeatureType (feature_type);
8399
  sfp->idx.subtype = featdef;
8400
  seqfeattype = FindFeatFromFeatDefType (featdef);
8401
  switch (seqfeattype) {
8402
    case SEQFEAT_GENE:
8403
      sfp->data.value.ptrvalue = GeneRefNew();
8404
      break;
8405
    case SEQFEAT_CDREGION:
8406
      sfp->data.value.ptrvalue = CdRegionNew();
8407
      SetCdRegionGeneticCode (sfp);
8408
      break;
8409
    case SEQFEAT_RNA:
8410
      rrp = RnaRefNew();
8411
      rrp->ext.choice = 0;
8412
      sfp->data.value.ptrvalue = rrp;
8413
      switch (featdef) {
8414
        case FEATDEF_preRNA:
8415
          rrp->type = RNA_TYPE_premsg;
8416
          break;
8417
        case FEATDEF_mRNA:
8418
          rrp->type = RNA_TYPE_mRNA;
8419
          break;
8420
        case FEATDEF_tRNA:
8421
          rrp->type = RNA_TYPE_tRNA;
8422
          break;
8423
        case FEATDEF_rRNA:
8424
          rrp->type = RNA_TYPE_rRNA;
8425
          break;
8426
        case FEATDEF_snRNA:
8427
          rrp->type = RNA_TYPE_other;
8428
          rrp->ext.choice = 1;
8429
          rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8430
          gbq = GBQualNew ();
8431
          gbq->qual = StringSave ("ncRNA_class");
8432
          gbq->val = StringSave ("snRNA");
8433
          break;
8434
        case FEATDEF_scRNA:
8435
          rrp->type = RNA_TYPE_other;
8436
          rrp->ext.choice = 1;
8437
          rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8438
          gbq = GBQualNew ();
8439
          gbq->qual = StringSave ("ncRNA_class");
8440
          gbq->val = StringSave ("scRNA");
8441
          break;
8442
        case FEATDEF_tmRNA:
8443
          rrp->type = RNA_TYPE_other;
8444
          rrp->ext.choice = 1;
8445
          rrp->ext.value.ptrvalue = StringSave ("tmRNA");
8446
          break;
8447
        case FEATDEF_ncRNA:
8448
          rrp->type = RNA_TYPE_other;
8449
          rrp->ext.choice = 1;
8450
          rrp->ext.value.ptrvalue = StringSave ("ncRNA");
8451
          break;
8452
      }
8453
      break;
8454
    case SEQFEAT_IMP:
8455
      ifp = ImpFeatNew();
8456
      sfp->data.value.ptrvalue = ifp;
8457
      label = GetFeatureNameFromFeatureType (feature_type);
8458
      ifp->key = StringSave (label);
8459
      break;
8460
  }
8461
}
8462
8463
8464
static void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep)
8465
{
8466
  ByteStorePtr       bs;
8467
  CharPtr            prot, ptr;
8468
  BioseqPtr          bsp;
8469
  Char               ch;
8470
  Int4               i;
8471
  SeqEntryPtr        psep, nsep;
8472
  MolInfoPtr         mip;
8473
  ValNodePtr         vnp, descr;
8474
  SeqFeatPtr         prot_sfp;
8475
  ProtRefPtr         prp;
8476
  Boolean            partial5, partial3;
8477
8478
  if (cds == NULL) return;
8479
8480
  CheckSeqLocForPartial (cds->location, &partial5, &partial3);
8481
8482
  /* Create corresponding protein sequence data for the CDS */
8483
8484
  bs = ProteinFromCdRegionEx (cds, TRUE, FALSE);
8485
  if (NULL == bs)
8486
    return;
8487
8488
  prot = BSMerge (bs, NULL);
8489
  bs = BSFree (bs);
8490
  if (NULL == prot)
8491
    return;
8492
8493
  ptr = prot;
8494
  ch = *ptr;
8495
  while (ch != '\0') {
8496
    *ptr = TO_UPPER (ch);
8497
    ptr++;
8498
    ch = *ptr;
8499
  }
8500
  i = StringLen (prot);
8501
  if (i > 0 && prot [i - 1] == '*') {
8502
    prot [i - 1] = '\0';
8503
  }
8504
  bs = BSNew (1000);
8505
  if (bs != NULL) {
8506
    ptr = prot;
8507
    BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr));
8508
  }
8509
8510
  /* Create the product protein Bioseq */
8511
  
8512
  bsp = BioseqNew ();
8513
  if (NULL == bsp)
8514
    return;
8515
  
8516
  bsp->repr = Seq_repr_raw;
8517
  bsp->mol = Seq_mol_aa;
8518
  bsp->seq_data_type = Seq_code_ncbieaa;
8519
  bsp->seq_data = (SeqDataPtr) bs;
8520
  bsp->length = BSLen (bs);
8521
  bs = NULL;
8522
  bsp->id = MakeNewProteinSeqId (cds->location, NULL);
8523
  SeqMgrAddToBioseqIndex (bsp);
8524
  
8525
  /* Create a new SeqEntry for the Prot Bioseq */
8526
  
8527
  psep = SeqEntryNew ();
8528
  if (NULL == psep)
8529
    return;
8530
  
8531
  psep->choice = 1;
8532
  psep->data.ptrvalue = (Pointer) bsp;
8533
  SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep);
8534
  
8535
  /* Add a descriptor to the protein Bioseq */
8536
  
8537
  mip = MolInfoNew ();
8538
  if (NULL == mip)
8539
    return;
8540
  
8541
  mip->biomol = 8;
8542
  mip->tech = 8;
8543
  if (partial5 && partial3) {
8544
    mip->completeness = 5;
8545
  } else if (partial5) {
8546
    mip->completeness = 3;
8547
  } else if (partial3) {
8548
    mip->completeness = 4;
8549
  }
8550
  vnp = CreateNewDescriptor (psep, Seq_descr_molinfo);
8551
  if (NULL == vnp)
8552
    return;
8553
  
8554
  vnp->data.ptrvalue = (Pointer) mip;
8555
  
8556
  /**/
8557
  
8558
  descr = ExtractBioSourceAndPubs (parent_sep);
8559
8560
  AddSeqEntryToSeqEntry (parent_sep, psep, TRUE);
8561
  nsep = FindNucSeqEntry (parent_sep);
8562
  ReplaceBioSourceAndPubs (parent_sep, descr);
8563
  SetSeqFeatProduct (cds, bsp);
8564
  
8565
  prp = ProtRefNew ();
8566
  
8567
  if (prp != NULL) {
8568
    prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL);
8569
    if (prot_sfp != NULL) {
8570
      prot_sfp->data.value.ptrvalue = (Pointer) prp;
8571
      SetSeqLocPartial (prot_sfp->location, partial5, partial3);
8572
      prot_sfp->partial = (partial5 || partial3);
8573
    }
8574
  }
8575
}
8576
8577
8578
static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action)
8579
{
8580
  LocationIntervalPtr l;
8581
  SeqLocPtr slp = NULL;
8582
  Uint1 strand = Seq_strand_plus;
8583
  Int4  from, to;
8584
8585
  if (bsp == NULL || action == NULL || action->location == NULL) return NULL;
8586
8587
  if (!action->plus_strand) {
8588
    strand = Seq_strand_minus;
8589
  }
8590
  if (action->location->choice == LocationChoice_interval) {
8591
    l = (LocationIntervalPtr) action->location->data.ptrvalue;
8592
    if (l != NULL) {
8593
      from = MIN (l->from, l->to);
8594
      to = MAX (l->from, l->to);
8595
      slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id));
8596
    }
8597
  } else if (action->location->choice == LocationChoice_whole_sequence) {
8598
    slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id));
8599
  }
8600
  SetSeqLocPartial (slp, action->partial5, action->partial3);
8601
  return slp;
8602
}
8603
8604
8605
static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp)
8606
{
8607
  SeqFeatPtr sfp;
8608
  SeqMgrFeatContext context;
8609
  Int4 featdef;
8610
  Boolean rval = TRUE;
8611
8612
  if (action == NULL || bsp == NULL) return FALSE;
8613
8614
  if (!action->add_redundant) {
8615
    featdef = GetFeatdefFromFeatureType (action->type);
8616
    sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context);
8617
    if (sfp != NULL) {
8618
      rval = FALSE;
8619
    }
8620
  }
8621
  return rval;
8622
} 
8623
8624
static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list)
8625
{
8626
  SeqEntryPtr sep;
8627
  Int4         seg_num;
8628
8629
  if (action == NULL || !action->apply_to_parts
8630
      || parts == NULL || parts->_class != BioseqseqSet_class_parts
8631
      || bsp_list == NULL) {
8632
    return;
8633
  }
8634
8635
  if (action->only_seg_num > -1) {
8636
    seg_num = 0;
8637
    sep = parts->seq_set;
8638
    while (seg_num < action->only_seg_num && sep != NULL) {
8639
      sep = sep->next;
8640
      seg_num++;
8641
    }
8642
    if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) {
8643
      ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue);
8644
    }
8645
  } else {
8646
    for (sep = parts->seq_set; sep != NULL; sep = sep->next) {
8647
      if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) {
8648
        ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue);
8649
      }
8650
    }
8651
  }  
8652
}
8653
8654
8655
static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list)
8656
{
8657
  BioseqSetPtr bssp, parts;
8658
  SeqEntryPtr sep;
8659
8660
  if (action == NULL || bsp == NULL || bsp_list == NULL) return;
8661
8662
  if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) {
8663
    bssp = (BioseqSetPtr) bsp->idx.parentptr;
8664
    if (bssp->_class == BioseqseqSet_class_segset) {
8665
      if (action->apply_to_parts) {
8666
        sep = bssp->seq_set;
8667
        while (sep != NULL && !IS_Bioseq_set (sep)) {
8668
          sep = sep->next;
8669
        }
8670
        if (sep != NULL) {
8671
          AddParts (action, sep->data.ptrvalue, bsp_list);
8672
        }
8673
      } else {
8674
        if (OkToApplyToBioseq (action, bsp)) {
8675
          ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
8676
        }
8677
      }       
8678
    } else if (bssp->_class == BioseqseqSet_class_parts) {
8679
      if (action->apply_to_parts) {
8680
        AddParts (action, bssp, bsp_list);
8681
      } else {
8682
        parts = bssp;
8683
        if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) {
8684
          bssp = (BioseqSetPtr) parts->idx.parentptr;
8685
          if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) {
8686
            ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list);
8687
          }
8688
        }
8689
      }
8690
    } else {
8691
      if (OkToApplyToBioseq (action, bsp)) {
8692
        ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
8693
      }
8694
    }
8695
  } else {
8696
    if (OkToApplyToBioseq (action, bsp)) {
8697
      ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp);
8698
    }
8699
  }
8700
}
8701
8702
static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list)
8703
{
8704
  BioseqSetPtr bssp;
8705
  SeqEntryPtr  seq_set;
8706
8707
  if (action == NULL || sep == NULL) return;
8708
8709
  while (sep != NULL) {
8710
    if (IS_Bioseq (sep)) {
8711
      AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list);
8712
    } else if (IS_Bioseq_set (sep)) {
8713
      bssp = (BioseqSetPtr) sep->data.ptrvalue;
8714
      if (bssp->_class == BioseqseqSet_class_segset) {
8715
        /* find master segment */
8716
        seq_set = bssp->seq_set;
8717
        while (seq_set != NULL && !IS_Bioseq (seq_set)) {
8718
          seq_set = seq_set->next;
8719
        }
8720
        if (seq_set != NULL) {
8721
          AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list);
8722
        }
8723
      } else if (bssp->_class == BioseqseqSet_class_nuc_prot) {
8724
        /* find nucleotide sequence */
8725
        seq_set = bssp->seq_set;
8726
        if (seq_set != NULL) {
8727
          if (IS_Bioseq_set (seq_set)) {
8728
            /* nucleotide is segmented set */
8729
            bssp = (BioseqSetPtr) seq_set->data.ptrvalue;
8730
            if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset
8731
                && bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) {
8732
              AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list);
8733
            }
8734
          } else if (IS_Bioseq (seq_set)) {
8735
            AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list);
8736
          }
8737
        }
8738
      } else {
8739
        /* add from set members */
8740
        AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list);
8741
      }
8742
    }
8743
    sep = sep->next;
8744
  }  
8745
}  
8746
  
8747
8748
static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds)
8749
{
8750
  BioseqPtr protbsp, bsp;
8751
  ByteStorePtr bs;
8752
  SeqFeatPtr   prot_sfp;
8753
  Boolean      partial5, partial3;
8754
8755
  if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return;
8756
8757
  protbsp = BioseqFindFromSeqLoc (cds->product);
8758
8759
  if (protbsp == NULL) {
8760
    bsp = BioseqFindFromSeqLoc (cds->location);
8761
    if (bsp != NULL) {
8762
      ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp));
8763
    }
8764
  } else {
8765
    bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds,
8766
                                              TRUE,
8767
                                              FALSE,
8768
                                              FALSE);
8769
    protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data));
8770
    protbsp->seq_data = (SeqDataPtr) bs;
8771
    protbsp->length = BSLen (bs);
8772
    prot_sfp = GetProtFeature (protbsp);
8773
    if (prot_sfp == NULL) {
8774
      CheckSeqLocForPartial (cds->location, &partial5, &partial3);
8775
      prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL);
8776
      prot_sfp->data.value.ptrvalue = ProtRefNew ();
8777
      SetSeqLocPartial (prot_sfp->location, partial5, partial3);
8778
      prot_sfp->partial = (partial5 || partial3);
8779
    } else {
8780
      if (SeqLocLen (prot_sfp->location) != protbsp->length) {
8781
        prot_sfp->location = SeqLocFree (prot_sfp->location);
8782
        prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id));   
8783
      }
8784
    }
8785
  }
8786
}
8787
8788
8789
static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep)
8790
{
8791
  ValNodePtr bsp_list = NULL, vnp, field_vnp;
8792
  Int4       featdef, seqfeattype;
8793
  BioseqPtr  bsp;
8794
  SeqFeatPtr sfp;
8795
  SeqLocPtr  slp;
8796
  FeatQualLegalValPtr q;
8797
  FeatureField f;
8798
  SeqIdPtr   sip;
8799
  SeqFeatPtr gene;
8800
  Int4       num_created = 0;
8801
8802
  if (sep == NULL || action == NULL) return 0;
8803
8804
  /* first, get list of Bioseqs to apply features to */
8805
  /* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */
8806
  if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) {
8807
    for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) {
8808
      sip = CreateSeqIdFromText (vnp->data.ptrvalue, sep);
8809
      bsp = BioseqFind (sip);
8810
      if (bsp != NULL) {
8811
        AddSequenceOrParts (action, bsp, &bsp_list);
8812
      }
8813
    }  
8814
  } else {
8815
    AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list);
8816
  }
8817
8818
  /* now add feature to each bioseq in list */
8819
  for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) {
8820
    bsp = vnp->data.ptrvalue;
8821
    if (bsp == NULL) continue;
8822
    featdef = GetFeatdefFromFeatureType (action->type);
8823
    seqfeattype = FindFeatFromFeatDefType (featdef);
8824
    slp = LocationFromApplyFeatureAction (bsp, action);
8825
    sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp);
8826
    if (sfp == NULL) continue;
8827
    CreateDataForFeature (sfp, action->type);
8828
    /* any extra actions */
8829
    switch (action->type) {
8830
      case (Feature_type_cds) :
8831
        ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp));
8832
        break;
8833
    }
8834
    gene = NULL;
8835
    for (field_vnp = action->fields; field_vnp != NULL; field_vnp = field_vnp->next) {
8836
      q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue;
8837
      if (q != NULL) {
8838
        f.field = ValNodeNew(NULL);
8839
        f.field->next = NULL;
8840
        f.field->choice = FeatQualChoice_legal_qual;
8841
        f.field->data.intvalue = q->qual;        
8842
        if (sfp->data.choice != SEQFEAT_GENE
8843
            && (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) {
8844
          if (gene == NULL) {
8845
            gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp);
8846
            CreateDataForFeature (gene, Feature_type_gene);
8847
          }
8848
          f.type = Feature_type_gene;
8849
          SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old);
8850
        } else {
8851
          f.type = action->type;
8852
          SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old);
8853
        }
8854
      }
8855
    }
8856
    if (action->type == Feature_type_cds) {
8857
      /* retranslate, to account for change in reading frame */
8858
      AdjustProteinSequenceForReadingFrame (sfp);
8859
      /* after the feature has been created, then adjust it for gaps */
8860
      /* Note - this step may result in multiple coding regions being created. */
8861
      AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL);
8862
    }
8863
    num_created++;
8864
  }  
8865
  return num_created;
8866
}
8867
8868
8869
typedef struct convertandremovefeaturecollection {
8870
  Uint1 featdef;
8871
  ValNodePtr constraint_set;
8872
  ValNodePtr feature_list;
8873
} ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr;
8874
8875
static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data)
8876
{
8877
  ConvertAndRemoveFeatureCollectionPtr p;  
8878
8879
  if (sfp == NULL || data == NULL) return;
8880
8881
  p = (ConvertAndRemoveFeatureCollectionPtr) data;
8882
  if (sfp->idx.subtype == p->featdef && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) {
8883
    ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp);
8884
  }
8885
}
8886
8887
8888
static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep)
8889
{
8890
  ConvertAndRemoveFeatureCollectionData d;
8891
  ValNodePtr vnp;
8892
  SeqFeatPtr sfp;
8893
  Int4       num_deleted = 0;
8894
8895
  if (action == NULL) return 0;
8896
8897
  d.featdef = GetFeatdefFromFeatureType (action->type);
8898
  d.constraint_set = action->constraint;
8899
  d.feature_list = NULL;
8900
8901
  VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
8902
  for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
8903
    sfp = vnp->data.ptrvalue;
8904
    if (sfp != NULL) {
8905
      sfp->idx.deleteme = TRUE;
8906
      num_deleted ++;
8907
    }
8908
  }
8909
  DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL);
8910
  return num_deleted;
8911
}
8912
8913
8914
static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val)
8915
{
8916
  Boolean rval = FALSE;
8917
  
8918
  switch (strand_choice)
8919
  {
8920
    case Feature_location_strand_from_any:
8921
      rval = TRUE;
8922
      break;
8923
    case Feature_location_strand_from_unknown:
8924
      if (strand_val == Seq_strand_unknown)
8925
      {
8926
        rval = TRUE;
8927
      }
8928
      break;
8929
    case Feature_location_strand_from_plus:
8930
      if (strand_val != Seq_strand_minus)
8931
      {
8932
        rval = TRUE;
8933
      }
8934
      break;
8935
    case Feature_location_strand_from_minus:
8936
      if (strand_val == Seq_strand_minus)
8937
      {
8938
        rval = TRUE;
8939
      }
8940
      break;
8941
    case Feature_location_strand_from_both:
8942
      if (strand_val == Seq_strand_both)
8943
      {
8944
        rval = TRUE;
8945
      }
8946
      break;
8947
  }
8948
  return rval;
8949
}
8950
8951
8952
static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val)
8953
{
8954
  Uint1 rval = Seq_strand_unknown;
8955
  
8956
  switch (strand_choice)
8957
  {
8958
    case Feature_location_strand_to_reverse:
8959
      switch (strand_val)
8960
      {
8961
        case Seq_strand_plus:
8962
        case Seq_strand_unknown:
8963
          rval = Seq_strand_minus;
8964
          break;
8965
        case Seq_strand_minus:
8966
          rval = Seq_strand_plus;
8967
          break;
8968
        default:
8969
          rval = strand_val;
8970
          break;
8971
      }
8972
      break;
8973
    case Feature_location_strand_to_unknown:
8974
      rval = Seq_strand_unknown;
8975
      break;
8976
    case Feature_location_strand_to_plus:
8977
      rval = Seq_strand_plus;
8978
      break;
8979
    case Feature_location_strand_to_minus:
8980
      rval = Seq_strand_minus;
8981
      break;
8982
    case Feature_location_strand_to_both:
8983
      rval = Seq_strand_both;
8984
      break;
8985
  }  
8986
  return rval;
8987
}
8988
8989
8990
static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand)
8991
{
8992
  SeqLocPtr      loc;
8993
  PackSeqPntPtr  psp;
8994
  SeqBondPtr     sbp;
8995
  SeqIntPtr      sinp;
8996
  SeqPntPtr      spp;
8997
  Boolean        rval = FALSE;
8998
  Uint1          strand_orig;
8999
9000
  while (slp != NULL) {
9001
    switch (slp->choice) {
9002
      case SEQLOC_NULL :
9003
        break;
9004
      case SEQLOC_EMPTY :
9005
      case SEQLOC_WHOLE :
9006
        break;
9007
      case SEQLOC_INT :
9008
        sinp = (SeqIntPtr) slp->data.ptrvalue;
9009
        if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand)) 
9010
        {
9011
          strand_orig = sinp->strand;
9012
          sinp->strand = GetNewStrandValue (toStrand, sinp->strand);
9013
          if (strand_orig != sinp->strand) {
9014
            rval = TRUE;
9015
          }
9016
        }
9017
        break;
9018
      case SEQLOC_PNT :
9019
        spp = (SeqPntPtr) slp->data.ptrvalue;
9020
        if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand))
9021
        {
9022
          strand_orig = spp->strand;
9023
          spp->strand = GetNewStrandValue (toStrand, spp->strand);
9024
          if (strand_orig != spp->strand) {
9025
            rval = TRUE;
9026
          }
9027
        }
9028
        break;
9029
      case SEQLOC_PACKED_PNT :
9030
        psp = (PackSeqPntPtr) slp->data.ptrvalue;
9031
        if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand)) 
9032
        {
9033
          strand_orig = psp->strand;
9034
          psp->strand = GetNewStrandValue (toStrand, psp->strand);
9035
          if (strand_orig != psp->strand) {
9036
            rval = TRUE;
9037
          }
9038
        }
9039
        break;
9040
      case SEQLOC_PACKED_INT :
9041
      case SEQLOC_MIX :
9042
      case SEQLOC_EQUIV :
9043
        loc = (SeqLocPtr) slp->data.ptrvalue;
9044
        while (loc != NULL) {
9045
          rval |= ConvertLocationStrand (loc, fromStrand, toStrand);
9046
          loc = loc->next;
9047
        }
9048
        break;
9049
      case SEQLOC_BOND :
9050
        sbp = (SeqBondPtr) slp->data.ptrvalue;
9051
        if (sbp != NULL) {
9052
          spp = (SeqPntPtr) sbp->a;
9053
          if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) 
9054
          {
9055
            strand_orig = spp->strand;
9056
            spp->strand = GetNewStrandValue (toStrand, spp->strand);
9057
            if (strand_orig != spp->strand) {
9058
              rval = TRUE;
9059
            }
9060
          }
9061
          spp = (SeqPntPtr) sbp->b;
9062
          if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) 
9063
          {
9064
            strand_orig = spp->strand;
9065
            spp->strand = GetNewStrandValue (toStrand, spp->strand);
9066
            if (strand_orig != spp->strand) {
9067
              rval = TRUE;
9068
            }
9069
          }
9070
        }
9071
        break;
9072
      case SEQLOC_FEAT :
9073
        break;
9074
      default :
9075
        break;
9076
    }
9077
    slp = slp->next;
9078
  }
9079
  return rval;
9080
}
9081
9082
9083
static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp)
9084
{
9085
  Boolean rval = FALSE;
9086
9087
  if (edit == NULL || sfp == NULL) {
9088
    return FALSE;
9089
  }
9090
9091
  rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to);  
9092
  return rval;
9093
}
9094
9095
9096
static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp)
9097
{
9098
  Uint1 strand;
9099
  Int4  start;
9100
  Boolean at_end = FALSE;
9101
9102
  if (slp == NULL || bsp == NULL) return FALSE;
9103
9104
  strand = SeqLocStrand (slp);
9105
9106
  if (strand == Seq_strand_minus) {
9107
    start = SeqLocStop (slp);
9108
    if (start == bsp->length - 1) {
9109
      at_end = TRUE;
9110
    }
9111
  } else {
9112
    start = SeqLocStart (slp);
9113
    if (start == 0) {
9114
      at_end = TRUE;
9115
    }
9116
  }
9117
  return at_end;
9118
}
9119
9120
9121
static Boolean HasGoodStartCodon (SeqFeatPtr sfp)
9122
{
9123
  ByteStorePtr bs;
9124
  CharPtr      prot;
9125
  Boolean     has_start = FALSE;
9126
9127
  if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
9128
    bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE);
9129
    if (bs != NULL) {
9130
      prot = BSMerge (bs, NULL);
9131
      bs = BSFree (bs);
9132
      if (prot != NULL && *prot == 'M') {
9133
        has_start = TRUE;
9134
      }
9135
      prot = MemFree (prot);
9136
    }
9137
  }
9138
  return has_start;
9139
}
9140
9141
9142
static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp)
9143
{
9144
  Boolean      rval = FALSE;
9145
  Boolean      make_partial = FALSE;
9146
  Uint1        strand;
9147
  BioseqPtr    bsp;
9148
  CdRegionPtr  crp;
9149
  Boolean      partial5, partial3;
9150
9151
  if (action == NULL || sfp == NULL) return FALSE;
9152
  bsp = BioseqFindFromSeqLoc (sfp->location);
9153
  strand = SeqLocStrand (sfp->location);
9154
9155
  switch (action->constraint) {
9156
    case Partial_5_set_constraint_all:
9157
      make_partial = TRUE;
9158
      break;
9159
    case Partial_5_set_constraint_at_end:
9160
      make_partial = At5EndOfSequence (sfp->location, bsp);
9161
      break;
9162
    case Partial_5_set_constraint_bad_start:
9163
      make_partial = HasGoodStartCodon (sfp);
9164
      break;
9165
    case Partial_5_set_constraint_frame_not_one:
9166
      if (sfp->data.choice == SEQFEAT_CDREGION
9167
          && (crp = sfp->data.value.ptrvalue) != NULL
9168
          && crp->frame != 0 && crp->frame != 1) {
9169
        make_partial = TRUE;
9170
      }
9171
      break;
9172
  }
9173
9174
  if (make_partial) {
9175
    CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
9176
    if (!partial5) {
9177
      SetSeqLocPartial (sfp->location, TRUE, partial3);
9178
      if (action->extend && bsp != NULL) {
9179
        ExtendSeqLocToEnd (sfp->location, bsp, TRUE);
9180
      }
9181
      rval = TRUE; 
9182
    }
9183
  }
9184
  return rval;
9185
}
9186
9187
9188
static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
9189
{
9190
  Boolean rval = FALSE, clear_partial = FALSE;
9191
  Boolean partial5, partial3;
9192
9193
  if (sfp == NULL) return FALSE;
9194
9195
  switch (action) {
9196
    case Partial_5_clear_constraint_all:
9197
      clear_partial = TRUE;
9198
      break;
9199
    case Partial_5_clear_constraint_not_at_end:
9200
      clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location));
9201
      break;
9202
    case Partial_5_clear_constraint_good_start:
9203
      clear_partial = !HasGoodStartCodon(sfp);
9204
      break;
9205
  }
9206
  if (clear_partial) {
9207
    CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
9208
    if (partial5) {
9209
      SetSeqLocPartial (sfp->location, FALSE, partial3);
9210
      rval = TRUE;
9211
    }
9212
  }
9213
  return rval;
9214
}
9215
9216
9217
static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp)
9218
{
9219
  Uint1 strand;
9220
  Int4  stop;
9221
  Boolean at_end = FALSE;
9222
9223
  if (slp == NULL || bsp == NULL) return FALSE;
9224
9225
  strand = SeqLocStrand (slp);
9226
9227
  if (strand == Seq_strand_minus) {
9228
    stop = SeqLocStart (slp);
9229
    if (stop == 0) {
9230
      at_end = TRUE;
9231
    }
9232
  } else {
9233
    stop = SeqLocStop (slp);
9234
    if (stop == bsp->length - 1) {
9235
      at_end = TRUE;
9236
    }
9237
  }
9238
  return at_end;
9239
}
9240
9241
9242
static Boolean HasGoodStopCodon (SeqFeatPtr sfp)
9243
{
9244
  ByteStorePtr bs;
9245
  CharPtr      prot;
9246
  Boolean      has_stop = FALSE;
9247
9248
  if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) {
9249
    bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE);
9250
    if (bs != NULL) {
9251
      prot = BSMerge (bs, NULL);
9252
      bs = BSFree (bs);
9253
      if (prot != NULL && prot[StringLen (prot) - 1] == '*') {
9254
        has_stop = TRUE;
9255
      }
9256
      prot = MemFree (prot);
9257
    }
9258
  }
9259
  return has_stop;
9260
}
9261
9262
9263
static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp)
9264
{
9265
  Boolean      rval = FALSE;
9266
  Boolean      make_partial = FALSE;
9267
  Uint1        strand;
9268
  BioseqPtr    bsp;
9269
  Boolean      partial5, partial3;
9270
9271
  if (action == NULL || sfp == NULL) return FALSE;
9272
  bsp = BioseqFindFromSeqLoc (sfp->location);
9273
  strand = SeqLocStrand (sfp->location);
9274
9275
  switch (action->constraint) {
9276
    case Partial_3_set_constraint_all:
9277
      make_partial = TRUE;
9278
      break;
9279
    case Partial_3_set_constraint_at_end:
9280
      make_partial = At3EndOfSequence (sfp->location, bsp);
9281
      break;
9282
    case Partial_3_set_constraint_bad_end:
9283
      make_partial = HasGoodStopCodon (sfp);
9284
      break;
9285
  }
9286
9287
  if (make_partial) {
9288
    CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
9289
    if (!partial3) {
9290
      SetSeqLocPartial (sfp->location, partial5, TRUE);
9291
      if (action->extend && bsp != NULL) {
9292
        ExtendSeqLocToEnd (sfp->location, bsp, FALSE);
9293
      }
9294
      rval = TRUE; 
9295
    }
9296
  }
9297
  return rval;
9298
}
9299
9300
9301
static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp)
9302
{
9303
  Boolean rval = FALSE, clear_partial = FALSE;
9304
  Boolean partial5, partial3;
9305
9306
  if (sfp == NULL) return FALSE;
9307
9308
  switch (action) {
9309
    case Partial_3_clear_constraint_all:
9310
      clear_partial = TRUE;
9311
      break;
9312
    case Partial_3_clear_constraint_not_at_end:
9313
      clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location));
9314
      break;
9315
    case Partial_3_clear_constraint_good_end:
9316
      clear_partial = !HasGoodStopCodon(sfp);
9317
      break;
9318
  }
9319
  if (clear_partial) {
9320
    CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
9321
    if (partial3) {
9322
      SetSeqLocPartial (sfp->location, partial5, FALSE);
9323
      rval = TRUE;
9324
    }
9325
  }
9326
  return rval;
9327
}
9328
9329
9330
static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp)
9331
{
9332
  Boolean hasNulls, rval = FALSE;
9333
  SeqLocPtr slp;
9334
  BioseqPtr bsp;
9335
  Boolean   partial5, partial3;
9336
9337
  if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) {
9338
    return FALSE;
9339
  }
9340
9341
  CheckSeqLocForPartial (sfp->location, &partial5, &partial3);
9342
	hasNulls = LocationHasNullsBetween (sfp->location);
9343
	switch (convert_location) 
9344
	{
9345
	  case Convert_location_type_join :
9346
	    if (hasNulls) 
9347
	    {
9348
	      slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE);
9349
		    sfp->location = SeqLocFree (sfp->location);
9350
		    sfp->location = slp;
9351
		    if (bsp->repr == Seq_repr_seg) 
9352
		    {
9353
		      slp = SegLocToPartsEx (bsp, sfp->location, FALSE);
9354
		      sfp->location = SeqLocFree (sfp->location);
9355
		      sfp->location = slp;
9356
		      hasNulls = LocationHasNullsBetween (sfp->location);
9357
		      sfp->partial = (sfp->partial || hasNulls);
9358
		    }
9359
		    FreeAllFuzz (sfp->location);
9360
		    SetSeqLocPartial (sfp->location, partial5, partial3);
9361
        rval = TRUE;
9362
	    }
9363
	    break;
9364
  	case Convert_location_type_order :
9365
	    if (!hasNulls) 
9366
	    {
9367
		    slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE);
9368
        sfp->location = SeqLocFree (sfp->location);
9369
		    sfp->location = slp;
9370
		    if (bsp->repr == Seq_repr_seg) 
9371
		    {
9372
		      slp = SegLocToPartsEx (bsp, sfp->location, TRUE);
9373
		      sfp->location = SeqLocFree (sfp->location);
9374
		      sfp->location = slp;
9375
		      hasNulls = LocationHasNullsBetween (sfp->location);
9376
		      sfp->partial = (sfp->partial || hasNulls);
9377
		    }
9378
		    FreeAllFuzz (sfp->location);
9379
		    SetSeqLocPartial (sfp->location, partial5, partial3);
9380
        rval = TRUE;
9381
	    }
9382
	    break;
9383
	  case Convert_location_type_merge :
9384
      if (sfp->location->choice != SEQLOC_INT) {
9385
	      slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE);
9386
	      sfp->location = SeqLocFree (sfp->location);
9387
	      sfp->location = slp;
9388
		    SetSeqLocPartial (sfp->location, partial5, partial3);
9389
        rval = TRUE;
9390
      }
9391
	  default:
9392
	    break;
9393
	}
9394
  return rval;
9395
}
9396
9397
9398
static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp)
9399
{
9400
  Boolean rval = FALSE;
9401
9402
  if (action == NULL || sfp == NULL) {
9403
    return FALSE;
9404
  }
9405
9406
  switch (action->choice) {
9407
    case LocationEditType_strand:
9408
      rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp);
9409
      break;
9410
    case LocationEditType_set_5_partial:
9411
      rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp);
9412
      break;
9413
    case LocationEditType_clear_5_partial:
9414
      rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp);
9415
      break;
9416
    case LocationEditType_set_3_partial:
9417
      rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp);
9418
      break;
9419
    case LocationEditType_clear_3_partial:
9420
      rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp);
9421
      break;
9422
    case LocationEditType_convert:
9423
      rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp);
9424
      break;
9425
  }
9426
  return rval;
9427
}
9428
9429
9430
static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep)
9431
{
9432
  ConvertAndRemoveFeatureCollectionData d;
9433
  ValNodePtr vnp;
9434
  SeqFeatPtr sfp;
9435
  Int4       num_affected = 0;
9436
9437
  if (action == NULL) return 0;
9438
9439
  d.featdef = GetFeatdefFromFeatureType (action->type);
9440
  d.constraint_set = action->constraint;
9441
  d.feature_list = NULL;
9442
9443
  VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback);
9444
  for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) {
9445
    sfp = vnp->data.ptrvalue;
9446
    if (sfp != NULL && ApplyLocationEditTypeToSeqFeat (action->action, sfp)) {
9447
      num_affected++;
9448
    }
9449
  }
9450
  return num_affected;
9451
}
9452
9453
9454
NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat)
9455
{
9456
  Int4 num_AECR = 0, num_parse = 0, num_feature = 0, num_fields = 0;
9457
9458
  while (macro != NULL) {
9459
    switch (macro->choice) {
9460
      case MacroActionChoice_aecr:
9461
        num_AECR += ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep);
9462
        break;
9463
      case MacroActionChoice_parse:
9464
        num_parse += ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep);
9465
        break;
9466
      case MacroActionChoice_add_feature:
9467
        num_feature += ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep);
9468
        SeqMgrIndexFeatures (ObjMgrGetEntityIDForChoice(sep), NULL);
9469
        break;
9470
      case MacroActionChoice_remove_feature:
9471
        num_feature += ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep);
9472
        break;
9473
      case MacroActionChoice_edit_location:
9474
        num_fields += ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep);
9475
        break;
9476
    }
9477
    macro = macro->next;
9478
  }
9479
  if (pNumFields != NULL) {
9480
    *pNumFields = num_AECR + num_parse + num_fields;
9481
  }
9482
  if (pNumFeat != NULL) {
9483
    *pNumFeat = num_feature;
9484
  }
9485
}
9486
9487
9488
/* for generating text descriptions of macro objects */
9489
NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field)
9490
{
9491
  CharPtr summ = NULL, locname, origname;
9492
  Int4    genome, origin;
9493
  CharPtr loc_fmt = "location %s";
9494
  CharPtr orig_fmt = "origin %s";
9495
9496
  if (field == NULL) return NULL;
9497
  switch (field->choice) {
9498
    case SourceQualChoice_textqual:
9499
      summ = StringSave (GetSourceQualName (field->data.intvalue));
9500
      break;
9501
    case SourceQualChoice_location:
9502
      genome = GenomeFromSrcLoc (field->data.intvalue);
9503
      locname = LocNameFromGenome (genome);
9504
      summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname)));
9505
      sprintf (summ, loc_fmt, locname);
9506
      break;
9507
    case SourceQualChoice_origin:
9508
      origin = OriginFromSrcOrig (field->data.intvalue);
9509
      origname = OriginNameFromOrigin (origin);
9510
      summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname)));
9511
      sprintf (summ, orig_fmt, origname);
9512
      break;
9513
  }
9514
  return summ;
9515
}
9516
9517
9518
NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field)
9519
{
9520
  CharPtr cp;
9521
  CharPtr label = NULL;
9522
  CharPtr legal_fmt = "%s %s";
9523
  CharPtr illegal_fmt = "constrained field on %s";
9524
  
9525
  if (feature_name == NULL) {
9526
    feature_name = "Unknown feature";
9527
  }
9528
9529
  if (field == NULL) {
9530
    return StringSave ("missing field");
9531
  } else if (field->choice == FeatQualChoice_legal_qual) {
9532
    cp = GetFeatQualName (field->data.intvalue);
9533
    if (cp == NULL) cp = "Unknown field type";
9534
    label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp)));
9535
    sprintf (label, legal_fmt, feature_name, cp);
9536
  } else if (field->choice == FeatQualChoice_illegal_qual) {
9537
    label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name)));
9538
    sprintf (label, illegal_fmt, feature_name);
9539
  } else {
9540
    label = StringSave ("illegal field value");
9541
  }
9542
  return label;
9543
}
9544
9545
9546
NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field)
9547
{
9548
  if (field == NULL) return TRUE;
9549
  if (field->field == NULL) return TRUE;
9550
  return FALSE;
9551
}
9552
9553
9554
NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field)
9555
{
9556
  Boolean rval = TRUE;
9557
9558
  if (field == NULL) return TRUE;
9559
  switch (field->choice) {
9560
    case FieldType_source_qual:
9561
      if (field->data.ptrvalue != NULL) {
9562
        rval = FALSE;
9563
      }
9564
      break;
9565
    case FieldType_feature_field:
9566
      if (!IsFeatureFieldEmpty (field->data.ptrvalue)) {
9567
        rval = FALSE;
9568
      }
9569
      break;
9570
    case FieldType_cds_gene_prot:
9571
      rval = FALSE;
9572
      break;
9573
  }
9574
  return rval;
9575
}
9576
9577
9578
NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp)
9579
{
9580
  FeatureFieldPtr ffp;
9581
  CharPtr str = NULL;
9582
  CharPtr    label = NULL;
9583
9584
  if (vnp == NULL) {
9585
    str = StringSave ("missing field");
9586
  } else {
9587
    switch (vnp->choice) {
9588
      case FieldType_source_qual:
9589
        str = SummarizeSourceQual (vnp->data.ptrvalue);
9590
        break;
9591
      case FieldType_feature_field:
9592
        ffp = (FeatureFieldPtr) vnp->data.ptrvalue;
9593
        if (ffp == NULL || ffp->field == NULL) {
9594
          str = StringSave ("missing field");
9595
        } else {
9596
          label = GetFeatureNameFromFeatureType (ffp->type);
9597
          str = FeatureFieldLabel (label, ffp->field);
9598
        }
9599
        break;
9600
      case FieldType_cds_gene_prot:
9601
        str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue));
9602
        if (str == NULL) {
9603
          str = StringSave ("Invalid CDS-Gene-Prot Field");
9604
        }
9605
        break;
9606
      case FieldType_molinfo_field:
9607
        str = GetSequenceQualName (vnp->data.ptrvalue);
9608
        if (str == NULL) {
9609
          str = StringSave ("Invalid Sequence Qual Field");
9610
        }
9611
        break;
9612
      default:
9613
        str = StringSave ("Invalid field type");
9614
        break;
9615
    }
9616
  }
9617
  return str;
9618
}
9619
9620
9621
/* for table readers that use the macro language functions */
9622
NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void)
9623
{
9624
  TabColumnConfigPtr t;
9625
9626
  t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData));
9627
  t->field = NULL;
9628
  t->existing_text = ExistingTextOption_replace_old;
9629
  t->skip_blank = TRUE;
9630
  return t;
9631
}
9632
9633
9634
9635
NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t)
9636
{
9637
  if (t != NULL) {
9638
    t->field = FieldTypeFree (t->field);
9639
    t = MemFree (t);
9640
  }
9641
  return t;
9642
}
9643
9644
9645
NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig)
9646
{
9647
  TabColumnConfigPtr t = NULL;
9648
9649
  if (orig != NULL) {
9650
    t = TabColumnConfigNew ();
9651
    t->match_type = orig->match_type;
9652
    t->existing_text = orig->existing_text;
9653
    t->skip_blank = orig->skip_blank;
9654
    t->match_mrna = orig->match_mrna;
9655
    t->field = AsnIoMemCopy (orig->field, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite);
9656
  }
9657
  return t;
9658
}
9659
9660
9661
NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns)
9662
{
9663
  ValNodePtr vnp_next;
9664
9665
  while (columns != NULL) {
9666
    vnp_next = columns->next;
9667
    columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue);
9668
    columns->next = NULL;
9669
    columns = ValNodeFree (columns);
9670
    columns = vnp_next;
9671
  }
9672
  return columns;
9673
}
9674
9675
9676
NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig)
9677
{
9678
  ValNodePtr new_list = NULL;
9679
  TabColumnConfigPtr t;
9680
9681
  while (orig != NULL) {
9682
    t = TabColumnConfigCopy (orig->data.ptrvalue);
9683
    ValNodeAddPointer (&new_list, 0, t);
9684
    orig = orig->next;
9685
  }
9686
  return new_list;
9687
}
9688
9689
9690
9691
/* This checks the column names and returns a list of the feature fields */
9692
NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list)
9693
{
9694
  ValNodePtr         header_vnp;
9695
  ValNodePtr         err_list = NULL, col_list = NULL;
9696
  Boolean            rval = TRUE;
9697
  TabColumnConfigPtr t;
9698
  FeatureFieldPtr    field;
9699
  Int4               featqual, feat_type;
9700
  CharPtr            first_space;
9701
  
9702
  if (header_line == NULL)
9703
  {
9704
    return FALSE;
9705
  }
9706
  
9707
  header_vnp = header_line->data.ptrvalue;
9708
  if (header_vnp == NULL || header_vnp->next == NULL)
9709
  {
9710
    return FALSE;
9711
  }
9712
  
9713
  /* skip ID column */
9714
  header_vnp = header_vnp->next;
9715
  while (header_vnp != NULL && rval)
9716
  {
9717
    first_space = StringChr (header_vnp->data.ptrvalue, ' ');
9718
    if (first_space != NULL) {
9719
      *first_space = 0;
9720
      feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue);
9721
      featqual = GetFeatQualByName (first_space + 1);
9722
      *first_space = ' ';
9723
      if (feat_type < 0 || featqual < 0) {
9724
        /* unable to recognize column name */
9725
        ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue));
9726
        /* if we're not able to send back a list of errors, just quit now */
9727
        if (perr_list == NULL) {
9728
          rval = FALSE;
9729
        }
9730
      } else if (err_list == NULL) {
9731
        /* if we've already found errors, don't bother collecting more fields */
9732
        field = FeatureFieldNew ();
9733
        field->type = feat_type;
9734
        field->field = ValNodeNew (NULL);
9735
        field->field->choice = FeatQualChoice_legal_qual;
9736
        field->field->data.intvalue = featqual;
9737
        t = TabColumnConfigNew ();
9738
        t->field = ValNodeNew (NULL);
9739
        t->field->choice = FieldType_feature_field;
9740
        t->field->data.ptrvalue = field;
9741
        ValNodeAddPointer (&col_list, 0, t);
9742
      }
9743
    } else {
9744
      featqual = GetFeatQualByName (header_vnp->data.ptrvalue);
9745
      if (featqual < 0) {
9746
        /* unable to recognize column name */
9747
        ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue));
9748
        /* if we're not able to send back a list of errors, just quit now */
9749
        if (perr_list == NULL) {
9750
          rval = FALSE;
9751
        }
9752
      } else if (err_list == NULL) {
9753
        /* if we've already found errors, don't bother collecting more fields */
9754
        field = FeatureFieldNew ();
9755
        field->type = Feature_type_any;
9756
        field->field = ValNodeNew (NULL);
9757
        field->field->choice = FeatQualChoice_legal_qual;
9758
        field->field->data.intvalue = featqual;
9759
        t = TabColumnConfigNew ();
9760
        t->field = ValNodeNew (NULL);
9761
        t->field->choice = FieldType_feature_field;
9762
        t->field->data.ptrvalue = field;
9763
        ValNodeAddPointer (&col_list, 0, t);
9764
      }
9765
    }
9766
    header_vnp = header_vnp->next;
9767
  }
9768
  if (err_list != NULL) {
9769
    col_list = TabColumnConfigListFree (col_list);
9770
    if (perr_list != NULL) {
9771
      *perr_list = err_list;
9772
    } else {
9773
      err_list = ValNodeFreeData (err_list);
9774
    }
9775
  }
9776
  return col_list;
9777
}
9778
9779
typedef struct findgenelocustag {
9780
  CharPtr locus_tag;
9781
  ValNodePtr gene_list;
9782
} FindGeneLocusTagData, PNTR FindGeneLocusTagPtr;
9783
9784
static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata)
9785
{
9786
  FindGeneLocusTagPtr p;
9787
  SeqFeatPtr          gene;
9788
  SeqMgrFeatContext   fcontext;
9789
9790
  if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) {
9791
    return;
9792
  }
9793
9794
  p = (FindGeneLocusTagPtr) userdata;
9795
9796
  gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext);
9797
  if (gene != NULL) {
9798
    ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene);
9799
  }
9800
}
9801
9802
9803
typedef struct objbystr {
9804
  ValNodePtr obj_list;
9805
  CharPtr    str;
9806
} ObjByStrData, PNTR ObjByStrPtr;
9807
9808
static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata)
9809
{
9810
  ObjByStrPtr p;
9811
  ValNodePtr    vnp;
9812
  DbtagPtr      dbt;
9813
  Char          buf[20];
9814
  Boolean       found = FALSE;
9815
9816
  if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return;
9817
  p = (ObjByStrPtr) userdata;
9818
9819
  if (StringHasNoText (p->str)) return;
9820
9821
  for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) {
9822
    dbt = (DbtagPtr) vnp->data.ptrvalue;
9823
    if (dbt != NULL && dbt->tag != NULL) {
9824
      if (dbt->tag->id > 0) {
9825
        sprintf (buf, "%d", dbt->tag->id);
9826
        if (StringCmp (buf, p->str) == 0) {
9827
          found = TRUE;
9828
        }
9829
      } else if (StringCmp (dbt->tag->str, p->str) == 0) {
9830
        found = TRUE;
9831
      }
9832
    }
9833
  }
9834
  if (found) {
9835
    ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp);
9836
  }
9837
9838
}
9839
9840
9841
static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref)
9842
{
9843
  ObjByStrData d;
9844
9845
  d.str = dbxref;
9846
  d.obj_list = NULL;
9847
  VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback);
9848
  return d.obj_list;
9849
}
9850
9851
9852
static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata)
9853
{
9854
  ObjByStrPtr p;
9855
  BioSourcePtr biop;
9856
9857
  if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return;
9858
  p = (ObjByStrPtr) userdata;
9859
9860
  if (StringHasNoText (p->str)) return;
9861
9862
  biop = (BioSourcePtr) sdp->data.ptrvalue;
9863
  if (biop != NULL && biop->org != NULL && StringCmp (biop->org->taxname, p->str) == 0) {
9864
    ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp);
9865
  }
9866
9867
}
9868
9869
9870
static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata)
9871
{
9872
  ObjByStrPtr p;
9873
  BioSourcePtr biop;
9874
9875
  if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return;
9876
  p = (ObjByStrPtr) userdata;
9877
9878
  if (StringHasNoText (p->str)) return;
9879
9880
  biop = (BioSourcePtr) sfp->data.value.ptrvalue;
9881
  if (biop != NULL && biop->org != NULL && StringCmp (biop->org->taxname, p->str) == 0) {
9882
    ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp);
9883
  }
9884
9885
}
9886
9887
9888
static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname)
9889
{
9890
  ObjByStrData d;
9891
9892
  d.str = taxname;
9893
  d.obj_list = NULL;
9894
  VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback);
9895
9896
  VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback);
9897
  return d.obj_list;
9898
}
9899
9900
9901
9902
static ValNodePtr 
9903
FindMatchForRow 
9904
(ValNodePtr  match_type,
9905
 Uint2       entityID,
9906
 SeqEntryPtr sep)
9907
{
9908
  ValNodePtr match_list = NULL;
9909
  SeqIdPtr   sip;
9910
  BioseqPtr  bsp, nbsp = NULL;
9911
  FindGeneLocusTagData fd;
9912
  SeqFeatPtr           sfp;
9913
  SeqMgrFeatContext    fcontext;
9914
9915
  if (match_type == NULL || sep == NULL) return NULL;
9916
9917
  switch (match_type->choice) {
9918
    case eTableMatchFeatureID:
9919
      sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_type->data.ptrvalue, NULL, &fcontext);
9920
      if (sfp != NULL) {
9921
        ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp);
9922
      }
9923
      break;
9924
    case eTableMatchGeneLocusTag:
9925
      fd.locus_tag = match_type->data.ptrvalue;
9926
      fd.gene_list = NULL;
9927
      VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback);
9928
      ValNodeLink (&match_list, fd.gene_list);
9929
      break;
9930
    case eTableMatchProteinID:
9931
    case eTableMatchNucID:
9932
      sip = CreateSeqIdFromText (match_type->data.ptrvalue, sep);
9933
      bsp = BioseqFind (sip);
9934
      sip = SeqIdFree (sip);
9935
      if (bsp != NULL) 
9936
      {
9937
        ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp);
9938
      }
9939
      break;
9940
    case eTableMatchDbxref:
9941
      match_list = GetFeaturesByDbxref (sep, match_type->data.ptrvalue);
9942
      break;
9943
    case eTableMatchBioSource:
9944
      match_list = GetBioSourcesByTaxName (sep, match_type->data.ptrvalue);
9945
      break;
9946
  }
9947
  return match_list;
9948
}
9949
9950
9951
static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp)
9952
{
9953
  ValNodePtr feat_list = NULL;
9954
  SeqFeatPtr sfp, cds;
9955
  SeqMgrFeatContext fcontext;
9956
  Int4              seqfeattype;
9957
9958
  if (bsp == NULL || !ISA_aa (bsp->mol)) 
9959
  {
9960
    return NULL;
9961
  }
9962
9963
  seqfeattype = FindFeatFromFeatDefType (featdef);
9964
  if (seqfeattype == SEQFEAT_PROT)
9965
  {
9966
    for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
9967
         sfp != NULL;
9968
         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
9969
    {
9970
      ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
9971
    }
9972
  }
9973
  else
9974
  {
9975
    cds = SeqMgrGetCDSgivenProduct (bsp, NULL);
9976
    if (cds != NULL) 
9977
    {
9978
      if (featdef == FEATDEF_CDS)
9979
      {
9980
        sfp = cds;
9981
      }
9982
      else if (featdef == FEATDEF_GENE)
9983
      {
9984
        sfp = GetGeneForFeature (cds);
9985
      }
9986
      else if (featdef == FEATDEF_mRNA)
9987
      {
9988
        sfp = SeqMgrGetOverlappingmRNA (cds->location, &fcontext);
9989
      }
9990
      if (sfp != NULL)
9991
      {
9992
        ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
9993
      }
9994
    }
9995
  }
9996
  return feat_list;
9997
}
9998
9999
10000
static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp)
10001
{
10002
  ValNodePtr feat_list = NULL;
10003
  SeqFeatPtr sfp;
10004
  SeqMgrFeatContext fcontext;
10005
  Int4              seqfeattype;
10006
  BioseqPtr         prot_bsp;
10007
10008
  if (bsp == NULL || ISA_aa (bsp->mol)) 
10009
  {
10010
    return NULL;
10011
  }
10012
10013
  seqfeattype = FindFeatFromFeatDefType (featdef);
10014
  if (seqfeattype == SEQFEAT_PROT)
10015
  {
10016
    for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext);
10017
         sfp != NULL;
10018
         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext))
10019
    {
10020
      prot_bsp = BioseqFindFromSeqLoc (sfp->product);
10021
      ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp));
10022
    }
10023
  }
10024
  else
10025
  {
10026
    for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
10027
         sfp != NULL;
10028
         sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
10029
    {
10030
      ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
10031
    }
10032
  }
10033
  return feat_list;
10034
}
10035
10036
10037
static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef)
10038
{
10039
  BioseqPtr bsp;
10040
  SeqFeatPtr sfp;
10041
  ValNodePtr feat_list = NULL;
10042
  SeqMgrFeatContext fcontext;
10043
  Int4              start, stop, swap;
10044
10045
  if (gene == NULL) return NULL;
10046
10047
  bsp = BioseqFindFromSeqLoc (gene->location);
10048
  start = SeqLocStart (gene->location);
10049
  stop = SeqLocStop (gene->location);
10050
  if (stop < start) 
10051
  {
10052
    swap = start;
10053
    start = stop;
10054
    stop = swap;
10055
  }
10056
  for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext);
10057
       sfp != NULL && fcontext.left < stop;
10058
       sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext))
10059
  {
10060
    if (fcontext.right >= start && gene == GetGeneForFeature (sfp))
10061
    {
10062
      ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
10063
    }
10064
  }
10065
  return feat_list;
10066
}
10067
10068
10069
static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene)
10070
{
10071
  ValNodePtr feat_list = NULL, cds_list, vnp;
10072
  SeqFeatPtr sfp, cds;
10073
  SeqMgrFeatContext fcontext;
10074
  BioseqPtr         protbsp;
10075
10076
  if (gene == NULL) 
10077
  {
10078
    return NULL;
10079
  }
10080
10081
  if (featdef == FEATDEF_GENE)
10082
  {
10083
    ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene);
10084
  }
10085
  else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT))
10086
  {
10087
    cds_list = GetFeaturesForGene (gene, FEATDEF_CDS);
10088
    for (vnp = cds_list; vnp != NULL; vnp = vnp->next) 
10089
    {
10090
      cds = vnp->data.ptrvalue;
10091
      if (cds != NULL)
10092
      {
10093
        protbsp = BioseqFindFromSeqLoc (cds->product);
10094
        for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext);
10095
             sfp != NULL;
10096
             sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext))
10097
        {
10098
          ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp);
10099
        }
10100
      }
10101
    }
10102
    cds_list = ValNodeFree (cds_list);
10103
  }
10104
  else
10105
  {
10106
    feat_list = GetFeaturesForGene (gene, featdef);
10107
  }
10108
10109
  return feat_list;
10110
}
10111
10112
10113
static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef)
10114
{
10115
  SeqEntryPtr sep;
10116
  BioseqPtr   bsp;
10117
  Int4        seqfeattype;
10118
  ValNodePtr  item_list = NULL;
10119
10120
  if (bssp == NULL) return NULL;
10121
10122
  seqfeattype = FindFeatFromFeatDefType (featdef);
10123
  for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
10124
    if (sep->data.ptrvalue == NULL) continue;
10125
    if (IS_Bioseq (sep)) {
10126
      bsp = sep->data.ptrvalue;
10127
      if (seqfeattype == SEQFEAT_PROT) {
10128
        if (ISA_aa (bsp->mol)) {
10129
          ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp));
10130
        }
10131
      } else if (!ISA_aa (bsp->mol)) {
10132
        ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp));
10133
      }
10134
    } else if (IS_Bioseq_set (sep)) {
10135
      ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef));
10136
    }
10137
  }
10138
  return item_list;
10139
}
10140
10141
10142
static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field)
10143
{
10144
  ValNodePtr vnp;
10145
  SeqFeatPtr sfp;
10146
  SeqDescrPtr sdp;
10147
  BioseqPtr   bsp;
10148
  ObjValNodePtr ovp;
10149
  ValNodePtr  feature_list = NULL;
10150
10151
  if (item_list == NULL || field == NULL) return NULL;
10152
10153
  for (vnp = item_list; vnp != NULL; vnp = vnp->next) {
10154
    if (vnp->choice == OBJ_SEQFEAT) {
10155
      sfp = vnp->data.ptrvalue;
10156
      if (sfp != NULL) {
10157
        bsp = BioseqFindFromSeqLoc (sfp->location);
10158
        ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp));
10159
      }
10160
    } else if (vnp->choice == OBJ_SEQDESC) {
10161
      sdp = vnp->data.ptrvalue;
10162
      if (sdp != NULL && sdp->extended != 0) {
10163
        ovp = (ObjValNodePtr) sdp;
10164
        if (ovp->idx.parenttype == OBJ_BIOSEQSET) {
10165
          ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type)));
10166
        } else if (ovp->idx.parenttype == OBJ_BIOSEQ) {
10167
          bsp = (BioseqPtr) ovp->idx.parentptr;
10168
          ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp));
10169
        }
10170
      }
10171
    }
10172
  }
10173
  return feature_list;
10174
}
10175
10176
10177
static ValNodePtr ValNodeCopyPtr (ValNodePtr orig)
10178
{
10179
  ValNodePtr new_list = NULL, last_vnp = NULL, vnp;
10180
10181
  while (orig != NULL) {
10182
    vnp = ValNodeNew (NULL);
10183
    vnp->choice = orig->choice;
10184
    vnp->data.ptrvalue = orig->data.ptrvalue;
10185
    if (last_vnp == NULL) {
10186
      new_list = vnp;
10187
    } else {
10188
      last_vnp->next = vnp;
10189
    }
10190
    last_vnp = vnp;
10191
    orig = orig->next;
10192
  }
10193
  return new_list;
10194
}
10195
10196
10197
static ValNodePtr GetFeatureListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FeatureFieldPtr field)
10198
{
10199
  ValNodePtr feature_list = NULL, vnp;
10200
10201
  if (match_list == NULL || field == NULL) return NULL;
10202
10203
  switch (match_type) {
10204
    case eTableMatchFeatureID:
10205
      feature_list = ValNodeCopyPtr (match_list);
10206
      break;
10207
    case eTableMatchGeneLocusTag:
10208
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10209
        ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
10210
      }
10211
      break;
10212
    case eTableMatchProteinID:
10213
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10214
        ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
10215
      }
10216
      break;
10217
    case eTableMatchDbxref:
10218
      feature_list = ValNodeCopyPtr (match_list);
10219
      break;
10220
    case eTableMatchNucID:
10221
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10222
        ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue));
10223
      }
10224
      break;
10225
    case eTableMatchBioSource:
10226
      ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field));
10227
      break;
10228
  }
10229
  return feature_list;
10230
}
10231
10232
10233
static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list)
10234
{
10235
  SeqDescrPtr sdp;
10236
  SeqMgrDescContext context;
10237
10238
  if (feature_list == NULL) return;
10239
  for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context);
10240
        sdp != NULL;
10241
        sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) {
10242
    ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp);
10243
  }
10244
}
10245
10246
static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list)
10247
{
10248
  BioseqPtr bsp;
10249
10250
  if (sfp == NULL || feature_list == NULL) return;
10251
10252
  if (sfp->data.choice == SEQFEAT_BIOSRC) {
10253
    ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp);
10254
  } else {
10255
    bsp = BioseqFindFromSeqLoc (sfp->location);
10256
    AddBioSourcesForBioseq (bsp, feature_list);
10257
  }
10258
}
10259
10260
10261
static ValNodePtr GetBioSourceListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FeatureFieldPtr field)
10262
{
10263
  ValNodePtr feature_list = NULL, vnp;
10264
10265
  if (match_list == NULL || field == NULL) return NULL;
10266
10267
  switch (match_type) {
10268
    case eTableMatchFeatureID:
10269
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10270
        if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
10271
          AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
10272
        }
10273
      }
10274
      break;
10275
    case eTableMatchGeneLocusTag:
10276
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10277
        if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
10278
          AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
10279
        }
10280
      }
10281
      break;
10282
    case eTableMatchProteinID:
10283
    case eTableMatchNucID:
10284
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10285
        if (vnp->choice == OBJ_BIOSEQ) {
10286
          AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list);
10287
        }
10288
      }
10289
      break;
10290
    case eTableMatchDbxref:
10291
      for (vnp = match_list; vnp != NULL; vnp = vnp->next) {
10292
        if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) {
10293
          AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list);
10294
        }
10295
      }
10296
      break;
10297
    case eTableMatchBioSource:
10298
      feature_list = ValNodeCopyPtr (match_list);
10299
      break;
10300
  }
10301
  return feature_list;
10302
}
10303
10304
10305
static ValNodePtr GetTargetListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FieldTypePtr field)
10306
{
10307
  ValNodePtr target_list = NULL;
10308
  FeatureFieldPtr feature_field;
10309
10310
  if (field == NULL) return NULL;
10311
  switch (field->choice) {
10312
    case FieldType_source_qual:
10313
      target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue);
10314
      break;
10315
    case FieldType_feature_field:
10316
      target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue);
10317
      break;
10318
    case FieldType_cds_gene_prot:
10319
      feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue);
10320
      target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field);
10321
      feature_field = FeatureFieldFree (feature_field);
10322
      break;
10323
  }
10324
  return target_list;
10325
}
10326
10327
10328
static void ReportMissingTargets (ValNodePtr PNTR perr_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num)
10329
{
10330
  CharPtr            feat_name;
10331
  FeatureFieldPtr    field;
10332
  CharPtr            no_feat_fmt = "No %s feature for %s (column %d, line %d)";
10333
  CharPtr            no_src_fmt = "No biosource for %s (column %d, line %d)";
10334
  CharPtr            err_msg;
10335
10336
  if (perr_list == NULL || ft == NULL || match_val == NULL) return;
10337
10338
  switch (ft->choice) {
10339
    case FieldType_source_qual:
10340
      err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) 
10341
                                                    + StringLen (match_val)
10342
                                                    + 30));
10343
      sprintf (err_msg, no_src_fmt, match_val, col_num, line_num);
10344
      ValNodeAddPointer (perr_list, 0, err_msg);
10345
      break;
10346
    case FieldType_feature_field:
10347
      field = (FeatureFieldPtr) ft->data.ptrvalue;
10348
      if (field != NULL) {
10349
        feat_name = GetFeatureNameFromFeatureType (field->type);
10350
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) 
10351
                                                      + StringLen (feat_name)
10352
                                                      + StringLen (match_val)
10353
                                                      + 30));
10354
        sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
10355
        ValNodeAddPointer (perr_list, 0, err_msg);
10356
      }
10357
      break;
10358
    case FieldType_cds_gene_prot:
10359
      field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue);
10360
      if (field != NULL) {
10361
        feat_name = GetFeatureNameFromFeatureType (field->type);
10362
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) 
10363
                                                      + StringLen (feat_name)
10364
                                                      + StringLen (match_val)
10365
                                                      + 30));
10366
        sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num);
10367
        ValNodeAddPointer (perr_list, 0, err_msg);
10368
      }
10369
      field = FeatureFieldFree (field);
10370
      break;
10371
  }
10372
}
10373
10374
10375
static void ReportEmptyIDColumn (ValNodePtr PNTR perr_list, Int4 line_num)
10376
{
10377
  CharPtr            err_msg;
10378
  CharPtr            missing_id_fmt = "No ID for line %d";
10379
10380
  err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15));
10381
  sprintf (err_msg, missing_id_fmt, line_num);
10382
  ValNodeAddPointer (perr_list, 0, err_msg);
10383
}
10384
10385
static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp)
10386
{
10387
  TabColumnConfigPtr t;
10388
10389
  while (val_vnp != NULL && col_vnp != NULL) {
10390
    t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10391
    if (t != NULL && t->match_type > 0) {
10392
      val_vnp->choice = (Uint1) t->match_type;
10393
      return val_vnp;
10394
    }
10395
    val_vnp = val_vnp->next;
10396
    col_vnp = col_vnp->next;
10397
  }
10398
  return NULL;
10399
}
10400
10401
10402
NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp)
10403
{
10404
  SeqMgrFeatContext fcontext;
10405
  BioseqPtr         pbsp;
10406
10407
  if (sfp == NULL) return NULL;
10408
  if (sfp->data.choice == SEQFEAT_PROT) 
10409
  { 
10410
    pbsp = BioseqFindFromSeqLoc (sfp->location);
10411
    sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL);
10412
    if (sfp == NULL) return NULL;
10413
  }
10414
  return SeqMgrGetOverlappingmRNA (sfp->location, &fcontext);
10415
}
10416
10417
10418
NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp)
10419
{
10420
  SeqFeatPtr mrna;
10421
  ProtRefPtr prp;
10422
  RnaRefPtr  rrp;
10423
10424
  if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return FALSE;
10425
10426
  prp = (ProtRefPtr) sfp->data.value.ptrvalue;
10427
  mrna = GetmRNAForFeature (sfp);
10428
10429
  if (mrna == NULL) return FALSE;
10430
10431
  rrp = (RnaRefPtr) mrna->data.value.ptrvalue;
10432
  if (rrp == NULL) 
10433
  {
10434
    rrp = RnaRefNew();
10435
    mrna->data.value.ptrvalue = rrp;
10436
  }
10437
10438
  rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue);
10439
  if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue))
10440
  {
10441
    rrp->ext.choice = 0;
10442
  }
10443
  else
10444
  {
10445
    rrp->ext.choice = 1;
10446
    rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue);
10447
  }
10448
  return TRUE;
10449
}
10450
10451
10452
NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft)
10453
{
10454
  FeatureFieldPtr field;
10455
  Boolean         rval = FALSE;
10456
10457
  if (ft == NULL) return FALSE;
10458
  if (ft->choice == FieldType_feature_field) {
10459
    field = (FeatureFieldPtr) ft->data.ptrvalue;
10460
    if (field != NULL && field->type == Feature_type_cds
10461
        && field->field != NULL
10462
        && field->field->choice == FeatQualChoice_legal_qual
10463
        && field->field->data.intvalue == Feat_qual_legal_product) {
10464
      rval = TRUE;
10465
    }
10466
  } else if (ft->choice == FieldType_cds_gene_prot) {
10467
    if (ft->data.intvalue == CDSGeneProt_field_prot_name) {
10468
      rval = TRUE;
10469
    }
10470
  }
10471
  return rval;
10472
}
10473
10474
10475
static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft)
10476
{
10477
  FeatureFieldPtr field;
10478
  Boolean         rval = FALSE;
10479
10480
  if (ft == NULL) return FALSE;
10481
  if (ft->choice == FieldType_feature_field) {
10482
    field = (FeatureFieldPtr) ft->data.ptrvalue;
10483
    if (field != NULL && field->type == Feature_type_gene
10484
        && field->field != NULL
10485
        && field->field->choice == FeatQualChoice_legal_qual
10486
        && field->field->data.intvalue == Feat_qual_legal_locus_tag) {
10487
      rval = TRUE;
10488
    }
10489
  } else if (ft->choice == FieldType_cds_gene_prot) {
10490
    if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) {
10491
      rval = TRUE;
10492
    }
10493
  }
10494
  return rval;
10495
}
10496
10497
10498
10499
NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns)
10500
{
10501
  ValNodePtr err_list = NULL;
10502
  ValNodePtr line_vnp, col_vnp, val_vnp;
10503
  Int4       line_num, col_num;
10504
  TabColumnConfigPtr t;
10505
  ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp;
10506
  CharPtr    bad_format_fmt = "Locus tag %s has incorrect format";
10507
  CharPtr    dup_fmt = "Locus tag %s appears in the table more than once";
10508
  CharPtr    inconsistent_fmt = "Locus tag prefix for %s is inconsistent";
10509
  CharPtr    err_msg;
10510
10511
  if (table == NULL || columns == NULL) {
10512
    return NULL;
10513
  }
10514
10515
  for (line_vnp = table, line_num = 1;
10516
       line_vnp != NULL;
10517
       line_vnp = line_vnp->next, line_num++) {
10518
    for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
10519
         val_vnp != NULL && col_vnp != NULL;
10520
         val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) {
10521
      t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10522
      if (t == NULL || t->match_type > 0 || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) {
10523
        continue;
10524
      }
10525
      if (IsFieldTypeGeneLocusTag (t->field)) {
10526
        ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue);
10527
      }
10528
    }
10529
  }
10530
10531
  bad_locus_tags = FindBadLocusTagsInList (locus_tag_values);
10532
  for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) {
10533
    switch (vnp->choice) {
10534
      case eLocusTagErrorBadFormat:
10535
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue)));
10536
        sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue);
10537
        ValNodeAddPointer (&err_list, 0, err_msg);
10538
        break;
10539
      case eLocusTagErrorDuplicate:
10540
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue)));
10541
        sprintf (err_msg, dup_fmt, vnp->data.ptrvalue);
10542
        ValNodeAddPointer (&err_list, 0, err_msg);
10543
        break;
10544
      case eLocusTagErrorInconsistentPrefix:
10545
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue)));
10546
        sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue);
10547
        ValNodeAddPointer (&err_list, 0, err_msg);
10548
        break;
10549
    }
10550
  }
10551
  locus_tag_values = ValNodeFree (locus_tag_values);
10552
  return err_list;
10553
}
10554
10555
10556
NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list)
10557
{
10558
  ValNodePtr err_list = NULL;
10559
  ValNodePtr line_vnp, val_vnp, col_vnp;
10560
  ValNodePtr obj_table = NULL, obj_row;
10561
  Int4       line_num = 1, col_num;
10562
  Uint2      entityID;
10563
  ValNodePtr match_list, match_choice, target_list;
10564
  TabColumnConfigPtr t;
10565
  CharPtr            err_msg;
10566
  CharPtr            no_match_fmt = "No match for %s, line %d";
10567
  CharPtr            bad_col_val_fmt = "Did not set value for column %d, line %d";
10568
  CharPtr            num_affected_fmt = "%d fields affected";
10569
  Int4               num_fields_affected = 0;
10570
10571
  if (sep == NULL) {
10572
    ValNodeAddPointer (&err_list, 0, StringSave ("No SeqEntry"));
10573
  }
10574
  if (table == NULL) {
10575
    ValNodeAddPointer (&err_list, 0, StringSave ("No table"));
10576
  }
10577
  if (columns == NULL) {
10578
    ValNodeAddPointer (&err_list, 0, StringSave ("No column information"));
10579
  }
10580
  if (err_list != NULL) {
10581
    if (p_err_list == NULL) {
10582
      err_list = ValNodeFreeData (err_list);
10583
    } else {
10584
      *p_err_list = err_list;
10585
    }
10586
    return NULL;
10587
  }
10588
10589
  entityID = SeqMgrGetEntityIDForSeqEntry (sep);
10590
10591
  for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
10592
    obj_row = NULL;
10593
    match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
10594
    if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
10595
      ReportEmptyIDColumn (&err_list, line_num);
10596
    } else {
10597
      match_list = FindMatchForRow (match_choice, entityID, sep);
10598
      if (match_list == NULL) {
10599
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
10600
        sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
10601
        ValNodeAddPointer (&err_list, 0, err_msg);
10602
      } else {
10603
        for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
10604
             col_vnp != NULL;
10605
             col_vnp = col_vnp->next, col_num++) {
10606
          target_list = NULL;
10607
          t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10608
          if (t == NULL || t->match_type > 0 
10609
              || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
10610
            /* no targets */
10611
          } else {         
10612
            target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field);
10613
            if (target_list == NULL) {
10614
              ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); 
10615
            }
10616
          }
10617
          ValNodeAddPointer (&obj_row, 0, target_list);
10618
          if (val_vnp != NULL) {
10619
            val_vnp = val_vnp->next;
10620
          }
10621
        }
10622
      }
10623
    }
10624
    ValNodeAddPointer (&obj_table, 0, obj_row);
10625
  }
10626
10627
  if (err_list != NULL) {
10628
    if (p_err_list == NULL) {
10629
      err_list = ValNodeFreeData (err_list);
10630
    } else {
10631
      *p_err_list = err_list;
10632
    }
10633
  }  
10634
  return obj_table;
10635
}
10636
10637
10638
NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table)
10639
{
10640
  ValNodePtr vnp_next, vnp_row, vnp_row_next;
10641
10642
  while (table != NULL) {
10643
    vnp_next = table->next;
10644
    table->next = NULL;
10645
    vnp_row = table->data.ptrvalue;
10646
    while (vnp_row != NULL) {
10647
      vnp_row_next = vnp_row->next;
10648
      vnp_row->next = NULL;
10649
      vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue);
10650
      vnp_row = ValNodeFree (vnp_row);
10651
      vnp_row = vnp_row_next;
10652
    }
10653
    table = ValNodeFree (table);
10654
    table = vnp_next;
10655
  }
10656
  return table;
10657
}
10658
10659
10660
typedef struct countfeat {
10661
  Uint1 featdef;
10662
  Int4 num;
10663
} CountFeatData, PNTR CountFeatPtr;
10664
10665
10666
static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata)
10667
{
10668
  CountFeatPtr p;
10669
10670
  if (sfp == NULL || userdata == NULL) return;
10671
10672
  p = (CountFeatPtr) userdata;
10673
  if (sfp->idx.subtype == p->featdef) {
10674
    p->num++;
10675
  }
10676
}
10677
10678
static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata)
10679
{
10680
  Int4Ptr p;
10681
10682
  p = (Int4Ptr) userdata;
10683
  if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) {
10684
    (*p)++;
10685
  }
10686
}
10687
10688
10689
static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns)
10690
{
10691
  ValNodePtr count_list = NULL, vnp;
10692
  TabColumnConfigPtr t;
10693
  CountFeatData d;
10694
  FeatureFieldPtr f;
10695
  Int4 num;
10696
  Uint1 featdef = 0;
10697
10698
  d.featdef = 0;
10699
  d.num = 0;
10700
  for (vnp = columns; vnp != NULL; vnp = vnp->next) {
10701
    num = 0;
10702
    t = (TabColumnConfigPtr) vnp->data.ptrvalue;
10703
    if (t != NULL && t->match_type == 0 && t->field != NULL) {
10704
      switch (t->field->choice) {
10705
        case FieldType_source_qual:
10706
          if (featdef != FEATDEF_BIOSRC) {
10707
            d.featdef = FEATDEF_BIOSRC;
10708
            d.num = 0;
10709
            VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
10710
            VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback);
10711
          }
10712
          num = d.num;
10713
          break;
10714
        case FieldType_feature_field:
10715
          f = (FeatureFieldPtr) t->field->data.ptrvalue;
10716
          if (f != NULL) {
10717
            featdef = GetFeatdefFromFeatureType(f->type);
10718
            if (featdef != d.featdef) {
10719
              d.featdef = featdef;
10720
              d.num = 0;
10721
              VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
10722
            }
10723
            num = d.num;
10724
          }
10725
          break;
10726
        case FieldType_cds_gene_prot:
10727
          f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue);
10728
          if (f != NULL) {
10729
            featdef = GetFeatdefFromFeatureType(f->type);
10730
            if (featdef != d.featdef) {
10731
              d.featdef = featdef;
10732
              d.num = 0;
10733
              VisitFeaturesInSep (sep, &d, CountFeaturesCallback);
10734
            }
10735
            num = d.num;
10736
          }
10737
          f = FeatureFieldFree (f);
10738
          break;
10739
      }
10740
    }
10741
    ValNodeAddInt (&count_list, 0, num);
10742
  }
10743
  return count_list;
10744
}
10745
10746
10747
NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table)
10748
{
10749
  ValNodePtr val_line_vnp, obj_line_vnp;
10750
  ValNodePtr val_vnp, obj_vnp, col_vnp;
10751
  ValNodePtr target_vnp;
10752
  TabColumnConfigPtr t;
10753
  CharPtr val, qual_name;
10754
  ValNodePtr         err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp;
10755
  CharPtr            err_msg;
10756
  CharPtr            bad_col_val_fmt = "Did not set value for column %d, line %d";
10757
  CharPtr            num_affected_fmt = "%d fields affected";
10758
  CharPtr            col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total";
10759
  Int4 num_fields_affected = 0, col_num, line_num, num_this_column;
10760
  Boolean success;
10761
  ValNodePtr count_msg = NULL;
10762
10763
  count_list = CountObjectsForColumnFields (sep, columns);
10764
10765
  for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1;
10766
       val_line_vnp != NULL && obj_line_vnp != NULL;
10767
       val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) {
10768
    val_vnp = val_line_vnp->data.ptrvalue;
10769
    obj_vnp = obj_line_vnp->data.ptrvalue;
10770
    col_vnp = columns;
10771
    col_num = 1;
10772
    count_vnp = count_affected_list;
10773
    while (obj_vnp != NULL && col_vnp != NULL) {
10774
      num_this_column = 0;
10775
      if (obj_vnp->data.ptrvalue != NULL) {
10776
        t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10777
        if (t == NULL || t->match_type > 0 
10778
            || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
10779
          /* ignore column or skip blank value */
10780
        } else {
10781
          if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
10782
            val = "";
10783
          } else {
10784
            val = val_vnp->data.ptrvalue;
10785
          }
10786
          for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) {
10787
            if (val[0] == 0) {
10788
              success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL);
10789
            } else {
10790
              success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL,
10791
                                                val_vnp->data.ptrvalue, t->existing_text);
10792
            }
10793
            if (success) {
10794
              num_fields_affected++;
10795
              num_this_column++;
10796
              if (t->match_mrna && IsFieldTypeCDSProduct (t->field)
10797
                  && target_vnp->choice == OBJ_SEQFEAT) {
10798
                if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) {
10799
                  num_fields_affected++;
10800
                }
10801
              }
10802
            } else {
10803
              err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30));
10804
              sprintf (err_msg, bad_col_val_fmt, col_num, line_num);
10805
              ValNodeAddPointer (&err_list, 0, err_msg);
10806
            }
10807
          }
10808
        }
10809
      }
10810
      if (val_vnp != NULL) {
10811
        val_vnp = val_vnp->next;
10812
      }
10813
      if (count_vnp == NULL) {
10814
        ValNodeAddInt (&count_affected_list, 0, num_this_column);
10815
      } else {
10816
        count_vnp->data.intvalue ++;
10817
        count_vnp = count_vnp->next;
10818
      }
10819
      obj_vnp = obj_vnp->next;
10820
      col_vnp = col_vnp->next;
10821
      col_num++;
10822
    }
10823
  }
10824
10825
  /* put message at top of list for number of fields affected */
10826
  err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15));
10827
  sprintf (err_msg, num_affected_fmt, num_fields_affected);
10828
  ValNodeAddPointer (&count_msg, 0, err_msg);
10829
10830
  /* if any affected, list number of fields per column, and the total in the record */
10831
  if (num_fields_affected > 0) {
10832
    for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1;
10833
         count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL;
10834
         count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) {
10835
      t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10836
      if (t != NULL && t->match_type == 0) {
10837
        qual_name = SummarizeFieldType (t->field);
10838
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45));
10839
        sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue);      
10840
        ValNodeAddPointer (&count_msg, 0, err_msg);
10841
        qual_name = MemFree (qual_name);
10842
      }
10843
    }
10844
  }
10845
10846
  ValNodeLink (&count_msg, err_list);
10847
10848
  count_list = ValNodeFree (count_list);
10849
  count_affected_list = ValNodeFree (count_affected_list);
10850
10851
  return count_msg;
10852
}
10853
10854
10855
static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2)
10856
10857
{
10858
  ValNodePtr  vnp1;
10859
  ValNodePtr  vnp2;
10860
10861
  if (ptr1 != NULL && ptr2 != NULL) {
10862
    vnp1 = *((ValNodePtr PNTR) ptr1);
10863
    vnp2 = *((ValNodePtr PNTR) ptr2);
10864
    if (vnp1 != NULL && vnp2 != NULL) {
10865
      if (vnp1->choice > vnp2->choice) {
10866
        return 1;
10867
      } else if (vnp1->choice < vnp2->choice) {
10868
        return -1;
10869
      } else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) {
10870
        return 1;
10871
      } else if (vnp2->data.ptrvalue < vnp2->data.ptrvalue) {
10872
        return -1;
10873
      } else {
10874
        return 0;
10875
      }
10876
    }
10877
  }
10878
  return 0;
10879
}
10880
10881
10882
NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table)
10883
{
10884
  Int4 col_num;
10885
  ValNodePtr line_vnp, col_vnp, obj_vnp, vnp;
10886
  ValNodePtr col_list = NULL, col_obj_list;
10887
  Boolean any_column_values_left;
10888
  ValNodePtr err_list = NULL;
10889
  Boolean found_multi;
10890
  CharPtr multi_fmt = "Multiple rows apply to the same object for column %d";
10891
  CharPtr err_msg;
10892
  
10893
  /* now, for each row, get pointer to first column */
10894
  for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) {
10895
    ValNodeAddPointer (&col_list, 0, line_vnp->data.ptrvalue);
10896
  }
10897
10898
  /* now for each column, make a list of all features in the column, then sort to see if there are duplicates */
10899
  any_column_values_left = TRUE;
10900
  col_num = 1;
10901
  while (any_column_values_left) {
10902
    any_column_values_left = FALSE;
10903
    col_obj_list = NULL;
10904
    for (vnp = col_list; vnp != NULL; vnp = vnp->next) {
10905
      col_vnp = vnp->data.ptrvalue;
10906
      if (col_vnp != NULL) {
10907
        obj_vnp = col_vnp->data.ptrvalue;
10908
        ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp));
10909
        vnp->data.ptrvalue = col_vnp->next;
10910
        any_column_values_left = TRUE;
10911
      }
10912
    }
10913
    if (col_obj_list != NULL) {
10914
      found_multi = FALSE;
10915
      col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue);
10916
      for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL && !found_multi; vnp = vnp->next) {
10917
        if (vnp->choice == vnp->next->choice
10918
            && vnp->data.ptrvalue == vnp->next->data.ptrvalue) {
10919
          found_multi = TRUE;
10920
        }
10921
      }
10922
      if (found_multi) {
10923
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt)
10924
                                                      + 30));
10925
        sprintf (err_msg, multi_fmt, col_num);
10926
        ValNodeAddPointer (&err_list, col_num, err_msg);
10927
      }
10928
      col_obj_list = ValNodeFree (col_obj_list);
10929
    }
10930
    col_num++;
10931
  }
10932
  col_list = ValNodeFree (col_list);
10933
  return err_list;
10934
}
10935
10936
10937
NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table)
10938
{
10939
  ValNodePtr err_list = NULL, vnp;
10940
  ValNodePtr val_line_vnp, obj_line_vnp;
10941
  ValNodePtr val_vnp, obj_vnp, col_vnp;
10942
  Int4       line_num = 1, col_num, num_existing_text = 0;
10943
  Uint2      entityID;
10944
  TabColumnConfigPtr t;
10945
  CharPtr            err_msg, str, qual_name, val;
10946
  CharPtr            already_has_val_fmt = "%s already has value '%s' (column %d), line %d.  Replacement is '%s'";
10947
  CharPtr            num_existing_text_fmt = "%d fields already have text.";
10948
  CharPtr            mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not.";
10949
  ValNodePtr         target_list, feat_vnp;
10950
  Int4               num_with_mrna = 0, num_without_mrna = 0;
10951
10952
  if (sep == NULL) {
10953
    ValNodeAddPointer (&err_list, 1, StringSave ("No SeqEntry"));
10954
  }
10955
  if (table == NULL) {
10956
    ValNodeAddPointer (&err_list, 1, StringSave ("No table"));
10957
  }
10958
  if (columns == NULL) {
10959
    ValNodeAddPointer (&err_list, 1, StringSave ("No column information"));
10960
  }
10961
  if (err_list != NULL) {
10962
    return err_list;
10963
  }
10964
10965
  entityID = SeqMgrGetEntityIDForSeqEntry (sep);
10966
10967
  for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1;
10968
       val_line_vnp != NULL && obj_line_vnp != NULL;
10969
       val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) {
10970
    val_vnp = val_line_vnp->data.ptrvalue;
10971
    obj_vnp = obj_line_vnp->data.ptrvalue;
10972
    col_vnp = columns;
10973
    if (val_vnp == NULL || obj_vnp == NULL) continue;
10974
    col_num = 1;
10975
    while (obj_vnp != NULL && col_vnp != NULL) {
10976
      if (obj_vnp->data.ptrvalue != NULL) {
10977
        t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
10978
        if (t == NULL || t->match_type > 0 
10979
            || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
10980
          /* ignore column or skip blank value */
10981
        } else {
10982
          target_list = obj_vnp->data.ptrvalue;
10983
          if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
10984
            val = "";
10985
          } else {
10986
            val = val_vnp->data.ptrvalue;
10987
          }
10988
          for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
10989
            /* check for existing text */
10990
            str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
10991
            if (!StringHasNoText (str)) {
10992
              qual_name = SummarizeFieldType (t->field);
10993
              err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt)
10994
                                                          + StringLen (qual_name) + StringLen (str)  
10995
                                                          + StringLen (val)
10996
                                                          + 30));
10997
              sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val);
10998
              ValNodeAddPointer (&err_list, col_num, err_msg);
10999
              num_existing_text ++;
11000
            }
11001
            str = MemFree (str);
11002
            /* check for mrna if changing CDS product */
11003
            if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) {
11004
              if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) {
11005
                num_with_mrna++;
11006
              } else {
11007
                num_without_mrna++;
11008
              }
11009
            }
11010
          }
11011
        }
11012
      }
11013
      if (val_vnp != NULL) {
11014
        val_vnp = val_vnp->next;
11015
      }
11016
      obj_vnp = obj_vnp->next;
11017
      col_vnp = col_vnp->next;
11018
      col_num++;
11019
    }
11020
  }          
11021
  if (num_existing_text > 0) {
11022
    err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt)
11023
                                                + 15));
11024
    sprintf (err_msg, num_existing_text_fmt, num_existing_text);
11025
    vnp = ValNodeNew (NULL);
11026
    vnp->choice = 0;
11027
    vnp->data.ptrvalue = err_msg;
11028
    vnp->next = err_list;
11029
    err_list = vnp;
11030
  }
11031
  if (num_with_mrna > 0 && num_without_mrna > 0) {
11032
    err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt)
11033
                                                + 30));
11034
    sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna);
11035
    vnp = ValNodeNew (NULL);
11036
    vnp->choice = 0;
11037
    vnp->data.ptrvalue = err_msg;
11038
    vnp->next = err_list;
11039
    err_list = vnp;
11040
  }    
11041
11042
  return err_list;
11043
}
11044
11045
11046
NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns)
11047
{
11048
  ValNodePtr err_list = NULL;
11049
  ValNodePtr line_vnp, val_vnp, col_vnp;
11050
  Int4       line_num = 1, col_num;
11051
  Uint2      entityID;
11052
  ValNodePtr match_list, match_choice, target_list, feat_vnp, vnp;
11053
  TabColumnConfigPtr t;
11054
  CharPtr            err_msg;
11055
  CharPtr            no_match_fmt = "No match for %s, line %d";
11056
  CharPtr            bad_col_val_fmt = "Did not set value for column %d, line %d";
11057
  CharPtr            num_affected_fmt = "%d fields affected";
11058
  Int4               num_fields_affected = 0;
11059
  CharPtr            val;
11060
  Boolean            success;
11061
11062
  if (sep == NULL) {
11063
    ValNodeAddPointer (&err_list, 0, StringSave ("No SeqEntry"));
11064
  }
11065
  if (table == NULL) {
11066
    ValNodeAddPointer (&err_list, 0, StringSave ("No table"));
11067
  }
11068
  if (columns == NULL) {
11069
    ValNodeAddPointer (&err_list, 0, StringSave ("No column information"));
11070
  }
11071
  if (err_list != NULL) {
11072
    return err_list;
11073
  }
11074
11075
  entityID = SeqMgrGetEntityIDForSeqEntry (sep);
11076
11077
  for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
11078
    match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
11079
    if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
11080
      ReportEmptyIDColumn (&err_list, line_num);
11081
    } else {
11082
      match_list = FindMatchForRow (match_choice, entityID, sep);
11083
      if (match_list == NULL) {
11084
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
11085
        sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
11086
        ValNodeAddPointer (&err_list, 0, err_msg);
11087
      } else {
11088
        for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
11089
             col_vnp != NULL;
11090
             col_vnp = col_vnp->next, col_num++) {
11091
          t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
11092
          if (t == NULL || t->match_type > 0 
11093
              || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
11094
            if (val_vnp != NULL) {
11095
              val_vnp = val_vnp->next;
11096
            }            
11097
            continue;
11098
          }
11099
          
11100
          target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field);
11101
          if (target_list == NULL) {
11102
            ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); 
11103
          } else {
11104
            if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
11105
              val = "";
11106
            } else {
11107
              val = val_vnp->data.ptrvalue;
11108
            }
11109
            for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
11110
              if (val[0] == 0) {
11111
                success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
11112
              } else {
11113
                success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL,
11114
                                                  val_vnp->data.ptrvalue, t->existing_text);
11115
              }
11116
              if (success) {
11117
                num_fields_affected++;
11118
                if (t->match_mrna && IsFieldTypeCDSProduct (t->field)
11119
                    && feat_vnp->choice == OBJ_SEQFEAT) {
11120
                  if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) {
11121
                    num_fields_affected++;
11122
                  }
11123
                }
11124
              } else {
11125
                err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30));
11126
                sprintf (err_msg, bad_col_val_fmt, col_num, line_num);
11127
                ValNodeAddPointer (&err_list, 0, err_msg);
11128
              }
11129
            }
11130
          }
11131
          target_list = ValNodeFree (target_list);
11132
          if (val_vnp != NULL) {
11133
            val_vnp = val_vnp->next;
11134
          }
11135
        }
11136
      }
11137
      match_list = ValNodeFree (match_list);
11138
    }
11139
  }
11140
  
11141
  err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15));
11142
  sprintf (err_msg, num_affected_fmt, num_fields_affected);
11143
  vnp = ValNodeNew (NULL);
11144
  vnp->data.ptrvalue = err_msg;
11145
  vnp->next = err_list;
11146
  err_list = vnp;
11147
11148
  return err_list;
11149
}
11150
11151
11152
NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns)
11153
{
11154
  ValNodePtr err_list = NULL, vnp;
11155
  ValNodePtr line_vnp, val_vnp, col_vnp;
11156
  Int4       line_num = 1, col_num, num_existing_text = 0;
11157
  Uint2      entityID;
11158
  TabColumnConfigPtr t;
11159
  CharPtr            err_msg, str, qual_name, val;
11160
  CharPtr            no_match_fmt = "No match for %s, line %d";
11161
  CharPtr            no_feat_fmt = "No %s feature for %s (column %d, line %d)";
11162
  CharPtr            already_has_val_fmt = "%s already has value '%s' (column %d), line %d.  Replacement is '%s'";
11163
  CharPtr            num_existing_text_fmt = "%d fields already have text.";
11164
  ValNodePtr         match_choice, match_list;
11165
  ValNodePtr         target_list, feat_vnp;
11166
11167
  if (sep == NULL) {
11168
    ValNodeAddPointer (&err_list, 1, StringSave ("No SeqEntry"));
11169
  }
11170
  if (table == NULL) {
11171
    ValNodeAddPointer (&err_list, 1, StringSave ("No table"));
11172
  }
11173
  if (columns == NULL) {
11174
    ValNodeAddPointer (&err_list, 1, StringSave ("No column information"));
11175
  }
11176
  if (err_list != NULL) {
11177
    return err_list;
11178
  }
11179
11180
  entityID = SeqMgrGetEntityIDForSeqEntry (sep);
11181
11182
  for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) {
11183
    match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns);
11184
    if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) {
11185
      ReportEmptyIDColumn (&err_list, line_num);
11186
    } else {
11187
      match_list = FindMatchForRow (match_choice, entityID, sep);
11188
      if (match_list == NULL) {
11189
        err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15));
11190
        sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num);
11191
        ValNodeAddPointer (&err_list, 0, err_msg);
11192
      } else {
11193
        for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1;
11194
             col_vnp != NULL;
11195
             col_vnp = col_vnp->next, col_num++) {
11196
          t = (TabColumnConfigPtr) col_vnp->data.ptrvalue;
11197
          if (t == NULL || t->match_type > 0 
11198
              || (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) {
11199
            if (val_vnp != NULL) {
11200
              val_vnp = val_vnp->next;
11201
            }            
11202
            continue;
11203
          }
11204
          target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field);
11205
          if (target_list == NULL) {
11206
            ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); 
11207
          } else {
11208
            if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) {
11209
              val = "";
11210
            } else {
11211
              val = val_vnp->data.ptrvalue;
11212
            }
11213
            for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) {
11214
              str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL);
11215
              if (!StringHasNoText (str)) {
11216
                qual_name = SummarizeFieldType (t->field);
11217
                err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt)
11218
                                                            + StringLen (qual_name) + StringLen (str)  
11219
                                                            + StringLen (val)
11220
                                                            + 30));
11221
                sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val);
11222
                ValNodeAddPointer (&err_list, col_num, err_msg);
11223
                num_existing_text ++;
11224
              }
11225
              str = MemFree (str);
11226
            }
11227
          }
11228
          target_list = ValNodeFree (target_list);
11229
          if (val_vnp != NULL) {
11230
            val_vnp = val_vnp->next;
11231
          }
11232
        }
11233
      }
11234
      match_list = ValNodeFree (match_list);
11235
    }
11236
  }          
11237
  if (num_existing_text > 0) {
11238
    err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt)
11239
                                                + 15));
11240
    sprintf (err_msg, num_existing_text_fmt, num_existing_text);
11241
    vnp = ValNodeNew (NULL);
11242
    vnp->choice = 0;
11243
    vnp->data.ptrvalue = err_msg;
11244
    vnp->next = err_list;
11245
    err_list = vnp;
11246
  }
11247
11248
  return err_list;
11249
}
11250
11251
11252
11253
11254