1.1.8
by Aaron M. Ucko
Import upstream version 6.1.20080302 |
1 |
/* macro.c
|
2 |
* ===========================================================================
|
|
3 |
*
|
|
4 |
* PUBLIC DOMAIN NOTICE
|
|
5 |
* National Center for Biotechnology Information (NCBI)
|
|
6 |
*
|
|
7 |
* This software/database is a "United States Government Work" under the
|
|
8 |
* terms of the United States Copyright Act. It was written as part of
|
|
9 |
* the author's official duties as a United States Government employee and
|
|
10 |
* thus cannot be copyrighted. This software/database is freely available
|
|
11 |
* to the public for use. The National Library of Medicine and the U.S.
|
|
12 |
* Government do not place any restriction on its use or reproduction.
|
|
13 |
* We would, however, appreciate having the NCBI and the author cited in
|
|
14 |
* any work or product based on this material
|
|
15 |
*
|
|
16 |
* Although all reasonable efforts have been taken to ensure the accuracy
|
|
17 |
* and reliability of the software and data, the NLM and the U.S.
|
|
18 |
* Government do not and cannot warrant the performance or results that
|
|
19 |
* may be obtained by using this software or data. The NLM and the U.S.
|
|
20 |
* Government disclaim all warranties, express or implied, including
|
|
21 |
* warranties of performance, merchantability or fitness for any particular
|
|
22 |
* purpose.
|
|
23 |
*
|
|
24 |
* ===========================================================================
|
|
25 |
*
|
|
26 |
* File Name: macro.c
|
|
27 |
*
|
|
28 |
* Author: Colleen Bollin
|
|
29 |
*
|
|
30 |
* Version Creation Date: 11/8/2007
|
|
31 |
*
|
|
32 |
* $Revision: 1.58 $
|
|
33 |
*
|
|
34 |
* File Description:
|
|
35 |
*
|
|
36 |
* Modifications:
|
|
37 |
* --------------------------------------------------------------------------
|
|
38 |
* Date Name Description of modification
|
|
39 |
* ------- ---------- -----------------------------------------------------
|
|
40 |
*
|
|
41 |
*
|
|
42 |
* ==========================================================================
|
|
43 |
*/
|
|
44 |
||
45 |
#include <asn.h> |
|
46 |
#include <objmacro.h> |
|
47 |
#include <objfeat.h> |
|
48 |
#include <subutil.h> |
|
49 |
#include <objmgr.h> |
|
50 |
#include <objfdef.h> |
|
51 |
#include <gbftdef.h> |
|
52 |
#include <sqnutils.h> |
|
53 |
#include <edutil.h> |
|
54 |
#include <gather.h> |
|
55 |
#include <asn2gnbi.h> |
|
56 |
#define NLM_GENERATED_CODE_PROTO
|
|
57 |
#include <macroapi.h> |
|
58 |
#include <seqport.h> |
|
59 |
||
60 |
/* structure and create/free functions for CGPSet, used for handling CDS-Gene-Prot sets */
|
|
61 |
typedef struct cgpset |
|
62 |
{
|
|
63 |
ValNodePtr cds_list; |
|
64 |
ValNodePtr gene_list; |
|
65 |
ValNodePtr prot_list; |
|
66 |
ValNodePtr mrna_list; |
|
67 |
} CGPSetData, PNTR CGPSetPtr; |
|
68 |
||
69 |
||
70 |
||
71 |
static CGPSetPtr CGPSetNew (void) |
|
72 |
{
|
|
73 |
CGPSetPtr c; |
|
74 |
||
75 |
c = (CGPSetPtr) MemNew (sizeof (CGPSetData)); |
|
76 |
c->cds_list = NULL; |
|
77 |
c->gene_list = NULL; |
|
78 |
c->prot_list = NULL; |
|
79 |
c->mrna_list = NULL; |
|
80 |
return c; |
|
81 |
}
|
|
82 |
||
83 |
||
84 |
static CGPSetPtr CGPSetFree (CGPSetPtr c) |
|
85 |
{
|
|
86 |
if (c != NULL) { |
|
87 |
c->cds_list = ValNodeFree (c->cds_list); |
|
88 |
c->gene_list = ValNodeFree (c->gene_list); |
|
89 |
c->prot_list = ValNodeFree (c->prot_list); |
|
90 |
c->mrna_list = ValNodeFree (c->mrna_list); |
|
91 |
c = MemFree (c); |
|
92 |
}
|
|
93 |
return c; |
|
94 |
}
|
|
95 |
||
96 |
||
97 |
static ValNodePtr FreeCGPSetList (ValNodePtr vnp) |
|
98 |
{
|
|
99 |
ValNodePtr vnp_next; |
|
100 |
||
101 |
while (vnp != NULL) { |
|
102 |
vnp_next = vnp->next; |
|
103 |
vnp->next = NULL; |
|
104 |
vnp->data.ptrvalue = CGPSetFree (vnp->data.ptrvalue); |
|
105 |
vnp = ValNodeFree (vnp); |
|
106 |
vnp = vnp_next; |
|
107 |
}
|
|
108 |
return NULL; |
|
109 |
}
|
|
110 |
||
111 |
||
112 |
/* generic functions for mapping constraints */
|
|
113 |
||
114 |
typedef struct feattypefeatdef { |
|
115 |
Int4 feattype; |
|
116 |
Int4 featdef; |
|
117 |
CharPtr featname; |
|
118 |
} FeatTypeFeatDefData, PNTR FeatTypeFeatDefPtr; |
|
119 |
||
120 |
static FeatTypeFeatDefData feattype_featdef[] = { |
|
121 |
{ Feature_type_any , FEATDEF_ANY , "any" } , |
|
122 |
{ Feature_type_gene , FEATDEF_GENE , "gene" } , |
|
123 |
{ Feature_type_org , FEATDEF_ORG , "org" } , |
|
124 |
{ Feature_type_cds , FEATDEF_CDS , "CDS" } , |
|
125 |
{ Feature_type_prot , FEATDEF_PROT , "Protein" } , |
|
126 |
{ Feature_type_preRNA , FEATDEF_preRNA , "preRNA" } , |
|
127 |
{ Feature_type_mRNA , FEATDEF_mRNA , "mRNA" } , |
|
128 |
{ Feature_type_tRNA , FEATDEF_tRNA , "tRNA" } , |
|
129 |
{ Feature_type_rRNA , FEATDEF_rRNA , "rRNA" } , |
|
130 |
{ Feature_type_snRNA , FEATDEF_snRNA , "snRNA" } , |
|
131 |
{ Feature_type_scRNA , FEATDEF_scRNA , "scRNA" } , |
|
132 |
{ Feature_type_otherRNA , FEATDEF_otherRNA , "misc_RNA" } , |
|
133 |
{ Feature_type_pub , FEATDEF_PUB , "pub" } , |
|
134 |
{ Feature_type_seq , FEATDEF_SEQ , "seq" } , |
|
135 |
{ Feature_type_imp , FEATDEF_IMP , "imp" } , |
|
136 |
{ Feature_type_allele , FEATDEF_allele , "allele" } , |
|
137 |
{ Feature_type_attenuator , FEATDEF_attenuator , "attenuator" } , |
|
138 |
{ Feature_type_c_region , FEATDEF_C_region , "c_region" } , |
|
139 |
{ Feature_type_caat_signal , FEATDEF_CAAT_signal , "caat_signal" } , |
|
140 |
{ Feature_type_imp_CDS , FEATDEF_Imp_CDS , "imp_CDS" } , |
|
141 |
{ Feature_type_conflict , FEATDEF_conflict , "conflict" } , |
|
142 |
{ Feature_type_d_loop , FEATDEF_D_loop , "d_loop" } , |
|
143 |
{ Feature_type_d_segment , FEATDEF_D_segment , "d_segment" } , |
|
144 |
{ Feature_type_enhancer , FEATDEF_enhancer , "enhancer" } , |
|
145 |
{ Feature_type_exon , FEATDEF_exon , "exon" } , |
|
146 |
{ Feature_type_gC_signal , FEATDEF_GC_signal , "gC_signal" } , |
|
147 |
{ Feature_type_iDNA , FEATDEF_iDNA , "iDNA" } , |
|
148 |
{ Feature_type_intron , FEATDEF_intron , "intron" } , |
|
149 |
{ Feature_type_j_segment , FEATDEF_J_segment , "j_segment" } , |
|
150 |
{ Feature_type_ltr , FEATDEF_LTR , "ltr" } , |
|
151 |
{ Feature_type_mat_peptide , FEATDEF_mat_peptide , "mat_peptide" } , |
|
152 |
{ Feature_type_misc_binding , FEATDEF_misc_binding , "misc_binding" } , |
|
153 |
{ Feature_type_misc_difference , FEATDEF_misc_difference , "misc_difference" } , |
|
154 |
{ Feature_type_misc_feature , FEATDEF_misc_feature , "misc_feature" } , |
|
155 |
{ Feature_type_misc_recomb , FEATDEF_misc_recomb , "misc_recomb" } , |
|
156 |
{ Feature_type_misc_RNA , FEATDEF_misc_RNA , "misc_RNA" } , |
|
157 |
{ Feature_type_misc_signal , FEATDEF_misc_signal , "misc_signal" } , |
|
158 |
{ Feature_type_misc_structure , FEATDEF_misc_structure , "misc_structure" } , |
|
159 |
{ Feature_type_modified_base , FEATDEF_modified_base , "modified_base" } , |
|
160 |
{ Feature_type_mutation , FEATDEF_mutation , "mutation" } , |
|
161 |
{ Feature_type_n_region , FEATDEF_N_region , "n_region" } , |
|
162 |
{ Feature_type_old_sequence , FEATDEF_old_sequence , "old_sequence" } , |
|
163 |
{ Feature_type_polyA_signal , FEATDEF_polyA_signal , "polyA_signal" } , |
|
164 |
{ Feature_type_polyA_site , FEATDEF_polyA_site , "polyA_site" } , |
|
165 |
{ Feature_type_precursor_RNA , FEATDEF_precursor_RNA , "precursor_RNA" } , |
|
166 |
{ Feature_type_prim_transcript , FEATDEF_prim_transcript , "prim_transcript" } , |
|
167 |
{ Feature_type_primer_bind , FEATDEF_primer_bind , "primer_bind" } , |
|
168 |
{ Feature_type_promoter , FEATDEF_promoter , "promoter" } , |
|
169 |
{ Feature_type_protein_bind , FEATDEF_protein_bind , "protein_bind" } , |
|
170 |
{ Feature_type_rbs , FEATDEF_RBS , "rbs" } , |
|
171 |
{ Feature_type_repeat_region , FEATDEF_repeat_region , "repeat_region" } , |
|
172 |
{ Feature_type_repeat_unit , FEATDEF_repeat_unit , "repeat_unit" } , |
|
173 |
{ Feature_type_rep_origin , FEATDEF_rep_origin , "rep_origin" } , |
|
174 |
{ Feature_type_s_region , FEATDEF_S_region , "s_region" } , |
|
175 |
{ Feature_type_satellite , FEATDEF_satellite , "satellite" } , |
|
176 |
{ Feature_type_sig_peptide , FEATDEF_sig_peptide , "sig_peptide" } , |
|
177 |
{ Feature_type_source , FEATDEF_source , "source" } , |
|
178 |
{ Feature_type_stem_loop , FEATDEF_stem_loop , "stem_loop" } , |
|
179 |
{ Feature_type_sts , FEATDEF_STS , "sts" } , |
|
180 |
{ Feature_type_tata_signal , FEATDEF_TATA_signal , "tata_signal" } , |
|
181 |
{ Feature_type_terminator , FEATDEF_terminator , "terminator" } , |
|
182 |
{ Feature_type_transit_peptide , FEATDEF_transit_peptide , "transit_peptide" } , |
|
183 |
{ Feature_type_unsure , FEATDEF_unsure , "unsure" } , |
|
184 |
{ Feature_type_v_region , FEATDEF_V_region , "v_region" } , |
|
185 |
{ Feature_type_v_segment , FEATDEF_V_segment , "v_segment" } , |
|
186 |
{ Feature_type_variation , FEATDEF_variation , "variation" } , |
|
187 |
{ Feature_type_virion , FEATDEF_virion , "virion" } , |
|
188 |
{ Feature_type_n3clip , FEATDEF_3clip , "3clip" } , |
|
189 |
{ Feature_type_n3UTR , FEATDEF_3UTR , "3UTR" } , |
|
190 |
{ Feature_type_n5clip , FEATDEF_5clip , "5clip" } , |
|
191 |
{ Feature_type_n5UTR , FEATDEF_5UTR , "5UTR" } , |
|
192 |
{ Feature_type_n10_signal , FEATDEF_10_signal , "10_signal" } , |
|
193 |
{ Feature_type_n35_signal , FEATDEF_35_signal , "35_signal" } , |
|
194 |
{ Feature_type_site_ref , FEATDEF_site_ref , "site_ref" } , |
|
195 |
{ Feature_type_region , FEATDEF_REGION , "region" } , |
|
196 |
{ Feature_type_comment , FEATDEF_COMMENT , "comment" } , |
|
197 |
{ Feature_type_bond , FEATDEF_BOND , "bond" } , |
|
198 |
{ Feature_type_site , FEATDEF_SITE , "site" } , |
|
199 |
{ Feature_type_rsite , FEATDEF_RSITE , "rsite" } , |
|
200 |
{ Feature_type_user , FEATDEF_USER , "user" } , |
|
201 |
{ Feature_type_txinit , FEATDEF_TXINIT , "txinit" } , |
|
202 |
{ Feature_type_num , FEATDEF_NUM , "num" } , |
|
203 |
{ Feature_type_psec_str , FEATDEF_PSEC_STR , "psec_str" } , |
|
204 |
{ Feature_type_non_std_residue , FEATDEF_NON_STD_RESIDUE , "non_std_residue" } , |
|
205 |
{ Feature_type_het , FEATDEF_HET , "het" } , |
|
206 |
{ Feature_type_biosrc , FEATDEF_BIOSRC , "biosrc" } , |
|
207 |
{ Feature_type_preprotein , FEATDEF_preprotein , "preprotein" } , |
|
208 |
{ Feature_type_mat_peptide_aa , FEATDEF_mat_peptide_aa , "mat_peptide_aa" } , |
|
209 |
{ Feature_type_sig_peptide_aa , FEATDEF_sig_peptide_aa , "sig_peptide_aa" } , |
|
210 |
{ Feature_type_transit_peptide_aa , FEATDEF_transit_peptide_aa , "transit_peptide_aa" } , |
|
211 |
{ Feature_type_snoRNA , FEATDEF_snoRNA , "snoRNA" } , |
|
212 |
{ Feature_type_gap , FEATDEF_gap , "gap" } , |
|
213 |
{ Feature_type_operon , FEATDEF_operon , "operon" } , |
|
214 |
{ Feature_type_oriT , FEATDEF_oriT , "oriT" } , |
|
215 |
{ Feature_type_ncRNA , FEATDEF_ncRNA , "ncRNA" } , |
|
216 |
{ Feature_type_tmRNA , FEATDEF_tmRNA , "tmRNA" }}; |
|
217 |
||
218 |
#define NUM_feattype_featdef sizeof (feattype_featdef) / sizeof (FeatTypeFeatDefData)
|
|
219 |
||
220 |
NLM_EXTERN Int4 GetFeatdefFromFeatureType (Int4 feature_type) |
|
221 |
{
|
|
222 |
Int4 i; |
|
223 |
||
224 |
for (i = 0; i < NUM_feattype_featdef; i++) { |
|
225 |
if (feature_type == feattype_featdef[i].feattype) { |
|
226 |
return feattype_featdef[i].featdef; |
|
227 |
}
|
|
228 |
}
|
|
229 |
return FEATDEF_BAD; |
|
230 |
}
|
|
231 |
||
232 |
||
233 |
NLM_EXTERN CharPtr GetFeatureNameFromFeatureType (Int4 feature_type) |
|
234 |
{
|
|
235 |
CharPtr str = NULL; |
|
236 |
Int4 i; |
|
237 |
||
238 |
for (i = 0; i < NUM_feattype_featdef && str == NULL; i++) { |
|
239 |
if (feature_type == feattype_featdef[i].feattype) { |
|
240 |
str = feattype_featdef[feature_type].featname; |
|
241 |
}
|
|
242 |
}
|
|
243 |
if (str == NULL) { |
|
244 |
str = "Unknown feature type"; |
|
245 |
}
|
|
246 |
return str; |
|
247 |
}
|
|
248 |
||
249 |
||
250 |
static Boolean Matchnamestring (CharPtr name1, CharPtr name2) |
|
251 |
{
|
|
252 |
if (name1 == NULL && name2 == NULL) { |
|
253 |
return TRUE; |
|
254 |
} else if (name1 == NULL || name2 == NULL) { |
|
255 |
return FALSE; |
|
256 |
} else { |
|
257 |
while (*name1 != 0 && *name2 != 0) { |
|
258 |
while (*name1 == ' ' || *name1 == '-' || *name1 == '_') { |
|
259 |
name1++; |
|
260 |
}
|
|
261 |
while (*name2 == ' ' || *name2 == '-' || *name2 == '_') { |
|
262 |
name2++; |
|
263 |
}
|
|
264 |
if (*name1 != *name2) { |
|
265 |
return FALSE; |
|
266 |
}
|
|
267 |
name1++; |
|
268 |
name2++; |
|
269 |
}
|
|
270 |
if (*name1 == 0 && *name2 == 0) { |
|
271 |
return TRUE; |
|
272 |
} else { |
|
273 |
return FALSE; |
|
274 |
}
|
|
275 |
}
|
|
276 |
}
|
|
277 |
||
278 |
||
279 |
NLM_EXTERN Int4 GetFeatureTypeByName (CharPtr feat_name) |
|
280 |
{
|
|
281 |
Int4 i; |
|
282 |
||
283 |
for (i = 0; i < NUM_feattype_featdef; i++) { |
|
284 |
if (Matchnamestring (feattype_featdef[i].featname, feat_name)) { |
|
285 |
return feattype_featdef[i].feattype; |
|
286 |
}
|
|
287 |
}
|
|
288 |
return -1; |
|
289 |
}
|
|
290 |
||
291 |
||
292 |
NLM_EXTERN void AddImportFeaturesToChoiceList (ValNodePtr PNTR feature_type_list) |
|
293 |
{
|
|
294 |
Int4 i, seqfeattype; |
|
295 |
CharPtr featname; |
|
296 |
ValNodePtr tmp_list = NULL; |
|
297 |
||
298 |
for (i = 1; i < NUM_feattype_featdef; i++) { |
|
299 |
if (feattype_featdef[i].feattype == Feature_type_gap) continue; |
|
300 |
seqfeattype = FindFeatFromFeatDefType (feattype_featdef[i].featdef); |
|
301 |
if (seqfeattype == SEQFEAT_IMP) { |
|
302 |
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); |
|
303 |
if (featname != NULL) { |
|
304 |
ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname)); |
|
305 |
}
|
|
306 |
}
|
|
307 |
}
|
|
308 |
tmp_list = ValNodeSort (tmp_list, SortVnpByString); |
|
309 |
ValNodeLink (feature_type_list, tmp_list); |
|
310 |
}
|
|
311 |
||
312 |
||
313 |
||
314 |
static Boolean IsMostUsedFeature (Uint1 val) |
|
315 |
{
|
|
316 |
if (val == Feature_type_gene |
|
317 |
|| val == Feature_type_cds |
|
318 |
|| val == Feature_type_prot |
|
319 |
|| val == Feature_type_exon |
|
320 |
|| val == Feature_type_intron |
|
321 |
|| val == Feature_type_mRNA |
|
322 |
|| val == Feature_type_rRNA |
|
323 |
|| val == Feature_type_otherRNA) { |
|
324 |
return TRUE; |
|
325 |
} else { |
|
326 |
return FALSE; |
|
327 |
}
|
|
328 |
}
|
|
329 |
||
330 |
||
331 |
static int LIBCALLBACK SortVnpByFeatureName (VoidPtr ptr1, VoidPtr ptr2) |
|
332 |
||
333 |
{
|
|
334 |
CharPtr str1; |
|
335 |
CharPtr str2; |
|
336 |
ValNodePtr vnp1; |
|
337 |
ValNodePtr vnp2; |
|
338 |
Boolean most_used1, most_used2; |
|
339 |
||
340 |
if (ptr1 != NULL && ptr2 != NULL) { |
|
341 |
vnp1 = *((ValNodePtr PNTR) ptr1); |
|
342 |
vnp2 = *((ValNodePtr PNTR) ptr2); |
|
343 |
if (vnp1 != NULL && vnp2 != NULL) { |
|
344 |
most_used1 = IsMostUsedFeature (vnp1->choice); |
|
345 |
most_used2 = IsMostUsedFeature (vnp2->choice); |
|
346 |
if (most_used1 && !most_used2) { |
|
347 |
return -1; |
|
348 |
} else if (!most_used1 && most_used2) { |
|
349 |
return 1; |
|
350 |
} else { |
|
351 |
str1 = (CharPtr) vnp1->data.ptrvalue; |
|
352 |
str2 = (CharPtr) vnp2->data.ptrvalue; |
|
353 |
if (str1 != NULL && str2 != NULL) { |
|
354 |
return StringICmp (str1, str2); |
|
355 |
}
|
|
356 |
}
|
|
357 |
}
|
|
358 |
}
|
|
359 |
return 0; |
|
360 |
}
|
|
361 |
||
362 |
||
363 |
NLM_EXTERN void AddAllFeaturesToChoiceList (ValNodePtr PNTR feature_type_list) |
|
364 |
{
|
|
365 |
Int4 i; |
|
366 |
CharPtr featname; |
|
367 |
ValNodePtr tmp_list = NULL; |
|
368 |
||
369 |
for (i = 1; i < NUM_feattype_featdef; i++) { |
|
370 |
if (feattype_featdef[i].feattype == Feature_type_gap) continue; |
|
371 |
featname = GetFeatureNameFromFeatureType (feattype_featdef[i].feattype); |
|
372 |
if (featname != NULL) { |
|
373 |
ValNodeAddPointer (&tmp_list, feattype_featdef[i].feattype, StringSave (featname)); |
|
374 |
}
|
|
375 |
}
|
|
376 |
tmp_list = ValNodeSort (tmp_list, SortVnpByFeatureName); |
|
377 |
ValNodeLink (feature_type_list, tmp_list); |
|
378 |
}
|
|
379 |
||
380 |
||
381 |
typedef struct featqualgbqual { |
|
382 |
Int4 featqual; |
|
383 |
Int4 gbqual; |
|
384 |
CharPtr qualname; |
|
385 |
} FeatQualGBQualData, PNTR FeatQualGBQualPtr; |
|
386 |
||
387 |
static FeatQualGBQualData featqual_gbqual[] = { |
|
388 |
{ Feat_qual_legal_allele , GBQUAL_allele , "allele" } , |
|
389 |
{ Feat_qual_legal_anticodon , GBQUAL_anticodon , "anticodon" } , |
|
390 |
{ Feat_qual_legal_bound_moiety , GBQUAL_bound_moiety , "bound-moiety" } , |
|
391 |
{ Feat_qual_legal_chromosome , GBQUAL_chromosome , "chromosome" } , |
|
392 |
{ Feat_qual_legal_citation , GBQUAL_citation , "citation" } , |
|
393 |
{ Feat_qual_legal_codon , GBQUAL_codon , "codon" } , |
|
394 |
{ Feat_qual_legal_codon_start , GBQUAL_codon_start , "codon-start" } , |
|
395 |
{ Feat_qual_legal_compare , GBQUAL_compare , "compare" } , |
|
396 |
{ Feat_qual_legal_cons_splice , GBQUAL_cons_splice , "cons-splice" } , |
|
397 |
{ Feat_qual_legal_db_xref , GBQUAL_db_xref , "db-xref" } , |
|
398 |
{ Feat_qual_legal_direction , GBQUAL_direction , "direction" } , |
|
399 |
{ Feat_qual_legal_environmental_sample , GBQUAL_environmental_sample , "environmental-sample" } , |
|
400 |
{ Feat_qual_legal_evidence , GBQUAL_evidence , "evidence" } , |
|
401 |
{ Feat_qual_legal_exception , GBQUAL_exception , "exception" } , |
|
402 |
{ Feat_qual_legal_experiment , GBQUAL_experiment , "experiment" } , |
|
403 |
{ Feat_qual_legal_focus , GBQUAL_focus , "focus" } , |
|
404 |
{ Feat_qual_legal_frequency , GBQUAL_frequency , "frequency" } , |
|
405 |
{ Feat_qual_legal_function , GBQUAL_function , "function" } , |
|
406 |
{ Feat_qual_legal_gene , GBQUAL_gene , "locus" } , |
|
407 |
{ Feat_qual_legal_inference , GBQUAL_inference , "inference" } , |
|
408 |
{ Feat_qual_legal_label , GBQUAL_label , "label" } , |
|
409 |
{ Feat_qual_legal_locus_tag , GBQUAL_locus_tag , "locus-tag" } , |
|
410 |
{ Feat_qual_legal_map , GBQUAL_map , "map" } , |
|
411 |
{ Feat_qual_legal_mobile_element , GBQUAL_mobile_element , "mobile-element" } , |
|
412 |
{ Feat_qual_legal_mod_base , GBQUAL_mod_base , "mod-base" } , |
|
413 |
{ Feat_qual_legal_mol_type , GBQUAL_mol_type , "mol-type" } , |
|
414 |
{ Feat_qual_legal_ncRNA_class , GBQUAL_ncRNA_class , "ncRNA-class" } , |
|
415 |
{ Feat_qual_legal_note , GBQUAL_note , "note" } , |
|
416 |
{ Feat_qual_legal_number , GBQUAL_number , "number" } , |
|
417 |
{ Feat_qual_legal_old_locus_tag , GBQUAL_old_locus_tag , "old-locus-tag" } , |
|
418 |
{ Feat_qual_legal_operon , GBQUAL_operon , "operon" } , |
|
419 |
{ Feat_qual_legal_organism , GBQUAL_organism , "organism" } , |
|
420 |
{ Feat_qual_legal_organelle , GBQUAL_organelle , "organelle" } , |
|
421 |
{ Feat_qual_legal_partial , GBQUAL_partial , "partial" } , |
|
422 |
{ Feat_qual_legal_phenotype , GBQUAL_phenotype , "phenotype" } , |
|
423 |
{ Feat_qual_legal_plasmid , GBQUAL_plasmid , "plasmid" } , |
|
424 |
{ Feat_qual_legal_product , GBQUAL_product , "product" } , |
|
425 |
{ Feat_qual_legal_protein_id , GBQUAL_protein_id , "protein-id" } , |
|
426 |
{ Feat_qual_legal_pseudo , GBQUAL_pseudo , "pseudo" } , |
|
427 |
{ Feat_qual_legal_rearranged , GBQUAL_rearranged , "rearranged" } , |
|
428 |
{ Feat_qual_legal_replace , GBQUAL_replace , "replace" } , |
|
429 |
{ Feat_qual_legal_rpt_family , GBQUAL_rpt_family , "rpt-family" } , |
|
430 |
{ Feat_qual_legal_rpt_type , GBQUAL_rpt_type , "rpt-type" } , |
|
431 |
{ Feat_qual_legal_rpt_unit , GBQUAL_rpt_unit , "rpt-unit" } , |
|
432 |
{ Feat_qual_legal_rpt_unit_seq , GBQUAL_rpt_unit_seq , "rpt-unit-seq" } , |
|
433 |
{ Feat_qual_legal_rpt_unit_range , GBQUAL_rpt_unit_range , "rpt-unit-range" } , |
|
434 |
{ Feat_qual_legal_segment , GBQUAL_segment , "segment" } , |
|
435 |
{ Feat_qual_legal_sequenced_mol , GBQUAL_sequenced_mol , "sequenced-mol" } , |
|
436 |
{ Feat_qual_legal_standard_name , GBQUAL_standard_name , "standard-name" } , |
|
437 |
{ Feat_qual_legal_transcript_id , GBQUAL_transcript_id , "transcript-id" } , |
|
438 |
{ Feat_qual_legal_transgenic , GBQUAL_transgenic , "transgenic" } , |
|
439 |
{ Feat_qual_legal_translation , GBQUAL_translation , "translation" } , |
|
440 |
{ Feat_qual_legal_transl_except , GBQUAL_transl_except , "transl-except" } , |
|
441 |
{ Feat_qual_legal_transl_table , GBQUAL_transl_table , "transl-table" } , |
|
442 |
{ Feat_qual_legal_usedin , GBQUAL_usedin , "usedin" } }; |
|
443 |
||
444 |
#define NUM_featqual_gbqual sizeof (featqual_gbqual) / sizeof (FeatQualGBQualData)
|
|
445 |
||
446 |
||
447 |
NLM_EXTERN Int4 GetNumFeatQual (void) |
|
448 |
{
|
|
449 |
return NUM_featqual_gbqual; |
|
450 |
}
|
|
451 |
||
452 |
||
453 |
static Int4 GetGBQualFromFeatQual (Int4 featqual) |
|
454 |
{
|
|
455 |
Int4 i; |
|
456 |
||
457 |
for (i = 0; i < NUM_featqual_gbqual; i++) { |
|
458 |
if (featqual == featqual_gbqual[i].featqual) { |
|
459 |
return featqual_gbqual[i].gbqual; |
|
460 |
}
|
|
461 |
}
|
|
462 |
return -1; |
|
463 |
}
|
|
464 |
||
465 |
||
466 |
NLM_EXTERN CharPtr GetFeatQualName (Int4 featqual) |
|
467 |
{
|
|
468 |
Int4 i; |
|
469 |
||
470 |
for (i = 0; i < NUM_featqual_gbqual; i++) { |
|
471 |
if (featqual == featqual_gbqual[i].featqual) { |
|
472 |
return featqual_gbqual[i].qualname; |
|
473 |
}
|
|
474 |
}
|
|
475 |
return NULL; |
|
476 |
}
|
|
477 |
||
478 |
||
479 |
NLM_EXTERN Int4 GetFeatQualByName (CharPtr qualname) |
|
480 |
{
|
|
481 |
Int4 i; |
|
482 |
||
483 |
for (i = 0; i < NUM_featqual_gbqual; i++) { |
|
484 |
if (Matchnamestring (featqual_gbqual[i].qualname, qualname)) { |
|
485 |
return featqual_gbqual[i].featqual; |
|
486 |
}
|
|
487 |
}
|
|
488 |
return -1; |
|
489 |
}
|
|
490 |
||
491 |
||
492 |
NLM_EXTERN void AddAllFeatureFieldsToChoiceList (ValNodePtr PNTR field_list) |
|
493 |
{
|
|
494 |
Int4 i; |
|
495 |
||
496 |
for (i = 1; i < NUM_featqual_gbqual; i++) { |
|
497 |
ValNodeAddPointer (field_list, featqual_gbqual[i].featqual, StringSave (featqual_gbqual[i].qualname)); |
|
498 |
}
|
|
499 |
}
|
|
500 |
||
501 |
||
502 |
#define IS_ORGMOD 1
|
|
503 |
#define IS_SUBSRC 2
|
|
504 |
#define IS_OTHER 3
|
|
505 |
||
506 |
typedef struct srcqualscqual { |
|
507 |
Int4 srcqual; |
|
508 |
Int4 subtype; |
|
509 |
Int4 typeflag; |
|
510 |
CharPtr qualname; |
|
511 |
} SrcQualSCQualData, PNTR SrcQualSCQualPtr; |
|
512 |
||
513 |
static SrcQualSCQualData srcqual_scqual[] = { |
|
514 |
{ Source_qual_acronym , ORGMOD_acronym , IS_ORGMOD , "acronym" } , |
|
515 |
{ Source_qual_anamorph , ORGMOD_anamorph , IS_ORGMOD , "anamorph" } , |
|
516 |
{ Source_qual_authority , ORGMOD_authority , IS_ORGMOD , "authority" } , |
|
517 |
{ Source_qual_bio_material , ORGMOD_bio_material , IS_ORGMOD , "bio-material" } , |
|
518 |
{ Source_qual_biotype , ORGMOD_biotype , IS_ORGMOD , "biotype" } , |
|
519 |
{ Source_qual_biovar , ORGMOD_biovar , IS_ORGMOD , "biovar" } , |
|
520 |
{ Source_qual_breed , ORGMOD_breed , IS_ORGMOD , "breed" } , |
|
521 |
{ Source_qual_cell_line , SUBSRC_cell_line , IS_SUBSRC , "cell-line" } , |
|
522 |
{ Source_qual_cell_type , SUBSRC_cell_type , IS_SUBSRC , "cell-type" } , |
|
523 |
{ Source_qual_chemovar , ORGMOD_chemovar , IS_ORGMOD , "chemovar" } , |
|
524 |
{ Source_qual_chromosome , SUBSRC_chromosome , IS_SUBSRC , "chromosome" } , |
|
525 |
{ Source_qual_clone , SUBSRC_clone , IS_SUBSRC , "clone" } , |
|
526 |
{ Source_qual_clone_lib , SUBSRC_clone_lib , IS_SUBSRC , "clone-lib" } , |
|
527 |
{ Source_qual_collected_by , SUBSRC_collected_by , IS_SUBSRC , "collected-by" } , |
|
528 |
{ Source_qual_collection_date , SUBSRC_collection_date , IS_SUBSRC , "collection-date" } , |
|
529 |
{ Source_qual_common , ORGMOD_common , IS_ORGMOD , "common" } , |
|
530 |
{ Source_qual_common_name , 0 , IS_OTHER , "common name" } , |
|
531 |
{ Source_qual_country , SUBSRC_country , IS_SUBSRC , "country" } , |
|
532 |
{ Source_qual_cultivar , ORGMOD_cultivar , IS_ORGMOD , "cultivar" } , |
|
533 |
{ Source_qual_culture_collection , ORGMOD_culture_collection , IS_ORGMOD , "culture-collection" } , |
|
534 |
{ Source_qual_dev_stage , SUBSRC_dev_stage , IS_SUBSRC , "dev-stage" } , |
|
535 |
{ Source_qual_division , 0 , IS_OTHER, "divistion" } , |
|
536 |
{ Source_qual_dosage , ORGMOD_dosage , IS_ORGMOD , "dosage" } , |
|
537 |
{ Source_qual_ecotype , ORGMOD_ecotype , IS_ORGMOD , "ecotype" } , |
|
538 |
{ Source_qual_endogenous_virus_name , SUBSRC_endogenous_virus_name , IS_SUBSRC , "endogenous-virus-name" } , |
|
539 |
{ Source_qual_environmental_sample , SUBSRC_environmental_sample , IS_SUBSRC , "environmental-sample" } , |
|
540 |
{ Source_qual_forma , ORGMOD_forma , IS_ORGMOD , "forma" } , |
|
541 |
{ Source_qual_forma_specialis , ORGMOD_forma_specialis , IS_ORGMOD , "forma-specialis" } , |
|
542 |
{ Source_qual_frequency , SUBSRC_frequency , IS_SUBSRC , "frequency" } , |
|
543 |
{ Source_qual_fwd_primer_name , SUBSRC_fwd_primer_name , IS_SUBSRC , "fwd-primer-name" } , |
|
544 |
{ Source_qual_fwd_primer_seq , SUBSRC_fwd_primer_seq , IS_SUBSRC , "fwd-primer-seq" } , |
|
545 |
{ Source_qual_gb_acronym , ORGMOD_gb_acronym , IS_ORGMOD , "gb-acronym" } , |
|
546 |
{ Source_qual_gb_anamorph , ORGMOD_gb_anamorph , IS_ORGMOD , "gb-anamorph" } , |
|
547 |
{ Source_qual_gb_synonym , ORGMOD_gb_synonym , IS_ORGMOD , "gb-synonym" } , |
|
548 |
{ Source_qual_genotype , SUBSRC_genotype , IS_SUBSRC , "genotype" } , |
|
549 |
{ Source_qual_germline , SUBSRC_germline , IS_SUBSRC , "germline" } , |
|
550 |
{ Source_qual_group , ORGMOD_group , IS_ORGMOD , "group" } , |
|
551 |
{ Source_qual_haplotype , SUBSRC_haplotype , IS_SUBSRC , "haplotype" } , |
|
552 |
{ Source_qual_identified_by , SUBSRC_identified_by , IS_SUBSRC , "identified-by" } , |
|
553 |
{ Source_qual_insertion_seq_name , SUBSRC_insertion_seq_name , IS_SUBSRC , "insertion-seq-name" } , |
|
554 |
{ Source_qual_isolate , ORGMOD_isolate , IS_ORGMOD , "isolate" } , |
|
555 |
{ Source_qual_isolation_source , SUBSRC_isolation_source , IS_SUBSRC , "isolation-source" } , |
|
556 |
{ Source_qual_lab_host , SUBSRC_lab_host , IS_SUBSRC , "lab-host" } , |
|
557 |
{ Source_qual_lat_lon , SUBSRC_lat_lon , IS_SUBSRC , "lat-lon" } , |
|
558 |
{ Source_qual_lineage , 0, IS_OTHER, "lineage" } , |
|
559 |
{ Source_qual_map , SUBSRC_map , IS_SUBSRC , "map" } , |
|
560 |
{ Source_qual_metagenome_source , ORGMOD_metagenome_source , IS_ORGMOD , "metagenome-source" } , |
|
561 |
{ Source_qual_metagenomic , SUBSRC_metagenomic , IS_SUBSRC , "metagenomic" } , |
|
562 |
{ Source_qual_old_lineage , ORGMOD_old_lineage , IS_ORGMOD , "old-lineage" } , |
|
563 |
{ Source_qual_old_name , ORGMOD_old_name , IS_ORGMOD , "old-name" } , |
|
564 |
{ Source_qual_orgmod_note , ORGMOD_other, IS_ORGMOD, "orgmod note" } , |
|
565 |
{ Source_qual_nat_host , ORGMOD_nat_host , IS_ORGMOD , "nat-host" } , |
|
566 |
{ Source_qual_pathovar , ORGMOD_pathovar , IS_ORGMOD , "pathovar" } , |
|
567 |
{ Source_qual_plasmid_name , SUBSRC_plasmid_name , IS_SUBSRC , "plasmid-name" } , |
|
568 |
{ Source_qual_plastid_name , SUBSRC_plastid_name , IS_SUBSRC , "plastid-name" } , |
|
569 |
{ Source_qual_pop_variant , SUBSRC_pop_variant , IS_SUBSRC , "pop-variant" } , |
|
570 |
{ Source_qual_rearranged , SUBSRC_rearranged , IS_SUBSRC , "rearranged" } , |
|
571 |
{ Source_qual_rev_primer_name , SUBSRC_rev_primer_name , IS_SUBSRC , "rev-primer-name" } , |
|
572 |
{ Source_qual_rev_primer_seq , SUBSRC_rev_primer_seq , IS_SUBSRC , "rev-primer-seq" } , |
|
573 |
{ Source_qual_segment , SUBSRC_segment , IS_SUBSRC , "segment" } , |
|
574 |
{ Source_qual_serogroup , ORGMOD_serogroup , IS_ORGMOD , "serogroup" } , |
|
575 |
{ Source_qual_serotype , ORGMOD_serotype , IS_ORGMOD , "serotype" } , |
|
576 |
{ Source_qual_serovar , ORGMOD_serovar , IS_ORGMOD , "serovar" } , |
|
577 |
{ Source_qual_sex , SUBSRC_sex , IS_SUBSRC , "sex" } , |
|
578 |
{ Source_qual_specimen_voucher , ORGMOD_specimen_voucher , IS_ORGMOD , "specimen-voucher" } , |
|
579 |
{ Source_qual_strain , ORGMOD_strain , IS_ORGMOD , "strain" } , |
|
580 |
{ Source_qual_subclone , SUBSRC_subclone , IS_SUBSRC , "subclone" } , |
|
581 |
{ Source_qual_subgroup , ORGMOD_subgroup , IS_ORGMOD , "subgroup" } , |
|
582 |
{ Source_qual_subsource_note , SUBSRC_other , IS_SUBSRC , "subsource note" } , |
|
583 |
{ Source_qual_sub_species , ORGMOD_sub_species , IS_ORGMOD , "sub-species" } , |
|
584 |
{ Source_qual_substrain , ORGMOD_substrain , IS_ORGMOD , "substrain" } , |
|
585 |
{ Source_qual_subtype , ORGMOD_subtype , IS_ORGMOD , "subtype" } , |
|
586 |
{ Source_qual_synonym , ORGMOD_synonym , IS_ORGMOD , "synonym" } , |
|
587 |
{ Source_qual_taxname , 0 , IS_OTHER , "taxname" } , |
|
588 |
{ Source_qual_teleomorph , ORGMOD_teleomorph , IS_ORGMOD , "teleomorph" } , |
|
589 |
{ Source_qual_tissue_lib , SUBSRC_tissue_lib , IS_SUBSRC , "tissue-lib" } , |
|
590 |
{ Source_qual_tissue_type , SUBSRC_tissue_type , IS_SUBSRC , "tissue-type" } , |
|
591 |
{ Source_qual_transgenic , SUBSRC_transgenic , IS_SUBSRC , "transgenic" } , |
|
592 |
{ Source_qual_transposon_name , SUBSRC_transposon_name , IS_SUBSRC , "transposon-name" } , |
|
593 |
{ Source_qual_type , ORGMOD_type , IS_ORGMOD , "type" } , |
|
594 |
{ Source_qual_variety , ORGMOD_variety , IS_ORGMOD , "variety" } }; |
|
595 |
||
596 |
#define NUM_srcqual_scqual sizeof (srcqual_scqual) / sizeof (SrcQualSCQualData)
|
|
597 |
||
598 |
static Int4 GetSubSrcQualFromSrcQual (Int4 srcqual) |
|
599 |
{
|
|
600 |
Int4 i; |
|
601 |
||
602 |
for (i = 0; i < NUM_srcqual_scqual; i++) { |
|
603 |
if (srcqual == srcqual_scqual[i].srcqual) { |
|
604 |
if (srcqual_scqual[i].typeflag == IS_SUBSRC) { |
|
605 |
return srcqual_scqual[i].subtype; |
|
606 |
} else { |
|
607 |
return -1; |
|
608 |
}
|
|
609 |
}
|
|
610 |
}
|
|
611 |
return -1; |
|
612 |
}
|
|
613 |
||
614 |
||
615 |
static Int4 GetOrgModQualFromSrcQual (Int4 srcqual) |
|
616 |
{
|
|
617 |
Int4 i; |
|
618 |
||
619 |
for (i = 0; i < NUM_srcqual_scqual; i++) { |
|
620 |
if (srcqual == srcqual_scqual[i].srcqual) { |
|
621 |
if (srcqual_scqual[i].typeflag == IS_ORGMOD) { |
|
622 |
return srcqual_scqual[i].subtype; |
|
623 |
} else { |
|
624 |
return -1; |
|
625 |
}
|
|
626 |
}
|
|
627 |
}
|
|
628 |
return -1; |
|
629 |
}
|
|
630 |
||
631 |
||
632 |
NLM_EXTERN Boolean IsNonTextSourceQual (Int4 srcqual) |
|
633 |
{
|
|
634 |
if (srcqual == Source_qual_transgenic |
|
635 |
|| srcqual == Source_qual_germline |
|
636 |
|| srcqual == Source_qual_metagenomic |
|
637 |
|| srcqual == Source_qual_environmental_sample |
|
638 |
|| srcqual == Source_qual_rearranged) |
|
639 |
{
|
|
640 |
return TRUE; |
|
641 |
}
|
|
642 |
else
|
|
643 |
{
|
|
644 |
return FALSE; |
|
645 |
}
|
|
646 |
}
|
|
647 |
||
648 |
||
649 |
NLM_EXTERN CharPtr GetSourceQualName (Int4 srcqual) |
|
650 |
{
|
|
651 |
CharPtr str = NULL; |
|
652 |
Int4 i; |
|
653 |
||
654 |
for (i = 0; i < NUM_srcqual_scqual && str == NULL; i++) { |
|
655 |
if (srcqual_scqual[i].srcqual == srcqual) { |
|
656 |
str = srcqual_scqual[i].qualname; |
|
657 |
}
|
|
658 |
}
|
|
659 |
if (str == NULL) { |
|
660 |
str = "Unknown source qualifier"; |
|
661 |
}
|
|
662 |
return str; |
|
663 |
}
|
|
664 |
||
665 |
||
666 |
NLM_EXTERN Int4 GetSourceQualTypeByName (CharPtr qualname) |
|
667 |
{
|
|
668 |
Int4 i; |
|
669 |
||
670 |
for (i = 0; i < NUM_srcqual_scqual; i++) { |
|
671 |
if (Matchnamestring(srcqual_scqual[i].qualname, qualname)) { |
|
672 |
return srcqual_scqual[i].srcqual; |
|
673 |
}
|
|
674 |
}
|
|
675 |
return -1; |
|
676 |
}
|
|
677 |
||
678 |
||
679 |
NLM_EXTERN ValNodePtr GetSourceQualList (void) |
|
680 |
{
|
|
681 |
ValNodePtr list = NULL; |
|
682 |
Int4 i; |
|
683 |
||
684 |
for (i = 0; i < NUM_srcqual_scqual; i++) { |
|
685 |
ValNodeAddPointer (&list, 0, StringSave (srcqual_scqual[i].qualname)); |
|
686 |
}
|
|
687 |
return list; |
|
688 |
}
|
|
689 |
||
690 |
typedef struct srclocgenome { |
|
691 |
Int4 srcloc; |
|
692 |
Int4 genome; |
|
693 |
CharPtr name; |
|
694 |
} SrcLocGenomeData, PNTR SrcLocGenomePtr; |
|
695 |
||
696 |
static SrcLocGenomeData srcloc_genome[] = { |
|
697 |
{ Source_location_unknown , GENOME_unknown , "unknown" } , |
|
698 |
{ Source_location_genomic , GENOME_genomic , "genomic" } , |
|
699 |
{ Source_location_chloroplast , GENOME_chloroplast , "chloroplast" } , |
|
700 |
{ Source_location_chromoplast , GENOME_chromoplast , "chromoplast" } , |
|
701 |
{ Source_location_kinetoplast , GENOME_kinetoplast , "kinetoplast" } , |
|
702 |
{ Source_location_mitochondrion , GENOME_mitochondrion , "mitochondrion" } , |
|
703 |
{ Source_location_plastid , GENOME_plastid , "plastid" } , |
|
704 |
{ Source_location_macronuclear , GENOME_macronuclear , "macronuclear" } , |
|
705 |
{ Source_location_extrachrom , GENOME_extrachrom , "extrachrom" } , |
|
706 |
{ Source_location_plasmid , GENOME_plasmid , "plasmid" } , |
|
707 |
{ Source_location_transposon , GENOME_transposon , "transposon" } , |
|
708 |
{ Source_location_insertion_seq , GENOME_insertion_seq , "insertion-seq" } , |
|
709 |
{ Source_location_cyanelle , GENOME_cyanelle , "cyanelle" } , |
|
710 |
{ Source_location_proviral , GENOME_proviral , "proviral" } , |
|
711 |
{ Source_location_virion , GENOME_virion , "virion" } , |
|
712 |
{ Source_location_nucleomorph , GENOME_nucleomorph , "nucleomorph" } , |
|
713 |
{ Source_location_apicoplast , GENOME_apicoplast , "apicoplast" } , |
|
714 |
{ Source_location_leucoplast , GENOME_leucoplast , "leucoplast" } , |
|
715 |
{ Source_location_proplastid , GENOME_proplastid , "proplastid" } , |
|
716 |
{ Source_location_endogenous_virus , GENOME_endogenous_virus , "endogenous-virus" } , |
|
717 |
{ Source_location_hydrogenosome , GENOME_hydrogenosome , "hydrogenosome" } , |
|
718 |
{ Source_location_chromosome , 21 , "chromosome" } , |
|
719 |
{ Source_location_chromatophore , 22 , "chromatophore" } }; |
|
720 |
||
721 |
#define NUM_srcloc_genome sizeof (srcloc_genome) / sizeof (SrcLocGenomeData)
|
|
722 |
||
723 |
NLM_EXTERN Int4 GenomeFromSrcLoc (Int4 srcloc) \ |
|
724 |
{
|
|
725 |
Int4 i; |
|
726 |
||
727 |
for (i = 0; i < NUM_srcloc_genome; i++) { |
|
728 |
if (srcloc_genome[i].srcloc == srcloc) { |
|
729 |
return srcloc_genome[i].genome; |
|
730 |
}
|
|
731 |
}
|
|
732 |
return -1; |
|
733 |
}
|
|
734 |
||
735 |
||
736 |
NLM_EXTERN CharPtr LocNameFromGenome (Int4 genome) |
|
737 |
{
|
|
738 |
Int4 i; |
|
739 |
||
740 |
for (i = 0; i < NUM_srcloc_genome; i++) { |
|
741 |
if (srcloc_genome[i].genome == genome) { |
|
742 |
return srcloc_genome[i].name; |
|
743 |
}
|
|
744 |
}
|
|
745 |
return NULL; |
|
746 |
}
|
|
747 |
||
748 |
||
749 |
static Int4 GenomeFromLocName (CharPtr loc_name) |
|
750 |
{
|
|
751 |
Int4 i; |
|
752 |
||
753 |
for (i = 0; i < NUM_srcloc_genome; i++) { |
|
754 |
if (StringICmp (srcloc_genome[i].name, loc_name) == 0) { |
|
755 |
return srcloc_genome[i].genome; |
|
756 |
}
|
|
757 |
}
|
|
758 |
return -1; |
|
759 |
}
|
|
760 |
||
761 |
||
762 |
NLM_EXTERN ValNodePtr GetLocationList (Boolean for_remove) |
|
763 |
{
|
|
764 |
ValNodePtr list = NULL; |
|
765 |
Int4 i; |
|
766 |
||
767 |
for (i = 0; i < NUM_srcloc_genome; i++) { |
|
768 |
if (for_remove && srcloc_genome[i].srcloc == Source_location_unknown) { |
|
769 |
ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave ("any")); |
|
770 |
} else { |
|
771 |
ValNodeAddPointer (&list, srcloc_genome[i].srcloc, StringSave (srcloc_genome[i].name)); |
|
772 |
}
|
|
773 |
}
|
|
774 |
return list; |
|
775 |
}
|
|
776 |
||
777 |
||
778 |
typedef struct srcorigorigin { |
|
779 |
Int4 srcorig; |
|
780 |
Int4 origin; |
|
781 |
CharPtr name; |
|
782 |
} SrcOrigOriginData, PNTR SrcrigOriginPtr; |
|
783 |
||
784 |
static SrcOrigOriginData srcorig_origin[] = { |
|
785 |
{ Source_origin_unknown , 0 , "unknown" } , |
|
786 |
{ Source_origin_natural , 1 , "natural" } , |
|
787 |
{ Source_origin_natmut , 2 , "natmut" } , |
|
788 |
{ Source_origin_mut , 3 , "mut" } , |
|
789 |
{ Source_origin_artificial , 4 , "artificial" } , |
|
790 |
{ Source_origin_synthetic , 5 , "synthetic" } , |
|
791 |
{ Source_origin_other , 255 , "other" } }; |
|
792 |
||
793 |
#define NUM_srcorig_origin sizeof (srcorig_origin) / sizeof (SrcOrigOriginData)
|
|
794 |
||
795 |
NLM_EXTERN Int4 OriginFromSrcOrig (Int4 srcorig) |
|
796 |
{
|
|
797 |
Int4 i; |
|
798 |
||
799 |
for (i = 0; i < NUM_srcorig_origin; i++) { |
|
800 |
if (srcorig_origin[i].srcorig == srcorig) { |
|
801 |
return srcorig_origin[i].origin; |
|
802 |
}
|
|
803 |
}
|
|
804 |
return -1; |
|
805 |
}
|
|
806 |
||
807 |
||
808 |
NLM_EXTERN CharPtr OriginNameFromOrigin (Int4 origin) |
|
809 |
{
|
|
810 |
Int4 i; |
|
811 |
||
812 |
for (i = 0; i < NUM_srcorig_origin; i++) { |
|
813 |
if (srcorig_origin[i].origin == origin) { |
|
814 |
return srcorig_origin[i].name; |
|
815 |
}
|
|
816 |
}
|
|
817 |
return NULL; |
|
818 |
}
|
|
819 |
||
820 |
||
821 |
static Int4 OriginFromOriginName (CharPtr origin_name) |
|
822 |
{
|
|
823 |
Int4 i; |
|
824 |
||
825 |
for (i = 0; i < NUM_srcorig_origin; i++) { |
|
826 |
if (StringCmp (srcorig_origin[i].name, origin_name) == 0) { |
|
827 |
return srcorig_origin[i].origin; |
|
828 |
}
|
|
829 |
}
|
|
830 |
return -1; |
|
831 |
}
|
|
832 |
||
833 |
||
834 |
NLM_EXTERN ValNodePtr GetOriginList (Boolean for_remove) |
|
835 |
{
|
|
836 |
ValNodePtr list = NULL; |
|
837 |
Int4 i; |
|
838 |
||
839 |
for (i = 0; i < NUM_srcorig_origin; i++) { |
|
840 |
if (for_remove && srcorig_origin[i].srcorig == Source_origin_unknown) { |
|
841 |
ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave ("any")); |
|
842 |
} else { |
|
843 |
ValNodeAddPointer (&list, srcorig_origin[i].srcorig, StringSave (srcorig_origin[i].name)); |
|
844 |
}
|
|
845 |
}
|
|
846 |
return list; |
|
847 |
}
|
|
848 |
||
849 |
||
850 |
typedef struct cdsgeneprotfieldname { |
|
851 |
Int4 field; |
|
852 |
CharPtr name; |
|
853 |
} CDSGeneProtFieldNameData, PNTR CDSGeneProtFieldNamePtr; |
|
854 |
||
855 |
static CDSGeneProtFieldNameData cdsgeneprotfield_name[] = { |
|
856 |
{ CDSGeneProt_field_cds_comment , "CDS comment" } , |
|
857 |
{ CDSGeneProt_field_gene_locus , "gene locus" } , |
|
858 |
{ CDSGeneProt_field_gene_description , "gene description" } , |
|
859 |
{ CDSGeneProt_field_gene_comment , "gene comment" } , |
|
860 |
{ CDSGeneProt_field_gene_allele , "allele" } , |
|
861 |
{ CDSGeneProt_field_gene_maploc , "maploc" } , |
|
862 |
{ CDSGeneProt_field_gene_locus_tag , "locus tag" } , |
|
863 |
{ CDSGeneProt_field_gene_synonym , "synonym" } , |
|
864 |
{ CDSGeneProt_field_gene_old_locus_tag , "old locus tag" } , |
|
865 |
{ CDSGeneProt_field_mrna_product , "mRNA product" } , |
|
866 |
{ CDSGeneProt_field_mrna_comment , "mRNA comment" } , |
|
867 |
{ CDSGeneProt_field_prot_name , "protein name" } , |
|
868 |
{ CDSGeneProt_field_prot_description , "protein description" } , |
|
869 |
{ CDSGeneProt_field_prot_ec_number , "protein EC number" } , |
|
870 |
{ CDSGeneProt_field_prot_activity , "protein activity" } , |
|
871 |
{ CDSGeneProt_field_prot_comment , "protein comment" } , |
|
872 |
{ CDSGeneProt_field_mat_peptide_name , "mat-peptide name" } , |
|
873 |
{ CDSGeneProt_field_mat_peptide_description , "mat-peptide description" } , |
|
874 |
{ CDSGeneProt_field_mat_peptide_ec_number , "mat-peptide EC number" } , |
|
875 |
{ CDSGeneProt_field_mat_peptide_activity , "mat-peptide activity" } , |
|
876 |
{ CDSGeneProt_field_mat_peptide_comment , "mat-peptide comment" } }; |
|
877 |
||
878 |
#define NUM_cdsgeneprotfield_name sizeof (cdsgeneprotfield_name) / sizeof (CDSGeneProtFieldNameData)
|
|
879 |
||
880 |
NLM_EXTERN CharPtr CDSGeneProtNameFromField (Int4 field) |
|
881 |
{
|
|
882 |
Int4 i; |
|
883 |
||
884 |
for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { |
|
885 |
if (cdsgeneprotfield_name[i].field == field) { |
|
886 |
return cdsgeneprotfield_name[i].name; |
|
887 |
}
|
|
888 |
}
|
|
889 |
return NULL; |
|
890 |
}
|
|
891 |
||
892 |
||
893 |
NLM_EXTERN void AddAllCDSGeneProtFieldsToChoiceList (ValNodePtr PNTR field_list) |
|
894 |
{
|
|
895 |
Int4 i; |
|
896 |
||
897 |
for (i = 0; i < NUM_cdsgeneprotfield_name; i++) { |
|
898 |
ValNodeAddPointer (field_list, cdsgeneprotfield_name[i].field, StringSave (cdsgeneprotfield_name[i].name)); |
|
899 |
}
|
|
900 |
}
|
|
901 |
||
902 |
||
903 |
typedef struct cdsgeneprotfeatname { |
|
904 |
Int4 feature_type; |
|
905 |
CharPtr name; |
|
906 |
} CDSGeneProtFeatNameData, PNTR CDSGeneProtFeatNamePtr; |
|
907 |
||
908 |
static CDSGeneProtFeatNameData cdsgeneprotfeat_name[] = { |
|
909 |
{ CDSGeneProt_feature_type_constraint_gene , "gene" } , |
|
910 |
{ CDSGeneProt_feature_type_constraint_mRNA , "mRNA" } , |
|
911 |
{ CDSGeneProt_feature_type_constraint_cds , "CDS" } , |
|
912 |
{ CDSGeneProt_feature_type_constraint_prot , "protein" } , |
|
913 |
{ CDSGeneProt_feature_type_constraint_mat_peptide , "mat-peptide" }}; |
|
914 |
||
915 |
#define NUM_cdsgeneprotfeat_name sizeof (cdsgeneprotfeat_name) / sizeof (CDSGeneProtFeatNameData)
|
|
916 |
||
917 |
NLM_EXTERN CharPtr CDSGeneProtFeatureNameFromFeatureType (Int4 feature_type) |
|
918 |
{
|
|
919 |
Int4 i; |
|
920 |
||
921 |
for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) { |
|
922 |
if (cdsgeneprotfeat_name[i].feature_type == feature_type) { |
|
923 |
return cdsgeneprotfeat_name[i].name; |
|
924 |
}
|
|
925 |
}
|
|
926 |
return NULL; |
|
927 |
}
|
|
928 |
||
929 |
||
930 |
NLM_EXTERN void AddAllCDSGeneProtFeaturesToChoiceList (ValNodePtr PNTR field_list) |
|
931 |
{
|
|
932 |
Int4 i; |
|
933 |
||
934 |
for (i = 0; i < NUM_cdsgeneprotfeat_name; i++) { |
|
935 |
ValNodeAddPointer (field_list, cdsgeneprotfeat_name[i].feature_type, StringSave (cdsgeneprotfeat_name[i].name)); |
|
936 |
}
|
|
937 |
}
|
|
938 |
||
939 |
||
940 |
NLM_EXTERN FeatureFieldPtr FeatureFieldFromCDSGeneProtField (Uint2 cds_gene_prot_field) |
|
941 |
{
|
|
942 |
FeatureFieldPtr f = NULL; |
|
943 |
||
944 |
switch (cds_gene_prot_field) { |
|
945 |
case CDSGeneProt_field_cds_comment: |
|
946 |
f = FeatureFieldNew (); |
|
947 |
f->type = Feature_type_cds; |
|
948 |
f->field = ValNodeNew (NULL); |
|
949 |
f->field->choice = FeatQualChoice_legal_qual; |
|
950 |
f->field->data.intvalue = Feat_qual_legal_note; |
|
951 |
break; |
|
952 |
case CDSGeneProt_field_gene_locus: |
|
953 |
f = FeatureFieldNew (); |
|
954 |
f->type = Feature_type_gene; |
|
955 |
f->field = ValNodeNew (NULL); |
|
956 |
f->field->choice = FeatQualChoice_legal_qual; |
|
957 |
f->field->data.intvalue = Feat_qual_legal_gene; |
|
958 |
break; |
|
959 |
case CDSGeneProt_field_gene_description: |
|
960 |
f = FeatureFieldNew (); |
|
961 |
f->type = Feature_type_gene; |
|
962 |
f->field = ValNodeNew (NULL); |
|
963 |
f->field->choice = FeatQualChoice_legal_qual; |
|
964 |
f->field->data.intvalue = Feat_qual_legal_gene_description; |
|
965 |
break; |
|
966 |
case CDSGeneProt_field_gene_comment: |
|
967 |
f = FeatureFieldNew (); |
|
968 |
f->type = Feature_type_gene; |
|
969 |
f->field = ValNodeNew (NULL); |
|
970 |
f->field->choice = FeatQualChoice_legal_qual; |
|
971 |
f->field->data.intvalue = Feat_qual_legal_note; |
|
972 |
break; |
|
973 |
case CDSGeneProt_field_gene_allele: |
|
974 |
f = FeatureFieldNew (); |
|
975 |
f->type = Feature_type_gene; |
|
976 |
f->field = ValNodeNew (NULL); |
|
977 |
f->field->choice = FeatQualChoice_legal_qual; |
|
978 |
f->field->data.intvalue = Feat_qual_legal_allele; |
|
979 |
break; |
|
980 |
case CDSGeneProt_field_gene_maploc: |
|
981 |
f = FeatureFieldNew (); |
|
982 |
f->type = Feature_type_gene; |
|
983 |
f->field = ValNodeNew (NULL); |
|
984 |
f->field->choice = FeatQualChoice_legal_qual; |
|
985 |
f->field->data.intvalue = Feat_qual_legal_map; |
|
986 |
break; |
|
987 |
case CDSGeneProt_field_gene_locus_tag: |
|
988 |
f = FeatureFieldNew (); |
|
989 |
f->type = Feature_type_gene; |
|
990 |
f->field = ValNodeNew (NULL); |
|
991 |
f->field->choice = FeatQualChoice_legal_qual; |
|
992 |
f->field->data.intvalue = Feat_qual_legal_locus_tag; |
|
993 |
break; |
|
994 |
case CDSGeneProt_field_gene_synonym: |
|
995 |
f = FeatureFieldNew (); |
|
996 |
f->type = Feature_type_gene; |
|
997 |
f->field = ValNodeNew (NULL); |
|
998 |
f->field->choice = FeatQualChoice_legal_qual; |
|
999 |
f->field->data.intvalue = Feat_qual_legal_synonym; |
|
1000 |
break; |
|
1001 |
case CDSGeneProt_field_gene_old_locus_tag: |
|
1002 |
f = FeatureFieldNew (); |
|
1003 |
f->type = Feature_type_gene; |
|
1004 |
f->field = ValNodeNew (NULL); |
|
1005 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1006 |
f->field->data.intvalue = Feat_qual_legal_old_locus_tag; |
|
1007 |
break; |
|
1008 |
case CDSGeneProt_field_mrna_product: |
|
1009 |
f = FeatureFieldNew (); |
|
1010 |
f->type = Feature_type_mRNA; |
|
1011 |
f->field = ValNodeNew (NULL); |
|
1012 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1013 |
f->field->data.intvalue = Feat_qual_legal_product; |
|
1014 |
break; |
|
1015 |
case CDSGeneProt_field_mrna_comment: |
|
1016 |
f = FeatureFieldNew (); |
|
1017 |
f->type = Feature_type_mRNA; |
|
1018 |
f->field = ValNodeNew (NULL); |
|
1019 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1020 |
f->field->data.intvalue = Feat_qual_legal_note; |
|
1021 |
break; |
|
1022 |
case CDSGeneProt_field_prot_name: |
|
1023 |
f = FeatureFieldNew (); |
|
1024 |
f->type = Feature_type_prot; |
|
1025 |
f->field = ValNodeNew (NULL); |
|
1026 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1027 |
f->field->data.intvalue = Feat_qual_legal_product; |
|
1028 |
break; |
|
1029 |
case CDSGeneProt_field_prot_description: |
|
1030 |
f = FeatureFieldNew (); |
|
1031 |
f->type = Feature_type_prot; |
|
1032 |
f->field = ValNodeNew (NULL); |
|
1033 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1034 |
f->field->data.intvalue = Feat_qual_legal_description; |
|
1035 |
break; |
|
1036 |
case CDSGeneProt_field_prot_ec_number: |
|
1037 |
f = FeatureFieldNew (); |
|
1038 |
f->type = Feature_type_prot; |
|
1039 |
f->field = ValNodeNew (NULL); |
|
1040 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1041 |
f->field->data.intvalue = Feat_qual_legal_ec_number; |
|
1042 |
break; |
|
1043 |
case CDSGeneProt_field_prot_activity: |
|
1044 |
f = FeatureFieldNew (); |
|
1045 |
f->type = Feature_type_prot; |
|
1046 |
f->field = ValNodeNew (NULL); |
|
1047 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1048 |
f->field->data.intvalue = Feat_qual_legal_activity; |
|
1049 |
break; |
|
1050 |
case CDSGeneProt_field_prot_comment: |
|
1051 |
f = FeatureFieldNew (); |
|
1052 |
f->type = Feature_type_prot; |
|
1053 |
f->field = ValNodeNew (NULL); |
|
1054 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1055 |
f->field->data.intvalue = Feat_qual_legal_note; |
|
1056 |
break; |
|
1057 |
case CDSGeneProt_field_mat_peptide_name: |
|
1058 |
f = FeatureFieldNew (); |
|
1059 |
f->type = Feature_type_mat_peptide; |
|
1060 |
f->field = ValNodeNew (NULL); |
|
1061 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1062 |
f->field->data.intvalue = Feat_qual_legal_product; |
|
1063 |
break; |
|
1064 |
case CDSGeneProt_field_mat_peptide_description: |
|
1065 |
f = FeatureFieldNew (); |
|
1066 |
f->type = Feature_type_mat_peptide; |
|
1067 |
f->field = ValNodeNew (NULL); |
|
1068 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1069 |
f->field->data.intvalue = Feat_qual_legal_description; |
|
1070 |
break; |
|
1071 |
case CDSGeneProt_field_mat_peptide_ec_number: |
|
1072 |
f = FeatureFieldNew (); |
|
1073 |
f->type = Feature_type_mat_peptide; |
|
1074 |
f->field = ValNodeNew (NULL); |
|
1075 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1076 |
f->field->data.intvalue = Feat_qual_legal_ec_number; |
|
1077 |
break; |
|
1078 |
case CDSGeneProt_field_mat_peptide_activity: |
|
1079 |
f = FeatureFieldNew (); |
|
1080 |
f->type = Feature_type_mat_peptide; |
|
1081 |
f->field = ValNodeNew (NULL); |
|
1082 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1083 |
f->field->data.intvalue = Feat_qual_legal_activity; |
|
1084 |
break; |
|
1085 |
case CDSGeneProt_field_mat_peptide_comment: |
|
1086 |
f = FeatureFieldNew (); |
|
1087 |
f->type = Feature_type_mat_peptide; |
|
1088 |
f->field = ValNodeNew (NULL); |
|
1089 |
f->field->choice = FeatQualChoice_legal_qual; |
|
1090 |
f->field->data.intvalue = Feat_qual_legal_note; |
|
1091 |
break; |
|
1092 |
}
|
|
1093 |
return f; |
|
1094 |
}
|
|
1095 |
||
1096 |
||
1097 |
/* Molinfo fields */
|
|
1098 |
typedef struct moleculetypebiomol { |
|
1099 |
Int4 molecule_type; |
|
1100 |
Int4 biomol; |
|
1101 |
CharPtr name; |
|
1102 |
} MoleculeTypeBiomolData, PNTR MoleculeTypeBiomolPtr; |
|
1103 |
||
1104 |
static MoleculeTypeBiomolData moleculetype_biomol[] = { |
|
1105 |
{ Molecule_type_unknown , 0, "unknown" } , |
|
1106 |
{ Molecule_type_genomic , MOLECULE_TYPE_GENOMIC , "genomic" } , |
|
1107 |
{ Molecule_type_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "precursor RNA" } , |
|
1108 |
{ Molecule_type_mRNA , MOLECULE_TYPE_MRNA , "mRNA" } , |
|
1109 |
{ Molecule_type_rRNA , MOLECULE_TYPE_RRNA , "rRNA" } , |
|
1110 |
{ Molecule_type_tRNA , MOLECULE_TYPE_TRNA , "tRNA" } , |
|
1111 |
{ Molecule_type_snRNA , MOLECULE_TYPE_SNRNA , "snRNA" } , |
|
1112 |
{ Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } , |
|
1113 |
{ Molecule_type_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "genomic mRNA" } , |
|
1114 |
{ Molecule_type_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , |
|
1115 |
{ Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } , |
|
1116 |
{ Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } , |
|
1117 |
{ Molecule_type_scRNA , MOLECULE_TYPE_SCRNA , "scRNA" } , |
|
1118 |
{ Molecule_type_snoRNA , MOLECULE_TYPE_SNORNA, "snoRNA" } , |
|
1119 |
{ Molecule_type_transcribed_RNA, MOLECULE_TYPE_TRANSCRIBED_RNA, "transcribed RNA" } , |
|
1120 |
{ Molecule_type_ncRNA, MOLECULE_TYPE_NCRNA, "ncRNA" } , |
|
1121 |
{ Molecule_type_transfer_messenger_RNA, MOLECULE_TYPE_TMRNA, "tmRNA" } , |
|
1122 |
{ Molecule_type_other, MOLECULE_TYPE_OTHER_GENETIC_MATERIAL, "other" } |
|
1123 |
};
|
|
1124 |
||
1125 |
||
1126 |
#define NUM_moleculetype_biomol sizeof (moleculetype_biomol) / sizeof (MoleculeTypeBiomolData)
|
|
1127 |
||
1128 |
NLM_EXTERN Int4 BiomolFromMoleculeType (Int4 molecule_type) |
|
1129 |
{
|
|
1130 |
Int4 i; |
|
1131 |
||
1132 |
for (i = 0; i < NUM_moleculetype_biomol; i++) { |
|
1133 |
if (moleculetype_biomol[i].molecule_type == molecule_type) { |
|
1134 |
return moleculetype_biomol[i].biomol; |
|
1135 |
}
|
|
1136 |
}
|
|
1137 |
return -1; |
|
1138 |
}
|
|
1139 |
||
1140 |
||
1141 |
NLM_EXTERN CharPtr BiomolNameFromBiomol (Int4 biomol) |
|
1142 |
{
|
|
1143 |
Int4 i; |
|
1144 |
||
1145 |
for (i = 0; i < NUM_moleculetype_biomol; i++) { |
|
1146 |
if (moleculetype_biomol[i].biomol == biomol) { |
|
1147 |
return moleculetype_biomol[i].name; |
|
1148 |
}
|
|
1149 |
}
|
|
1150 |
return NULL; |
|
1151 |
}
|
|
1152 |
||
1153 |
||
1154 |
static Int4 BiomolFromBiomolName (CharPtr biomol_name) |
|
1155 |
{
|
|
1156 |
Int4 i; |
|
1157 |
||
1158 |
for (i = 0; i < NUM_moleculetype_biomol; i++) { |
|
1159 |
if (StringCmp (moleculetype_biomol[i].name, biomol_name) == 0) { |
|
1160 |
return moleculetype_biomol[i].biomol; |
|
1161 |
}
|
|
1162 |
}
|
|
1163 |
return -1; |
|
1164 |
}
|
|
1165 |
||
1166 |
||
1167 |
NLM_EXTERN ValNodePtr GetMoleculeTypeList (void) |
|
1168 |
{
|
|
1169 |
ValNodePtr list = NULL; |
|
1170 |
Int4 i; |
|
1171 |
||
1172 |
for (i = 0; i < NUM_moleculetype_biomol; i++) { |
|
1173 |
ValNodeAddPointer (&list, moleculetype_biomol[i].molecule_type, StringSave (moleculetype_biomol[i].name)); |
|
1174 |
}
|
|
1175 |
return list; |
|
1176 |
}
|
|
1177 |
||
1178 |
||
1179 |
/* Technique fields */
|
|
1180 |
typedef struct techniquetypetech { |
|
1181 |
Int4 technique_type; |
|
1182 |
Int4 tech; |
|
1183 |
CharPtr name; |
|
1184 |
} TechniqueTypeTechData, PNTR TechniqueTypeTechPtr; |
|
1185 |
||
1186 |
static TechniqueTypeTechData techniquetype_tech[] = { |
|
1187 |
{ Technique_type_unknown , MI_TECH_unknown , "unknown" } , |
|
1188 |
{ Technique_type_standard , MI_TECH_standard , "standard" } , |
|
1189 |
{ Technique_type_est , MI_TECH_est , "EST" } , |
|
1190 |
{ Technique_type_sts , MI_TECH_sts , "STS" } , |
|
1191 |
{ Technique_type_survey , MI_TECH_survey , "survey" } , |
|
1192 |
{ Technique_type_genetic_map , MI_TECH_genemap , "genetic map" } , |
|
1193 |
{ Technique_type_physical_map , MI_TECH_physmap , "physical map" } , |
|
1194 |
{ Technique_type_derived , MI_TECH_derived , "derived" } , |
|
1195 |
{ Technique_type_concept_trans , MI_TECH_concept_trans , "concept-trans" } , |
|
1196 |
{ Technique_type_seq_pept , MI_TECH_seq_pept , "seq-pept" } , |
|
1197 |
{ Technique_type_both , MI_TECH_both , "both" } , |
|
1198 |
{ Technique_type_seq_pept_overlap , MI_TECH_seq_pept_overlap , "seq-pept-overlap" } , |
|
1199 |
{ Technique_type_seq_pept_homol , MI_TECH_seq_pept_homol, "seq-pept-homol" } , |
|
1200 |
{ Technique_type_concept_trans_a, MI_TECH_concept_trans_a, "concept-trans-a" } , |
|
1201 |
{ Technique_type_htgs_1, MI_TECH_htgs_1, "HTGS-1" } , |
|
1202 |
{ Technique_type_htgs_2, MI_TECH_htgs_2, "HTGS-2" } , |
|
1203 |
{ Technique_type_htgs_3, MI_TECH_htgs_3, "HTGS-3" } , |
|
1204 |
{ Technique_type_fli_cDNA, MI_TECH_fli_cdna, "fli-cDNA" } , |
|
1205 |
{ Technique_type_htgs_0, MI_TECH_htgs_0, "HTGS-0" } , |
|
1206 |
{ Technique_type_htc, MI_TECH_htc, "HTC" } , |
|
1207 |
{ Technique_type_wgs, MI_TECH_wgs, "WGS" } , |
|
1208 |
{ Technique_type_barcode, MI_TECH_barcode, "BARCODE" } , |
|
1209 |
{ Technique_type_composite_wgs_htgs, MI_TECH_composite_wgs_htgs, "composite WGS-HTGS" } , |
|
1210 |
{ Technique_type_tsa, MI_TECH_tsa, "TSA" } , |
|
1211 |
{ Technique_type_other, MI_TECH_other, "other" } |
|
1212 |
};
|
|
1213 |
||
1214 |
||
1215 |
#define NUM_techniquetype_tech sizeof (techniquetype_tech) / sizeof (TechniqueTypeTechData)
|
|
1216 |
||
1217 |
NLM_EXTERN Int4 TechFromTechniqueType (Int4 technique_type) |
|
1218 |
{
|
|
1219 |
Int4 i; |
|
1220 |
||
1221 |
for (i = 0; i < NUM_techniquetype_tech; i++) { |
|
1222 |
if (techniquetype_tech[i].technique_type == technique_type) { |
|
1223 |
return techniquetype_tech[i].tech; |
|
1224 |
}
|
|
1225 |
}
|
|
1226 |
return -1; |
|
1227 |
}
|
|
1228 |
||
1229 |
||
1230 |
NLM_EXTERN CharPtr TechNameFromTech (Int4 tech) |
|
1231 |
{
|
|
1232 |
Int4 i; |
|
1233 |
||
1234 |
for (i = 0; i < NUM_techniquetype_tech; i++) { |
|
1235 |
if (techniquetype_tech[i].tech == tech) { |
|
1236 |
return techniquetype_tech[i].name; |
|
1237 |
}
|
|
1238 |
}
|
|
1239 |
return NULL; |
|
1240 |
}
|
|
1241 |
||
1242 |
||
1243 |
static Int4 TechFromTechName (CharPtr tech_name) |
|
1244 |
{
|
|
1245 |
Int4 i; |
|
1246 |
||
1247 |
for (i = 0; i < NUM_techniquetype_tech; i++) { |
|
1248 |
if (StringCmp (techniquetype_tech[i].name, tech_name) == 0) { |
|
1249 |
return techniquetype_tech[i].tech; |
|
1250 |
}
|
|
1251 |
}
|
|
1252 |
return -1; |
|
1253 |
}
|
|
1254 |
||
1255 |
||
1256 |
NLM_EXTERN ValNodePtr GetTechniqueTypeList (void) |
|
1257 |
{
|
|
1258 |
ValNodePtr list = NULL; |
|
1259 |
Int4 i; |
|
1260 |
||
1261 |
for (i = 0; i < NUM_techniquetype_tech; i++) { |
|
1262 |
ValNodeAddPointer (&list, techniquetype_tech[i].technique_type, StringSave (techniquetype_tech[i].name)); |
|
1263 |
}
|
|
1264 |
return list; |
|
1265 |
}
|
|
1266 |
||
1267 |
||
1268 |
/* Completedness fields */
|
|
1269 |
typedef struct completednesstypecompleteness { |
|
1270 |
Int4 completedness_type; |
|
1271 |
Int4 completeness; |
|
1272 |
CharPtr name; |
|
1273 |
} CompletednessTypeCompletenessData, PNTR CompletednessTypeCompletenessPtr; |
|
1274 |
||
1275 |
static CompletednessTypeCompletenessData completednesstype_completeness[] = { |
|
1276 |
{ Completedness_type_unknown, 0, "unknown" } , |
|
1277 |
{ Completedness_type_complete, 1, "complete" } , |
|
1278 |
{ Completedness_type_partial, 2, "partial" } , |
|
1279 |
{ Completedness_type_no_left, 3, "no left" } , |
|
1280 |
{ Completedness_type_no_right, 4, "no right" } , |
|
1281 |
{ Completedness_type_no_ends, 5, "no ends" } , |
|
1282 |
{ Completedness_type_has_left, 6, "has left" } , |
|
1283 |
{ Completedness_type_has_right, 7, "has right" } , |
|
1284 |
{ Completedness_type_other, 255, "other" } |
|
1285 |
};
|
|
1286 |
||
1287 |
#define NUM_completednesstype_completeness sizeof (completednesstype_completeness) / sizeof (CompletednessTypeCompletenessData)
|
|
1288 |
||
1289 |
NLM_EXTERN Int4 CompletenessFromCompletednessType (Int4 completedness_type) |
|
1290 |
{
|
|
1291 |
Int4 i; |
|
1292 |
||
1293 |
for (i = 0; i < NUM_completednesstype_completeness; i++) { |
|
1294 |
if (completednesstype_completeness[i].completedness_type == completedness_type) { |
|
1295 |
return completednesstype_completeness[i].completeness; |
|
1296 |
}
|
|
1297 |
}
|
|
1298 |
return -1; |
|
1299 |
}
|
|
1300 |
||
1301 |
||
1302 |
NLM_EXTERN CharPtr CompletenessNameFromCompleteness (Int4 completeness) |
|
1303 |
{
|
|
1304 |
Int4 i; |
|
1305 |
||
1306 |
for (i = 0; i < NUM_completednesstype_completeness; i++) { |
|
1307 |
if (completednesstype_completeness[i].completeness == completeness) { |
|
1308 |
return completednesstype_completeness[i].name; |
|
1309 |
}
|
|
1310 |
}
|
|
1311 |
return NULL; |
|
1312 |
}
|
|
1313 |
||
1314 |
||
1315 |
static Int4 CompletenessFromCompletenessName (CharPtr completeness_name) |
|
1316 |
{
|
|
1317 |
Int4 i; |
|
1318 |
||
1319 |
for (i = 0; i < NUM_completednesstype_completeness; i++) { |
|
1320 |
if (StringCmp (completednesstype_completeness[i].name, completeness_name) == 0) { |
|
1321 |
return completednesstype_completeness[i].completeness; |
|
1322 |
}
|
|
1323 |
}
|
|
1324 |
return -1; |
|
1325 |
}
|
|
1326 |
||
1327 |
||
1328 |
NLM_EXTERN ValNodePtr GetCompletednessTypeList (void) |
|
1329 |
{
|
|
1330 |
ValNodePtr list = NULL; |
|
1331 |
Int4 i; |
|
1332 |
||
1333 |
for (i = 0; i < NUM_completednesstype_completeness; i++) { |
|
1334 |
ValNodeAddPointer (&list, completednesstype_completeness[i].completedness_type, StringSave (completednesstype_completeness[i].name)); |
|
1335 |
}
|
|
1336 |
return list; |
|
1337 |
}
|
|
1338 |
||
1339 |
||
1340 |
/* Molecule class fields */
|
|
1341 |
typedef struct moleculeclasstypemol { |
|
1342 |
Int4 moleculeclass_type; |
|
1343 |
Int4 mol; |
|
1344 |
CharPtr name; |
|
1345 |
} MoleculeClassTypeMolData, PNTR MoleculeClassTypeMolPtr; |
|
1346 |
||
1347 |
static MoleculeClassTypeMolData moleculeclasstype_mol[] = { |
|
1348 |
{ Molecule_class_type_unknown, 0, "unknown" } , |
|
1349 |
{ Molecule_class_type_dna, MOLECULE_CLASS_DNA, "DNA" } , |
|
1350 |
{ Molecule_class_type_rna, MOLECULE_CLASS_RNA, "RNA" } , |
|
1351 |
{ Molecule_class_type_protein, MOLECULE_CLASS_PROTEIN, "protein" } , |
|
1352 |
{ Molecule_class_type_nucleotide, MOLECULE_CLASS_NUC, "nucleotide" } , |
|
1353 |
{ Molecule_class_type_other, 255, "other" } |
|
1354 |
};
|
|
1355 |
||
1356 |
||
1357 |
#define NUM_moleculeclasstype_mol sizeof (moleculeclasstype_mol) / sizeof (MoleculeClassTypeMolData)
|
|
1358 |
||
1359 |
NLM_EXTERN Int4 MolFromMoleculeClassType (Int4 moleculeclass_type) |
|
1360 |
{
|
|
1361 |
Int4 i; |
|
1362 |
||
1363 |
for (i = 0; i < NUM_moleculeclasstype_mol; i++) { |
|
1364 |
if (moleculeclasstype_mol[i].moleculeclass_type == moleculeclass_type) { |
|
1365 |
return moleculeclasstype_mol[i].mol; |
|
1366 |
}
|
|
1367 |
}
|
|
1368 |
return -1; |
|
1369 |
}
|
|
1370 |
||
1371 |
||
1372 |
NLM_EXTERN CharPtr MolNameFromMol (Int4 mol) |
|
1373 |
{
|
|
1374 |
Int4 i; |
|
1375 |
||
1376 |
for (i = 0; i < NUM_moleculeclasstype_mol; i++) { |
|
1377 |
if (moleculeclasstype_mol[i].mol == mol) { |
|
1378 |
return moleculeclasstype_mol[i].name; |
|
1379 |
}
|
|
1380 |
}
|
|
1381 |
return NULL; |
|
1382 |
}
|
|
1383 |
||
1384 |
||
1385 |
static Int4 MolFromMolName (CharPtr mol_name) |
|
1386 |
{
|
|
1387 |
Int4 i; |
|
1388 |
||
1389 |
for (i = 0; i < NUM_moleculeclasstype_mol; i++) { |
|
1390 |
if (StringCmp (moleculeclasstype_mol[i].name, mol_name) == 0) { |
|
1391 |
return moleculeclasstype_mol[i].mol; |
|
1392 |
}
|
|
1393 |
}
|
|
1394 |
return -1; |
|
1395 |
}
|
|
1396 |
||
1397 |
||
1398 |
NLM_EXTERN ValNodePtr GetMoleculeClassTypeList (void) |
|
1399 |
{
|
|
1400 |
ValNodePtr list = NULL; |
|
1401 |
Int4 i; |
|
1402 |
||
1403 |
for (i = 0; i < NUM_moleculeclasstype_mol; i++) { |
|
1404 |
ValNodeAddPointer (&list, moleculeclasstype_mol[i].moleculeclass_type, StringSave (moleculeclasstype_mol[i].name)); |
|
1405 |
}
|
|
1406 |
return list; |
|
1407 |
}
|
|
1408 |
||
1409 |
||
1410 |
/* Topology fields */
|
|
1411 |
typedef struct topologytypetopology { |
|
1412 |
Int4 topology_type; |
|
1413 |
Int4 topology; |
|
1414 |
CharPtr name; |
|
1415 |
} TopologyTypeTopologyData, PNTR TopologyTypeTopologyPtr; |
|
1416 |
||
1417 |
static TopologyTypeTopologyData topologytype_topology[] = { |
|
1418 |
{ Topology_type_unknown, 0, "unknown" } , |
|
1419 |
{ Topology_type_linear, TOPOLOGY_LINEAR, "linear" } , |
|
1420 |
{ Topology_type_circular, TOPOLOGY_CIRCULAR, "circular" } , |
|
1421 |
{ Topology_type_tandem, TOPOLOGY_TANDEM, "tandem" } , |
|
1422 |
{ Topology_type_other, 255, "other" } |
|
1423 |
};
|
|
1424 |
||
1425 |
#define NUM_topologytype_topology sizeof (topologytype_topology) / sizeof (TopologyTypeTopologyData)
|
|
1426 |
||
1427 |
NLM_EXTERN Int4 TopologyFromTopologyType (Int4 topology_type) |
|
1428 |
{
|
|
1429 |
Int4 i; |
|
1430 |
||
1431 |
for (i = 0; i < NUM_topologytype_topology; i++) { |
|
1432 |
if (topologytype_topology[i].topology_type == topology_type) { |
|
1433 |
return topologytype_topology[i].topology; |
|
1434 |
}
|
|
1435 |
}
|
|
1436 |
return -1; |
|
1437 |
}
|
|
1438 |
||
1439 |
||
1440 |
NLM_EXTERN CharPtr TopologyNameFromTopology (Int4 topology) |
|
1441 |
{
|
|
1442 |
Int4 i; |
|
1443 |
||
1444 |
for (i = 0; i < NUM_topologytype_topology; i++) { |
|
1445 |
if (topologytype_topology[i].topology == topology) { |
|
1446 |
return topologytype_topology[i].name; |
|
1447 |
}
|
|
1448 |
}
|
|
1449 |
return NULL; |
|
1450 |
}
|
|
1451 |
||
1452 |
||
1453 |
static Int4 TopologyFromTopologyName (CharPtr topology_name) |
|
1454 |
{
|
|
1455 |
Int4 i; |
|
1456 |
||
1457 |
for (i = 0; i < NUM_topologytype_topology; i++) { |
|
1458 |
if (StringCmp (topologytype_topology[i].name, topology_name) == 0) { |
|
1459 |
return topologytype_topology[i].topology; |
|
1460 |
}
|
|
1461 |
}
|
|
1462 |
return -1; |
|
1463 |
}
|
|
1464 |
||
1465 |
||
1466 |
NLM_EXTERN ValNodePtr GetTopologyTypeList (void) |
|
1467 |
{
|
|
1468 |
ValNodePtr list = NULL; |
|
1469 |
Int4 i; |
|
1470 |
||
1471 |
for (i = 0; i < NUM_topologytype_topology; i++) { |
|
1472 |
ValNodeAddPointer (&list, topologytype_topology[i].topology_type, StringSave (topologytype_topology[i].name)); |
|
1473 |
}
|
|
1474 |
return list; |
|
1475 |
}
|
|
1476 |
||
1477 |
||
1478 |
/* strand fields */
|
|
1479 |
typedef struct strandtypestrand { |
|
1480 |
Int4 strand_type; |
|
1481 |
Int4 strand; |
|
1482 |
CharPtr name; |
|
1483 |
} StrandTypeStrandData, PNTR StrandTypeStrandPtr; |
|
1484 |
||
1485 |
static StrandTypeStrandData strandtype_strand[] = { |
|
1486 |
{ Strand_type_unknown, 0, "unknown" } , |
|
1487 |
{ Strand_type_single, STRANDEDNESS_SINGLE, "single" } , |
|
1488 |
{ Strand_type_double__, STRANDEDNESS_DOUBLE, "double" } , |
|
1489 |
{ Strand_type_mixed, 3, "mixed" } , |
|
1490 |
{ Strand_type_mixed_rev, 4, "mixed-rev" } , |
|
1491 |
{ Strand_type_other, 255, "other" } |
|
1492 |
};
|
|
1493 |
||
1494 |
#define NUM_strandtype_strand sizeof (strandtype_strand) / sizeof (StrandTypeStrandData)
|
|
1495 |
||
1496 |
NLM_EXTERN Int4 StrandFromStrandType (Int4 strand_type) |
|
1497 |
{
|
|
1498 |
Int4 i; |
|
1499 |
||
1500 |
for (i = 0; i < NUM_strandtype_strand; i++) { |
|
1501 |
if (strandtype_strand[i].strand_type == strand_type) { |
|
1502 |
return strandtype_strand[i].strand; |
|
1503 |
}
|
|
1504 |
}
|
|
1505 |
return -1; |
|
1506 |
}
|
|
1507 |
||
1508 |
||
1509 |
NLM_EXTERN CharPtr StrandNameFromStrand (Int4 strand) |
|
1510 |
{
|
|
1511 |
Int4 i; |
|
1512 |
||
1513 |
for (i = 0; i < NUM_strandtype_strand; i++) { |
|
1514 |
if (strandtype_strand[i].strand == strand) { |
|
1515 |
return strandtype_strand[i].name; |
|
1516 |
}
|
|
1517 |
}
|
|
1518 |
return NULL; |
|
1519 |
}
|
|
1520 |
||
1521 |
||
1522 |
static Int4 StrandFromStrandName (CharPtr strand_name) |
|
1523 |
{
|
|
1524 |
Int4 i; |
|
1525 |
||
1526 |
for (i = 0; i < NUM_strandtype_strand; i++) { |
|
1527 |
if (StringCmp (strandtype_strand[i].name, strand_name) == 0) { |
|
1528 |
return strandtype_strand[i].strand; |
|
1529 |
}
|
|
1530 |
}
|
|
1531 |
return -1; |
|
1532 |
}
|
|
1533 |
||
1534 |
||
1535 |
NLM_EXTERN ValNodePtr GetStrandTypeList (void) |
|
1536 |
{
|
|
1537 |
ValNodePtr list = NULL; |
|
1538 |
Int4 i; |
|
1539 |
||
1540 |
for (i = 0; i < NUM_strandtype_strand; i++) { |
|
1541 |
ValNodeAddPointer (&list, strandtype_strand[i].strand_type, StringSave (strandtype_strand[i].name)); |
|
1542 |
}
|
|
1543 |
return list; |
|
1544 |
}
|
|
1545 |
||
1546 |
||
1547 |
static CharPtr GetSequenceQualValName (ValNodePtr field) |
|
1548 |
{
|
|
1549 |
CharPtr val = NULL; |
|
1550 |
||
1551 |
if (field == NULL) return NULL; |
|
1552 |
switch (field->choice) { |
|
1553 |
case MolinfoField_molecule: |
|
1554 |
val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue)); |
|
1555 |
break; |
|
1556 |
case MolinfoField_technique: |
|
1557 |
val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue)); |
|
1558 |
break; |
|
1559 |
case MolinfoField_completedness: |
|
1560 |
val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue)); |
|
1561 |
break; |
|
1562 |
case MolinfoField_mol_class: |
|
1563 |
val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue)); |
|
1564 |
break; |
|
1565 |
case MolinfoField_topology: |
|
1566 |
val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue)); |
|
1567 |
break; |
|
1568 |
case MolinfoField_strand: |
|
1569 |
val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue)); |
|
1570 |
break; |
|
1571 |
}
|
|
1572 |
return val; |
|
1573 |
}
|
|
1574 |
||
1575 |
||
1576 |
static CharPtr GetSequenceQualName (ValNodePtr field) |
|
1577 |
{
|
|
1578 |
CharPtr str = NULL, fieldname = "invalid field", val = "invalid value"; |
|
1579 |
CharPtr fmt = "%s %s"; |
|
1580 |
||
1581 |
if (field == NULL) return NULL; |
|
1582 |
switch (field->choice) { |
|
1583 |
case MolinfoField_molecule: |
|
1584 |
fieldname = "molecule"; |
|
1585 |
val = BiomolNameFromBiomol (BiomolFromMoleculeType (field->data.intvalue)); |
|
1586 |
break; |
|
1587 |
case MolinfoField_technique: |
|
1588 |
fieldname = "technique"; |
|
1589 |
val = TechNameFromTech (TechFromTechniqueType (field->data.intvalue)); |
|
1590 |
break; |
|
1591 |
case MolinfoField_completedness: |
|
1592 |
fieldname = "completeness"; |
|
1593 |
val = CompletenessNameFromCompleteness (CompletenessFromCompletednessType (field->data.intvalue)); |
|
1594 |
break; |
|
1595 |
case MolinfoField_mol_class: |
|
1596 |
fieldname = "class"; |
|
1597 |
val = MolNameFromMol (MolFromMoleculeClassType (field->data.intvalue)); |
|
1598 |
break; |
|
1599 |
case MolinfoField_topology: |
|
1600 |
fieldname = "topology"; |
|
1601 |
val = TopologyNameFromTopology (TopologyFromTopologyType (field->data.intvalue)); |
|
1602 |
break; |
|
1603 |
case MolinfoField_strand: |
|
1604 |
fieldname = "strand"; |
|
1605 |
val = StrandNameFromStrand (StrandFromStrandType (field->data.intvalue)); |
|
1606 |
break; |
|
1607 |
}
|
|
1608 |
if (val == NULL) { |
|
1609 |
val = "Invalid value"; |
|
1610 |
}
|
|
1611 |
str = (CharPtr) MemNew (sizeof (Char) * (StringLen (fmt) + StringLen (fieldname) + StringLen (val))); |
|
1612 |
sprintf (str, fmt, fieldname, val); |
|
1613 |
return str; |
|
1614 |
}
|
|
1615 |
||
1616 |
||
1617 |
/* Simple constraints */
|
|
1618 |
static Boolean IsWholeWordMatch (CharPtr start, CharPtr found, Int4 match_len) |
|
1619 |
{
|
|
1620 |
Boolean rval = TRUE; |
|
1621 |
Char char_after; |
|
1622 |
Char char_before; |
|
1623 |
||
1624 |
if (match_len == 0) |
|
1625 |
{
|
|
1626 |
rval = TRUE; |
|
1627 |
}
|
|
1628 |
else if (start == NULL || found == NULL) |
|
1629 |
{
|
|
1630 |
rval = FALSE; |
|
1631 |
}
|
|
1632 |
else
|
|
1633 |
{
|
|
1634 |
char_after = *(found + match_len); |
|
1635 |
if (found != start) |
|
1636 |
{
|
|
1637 |
char_before = *(found - 1); |
|
1638 |
if (isalpha ((Int4) char_before) || isdigit ((Int4) char_before)) |
|
1639 |
{
|
|
1640 |
rval = FALSE; |
|
1641 |
}
|
|
1642 |
}
|
|
1643 |
if (char_after != 0 && (isalpha ((Int4) char_after) || isdigit ((Int4)char_after))) |
|
1644 |
{
|
|
1645 |
rval = FALSE; |
|
1646 |
}
|
|
1647 |
}
|
|
1648 |
return rval; |
|
1649 |
}
|
|
1650 |
||
1651 |
||
1652 |
NLM_EXTERN Boolean IsStringConstraintEmpty (StringConstraintPtr scp) |
|
1653 |
{
|
|
1654 |
if (scp == NULL || StringHasNoText (scp->match_text)) return TRUE; |
|
1655 |
else return FALSE; |
|
1656 |
}
|
|
1657 |
||
1658 |
||
1659 |
NLM_EXTERN Boolean DoesSingleStringMatchConstraint (CharPtr str, StringConstraintPtr scp) |
|
1660 |
{
|
|
1661 |
CharPtr pFound; |
|
1662 |
Boolean rval = FALSE; |
|
1663 |
Char char_after = 0; |
|
1664 |
||
1665 |
if (IsStringConstraintEmpty (scp)) return TRUE; |
|
1666 |
if (StringHasNoText (str)) return FALSE; |
|
1667 |
||
1668 |
switch (scp->match_location) |
|
1669 |
{
|
|
1670 |
case String_location_contains: |
|
1671 |
if (scp->case_sensitive) |
|
1672 |
{
|
|
1673 |
pFound = StringSearch (str, scp->match_text); |
|
1674 |
}
|
|
1675 |
else
|
|
1676 |
{
|
|
1677 |
pFound = StringISearch (str, scp->match_text); |
|
1678 |
}
|
|
1679 |
if (pFound == NULL) |
|
1680 |
{
|
|
1681 |
rval = FALSE; |
|
1682 |
}
|
|
1683 |
else if (scp->whole_word) |
|
1684 |
{
|
|
1685 |
rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); |
|
1686 |
while (!rval && pFound != NULL) |
|
1687 |
{
|
|
1688 |
if (scp->case_sensitive) |
|
1689 |
{
|
|
1690 |
pFound = StringSearch (pFound + 1, scp->match_text); |
|
1691 |
}
|
|
1692 |
else
|
|
1693 |
{
|
|
1694 |
pFound = StringISearch (pFound + 1, scp->match_text); |
|
1695 |
}
|
|
1696 |
if (pFound != NULL) |
|
1697 |
{
|
|
1698 |
rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); |
|
1699 |
}
|
|
1700 |
}
|
|
1701 |
}
|
|
1702 |
else
|
|
1703 |
{
|
|
1704 |
rval = TRUE; |
|
1705 |
}
|
|
1706 |
break; |
|
1707 |
case String_location_starts: |
|
1708 |
if (scp->case_sensitive) |
|
1709 |
{
|
|
1710 |
pFound = StringSearch (str, scp->match_text); |
|
1711 |
}
|
|
1712 |
else
|
|
1713 |
{
|
|
1714 |
pFound = StringISearch (str, scp->match_text); |
|
1715 |
}
|
|
1716 |
if (pFound == str) |
|
1717 |
{
|
|
1718 |
if (scp->whole_word) |
|
1719 |
{
|
|
1720 |
rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); |
|
1721 |
}
|
|
1722 |
else
|
|
1723 |
{
|
|
1724 |
rval = TRUE; |
|
1725 |
}
|
|
1726 |
}
|
|
1727 |
break; |
|
1728 |
case String_location_ends: |
|
1729 |
if (scp->case_sensitive) |
|
1730 |
{
|
|
1731 |
pFound = StringSearch (str, scp->match_text); |
|
1732 |
}
|
|
1733 |
else
|
|
1734 |
{
|
|
1735 |
pFound = StringISearch (str, scp->match_text); |
|
1736 |
}
|
|
1737 |
while (pFound != NULL && !rval) { |
|
1738 |
char_after = *(pFound + StringLen (scp->match_text)); |
|
1739 |
if (char_after == 0) |
|
1740 |
{
|
|
1741 |
if (scp->whole_word) |
|
1742 |
{
|
|
1743 |
rval = IsWholeWordMatch (str, pFound, StringLen (scp->match_text)); |
|
1744 |
}
|
|
1745 |
else
|
|
1746 |
{
|
|
1747 |
rval = TRUE; |
|
1748 |
}
|
|
1749 |
/* stop the search, we're at the end of the string */
|
|
1750 |
pFound = NULL; |
|
1751 |
}
|
|
1752 |
else
|
|
1753 |
{
|
|
1754 |
if (scp->case_sensitive) |
|
1755 |
{
|
|
1756 |
pFound = StringSearch (pFound + 1, scp->match_text); |
|
1757 |
}
|
|
1758 |
else
|
|
1759 |
{
|
|
1760 |
pFound = StringISearch (pFound + 1, scp->match_text); |
|
1761 |
}
|
|
1762 |
}
|
|
1763 |
}
|
|
1764 |
break; |
|
1765 |
case String_location_equals: |
|
1766 |
if (scp->case_sensitive) |
|
1767 |
{
|
|
1768 |
if (StringCmp (str, scp->match_text) == 0) |
|
1769 |
{
|
|
1770 |
rval = TRUE; |
|
1771 |
}
|
|
1772 |
}
|
|
1773 |
else
|
|
1774 |
{
|
|
1775 |
if (StringICmp (str, scp->match_text) == 0) |
|
1776 |
{
|
|
1777 |
rval = TRUE; |
|
1778 |
}
|
|
1779 |
}
|
|
1780 |
break; |
|
1781 |
case String_location_inlist: |
|
1782 |
if (scp->case_sensitive) |
|
1783 |
{
|
|
1784 |
pFound = StringSearch (scp->match_text, str); |
|
1785 |
}
|
|
1786 |
else
|
|
1787 |
{
|
|
1788 |
pFound = StringISearch (scp->match_text, str); |
|
1789 |
}
|
|
1790 |
if (pFound == NULL) |
|
1791 |
{
|
|
1792 |
rval = FALSE; |
|
1793 |
}
|
|
1794 |
else
|
|
1795 |
{
|
|
1796 |
rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str)); |
|
1797 |
while (!rval && pFound != NULL) |
|
1798 |
{
|
|
1799 |
if (scp->case_sensitive) |
|
1800 |
{
|
|
1801 |
pFound = StringSearch (pFound + 1, str); |
|
1802 |
}
|
|
1803 |
else
|
|
1804 |
{
|
|
1805 |
pFound = StringISearch (pFound + 1, str); |
|
1806 |
}
|
|
1807 |
if (pFound != NULL) |
|
1808 |
{
|
|
1809 |
rval = IsWholeWordMatch (scp->match_text, pFound, StringLen (str)); |
|
1810 |
}
|
|
1811 |
}
|
|
1812 |
}
|
|
1813 |
if (!rval) { |
|
1814 |
/* look for spans */
|
|
1815 |
rval = IsStringInSpanInList (str, scp->match_text); |
|
1816 |
}
|
|
1817 |
break; |
|
1818 |
}
|
|
1819 |
return rval; |
|
1820 |
}
|
|
1821 |
||
1822 |
||
1823 |
NLM_EXTERN Boolean DoesStringMatchConstraint (CharPtr str, StringConstraintPtr scp) |
|
1824 |
{
|
|
1825 |
Boolean rval; |
|
1826 |
||
1827 |
rval = DoesSingleStringMatchConstraint (str, scp); |
|
1828 |
if (scp != NULL && scp->not_present) { |
|
1829 |
rval = !rval; |
|
1830 |
}
|
|
1831 |
return rval; |
|
1832 |
}
|
|
1833 |
||
1834 |
||
1835 |
static Boolean DoesStringListMatchConstraint (ValNodePtr list, StringConstraintPtr scp) |
|
1836 |
{
|
|
1837 |
Int4 len = 1; |
|
1838 |
CharPtr tmp; |
|
1839 |
Boolean rval = FALSE; |
|
1840 |
ValNodePtr vnp; |
|
1841 |
||
1842 |
if (IsStringConstraintEmpty (scp)) { |
|
1843 |
return TRUE; |
|
1844 |
}
|
|
1845 |
if (list == NULL) return FALSE; |
|
1846 |
||
1847 |
for (vnp = list; vnp != NULL; vnp = vnp->next) { |
|
1848 |
len += StringLen (vnp->data.ptrvalue) + 2; |
|
1849 |
}
|
|
1850 |
||
1851 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
1852 |
for (vnp = list; vnp != NULL; vnp = vnp->next) { |
|
1853 |
StringCat (tmp, vnp->data.ptrvalue); |
|
1854 |
if (vnp->next != NULL) { |
|
1855 |
StringCat (tmp, "; "); |
|
1856 |
}
|
|
1857 |
}
|
|
1858 |
||
1859 |
rval = DoesStringMatchConstraint (tmp, scp); |
|
1860 |
tmp = MemFree (tmp); |
|
1861 |
return rval; |
|
1862 |
}
|
|
1863 |
||
1864 |
||
1865 |
static Boolean DoesStrandMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) |
|
1866 |
{
|
|
1867 |
Uint2 strand; |
|
1868 |
Boolean rval = FALSE; |
|
1869 |
||
1870 |
if (slp == NULL) |
|
1871 |
{
|
|
1872 |
rval = FALSE; |
|
1873 |
}
|
|
1874 |
else if (lcp == NULL || lcp->strand == Strand_constraint_any) |
|
1875 |
{
|
|
1876 |
rval = TRUE; |
|
1877 |
}
|
|
1878 |
else
|
|
1879 |
{
|
|
1880 |
strand = SeqLocStrand (slp); |
|
1881 |
if (strand == Seq_strand_minus) |
|
1882 |
{
|
|
1883 |
if (lcp->strand == Strand_constraint_minus) |
|
1884 |
{
|
|
1885 |
rval = TRUE; |
|
1886 |
}
|
|
1887 |
else
|
|
1888 |
{
|
|
1889 |
rval = FALSE; |
|
1890 |
}
|
|
1891 |
}
|
|
1892 |
else
|
|
1893 |
{
|
|
1894 |
if (lcp->strand == Strand_constraint_plus) |
|
1895 |
{
|
|
1896 |
rval = TRUE; |
|
1897 |
}
|
|
1898 |
else
|
|
1899 |
{
|
|
1900 |
rval = FALSE; |
|
1901 |
}
|
|
1902 |
}
|
|
1903 |
}
|
|
1904 |
return rval; |
|
1905 |
}
|
|
1906 |
||
1907 |
||
1908 |
static Boolean DoesBioseqMatchSequenceType (BioseqPtr bsp, Uint2 seq_type) |
|
1909 |
{
|
|
1910 |
Boolean rval = FALSE; |
|
1911 |
||
1912 |
if (bsp == NULL) return FALSE; |
|
1913 |
if (seq_type == Seqtype_constraint_any) return TRUE; |
|
1914 |
||
1915 |
if (ISA_na (bsp->mol) && seq_type == Seqtype_constraint_nuc) |
|
1916 |
{
|
|
1917 |
rval = TRUE; |
|
1918 |
}
|
|
1919 |
else if (ISA_aa (bsp->mol) && seq_type == Seqtype_constraint_prot) |
|
1920 |
{
|
|
1921 |
rval = TRUE; |
|
1922 |
}
|
|
1923 |
return rval; |
|
1924 |
}
|
|
1925 |
||
1926 |
||
1927 |
static Boolean DoesSequenceTypeMatchContraint (SeqLocPtr slp, LocationConstraintPtr lcp) |
|
1928 |
{
|
|
1929 |
Boolean rval = FALSE; |
|
1930 |
BioseqPtr bsp; |
|
1931 |
||
1932 |
if (slp == NULL) |
|
1933 |
{
|
|
1934 |
rval = FALSE; |
|
1935 |
}
|
|
1936 |
else if (lcp == NULL || lcp->seq_type == Seqtype_constraint_any) |
|
1937 |
{
|
|
1938 |
rval = TRUE; |
|
1939 |
}
|
|
1940 |
else
|
|
1941 |
{
|
|
1942 |
bsp = BioseqFindFromSeqLoc (slp); |
|
1943 |
rval = DoesBioseqMatchSequenceType (bsp, lcp->seq_type); |
|
1944 |
}
|
|
1945 |
return rval; |
|
1946 |
}
|
|
1947 |
||
1948 |
static Boolean DoesLocationMatchConstraint (SeqLocPtr slp, LocationConstraintPtr lcp) |
|
1949 |
||
1950 |
{
|
|
1951 |
Boolean rval = FALSE; |
|
1952 |
||
1953 |
if (slp == NULL) |
|
1954 |
{
|
|
1955 |
rval = FALSE; |
|
1956 |
}
|
|
1957 |
else if (lcp == NULL || (DoesStrandMatchConstraint (slp, lcp) && DoesSequenceTypeMatchContraint (slp, lcp))) |
|
1958 |
{
|
|
1959 |
rval = TRUE; |
|
1960 |
}
|
|
1961 |
return rval; |
|
1962 |
}
|
|
1963 |
||
1964 |
||
1965 |
static Boolean DoesObjectMatchLocationConstraint (Uint1 choice, Pointer data, LocationConstraintPtr constraint) |
|
1966 |
{
|
|
1967 |
SeqFeatPtr sfp; |
|
1968 |
SeqDescrPtr sdp; |
|
1969 |
CGPSetPtr cgp; |
|
1970 |
BioseqPtr bsp = NULL; |
|
1971 |
BioseqSetPtr bssp; |
|
1972 |
ValNodePtr vnp; |
|
1973 |
ObjValNodePtr ovp; |
|
1974 |
SeqMgrFeatContext context; |
|
1975 |
||
1976 |
if (data == NULL) return FALSE; |
|
1977 |
||
1978 |
if (constraint == NULL |
|
1979 |
|| (constraint->strand == Strand_constraint_any |
|
1980 |
&& constraint->seq_type == Seqtype_constraint_any)) { |
|
1981 |
return TRUE; |
|
1982 |
}
|
|
1983 |
||
1984 |
if (choice == OBJ_SEQFEAT) { |
|
1985 |
sfp = (SeqFeatPtr) data; |
|
1986 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
1987 |
} else if (choice == OBJ_SEQDESC) { |
|
1988 |
sdp = (SeqDescrPtr) data; |
|
1989 |
if (sdp->extended != 0) { |
|
1990 |
ovp = (ObjValNodePtr) sdp; |
|
1991 |
if (ovp->idx.parenttype == OBJ_BIOSEQSET) { |
|
1992 |
bssp = (BioseqSetPtr) ovp->idx.parentptr; |
|
1993 |
if (bssp != NULL && bssp->seq_set != NULL && IS_Bioseq_set (bssp->seq_set)) { |
|
1994 |
bsp = (BioseqPtr) bssp->seq_set->data.ptrvalue; |
|
1995 |
}
|
|
1996 |
} else if (ovp->idx.parenttype == OBJ_BIOSEQ) { |
|
1997 |
bsp = (BioseqPtr) ovp->idx.parentptr; |
|
1998 |
}
|
|
1999 |
}
|
|
2000 |
} else if (choice == 0) { |
|
2001 |
if (constraint->seq_type != Seqtype_constraint_any) { |
|
2002 |
return FALSE; |
|
2003 |
}
|
|
2004 |
cgp = (CGPSetPtr) data; |
|
2005 |
if (cgp->cds_list != NULL && cgp->cds_list->data.ptrvalue != NULL) { |
|
2006 |
sfp = (SeqFeatPtr) cgp->cds_list->data.ptrvalue; |
|
2007 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
2008 |
} else if (cgp->gene_list != NULL && cgp->gene_list->data.ptrvalue != NULL) { |
|
2009 |
sfp = (SeqFeatPtr) cgp->gene_list->data.ptrvalue; |
|
2010 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
2011 |
} else if (cgp->mrna_list != NULL && cgp->mrna_list->data.ptrvalue != NULL) { |
|
2012 |
sfp = (SeqFeatPtr) cgp->mrna_list->data.ptrvalue; |
|
2013 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
2014 |
} else if (cgp->prot_list != NULL && cgp->prot_list->data.ptrvalue != NULL) { |
|
2015 |
sfp = (SeqFeatPtr) cgp->prot_list->data.ptrvalue; |
|
2016 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
2017 |
}
|
|
2018 |
}
|
|
2019 |
if (!DoesBioseqMatchSequenceType(bsp, constraint->seq_type)) { |
|
2020 |
return FALSE; |
|
2021 |
}
|
|
2022 |
if (constraint->strand != Strand_constraint_any && ISA_aa (bsp->mol)) { |
|
2023 |
sfp = SeqMgrGetCDSgivenProduct (bsp, &context); |
|
2024 |
if (constraint->strand == Strand_constraint_minus && context.strand != Seq_strand_minus) { |
|
2025 |
return FALSE; |
|
2026 |
}
|
|
2027 |
if (constraint->strand == Strand_constraint_plus && context.strand == Seq_strand_minus) { |
|
2028 |
return FALSE; |
|
2029 |
}
|
|
2030 |
} else if (constraint->strand != Strand_constraint_any) { |
|
2031 |
if (choice == 0) { |
|
2032 |
/* strand for CDS-Gene-Prot group */
|
|
2033 |
cgp = (CGPSetPtr) data; |
|
2034 |
for (vnp = cgp->cds_list; vnp != NULL; vnp = vnp->next) { |
|
2035 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
2036 |
if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) { |
|
2037 |
return FALSE; |
|
2038 |
}
|
|
2039 |
}
|
|
2040 |
for (vnp = cgp->gene_list; vnp != NULL; vnp = vnp->next) { |
|
2041 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
2042 |
if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) { |
|
2043 |
return FALSE; |
|
2044 |
}
|
|
2045 |
}
|
|
2046 |
for (vnp = cgp->mrna_list; vnp != NULL; vnp = vnp->next) { |
|
2047 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
2048 |
if (sfp != NULL && !DoesStrandMatchConstraint (sfp->location, constraint)) { |
|
2049 |
return FALSE; |
|
2050 |
}
|
|
2051 |
}
|
|
2052 |
} else if (choice == OBJ_SEQFEAT) { |
|
2053 |
sfp = (SeqFeatPtr) data; |
|
2054 |
if (!DoesStrandMatchConstraint (sfp->location, constraint)) { |
|
2055 |
return FALSE; |
|
2056 |
}
|
|
2057 |
} else { |
|
2058 |
/* descriptors can't meet strand constraints */
|
|
2059 |
return FALSE; |
|
2060 |
}
|
|
2061 |
}
|
|
2062 |
return TRUE; |
|
2063 |
}
|
|
2064 |
||
2065 |
||
2066 |
/* for parsing and editing */
|
|
2067 |
static CharPtr GetTextPortionFromString (CharPtr str, TextPortionPtr text_portion) |
|
2068 |
{
|
|
2069 |
CharPtr portion = NULL; |
|
2070 |
CharPtr found_start, found_end; |
|
2071 |
Int4 found_len; |
|
2072 |
||
2073 |
if (StringHasNoText (str)) { |
|
2074 |
return NULL; |
|
2075 |
}
|
|
2076 |
if (text_portion == NULL) { |
|
2077 |
return StringSave (str); |
|
2078 |
}
|
|
2079 |
||
2080 |
if (text_portion->left_text == NULL || text_portion->left_text [0] == 0) |
|
2081 |
{
|
|
2082 |
found_start = str; |
|
2083 |
}
|
|
2084 |
else
|
|
2085 |
{
|
|
2086 |
if (text_portion->case_sensitive) |
|
2087 |
{
|
|
2088 |
found_start = StringSearch (str, text_portion->left_text); |
|
2089 |
}
|
|
2090 |
else
|
|
2091 |
{
|
|
2092 |
found_start = StringISearch (str, text_portion->left_text); |
|
2093 |
}
|
|
2094 |
||
2095 |
if (text_portion->whole_word && ! IsWholeWordMatch (str, found_start, StringLen (text_portion->left_text))) |
|
2096 |
{
|
|
2097 |
found_start = NULL; |
|
2098 |
}
|
|
2099 |
}
|
|
2100 |
||
2101 |
if (found_start == NULL) |
|
2102 |
{
|
|
2103 |
return NULL; |
|
2104 |
}
|
|
2105 |
||
2106 |
if (!text_portion->include_left) |
|
2107 |
{
|
|
2108 |
found_start += StringLen (text_portion->left_text); |
|
2109 |
}
|
|
2110 |
||
2111 |
if (text_portion->right_text == NULL || text_portion->right_text [0] == 0) |
|
2112 |
{
|
|
2113 |
found_len = StringLen (found_start); |
|
2114 |
}
|
|
2115 |
else
|
|
2116 |
{
|
|
2117 |
if (text_portion->case_sensitive) |
|
2118 |
{
|
|
2119 |
found_end = StringSearch (found_start, text_portion->right_text); |
|
2120 |
}
|
|
2121 |
else
|
|
2122 |
{
|
|
2123 |
found_end = StringISearch (found_start, text_portion->right_text); |
|
2124 |
}
|
|
2125 |
if (text_portion->whole_word && ! IsWholeWordMatch (str, found_end, StringLen (text_portion->right_text))) |
|
2126 |
{
|
|
2127 |
found_end = NULL; |
|
2128 |
}
|
|
2129 |
||
2130 |
if (found_end == NULL) |
|
2131 |
{
|
|
2132 |
found_len = 0; |
|
2133 |
}
|
|
2134 |
else if (text_portion->include_right) |
|
2135 |
{
|
|
2136 |
found_len = (Int4)(found_end - found_start) + StringLen (text_portion->right_text); |
|
2137 |
}
|
|
2138 |
else
|
|
2139 |
{
|
|
2140 |
found_len = found_end - found_start; |
|
2141 |
}
|
|
2142 |
}
|
|
2143 |
||
2144 |
if (found_len > 0) |
|
2145 |
{
|
|
2146 |
portion = (CharPtr) MemNew (sizeof (Char) * (found_len + 1)); |
|
2147 |
StringNCpy (portion, found_start, found_len); |
|
2148 |
portion[found_len] = 0; |
|
2149 |
}
|
|
2150 |
return portion; |
|
2151 |
}
|
|
2152 |
||
2153 |
||
2154 |
||
2155 |
static CharPtr FindTextPortionLocationInString (CharPtr str, TextPortionPtr text_portion) |
|
2156 |
{
|
|
2157 |
CharPtr start, stop; |
|
2158 |
||
2159 |
if (str == NULL || text_portion == NULL) return FALSE; |
|
2160 |
||
2161 |
if (text_portion->left_text != NULL) { |
|
2162 |
start = StringSearch (str, text_portion->left_text); |
|
2163 |
if (start != NULL) { |
|
2164 |
if (!text_portion->include_left) { |
|
2165 |
start += StringLen (text_portion->left_text); |
|
2166 |
}
|
|
2167 |
}
|
|
2168 |
} else { |
|
2169 |
start = str; |
|
2170 |
}
|
|
2171 |
if (start != NULL) { |
|
2172 |
if (text_portion->right_text != NULL) { |
|
2173 |
stop = StringSearch (start, text_portion->right_text); |
|
2174 |
if (stop == NULL) { |
|
2175 |
start = NULL; |
|
2176 |
}
|
|
2177 |
}
|
|
2178 |
}
|
|
2179 |
return start; |
|
2180 |
}
|
|
2181 |
||
2182 |
||
2183 |
static void ReplaceStringForParse(CharPtr src_text, TextPortionPtr text_portion) |
|
2184 |
{
|
|
2185 |
CharPtr src, dst; |
|
2186 |
||
2187 |
if (src_text == NULL || text_portion == NULL) { |
|
2188 |
return; |
|
2189 |
}
|
|
2190 |
||
2191 |
dst = FindTextPortionLocationInString (src_text, text_portion); |
|
2192 |
if (dst == NULL) return; |
|
2193 |
if (text_portion->right_text == NULL) { |
|
2194 |
*dst = 0; |
|
2195 |
} else { |
|
2196 |
src = StringSearch (src_text, text_portion->right_text); |
|
2197 |
if (src != NULL) { |
|
2198 |
if (text_portion->include_right) { |
|
2199 |
src += StringLen (text_portion->right_text); |
|
2200 |
}
|
|
2201 |
while (*src != 0) { |
|
2202 |
*dst = *src; |
|
2203 |
dst++; |
|
2204 |
src++; |
|
2205 |
}
|
|
2206 |
*dst = 0; |
|
2207 |
}
|
|
2208 |
}
|
|
2209 |
}
|
|
2210 |
||
2211 |
||
2212 |
/* generic functions for getting string values */
|
|
2213 |
static Int4 GetDbtagStringLen (DbtagPtr db_tag) |
|
2214 |
{
|
|
2215 |
Int4 len; |
|
2216 |
||
2217 |
if (db_tag == NULL) |
|
2218 |
{
|
|
2219 |
return 0; |
|
2220 |
}
|
|
2221 |
||
2222 |
len = StringLen (db_tag->db) + 2; |
|
2223 |
if (db_tag->tag != NULL) |
|
2224 |
{
|
|
2225 |
if (db_tag->tag->str != NULL) |
|
2226 |
{
|
|
2227 |
len += StringLen (db_tag->tag->str); |
|
2228 |
}
|
|
2229 |
else
|
|
2230 |
{
|
|
2231 |
len += 10; |
|
2232 |
}
|
|
2233 |
}
|
|
2234 |
return len; |
|
2235 |
}
|
|
2236 |
||
2237 |
||
2238 |
static CharPtr GetDbtagString (DbtagPtr db_tag) |
|
2239 |
{
|
|
2240 |
Int4 len; |
|
2241 |
CharPtr str; |
|
2242 |
||
2243 |
if (db_tag == NULL) { |
|
2244 |
return NULL; |
|
2245 |
}
|
|
2246 |
||
2247 |
len = GetDbtagStringLen (db_tag); |
|
2248 |
if (len == 0) { |
|
2249 |
return NULL; |
|
2250 |
}
|
|
2251 |
||
2252 |
str = (CharPtr) MemNew (len * sizeof (Char)); |
|
2253 |
if (str != NULL) { |
|
2254 |
StringCpy (str, db_tag->db); |
|
2255 |
StringCat (str, ":"); |
|
2256 |
if (db_tag->tag != NULL) { |
|
2257 |
if (db_tag->tag->str != NULL) { |
|
2258 |
StringCat (str, db_tag->tag->str); |
|
2259 |
} else { |
|
2260 |
sprintf (str + StringLen (str), "%d", db_tag->tag->id); |
|
2261 |
}
|
|
2262 |
}
|
|
2263 |
}
|
|
2264 |
return str; |
|
2265 |
}
|
|
2266 |
||
2267 |
||
2268 |
/* generic functions for setting field values */
|
|
2269 |
static Boolean SetStringValue (CharPtr PNTR existing_val, CharPtr new_val, Uint2 existing_text) |
|
2270 |
{
|
|
2271 |
Boolean rval = FALSE; |
|
2272 |
Int4 len; |
|
2273 |
CharPtr tmp; |
|
2274 |
||
2275 |
if (existing_val == NULL) { |
|
2276 |
return FALSE; |
|
2277 |
}
|
|
2278 |
||
2279 |
if (StringHasNoText (*existing_val)) { |
|
2280 |
*existing_val = MemFree (*existing_val); |
|
2281 |
*existing_val = StringSave (new_val); |
|
2282 |
rval = TRUE; |
|
2283 |
} else { |
|
2284 |
switch (existing_text) { |
|
2285 |
case ExistingTextOption_replace_old : |
|
2286 |
*existing_val = MemFree (*existing_val); |
|
2287 |
*existing_val = StringSave (new_val); |
|
2288 |
rval = TRUE; |
|
2289 |
break; |
|
2290 |
case ExistingTextOption_append_semi : |
|
2291 |
len = StringLen (new_val) + StringLen (*existing_val) + 3; |
|
2292 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2293 |
if (tmp != NULL) { |
|
2294 |
sprintf (tmp, "%s; %s", *existing_val, new_val); |
|
2295 |
MemFree (*existing_val); |
|
2296 |
*existing_val = tmp; |
|
2297 |
rval = TRUE; |
|
2298 |
}
|
|
2299 |
break; |
|
2300 |
case ExistingTextOption_append_space : |
|
2301 |
len = StringLen (new_val) + StringLen (*existing_val) + 2; |
|
2302 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2303 |
if (tmp != NULL) { |
|
2304 |
sprintf (tmp, "%s %s", *existing_val, new_val); |
|
2305 |
MemFree (*existing_val); |
|
2306 |
*existing_val = tmp; |
|
2307 |
rval = TRUE; |
|
2308 |
}
|
|
2309 |
break; |
|
2310 |
case ExistingTextOption_append_colon : |
|
2311 |
len = StringLen (new_val) + StringLen (*existing_val) + 3; |
|
2312 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2313 |
if (tmp != NULL) { |
|
2314 |
sprintf (tmp, "%s: %s", *existing_val, new_val); |
|
2315 |
MemFree (*existing_val); |
|
2316 |
*existing_val = tmp; |
|
2317 |
rval = TRUE; |
|
2318 |
}
|
|
2319 |
break; |
|
2320 |
case ExistingTextOption_append_none : |
|
2321 |
len = StringLen (new_val) + StringLen (*existing_val) + 1; |
|
2322 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2323 |
if (tmp != NULL) { |
|
2324 |
sprintf (tmp, "%s%s", *existing_val, new_val); |
|
2325 |
MemFree (*existing_val); |
|
2326 |
*existing_val = tmp; |
|
2327 |
rval = TRUE; |
|
2328 |
}
|
|
2329 |
break; |
|
2330 |
case ExistingTextOption_prefix_semi : |
|
2331 |
len = StringLen (new_val) + StringLen (*existing_val) + 3; |
|
2332 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2333 |
if (tmp != NULL) { |
|
2334 |
sprintf (tmp, "%s; %s", new_val, *existing_val); |
|
2335 |
MemFree (*existing_val); |
|
2336 |
*existing_val = tmp; |
|
2337 |
rval = TRUE; |
|
2338 |
}
|
|
2339 |
break; |
|
2340 |
case ExistingTextOption_prefix_space : |
|
2341 |
len = StringLen (new_val) + StringLen (*existing_val) + 2; |
|
2342 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2343 |
if (tmp != NULL) { |
|
2344 |
sprintf (tmp, "%s %s", new_val, *existing_val); |
|
2345 |
MemFree (*existing_val); |
|
2346 |
*existing_val = tmp; |
|
2347 |
rval = TRUE; |
|
2348 |
}
|
|
2349 |
break; |
|
2350 |
case ExistingTextOption_prefix_colon : |
|
2351 |
len = StringLen (new_val) + StringLen (*existing_val) + 3; |
|
2352 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2353 |
if (tmp != NULL) { |
|
2354 |
sprintf (tmp, "%s: %s", new_val, *existing_val); |
|
2355 |
MemFree (*existing_val); |
|
2356 |
*existing_val = tmp; |
|
2357 |
rval = TRUE; |
|
2358 |
}
|
|
2359 |
break; |
|
2360 |
case ExistingTextOption_prefix_none : |
|
2361 |
len = StringLen (new_val) + StringLen (*existing_val) + 1; |
|
2362 |
tmp = (CharPtr) MemNew (sizeof (Char) * len); |
|
2363 |
if (tmp != NULL) { |
|
2364 |
sprintf (tmp, "%s%s", new_val, *existing_val); |
|
2365 |
MemFree (*existing_val); |
|
2366 |
*existing_val = tmp; |
|
2367 |
rval = TRUE; |
|
2368 |
}
|
|
2369 |
break; |
|
2370 |
case ExistingTextOption_leave_old : |
|
2371 |
rval = FALSE; |
|
2372 |
}
|
|
2373 |
}
|
|
2374 |
return rval; |
|
2375 |
}
|
|
2376 |
||
2377 |
||
2378 |
static Boolean SetStringsInValNodeStringList (ValNodePtr PNTR list, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) |
|
2379 |
{
|
|
2380 |
ValNodePtr vnp; |
|
2381 |
CharPtr cp; |
|
2382 |
Boolean rval = FALSE; |
|
2383 |
||
2384 |
if (list == NULL) |
|
2385 |
{
|
|
2386 |
return FALSE; |
|
2387 |
}
|
|
2388 |
||
2389 |
if (*list == NULL && (scp == NULL || StringHasNoText (scp->match_text))) { |
|
2390 |
ValNodeAddPointer (list, 0, StringSave (new_val)); |
|
2391 |
rval = TRUE; |
|
2392 |
} else if (existing_text == ExistingTextOption_append_semi) { |
|
2393 |
if (DoesStringListMatchConstraint (*list, scp)) { |
|
2394 |
ValNodeAddPointer (list, 0, StringSave (new_val)); |
|
2395 |
rval = TRUE; |
|
2396 |
}
|
|
2397 |
} else if (existing_text == ExistingTextOption_prefix_semi) { |
|
2398 |
if (DoesStringListMatchConstraint (*list, scp)) { |
|
2399 |
vnp = ValNodeNew (NULL); |
|
2400 |
vnp->data.ptrvalue = StringSave (new_val); |
|
2401 |
vnp->next = *list; |
|
2402 |
*list = vnp; |
|
2403 |
rval = TRUE; |
|
2404 |
}
|
|
2405 |
} else if (existing_text == ExistingTextOption_replace_old) { |
|
2406 |
if (DoesStringListMatchConstraint (*list, scp)) { |
|
2407 |
*list = ValNodeFreeData (*list); |
|
2408 |
vnp = ValNodeNew (NULL); |
|
2409 |
vnp->data.ptrvalue = StringSave (new_val); |
|
2410 |
*list = vnp; |
|
2411 |
rval = TRUE; |
|
2412 |
}
|
|
2413 |
} else if (existing_text == ExistingTextOption_leave_old) { |
|
2414 |
rval = FALSE; |
|
2415 |
} else { |
|
2416 |
for (vnp = *list; vnp != NULL; vnp = vnp->next) |
|
2417 |
{
|
|
2418 |
cp = (CharPtr) vnp->data.ptrvalue; |
|
2419 |
if (DoesStringMatchConstraint (cp, scp)) { |
|
2420 |
rval |= SetStringValue (&cp, new_val, existing_text); |
|
2421 |
vnp->data.ptrvalue = cp; |
|
2422 |
}
|
|
2423 |
}
|
|
2424 |
}
|
|
2425 |
return rval; |
|
2426 |
}
|
|
2427 |
||
2428 |
||
2429 |
static Boolean SetStringInGBQualList (GBQualPtr PNTR list, ValNodePtr field, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) |
|
2430 |
{
|
|
2431 |
Boolean rval = FALSE; |
|
2432 |
Int4 gbqual; |
|
2433 |
CharPtr qual_name = NULL; |
|
2434 |
GBQualPtr gbq, last_gbq = NULL; |
|
2435 |
||
2436 |
if (field == NULL) return FALSE; |
|
2437 |
||
2438 |
if (field->choice == FeatQualChoice_legal_qual) |
|
2439 |
{
|
|
2440 |
gbqual = GetGBQualFromFeatQual (field->data.intvalue); |
|
2441 |
if (gbqual > -1) { |
|
2442 |
qual_name = ParFlat_GBQual_names [gbqual].name; |
|
2443 |
for (gbq = *list; gbq != NULL; gbq = gbq->next) { |
|
2444 |
if (StringCmp (gbq->qual, qual_name) == 0 |
|
2445 |
&& DoesStringMatchConstraint (gbq->val, scp)) { |
|
2446 |
rval |= SetStringValue (&(gbq->val), new_val, existing_text); |
|
2447 |
}
|
|
2448 |
last_gbq = gbq; |
|
2449 |
}
|
|
2450 |
if (!rval && (scp == NULL || scp->match_text == NULL)) { |
|
2451 |
gbq = GBQualNew (); |
|
2452 |
gbq->qual = StringSave (qual_name); |
|
2453 |
gbq->val = StringSave (new_val); |
|
2454 |
if (last_gbq == NULL) { |
|
2455 |
*list = gbq; |
|
2456 |
} else { |
|
2457 |
last_gbq->next = gbq; |
|
2458 |
}
|
|
2459 |
rval = TRUE; |
|
2460 |
}
|
|
2461 |
}
|
|
2462 |
} else if (field->choice == FeatQualChoice_illegal_qual) { |
|
2463 |
for (gbq = *list; gbq != NULL; gbq = gbq->next) { |
|
2464 |
if (DoesStringMatchConstraint (gbq->qual, field->data.ptrvalue) |
|
2465 |
&& DoesStringMatchConstraint (gbq->val, scp)) { |
|
2466 |
rval |= SetStringValue (&(gbq->val), new_val, existing_text); |
|
2467 |
}
|
|
2468 |
}
|
|
2469 |
}
|
|
2470 |
||
2471 |
return rval; |
|
2472 |
}
|
|
2473 |
||
2474 |
||
2475 |
static Boolean IsAllDigits (CharPtr str) |
|
2476 |
{
|
|
2477 |
CharPtr cp; |
|
2478 |
||
2479 |
if (StringHasNoText (str)) return FALSE; |
|
2480 |
||
2481 |
cp = str; |
|
2482 |
while (*cp != 0 && isdigit (*cp)) { |
|
2483 |
cp++; |
|
2484 |
}
|
|
2485 |
if (*cp == 0) { |
|
2486 |
return TRUE; |
|
2487 |
} else { |
|
2488 |
return FALSE; |
|
2489 |
}
|
|
2490 |
}
|
|
2491 |
||
2492 |
||
2493 |
static Boolean SetObjectIdString (ObjectIdPtr oip, CharPtr value, Uint2 existing_text) |
|
2494 |
{
|
|
2495 |
Boolean rval = FALSE; |
|
2496 |
Char num[15]; |
|
2497 |
CharPtr tmp = NULL; |
|
2498 |
||
2499 |
if (oip == NULL) { |
|
2500 |
return FALSE; |
|
2501 |
}
|
|
2502 |
||
2503 |
if (oip->id > 0) { |
|
2504 |
sprintf (num, "%d", oip->id); |
|
2505 |
tmp = StringSave (num); |
|
2506 |
} else { |
|
2507 |
tmp = StringSaveNoNull (oip->str); |
|
2508 |
}
|
|
2509 |
if (SetStringValue (&tmp, value, existing_text)) { |
|
2510 |
oip->str = MemFree (oip->str); |
|
2511 |
oip->id = 0; |
|
2512 |
if (IsAllDigits (tmp)) { |
|
2513 |
oip->id = atoi (tmp); |
|
2514 |
} else { |
|
2515 |
oip->str = tmp; |
|
2516 |
tmp = NULL; |
|
2517 |
}
|
|
2518 |
rval = TRUE; |
|
2519 |
}
|
|
2520 |
tmp = MemFree (tmp); |
|
2521 |
return rval; |
|
2522 |
}
|
|
2523 |
||
2524 |
||
2525 |
static Boolean SetDbtagString (DbtagPtr db_tag, CharPtr value, Uint2 existing_text) |
|
2526 |
{
|
|
2527 |
Boolean rval = FALSE; |
|
2528 |
CharPtr cp; |
|
2529 |
Int4 dbxvalid; |
|
2530 |
CharPtr tmp; |
|
2531 |
CharPtr twoval; |
|
2532 |
||
2533 |
if (db_tag == NULL || StringHasNoText (value)) { |
|
2534 |
return FALSE; |
|
2535 |
}
|
|
2536 |
||
2537 |
cp = StringChr (value, ':'); |
|
2538 |
if (cp == NULL) { |
|
2539 |
tmp = StringSave (db_tag->db); |
|
2540 |
if (SetStringValue (&tmp, value, existing_text)) { |
|
2541 |
dbxvalid = IsDbxrefValid (tmp, NULL, NULL, TRUE, NULL); |
|
2542 |
if (dbxvalid != 0) { |
|
2543 |
db_tag->db = MemFree (db_tag->db); |
|
2544 |
db_tag->db = tmp; |
|
2545 |
tmp = NULL; |
|
2546 |
rval = TRUE; |
|
2547 |
}
|
|
2548 |
}
|
|
2549 |
if (!rval) { |
|
2550 |
if (db_tag->tag == NULL) { |
|
2551 |
db_tag->tag = ObjectIdNew(); |
|
2552 |
}
|
|
2553 |
rval = SetObjectIdString (db_tag->tag, value, existing_text); |
|
2554 |
}
|
|
2555 |
tmp = MemFree (tmp); |
|
2556 |
} else { |
|
2557 |
twoval = StringSave (value); |
|
2558 |
cp = StringChr (twoval, ':'); |
|
2559 |
*cp = 0; |
|
2560 |
cp++; |
|
2561 |
rval = SetStringValue (&(db_tag->db), twoval, existing_text); |
|
2562 |
if (db_tag->tag == NULL) { |
|
2563 |
db_tag->tag = ObjectIdNew (); |
|
2564 |
}
|
|
2565 |
rval |= SetObjectIdString (db_tag->tag, cp, existing_text); |
|
2566 |
twoval = MemFree (twoval); |
|
2567 |
}
|
|
2568 |
return rval; |
|
2569 |
}
|
|
2570 |
||
2571 |
||
2572 |
static Boolean SetDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) |
|
2573 |
{
|
|
2574 |
ValNodePtr vnp; |
|
2575 |
Boolean rval = FALSE, skip; |
|
2576 |
DbtagPtr dbtag; |
|
2577 |
CharPtr cp; |
|
2578 |
||
2579 |
if (sfp == NULL) { |
|
2580 |
return FALSE; |
|
2581 |
}
|
|
2582 |
||
2583 |
if ((sfp->dbxref == NULL || existing_text == ExistingTextOption_append_semi) && (scp == NULL || StringHasNoText (scp->match_text))) { |
|
2584 |
dbtag = DbtagNew (); |
|
2585 |
rval = SetDbtagString (dbtag, value, existing_text); |
|
2586 |
if (rval) { |
|
2587 |
ValNodeAddPointer (&(sfp->dbxref), 0, dbtag); |
|
2588 |
} else { |
|
2589 |
dbtag = DbtagFree (dbtag); |
|
2590 |
}
|
|
2591 |
} else { |
|
2592 |
for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { |
|
2593 |
skip = FALSE; |
|
2594 |
if (scp != NULL) { |
|
2595 |
cp = GetDbtagString (vnp->data.ptrvalue); |
|
2596 |
if (!DoesStringMatchConstraint (cp, scp)) { |
|
2597 |
skip = TRUE; |
|
2598 |
}
|
|
2599 |
cp = MemFree (cp); |
|
2600 |
}
|
|
2601 |
if (!skip) { |
|
2602 |
rval |= SetDbtagString (vnp->data.ptrvalue, value, existing_text); |
|
2603 |
}
|
|
2604 |
}
|
|
2605 |
}
|
|
2606 |
return rval; |
|
2607 |
}
|
|
2608 |
||
2609 |
||
2610 |
||
2611 |
static CharPtr GetFirstValNodeStringMatch (ValNodePtr vnp, StringConstraintPtr scp) |
|
2612 |
{
|
|
2613 |
CharPtr str = NULL; |
|
2614 |
while (vnp != NULL && str == NULL) { |
|
2615 |
if (!StringHasNoText (vnp->data.ptrvalue) |
|
2616 |
&& DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { |
|
2617 |
str = StringSave (vnp->data.ptrvalue); |
|
2618 |
}
|
|
2619 |
vnp = vnp->next; |
|
2620 |
}
|
|
2621 |
return str; |
|
2622 |
}
|
|
2623 |
||
2624 |
||
2625 |
static Boolean RemoveValNodeStringMatch (ValNodePtr PNTR list, StringConstraintPtr scp) |
|
2626 |
{
|
|
2627 |
ValNodePtr vnp_prev = NULL, vnp_next, vnp; |
|
2628 |
Boolean rval = FALSE; |
|
2629 |
||
2630 |
if (list == NULL) return FALSE; |
|
2631 |
vnp = *list; |
|
2632 |
while (vnp != NULL) { |
|
2633 |
vnp_next = vnp->next; |
|
2634 |
if (!StringHasNoText (vnp->data.ptrvalue) |
|
2635 |
&& DoesStringMatchConstraint (vnp->data.ptrvalue, scp)) { |
|
2636 |
if (vnp_prev == NULL) { |
|
2637 |
*list = vnp->next; |
|
2638 |
} else { |
|
2639 |
vnp_prev->next = vnp->next; |
|
2640 |
}
|
|
2641 |
vnp->next = NULL; |
|
2642 |
vnp = ValNodeFreeData (vnp); |
|
2643 |
rval = TRUE; |
|
2644 |
} else { |
|
2645 |
vnp_prev = vnp; |
|
2646 |
}
|
|
2647 |
vnp = vnp_next; |
|
2648 |
}
|
|
2649 |
return rval; |
|
2650 |
}
|
|
2651 |
||
2652 |
||
2653 |
static CharPtr GetFirstGBQualMatch (GBQualPtr qual, CharPtr qual_name, StringConstraintPtr scp) |
|
2654 |
{
|
|
2655 |
CharPtr str = NULL; |
|
2656 |
while (qual != NULL && str == NULL) { |
|
2657 |
if (StringICmp (qual->qual, qual_name) == 0 |
|
2658 |
&&!StringHasNoText (qual->val) |
|
2659 |
&& DoesStringMatchConstraint (qual->val, scp)) { |
|
2660 |
str = StringSave (qual->val); |
|
2661 |
}
|
|
2662 |
qual = qual->next; |
|
2663 |
}
|
|
2664 |
return str; |
|
2665 |
}
|
|
2666 |
||
2667 |
||
2668 |
static CharPtr GetFirstGBQualMatchConstraintName (GBQualPtr qual, StringConstraintPtr qual_name, StringConstraintPtr scp) |
|
2669 |
{
|
|
2670 |
CharPtr str = NULL; |
|
2671 |
while (qual != NULL && str == NULL) { |
|
2672 |
if (DoesStringMatchConstraint (qual->qual, qual_name) |
|
2673 |
&&!StringHasNoText (qual->val) |
|
2674 |
&& DoesStringMatchConstraint (qual->val, scp)) { |
|
2675 |
str = StringSave (qual->val); |
|
2676 |
}
|
|
2677 |
qual = qual->next; |
|
2678 |
}
|
|
2679 |
return str; |
|
2680 |
}
|
|
2681 |
||
2682 |
||
2683 |
static Boolean RemoveGBQualMatch (GBQualPtr PNTR list, CharPtr qual_name, StringConstraintPtr scp) |
|
2684 |
{
|
|
2685 |
GBQualPtr qual_prev = NULL, qual_next, qual; |
|
2686 |
Boolean rval = FALSE; |
|
2687 |
||
2688 |
if (list == NULL) return FALSE; |
|
2689 |
qual = *list; |
|
2690 |
while (qual != NULL) { |
|
2691 |
qual_next = qual->next; |
|
2692 |
if (StringICmp (qual->qual, qual_name) == 0 |
|
2693 |
&& !StringHasNoText (qual->val) |
|
2694 |
&& DoesStringMatchConstraint (qual->val, scp)) { |
|
2695 |
if (qual_prev == NULL) { |
|
2696 |
*list = qual->next; |
|
2697 |
} else { |
|
2698 |
qual_prev->next = qual->next; |
|
2699 |
}
|
|
2700 |
qual->next = NULL; |
|
2701 |
qual = GBQualFree (qual); |
|
2702 |
rval = TRUE; |
|
2703 |
} else { |
|
2704 |
qual_prev = qual; |
|
2705 |
}
|
|
2706 |
qual = qual_next; |
|
2707 |
}
|
|
2708 |
return rval; |
|
2709 |
}
|
|
2710 |
||
2711 |
||
2712 |
static Boolean RemoveGBQualMatchConstraintName (GBQualPtr PNTR list, StringConstraintPtr qual_name, StringConstraintPtr scp) |
|
2713 |
{
|
|
2714 |
GBQualPtr qual_prev = NULL, qual_next, qual; |
|
2715 |
Boolean rval = FALSE; |
|
2716 |
||
2717 |
if (list == NULL) return FALSE; |
|
2718 |
qual = *list; |
|
2719 |
while (qual != NULL) { |
|
2720 |
qual_next = qual->next; |
|
2721 |
if (DoesStringMatchConstraint (qual->qual, qual_name) |
|
2722 |
&& !StringHasNoText (qual->val) |
|
2723 |
&& DoesStringMatchConstraint (qual->val, scp)) { |
|
2724 |
if (qual_prev == NULL) { |
|
2725 |
*list = qual->next; |
|
2726 |
} else { |
|
2727 |
qual_prev->next = qual->next; |
|
2728 |
}
|
|
2729 |
qual->next = NULL; |
|
2730 |
qual = GBQualFree (qual); |
|
2731 |
rval = TRUE; |
|
2732 |
} else { |
|
2733 |
qual_prev = qual; |
|
2734 |
}
|
|
2735 |
qual = qual_next; |
|
2736 |
}
|
|
2737 |
return rval; |
|
2738 |
}
|
|
2739 |
||
2740 |
||
2741 |
static CharPtr GetDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp) |
|
2742 |
{
|
|
2743 |
ValNodePtr vnp; |
|
2744 |
Int4 len = 0; |
|
2745 |
CharPtr str = NULL, cp; |
|
2746 |
||
2747 |
if (sfp == NULL || sfp->dbxref == NULL) { |
|
2748 |
return NULL; |
|
2749 |
}
|
|
2750 |
||
2751 |
for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { |
|
2752 |
cp = GetDbtagString (vnp->data.ptrvalue); |
|
2753 |
if (cp != NULL && DoesStringMatchConstraint(cp, scp)) { |
|
2754 |
len += StringLen (cp) + 1; |
|
2755 |
}
|
|
2756 |
cp = MemFree (cp); |
|
2757 |
}
|
|
2758 |
||
2759 |
if (len == 0) { |
|
2760 |
return NULL; |
|
2761 |
}
|
|
2762 |
||
2763 |
str = (CharPtr) MemNew ((len + 1) * sizeof (Char)); |
|
2764 |
if (str != NULL) { |
|
2765 |
for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) { |
|
2766 |
cp = GetDbtagString (vnp->data.ptrvalue); |
|
2767 |
if (cp != NULL && DoesStringMatchConstraint(cp, scp)) { |
|
2768 |
StringCat (str, cp); |
|
2769 |
StringCat (str, ";"); |
|
2770 |
}
|
|
2771 |
cp = MemFree (cp); |
|
2772 |
}
|
|
2773 |
}
|
|
2774 |
if (StringLen (str) >1) { |
|
2775 |
/* remove final semicolon */
|
|
2776 |
str [StringLen (str) - 2] = 0; |
|
2777 |
}
|
|
2778 |
return str; |
|
2779 |
}
|
|
2780 |
||
2781 |
||
2782 |
static Boolean RemoveDbxrefString (SeqFeatPtr sfp, StringConstraintPtr scp) |
|
2783 |
{
|
|
2784 |
ValNodePtr vnp, vnp_prev = NULL, vnp_next; |
|
2785 |
CharPtr cp; |
|
2786 |
Boolean rval = FALSE; |
|
2787 |
||
2788 |
if (sfp == NULL || sfp->dbxref == NULL) { |
|
2789 |
return FALSE; |
|
2790 |
}
|
|
2791 |
||
2792 |
vnp = sfp->dbxref; |
|
2793 |
while (vnp != NULL) { |
|
2794 |
vnp_next = vnp->next; |
|
2795 |
cp = GetDbtagString (vnp->data.ptrvalue); |
|
2796 |
if (DoesStringMatchConstraint(cp, scp)) { |
|
2797 |
if (vnp_prev == NULL) { |
|
2798 |
sfp->dbxref = vnp->next; |
|
2799 |
} else { |
|
2800 |
vnp_prev->next = vnp->next; |
|
2801 |
}
|
|
2802 |
vnp->next = NULL; |
|
2803 |
vnp->data.ptrvalue = DbtagFree (vnp->data.ptrvalue); |
|
2804 |
vnp = ValNodeFree (vnp); |
|
2805 |
rval = TRUE; |
|
2806 |
} else { |
|
2807 |
vnp_prev = vnp; |
|
2808 |
}
|
|
2809 |
}
|
|
2810 |
return rval; |
|
2811 |
}
|
|
2812 |
||
2813 |
||
2814 |
static CharPtr GetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp) |
|
2815 |
{
|
|
2816 |
RnaRefPtr rrp; |
|
2817 |
SeqMgrFeatContext context; |
|
2818 |
CharPtr str = NULL; |
|
2819 |
||
2820 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { |
|
2821 |
return NULL; |
|
2822 |
}
|
|
2823 |
||
2824 |
rrp = sfp->data.value.ptrvalue; |
|
2825 |
if (rrp->ext.choice == 0 |
|
2826 |
|| (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) |
|
2827 |
|| (rrp->ext.choice == 1 |
|
2828 |
&& (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 |
|
2829 |
|| StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 |
|
2830 |
|| StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { |
|
2831 |
str = GetFirstGBQualMatch (sfp->qual, "product", scp); |
|
2832 |
}
|
|
2833 |
||
2834 |
if (str == NULL) { |
|
2835 |
if (rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue) |
|
2836 |
&& StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 |
|
2837 |
&& StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 |
|
2838 |
&& StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0) { |
|
2839 |
str = StringSave (rrp->ext.value.ptrvalue); |
|
2840 |
} else if (rrp->ext.choice == 2 && rrp->ext.value.ptrvalue != NULL) { |
|
2841 |
if (SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, 0, 0, sfp, &context) != NULL |
|
2842 |
&& !StringHasNoText (context.label) |
|
2843 |
&& StringCmp (context.label, "tRNA") != 0) { |
|
2844 |
str = (CharPtr) MemNew (sizeof (Char) + (StringLen (context.label) + 6)); |
|
2845 |
sprintf (str, "tRNA-%s", context.label); |
|
2846 |
}
|
|
2847 |
}
|
|
2848 |
if (!DoesStringMatchConstraint(str, scp)) { |
|
2849 |
str = MemFree (str); |
|
2850 |
}
|
|
2851 |
}
|
|
2852 |
return str; |
|
2853 |
}
|
|
2854 |
||
2855 |
||
2856 |
static Boolean IsParseabletRNAName (CharPtr name_string) |
|
2857 |
{
|
|
2858 |
if (StringHasNoText(name_string)) |
|
2859 |
{
|
|
2860 |
return TRUE; |
|
2861 |
}
|
|
2862 |
else if (StringNICmp (name_string, "trna-", 5) != 0) |
|
2863 |
{
|
|
2864 |
return FALSE; |
|
2865 |
}
|
|
2866 |
else if (StringLen (name_string) != 8) |
|
2867 |
{
|
|
2868 |
return FALSE; |
|
2869 |
}
|
|
2870 |
else if (ParseTRnaString (name_string, NULL, NULL, TRUE) == 0) |
|
2871 |
{
|
|
2872 |
return FALSE; |
|
2873 |
}
|
|
2874 |
else
|
|
2875 |
{
|
|
2876 |
return TRUE; |
|
2877 |
}
|
|
2878 |
}
|
|
2879 |
||
2880 |
||
2881 |
static Boolean SetRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp, CharPtr new_val, Uint2 existing_text) |
|
2882 |
{
|
|
2883 |
RnaRefPtr rrp; |
|
2884 |
Boolean rval = FALSE; |
|
2885 |
ValNode vn; |
|
2886 |
CharPtr cp, tmp; |
|
2887 |
tRNAPtr trp; |
|
2888 |
Boolean justTrnaText = FALSE; |
|
2889 |
Uint1 codon [6]; |
|
2890 |
||
2891 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { |
|
2892 |
return FALSE; |
|
2893 |
}
|
|
2894 |
||
2895 |
rrp = sfp->data.value.ptrvalue; |
|
2896 |
if (rrp->ext.choice == 0 |
|
2897 |
|| (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) |
|
2898 |
|| (rrp->ext.choice == 1 |
|
2899 |
&& (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 |
|
2900 |
|| StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 |
|
2901 |
|| StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { |
|
2902 |
vn.choice = FeatQualChoice_legal_qual; |
|
2903 |
vn.data.intvalue = Feat_qual_legal_product; |
|
2904 |
||
2905 |
rval = SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text); |
|
2906 |
}
|
|
2907 |
||
2908 |
if (!rval) { |
|
2909 |
if ((rrp->ext.choice == 0 || (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue))) |
|
2910 |
&& (scp == NULL || scp->match_text == NULL)) { |
|
2911 |
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); |
|
2912 |
rrp->ext.value.ptrvalue = StringSave (new_val); |
|
2913 |
rrp->ext.choice = 1; |
|
2914 |
rval = TRUE; |
|
2915 |
} else if (rrp->ext.choice == 1 |
|
2916 |
&& StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 |
|
2917 |
&& StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 |
|
2918 |
&& StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0 |
|
2919 |
&& DoesStringMatchConstraint (rrp->ext.value.ptrvalue, scp)) { |
|
2920 |
cp = rrp->ext.value.ptrvalue; |
|
2921 |
rval = SetStringValue (&cp, new_val, existing_text); |
|
2922 |
rrp->ext.value.ptrvalue = cp; |
|
2923 |
rval = TRUE; |
|
2924 |
} else if (rrp->ext.choice == 2) { |
|
2925 |
tmp = GetRNAProductString (sfp, NULL); |
|
2926 |
if (DoesStringMatchConstraint (tmp, scp) |
|
2927 |
&& SetStringValue (&tmp, new_val, existing_text)) { |
|
2928 |
trp = (tRNAPtr) rrp->ext.value.ptrvalue; |
|
2929 |
if (trp == NULL) { |
|
2930 |
trp = MemNew (sizeof (tRNA)); |
|
2931 |
trp->aatype = 0; |
|
2932 |
MemSet (trp->codon, 255, sizeof (trp->codon)); |
|
2933 |
trp->anticodon = NULL; |
|
2934 |
rrp->ext.value.ptrvalue = trp; |
|
2935 |
}
|
|
2936 |
||
2937 |
if (!IsParseabletRNAName(tmp)) |
|
2938 |
{
|
|
2939 |
if (trp->anticodon == NULL |
|
2940 |
&& trp->codon[0] == 255 |
|
2941 |
&& trp->codon[1] == 255 |
|
2942 |
&& trp->codon[2] == 255 |
|
2943 |
&& trp->codon[3] == 255 |
|
2944 |
&& trp->codon[4] == 255 |
|
2945 |
&& trp->codon[5] == 255) |
|
2946 |
{
|
|
2947 |
trp = MemFree (trp); |
|
2948 |
rrp->ext.choice = 1; |
|
2949 |
rrp->ext.value.ptrvalue = tmp; |
|
2950 |
tmp = NULL; |
|
2951 |
rval = TRUE; |
|
2952 |
}
|
|
2953 |
else
|
|
2954 |
{
|
|
2955 |
vn.choice = FeatQualChoice_legal_qual; |
|
2956 |
vn.data.intvalue = Feat_qual_legal_product; |
|
2957 |
if (SetStringInGBQualList (&(sfp->qual), &vn, scp, new_val, existing_text)) { |
|
2958 |
trp->aa = 0; |
|
2959 |
rval = TRUE; |
|
2960 |
}
|
|
2961 |
}
|
|
2962 |
}
|
|
2963 |
else
|
|
2964 |
{
|
|
2965 |
trp->aa = ParseTRnaString (tmp, &justTrnaText, codon, TRUE); |
|
2966 |
trp->aatype = 2; |
|
2967 |
rval = TRUE; |
|
2968 |
}
|
|
2969 |
tmp = MemFree (tmp); |
|
2970 |
}
|
|
2971 |
}
|
|
2972 |
}
|
|
2973 |
return rval; |
|
2974 |
}
|
|
2975 |
||
2976 |
||
2977 |
static Boolean RemoveRNAProductString (SeqFeatPtr sfp, StringConstraintPtr scp) |
|
2978 |
{
|
|
2979 |
RnaRefPtr rrp; |
|
2980 |
Boolean rval = FALSE; |
|
2981 |
||
2982 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_RNA || sfp->data.value.ptrvalue == NULL) { |
|
2983 |
return FALSE; |
|
2984 |
}
|
|
2985 |
||
2986 |
rrp = sfp->data.value.ptrvalue; |
|
2987 |
if (rrp->ext.choice == 0 |
|
2988 |
|| (rrp->ext.choice == 1 && StringHasNoText (rrp->ext.value.ptrvalue)) |
|
2989 |
|| (rrp->ext.choice == 1 |
|
2990 |
&& (StringCmp (rrp->ext.value.ptrvalue, "ncRNA") == 0 |
|
2991 |
|| StringCmp (rrp->ext.value.ptrvalue, "tmRNA") == 0 |
|
2992 |
|| StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") == 0))) { |
|
2993 |
rval = RemoveGBQualMatch (&(sfp->qual), "product", scp); |
|
2994 |
}
|
|
2995 |
||
2996 |
if (!rval |
|
2997 |
&& rrp->ext.choice == 1 && !StringHasNoText (rrp->ext.value.ptrvalue) |
|
2998 |
&& StringCmp (rrp->ext.value.ptrvalue, "ncRNA") != 0 |
|
2999 |
&& StringCmp (rrp->ext.value.ptrvalue, "tmRNA") != 0 |
|
3000 |
&& StringCmp (rrp->ext.value.ptrvalue, "misc_RNA") != 0 |
|
3001 |
&& DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) { |
|
3002 |
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); |
|
3003 |
rrp->ext.choice = 0; |
|
3004 |
rval = TRUE; |
|
3005 |
}
|
|
3006 |
return rval; |
|
3007 |
}
|
|
3008 |
||
3009 |
||
3010 |
static SeqFeatPtr GetProtFeature (BioseqPtr protbsp) |
|
3011 |
{
|
|
3012 |
SeqMgrFeatContext fcontext; |
|
3013 |
SeqAnnotPtr sap; |
|
3014 |
SeqFeatPtr prot_sfp; |
|
3015 |
ProtRefPtr prp; |
|
3016 |
||
3017 |
if (protbsp == NULL) return NULL; |
|
3018 |
||
3019 |
prot_sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); |
|
3020 |
if (prot_sfp == NULL) { |
|
3021 |
sap = protbsp->annot; |
|
3022 |
while (sap != NULL && prot_sfp == NULL) { |
|
3023 |
if (sap->type == 1) { |
|
3024 |
prot_sfp = sap->data; |
|
3025 |
while (prot_sfp != NULL |
|
3026 |
&& (prot_sfp->data.choice != SEQFEAT_PROT |
|
3027 |
|| (prp = prot_sfp->data.value.ptrvalue) == NULL |
|
3028 |
|| prp->processed != 0)) { |
|
3029 |
prot_sfp = prot_sfp->next; |
|
3030 |
}
|
|
3031 |
}
|
|
3032 |
sap = sap->next; |
|
3033 |
}
|
|
3034 |
}
|
|
3035 |
return prot_sfp; |
|
3036 |
}
|
|
3037 |
||
3038 |
||
3039 |
static ProtRefPtr GetProtRefForFeature (SeqFeatPtr sfp) |
|
3040 |
{
|
|
3041 |
BioseqPtr protbsp; |
|
3042 |
SeqFeatPtr protsfp; |
|
3043 |
ProtRefPtr prp = NULL; |
|
3044 |
SeqFeatXrefPtr xref; |
|
3045 |
||
3046 |
if (sfp == NULL) return NULL; |
|
3047 |
||
3048 |
if (sfp->data.choice == SEQFEAT_PROT) { |
|
3049 |
prp = (ProtRefPtr) sfp->data.value.ptrvalue; |
|
3050 |
} else if (sfp->data.choice == SEQFEAT_CDREGION) { |
|
3051 |
xref = sfp->xref; |
|
3052 |
while (xref != NULL && xref->data.choice != SEQFEAT_PROT) { |
|
3053 |
xref = xref->next; |
|
3054 |
}
|
|
3055 |
if (xref != NULL) { |
|
3056 |
prp = xref->data.value.ptrvalue; |
|
3057 |
}
|
|
3058 |
if (prp == NULL && sfp->product != NULL) { |
|
3059 |
protbsp = BioseqFindFromSeqLoc (sfp->product); |
|
3060 |
protsfp = GetProtFeature (protbsp); |
|
3061 |
if (protsfp != NULL) { |
|
3062 |
prp = protsfp->data.value.ptrvalue; |
|
3063 |
}
|
|
3064 |
}
|
|
3065 |
}
|
|
3066 |
return prp; |
|
3067 |
}
|
|
3068 |
||
3069 |
||
3070 |
NLM_EXTERN CharPtr GetQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) |
|
3071 |
{
|
|
3072 |
CharPtr str = NULL; |
|
3073 |
GeneRefPtr grp = NULL; |
|
3074 |
ProtRefPtr prp = NULL; |
|
3075 |
Int4 gbqual; |
|
3076 |
||
3077 |
if (sfp == NULL || field == NULL || field->field == NULL) |
|
3078 |
{
|
|
3079 |
return NULL; |
|
3080 |
}
|
|
3081 |
if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) |
|
3082 |
{
|
|
3083 |
return NULL; |
|
3084 |
}
|
|
3085 |
||
3086 |
// for gene fields
|
|
3087 |
if (sfp->idx.subtype == FEATDEF_GENE) { |
|
3088 |
grp = sfp->data.value.ptrvalue; |
|
3089 |
} else { |
|
3090 |
grp = SeqMgrGetGeneXref (sfp); |
|
3091 |
}
|
|
3092 |
||
3093 |
// for protein fields
|
|
3094 |
prp = GetProtRefForFeature (sfp); |
|
3095 |
||
3096 |
/* fields common to all features */
|
|
3097 |
/* note, also known as comment */
|
|
3098 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) |
|
3099 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) |
|
3100 |
{
|
|
3101 |
if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
3102 |
{
|
|
3103 |
str = StringSave (sfp->comment); |
|
3104 |
}
|
|
3105 |
}
|
|
3106 |
/* db-xref */
|
|
3107 |
if (str == NULL |
|
3108 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) |
|
3109 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue)))) |
|
3110 |
{
|
|
3111 |
str = GetDbxrefString (sfp, scp); |
|
3112 |
}
|
|
3113 |
/* exception */
|
|
3114 |
if (str == NULL |
|
3115 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) |
|
3116 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue)))) |
|
3117 |
{
|
|
3118 |
if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint(sfp->except_text, scp)) |
|
3119 |
{
|
|
3120 |
str = StringSave (sfp->except_text); |
|
3121 |
}
|
|
3122 |
}
|
|
3123 |
/* evidence */
|
|
3124 |
if (str == NULL |
|
3125 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) |
|
3126 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue)))) |
|
3127 |
{
|
|
3128 |
if (sfp->exp_ev == 1) |
|
3129 |
{
|
|
3130 |
str = StringSave ("experimental"); |
|
3131 |
}
|
|
3132 |
else if (sfp->exp_ev == 2) |
|
3133 |
{
|
|
3134 |
str = StringSave ("non-experimental"); |
|
3135 |
}
|
|
3136 |
if (!DoesStringMatchConstraint(str, scp)) { |
|
3137 |
str = MemFree (str); |
|
3138 |
}
|
|
3139 |
}
|
|
3140 |
||
3141 |
/* fields common to some features */
|
|
3142 |
/* product */
|
|
3143 |
if (str == NULL |
|
3144 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) |
|
3145 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue)))) |
|
3146 |
{
|
|
3147 |
if (prp != NULL) { |
|
3148 |
str = GetFirstValNodeStringMatch (prp->name, scp); |
|
3149 |
} else if (sfp->data.choice == SEQFEAT_RNA) { |
|
3150 |
str = GetRNAProductString (sfp, scp); |
|
3151 |
}
|
|
3152 |
}
|
|
3153 |
||
3154 |
/* Gene fields */
|
|
3155 |
/* locus */
|
|
3156 |
if (str == NULL |
|
3157 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) |
|
3158 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) |
|
3159 |
&& grp != NULL) |
|
3160 |
{
|
|
3161 |
if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint(grp->locus, scp)) |
|
3162 |
{
|
|
3163 |
str = StringSave (grp->locus); |
|
3164 |
}
|
|
3165 |
}
|
|
3166 |
/* description */
|
|
3167 |
if (str == NULL |
|
3168 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) |
|
3169 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3170 |
&& grp != NULL) |
|
3171 |
{
|
|
3172 |
if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) |
|
3173 |
{
|
|
3174 |
str = StringSave (grp->desc); |
|
3175 |
}
|
|
3176 |
}
|
|
3177 |
/* maploc */
|
|
3178 |
if (str == NULL |
|
3179 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) |
|
3180 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) |
|
3181 |
&& grp != NULL) |
|
3182 |
{
|
|
3183 |
if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) |
|
3184 |
{
|
|
3185 |
str = StringSave (grp->maploc); |
|
3186 |
}
|
|
3187 |
}
|
|
3188 |
/* allele */
|
|
3189 |
if (str == NULL |
|
3190 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) |
|
3191 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) |
|
3192 |
&& grp != NULL) |
|
3193 |
{
|
|
3194 |
if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) |
|
3195 |
{
|
|
3196 |
str = StringSave (grp->allele); |
|
3197 |
}
|
|
3198 |
}
|
|
3199 |
/* locus_tag */
|
|
3200 |
if (str == NULL |
|
3201 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) |
|
3202 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) |
|
3203 |
&& grp != NULL) |
|
3204 |
{
|
|
3205 |
if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
3206 |
{
|
|
3207 |
str = StringSave (grp->locus_tag); |
|
3208 |
}
|
|
3209 |
}
|
|
3210 |
/* synonym */
|
|
3211 |
if (str == NULL |
|
3212 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) |
|
3213 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) |
|
3214 |
&& grp != NULL) |
|
3215 |
{
|
|
3216 |
str = GetFirstValNodeStringMatch (grp->syn, scp); |
|
3217 |
}
|
|
3218 |
||
3219 |
||
3220 |
/* protein fields */
|
|
3221 |
/* note - product handled above */
|
|
3222 |
/* description */
|
|
3223 |
if (str == NULL |
|
3224 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) |
|
3225 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3226 |
&& prp != NULL) |
|
3227 |
{
|
|
3228 |
if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
3229 |
str = StringSave (prp->desc); |
|
3230 |
}
|
|
3231 |
}
|
|
3232 |
/* ec_number */
|
|
3233 |
if (str == NULL |
|
3234 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) |
|
3235 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) |
|
3236 |
&& prp != NULL) |
|
3237 |
{
|
|
3238 |
str = GetFirstValNodeStringMatch (prp->ec, scp); |
|
3239 |
}
|
|
3240 |
/* activity */
|
|
3241 |
if (str == NULL |
|
3242 |
&& ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity) |
|
3243 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue))) |
|
3244 |
&& prp != NULL) |
|
3245 |
{
|
|
3246 |
str = GetFirstValNodeStringMatch (prp->activity, scp); |
|
3247 |
}
|
|
3248 |
||
3249 |
||
3250 |
/* actual GenBank qualifiers */
|
|
3251 |
if (str == NULL) |
|
3252 |
{
|
|
3253 |
if (field->field->choice == FeatQualChoice_legal_qual) |
|
3254 |
{
|
|
3255 |
gbqual = GetGBQualFromFeatQual (field->field->data.intvalue); |
|
3256 |
if (gbqual > -1) { |
|
3257 |
str = GetFirstGBQualMatch (sfp->qual, ParFlat_GBQual_names [gbqual].name, scp); |
|
3258 |
} else { |
|
3259 |
/* need to do something with non-qualifier qualifiers */
|
|
3260 |
}
|
|
3261 |
} else { |
|
3262 |
str = GetFirstGBQualMatchConstraintName (sfp->qual, field->field->data.ptrvalue, scp); |
|
3263 |
}
|
|
3264 |
}
|
|
3265 |
return str; |
|
3266 |
}
|
|
3267 |
||
3268 |
||
3269 |
static Boolean RemoveQualFromFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp) |
|
3270 |
{
|
|
3271 |
Boolean rval = FALSE; |
|
3272 |
GeneRefPtr grp = NULL; |
|
3273 |
ProtRefPtr prp = NULL; |
|
3274 |
RnaRefPtr rrp; |
|
3275 |
tRNAPtr trp; |
|
3276 |
Int4 gbqual; |
|
3277 |
||
3278 |
if (sfp == NULL || field == NULL || field->field == NULL) |
|
3279 |
{
|
|
3280 |
return FALSE; |
|
3281 |
}
|
|
3282 |
if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) |
|
3283 |
{
|
|
3284 |
return FALSE; |
|
3285 |
}
|
|
3286 |
||
3287 |
// for gene fields
|
|
3288 |
if (sfp->idx.subtype == FEATDEF_GENE) { |
|
3289 |
grp = sfp->data.value.ptrvalue; |
|
3290 |
} else { |
|
3291 |
grp = SeqMgrGetGeneXref (sfp); |
|
3292 |
}
|
|
3293 |
||
3294 |
// for protein fields
|
|
3295 |
prp = GetProtRefForFeature (sfp); |
|
3296 |
||
3297 |
// for RNA fields
|
|
3298 |
if (sfp->data.choice == SEQFEAT_RNA) { |
|
3299 |
rrp = (RnaRefPtr) sfp->data.value.ptrvalue; |
|
3300 |
}
|
|
3301 |
||
3302 |
/* fields common to all features */
|
|
3303 |
/* note, also known as comment */
|
|
3304 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) |
|
3305 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) |
|
3306 |
{
|
|
3307 |
if (!StringHasNoText (sfp->comment) && DoesStringMatchConstraint (sfp->comment, scp)) |
|
3308 |
{
|
|
3309 |
sfp->comment = MemFree (sfp->comment); |
|
3310 |
rval = TRUE; |
|
3311 |
}
|
|
3312 |
}
|
|
3313 |
/* db-xref */
|
|
3314 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) |
|
3315 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))) |
|
3316 |
{
|
|
3317 |
rval = RemoveDbxrefString (sfp, scp); |
|
3318 |
}
|
|
3319 |
/* exception */
|
|
3320 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) |
|
3321 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))) |
|
3322 |
{
|
|
3323 |
if (!StringHasNoText (sfp->except_text) && DoesStringMatchConstraint (sfp->except_text, scp)) |
|
3324 |
{
|
|
3325 |
sfp->except_text = MemFree (sfp->except_text); |
|
3326 |
rval = TRUE; |
|
3327 |
}
|
|
3328 |
}
|
|
3329 |
/* evidence */
|
|
3330 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) |
|
3331 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))) |
|
3332 |
{
|
|
3333 |
if ((sfp->exp_ev == 1 && DoesStringMatchConstraint("experimental", scp)) |
|
3334 |
|| (sfp->exp_ev == 2 && DoesStringMatchConstraint("non-experimental", scp))) { |
|
3335 |
sfp->exp_ev = 0; |
|
3336 |
rval = TRUE; |
|
3337 |
}
|
|
3338 |
}
|
|
3339 |
||
3340 |
/* fields common to some features */
|
|
3341 |
/* product */
|
|
3342 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) |
|
3343 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))) |
|
3344 |
{
|
|
3345 |
if (prp != NULL) { |
|
3346 |
rval = RemoveValNodeStringMatch (&(prp->name), scp); |
|
3347 |
} else if (sfp->data.choice == SEQFEAT_RNA) { |
|
3348 |
rval = RemoveRNAProductString (sfp, scp); |
|
3349 |
}
|
|
3350 |
}
|
|
3351 |
||
3352 |
/* Gene fields */
|
|
3353 |
/* locus */
|
|
3354 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) |
|
3355 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) |
|
3356 |
&& grp != NULL) |
|
3357 |
{
|
|
3358 |
if (!StringHasNoText (grp->locus) && DoesStringMatchConstraint (grp->locus, scp)) { |
|
3359 |
grp->locus = MemFree (grp->locus); |
|
3360 |
rval = TRUE; |
|
3361 |
}
|
|
3362 |
}
|
|
3363 |
/* description */
|
|
3364 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) |
|
3365 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3366 |
&& grp != NULL) |
|
3367 |
{
|
|
3368 |
if (!StringHasNoText (grp->desc) && DoesStringMatchConstraint(grp->desc, scp)) |
|
3369 |
{
|
|
3370 |
grp->desc = MemFree (grp->desc); |
|
3371 |
rval = TRUE; |
|
3372 |
}
|
|
3373 |
}
|
|
3374 |
/* maploc */
|
|
3375 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) |
|
3376 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) |
|
3377 |
&& grp != NULL) |
|
3378 |
{
|
|
3379 |
if (!StringHasNoText (grp->maploc) && DoesStringMatchConstraint(grp->maploc, scp)) |
|
3380 |
{
|
|
3381 |
grp->maploc = MemFree (grp->maploc); |
|
3382 |
rval = TRUE; |
|
3383 |
}
|
|
3384 |
}
|
|
3385 |
/* allele */
|
|
3386 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) |
|
3387 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) |
|
3388 |
&& grp != NULL) |
|
3389 |
{
|
|
3390 |
if (!StringHasNoText (grp->allele) && DoesStringMatchConstraint(grp->allele, scp)) |
|
3391 |
{
|
|
3392 |
grp->allele = MemFree (grp->allele); |
|
3393 |
rval = TRUE; |
|
3394 |
}
|
|
3395 |
}
|
|
3396 |
/* locus_tag */
|
|
3397 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) |
|
3398 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) |
|
3399 |
&& grp != NULL) |
|
3400 |
{
|
|
3401 |
if (!StringHasNoText (grp->locus_tag) && DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
3402 |
{
|
|
3403 |
grp->locus_tag = MemFree (grp->locus_tag); |
|
3404 |
rval = TRUE; |
|
3405 |
}
|
|
3406 |
}
|
|
3407 |
/* synonym */
|
|
3408 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) |
|
3409 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) |
|
3410 |
&& grp != NULL) |
|
3411 |
{
|
|
3412 |
rval = RemoveValNodeStringMatch (&(grp->syn), scp); |
|
3413 |
}
|
|
3414 |
||
3415 |
/* protein fields */
|
|
3416 |
/* note - product handled above */
|
|
3417 |
/* description */
|
|
3418 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) |
|
3419 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3420 |
&& prp != NULL) |
|
3421 |
{
|
|
3422 |
if (!StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
3423 |
prp->desc = MemFree (prp->desc); |
|
3424 |
rval = TRUE; |
|
3425 |
}
|
|
3426 |
}
|
|
3427 |
/* ec_number */
|
|
3428 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) |
|
3429 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) |
|
3430 |
&& prp != NULL) |
|
3431 |
{
|
|
3432 |
rval = RemoveValNodeStringMatch (&(prp->ec), scp); |
|
3433 |
}
|
|
3434 |
/* activity */
|
|
3435 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity) |
|
3436 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue))) |
|
3437 |
&& prp != NULL) |
|
3438 |
{
|
|
3439 |
rval = RemoveValNodeStringMatch (&(prp->activity), scp); |
|
3440 |
}
|
|
3441 |
||
3442 |
/* RNA fields */
|
|
3443 |
/* anticodon */
|
|
3444 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon) |
|
3445 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue))) |
|
3446 |
&& rrp != NULL && rrp->ext.choice == 2) |
|
3447 |
{
|
|
3448 |
trp = (tRNAPtr) rrp->ext.value.ptrvalue; |
|
3449 |
if (trp != NULL && trp->anticodon != NULL) { |
|
3450 |
trp->anticodon = SeqLocFree (trp->anticodon); |
|
3451 |
rval = TRUE; |
|
3452 |
}
|
|
3453 |
}
|
|
3454 |
/* codons recognized */
|
|
3455 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_anticodon) |
|
3456 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("anticodon", field->field->data.ptrvalue))) |
|
3457 |
&& rrp != NULL && rrp->ext.choice == 2) |
|
3458 |
{
|
|
3459 |
trp = (tRNAPtr) rrp->ext.value.ptrvalue; |
|
3460 |
if (trp != NULL && (trp->codon[0] != 255 || trp->codon[1] != 255 || trp->codon[2] != 255 |
|
3461 |
|| trp->codon[3] != 255 || trp->codon[4] != 255 || trp->codon[5] != 255)) { |
|
3462 |
trp->codon [0] = 255; |
|
3463 |
trp->codon [1] = 255; |
|
3464 |
trp->codon [2] = 255; |
|
3465 |
trp->codon [3] = 255; |
|
3466 |
trp->codon [4] = 255; |
|
3467 |
trp->codon [5] = 255; |
|
3468 |
rval = TRUE; |
|
3469 |
}
|
|
3470 |
}
|
|
3471 |
||
3472 |
if (!rval) { |
|
3473 |
/* actual GenBank qualifiers */
|
|
3474 |
if (field->field->choice == FeatQualChoice_legal_qual) |
|
3475 |
{
|
|
3476 |
gbqual = GetGBQualFromFeatQual (field->field->data.intvalue); |
|
3477 |
if (gbqual > -1) { |
|
3478 |
rval = RemoveGBQualMatch (&(sfp->qual), ParFlat_GBQual_names [gbqual].name, scp); |
|
3479 |
} else { |
|
3480 |
/* need to do something with non-qualifier qualifiers */
|
|
3481 |
}
|
|
3482 |
} else { |
|
3483 |
rval = RemoveGBQualMatchConstraintName (&(sfp->qual), field->field->data.ptrvalue, scp); |
|
3484 |
}
|
|
3485 |
}
|
|
3486 |
||
3487 |
return rval; |
|
3488 |
}
|
|
3489 |
||
3490 |
||
3491 |
static Boolean ChooseBestFrame (SeqFeatPtr sfp) |
|
3492 |
{
|
|
3493 |
CdRegionPtr crp; |
|
3494 |
Uint1 new_frame = 0, i, orig_frame; |
|
3495 |
ByteStorePtr bs; |
|
3496 |
Int4 lens [3]; |
|
3497 |
Int4 max; |
|
3498 |
Boolean retval = TRUE; |
|
3499 |
||
3500 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_CDREGION) return FALSE; |
|
3501 |
||
3502 |
crp = sfp->data.value.ptrvalue; |
|
3503 |
if (crp == NULL) return FALSE; |
|
3504 |
orig_frame = crp->frame; |
|
3505 |
||
3506 |
max = 0; |
|
3507 |
for (i = 1; i <= 3; i++) { |
|
3508 |
crp->frame = i; |
|
3509 |
bs = ProteinFromCdRegionEx (sfp, FALSE, FALSE); |
|
3510 |
lens[i - 1] = BSLen (bs); |
|
3511 |
BSFree (bs); |
|
3512 |
if (lens[i - 1] > max) { |
|
3513 |
max = lens[i - 1]; |
|
3514 |
new_frame = i; |
|
3515 |
}
|
|
3516 |
}
|
|
3517 |
for (i = 1; i <= 3; i++) { |
|
3518 |
if (lens [i - 1] == max && i != new_frame) { |
|
3519 |
retval = FALSE; |
|
3520 |
}
|
|
3521 |
}
|
|
3522 |
if (retval) { |
|
3523 |
crp->frame = new_frame; |
|
3524 |
} else { |
|
3525 |
crp->frame = orig_frame; |
|
3526 |
}
|
|
3527 |
return retval; |
|
3528 |
}
|
|
3529 |
||
3530 |
||
3531 |
static Boolean SetQualOnFeature (SeqFeatPtr sfp, FeatureFieldPtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) |
|
3532 |
{
|
|
3533 |
Boolean rval = FALSE; |
|
3534 |
GeneRefPtr grp = NULL; |
|
3535 |
ProtRefPtr prp = NULL; |
|
3536 |
CharPtr tmp; |
|
3537 |
CdRegionPtr crp; |
|
3538 |
||
3539 |
if (sfp == NULL || field == NULL || field->field == NULL) |
|
3540 |
{
|
|
3541 |
return FALSE; |
|
3542 |
}
|
|
3543 |
if (field->type != Feature_type_any && sfp->idx.subtype != GetFeatdefFromFeatureType (field->type)) |
|
3544 |
{
|
|
3545 |
return FALSE; |
|
3546 |
}
|
|
3547 |
||
3548 |
// for gene fields
|
|
3549 |
if (sfp->idx.subtype == FEATDEF_GENE) { |
|
3550 |
grp = sfp->data.value.ptrvalue; |
|
3551 |
} else { |
|
3552 |
grp = SeqMgrGetGeneXref (sfp); |
|
3553 |
}
|
|
3554 |
||
3555 |
// for protein fields
|
|
3556 |
prp = GetProtRefForFeature (sfp); |
|
3557 |
||
3558 |
/* fields common to all features */
|
|
3559 |
/* note, also known as comment */
|
|
3560 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_note) |
|
3561 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("note", field->field->data.ptrvalue))) |
|
3562 |
{
|
|
3563 |
if (DoesStringMatchConstraint(sfp->comment, scp)) |
|
3564 |
{
|
|
3565 |
rval = SetStringValue ( &(sfp->comment), value, existing_text); |
|
3566 |
}
|
|
3567 |
}
|
|
3568 |
/* db-xref */
|
|
3569 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_db_xref) |
|
3570 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("db_xref", field->field->data.ptrvalue))) |
|
3571 |
{
|
|
3572 |
rval = SetDbxrefString (sfp, scp, value, existing_text); |
|
3573 |
}
|
|
3574 |
/* exception */
|
|
3575 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_exception) |
|
3576 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("exception", field->field->data.ptrvalue))) |
|
3577 |
{
|
|
3578 |
if (DoesStringMatchConstraint(sfp->except_text, scp)) |
|
3579 |
{
|
|
3580 |
rval = SetStringValue ( &(sfp->except_text), value, existing_text); |
|
3581 |
}
|
|
3582 |
}
|
|
3583 |
/* evidence */
|
|
3584 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_evidence) |
|
3585 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("evidence", field->field->data.ptrvalue))) |
|
3586 |
{
|
|
3587 |
tmp = NULL; |
|
3588 |
if (sfp->exp_ev == 1) |
|
3589 |
{
|
|
3590 |
tmp = StringSave ("experimental"); |
|
3591 |
}
|
|
3592 |
else if (sfp->exp_ev == 2) |
|
3593 |
{
|
|
3594 |
tmp = StringSave ("non-experimental"); |
|
3595 |
}
|
|
3596 |
if (DoesStringMatchConstraint(tmp, scp)) { |
|
3597 |
rval = SetStringValue (&tmp, value, existing_text); |
|
3598 |
if (rval) { |
|
3599 |
rval = FALSE; |
|
3600 |
if (StringICmp (tmp, "experimental") == 0) { |
|
3601 |
sfp->exp_ev = 1; |
|
3602 |
rval = TRUE; |
|
3603 |
} else if (StringICmp (tmp, "non-experimental") == 0) { |
|
3604 |
sfp->exp_ev = 2; |
|
3605 |
rval = TRUE; |
|
3606 |
} else if (StringHasNoText (tmp)) { |
|
3607 |
sfp->exp_ev = 0; |
|
3608 |
rval = TRUE; |
|
3609 |
}
|
|
3610 |
}
|
|
3611 |
}
|
|
3612 |
tmp = MemFree (tmp); |
|
3613 |
}
|
|
3614 |
||
3615 |
||
3616 |
/* fields common to some features */
|
|
3617 |
/* product */
|
|
3618 |
if ((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_product) |
|
3619 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("product", field->field->data.ptrvalue))) |
|
3620 |
{
|
|
3621 |
if (prp != NULL) { |
|
3622 |
rval = SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); |
|
3623 |
} else if (sfp->data.choice == SEQFEAT_RNA) { |
|
3624 |
rval = SetRNAProductString (sfp, scp, value, existing_text); |
|
3625 |
}
|
|
3626 |
}
|
|
3627 |
||
3628 |
/* Gene fields */
|
|
3629 |
/* locus */
|
|
3630 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene) |
|
3631 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus", field->field->data.ptrvalue))) |
|
3632 |
&& grp != NULL) |
|
3633 |
{
|
|
3634 |
if (DoesStringMatchConstraint(grp->locus, scp)) |
|
3635 |
{
|
|
3636 |
rval = SetStringValue (&(grp->locus), value, existing_text); |
|
3637 |
}
|
|
3638 |
}
|
|
3639 |
/* description */
|
|
3640 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_gene_description) |
|
3641 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3642 |
&& grp != NULL) |
|
3643 |
{
|
|
3644 |
if (DoesStringMatchConstraint(grp->desc, scp)) |
|
3645 |
{
|
|
3646 |
rval = SetStringValue (&(grp->desc), value, existing_text); |
|
3647 |
}
|
|
3648 |
}
|
|
3649 |
/* maploc */
|
|
3650 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_map) |
|
3651 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("map", field->field->data.ptrvalue))) |
|
3652 |
&& grp != NULL) |
|
3653 |
{
|
|
3654 |
if (DoesStringMatchConstraint(grp->maploc, scp)) |
|
3655 |
{
|
|
3656 |
rval = SetStringValue (&(grp->maploc), value, existing_text); |
|
3657 |
}
|
|
3658 |
}
|
|
3659 |
/* allele */
|
|
3660 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_allele) |
|
3661 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("allele", field->field->data.ptrvalue))) |
|
3662 |
&& grp != NULL) |
|
3663 |
{
|
|
3664 |
if (DoesStringMatchConstraint(grp->allele, scp)) |
|
3665 |
{
|
|
3666 |
rval = SetStringValue (&(grp->allele), value, existing_text); |
|
3667 |
}
|
|
3668 |
}
|
|
3669 |
/* locus_tag */
|
|
3670 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_locus_tag) |
|
3671 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("locus_tag", field->field->data.ptrvalue))) |
|
3672 |
&& grp != NULL) |
|
3673 |
{
|
|
3674 |
if (DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
3675 |
{
|
|
3676 |
rval = SetStringValue (&(grp->locus_tag), value, existing_text); |
|
3677 |
}
|
|
3678 |
}
|
|
3679 |
/* synonym */
|
|
3680 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_synonym) |
|
3681 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("synonym", field->field->data.ptrvalue))) |
|
3682 |
&& grp != NULL) |
|
3683 |
{
|
|
3684 |
rval = SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); |
|
3685 |
}
|
|
3686 |
||
3687 |
||
3688 |
/* protein fields */
|
|
3689 |
/* note - product handled above */
|
|
3690 |
/* description */
|
|
3691 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_description) |
|
3692 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("description", field->field->data.ptrvalue))) |
|
3693 |
&& prp != NULL) |
|
3694 |
{
|
|
3695 |
if (DoesStringMatchConstraint(prp->desc, scp)) { |
|
3696 |
rval = SetStringValue (&(prp->desc), value, existing_text); |
|
3697 |
}
|
|
3698 |
}
|
|
3699 |
/* ec_number */
|
|
3700 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_ec_number) |
|
3701 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("ec_number", field->field->data.ptrvalue))) |
|
3702 |
&& prp != NULL) |
|
3703 |
{
|
|
3704 |
rval = SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); |
|
3705 |
}
|
|
3706 |
/* activity */
|
|
3707 |
if (((field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_activity) |
|
3708 |
|| (field->field->choice == FeatQualChoice_illegal_qual && DoesStringMatchConstraint ("activity", field->field->data.ptrvalue))) |
|
3709 |
&& prp != NULL) |
|
3710 |
{
|
|
3711 |
rval = SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); |
|
3712 |
}
|
|
3713 |
||
3714 |
if (field->field->choice == FeatQualChoice_legal_qual && field->field->data.intvalue == Feat_qual_legal_codon_start |
|
3715 |
&& sfp->data.choice == SEQFEAT_CDREGION) |
|
3716 |
{
|
|
3717 |
crp = (CdRegionPtr) sfp->data.value.ptrvalue; |
|
3718 |
if (StringICmp (value, "best") == 0) |
|
3719 |
{
|
|
3720 |
rval = ChooseBestFrame (sfp); |
|
3721 |
}
|
|
3722 |
else if (StringCmp (value, "1") == 0) |
|
3723 |
{
|
|
3724 |
crp->frame = 1; |
|
3725 |
rval = TRUE; |
|
3726 |
}
|
|
3727 |
else if (StringCmp (value, "2") == 0) |
|
3728 |
{
|
|
3729 |
crp->frame = 2; |
|
3730 |
rval = TRUE; |
|
3731 |
}
|
|
3732 |
else if (StringCmp (value, "3") == 0) |
|
3733 |
{
|
|
3734 |
crp->frame = 3; |
|
3735 |
rval = TRUE; |
|
3736 |
}
|
|
3737 |
}
|
|
3738 |
||
3739 |
/* actual GenBank qualifiers */
|
|
3740 |
if (!rval) |
|
3741 |
{
|
|
3742 |
rval = SetStringInGBQualList (&(sfp->qual), field->field, scp, value, existing_text); |
|
3743 |
}
|
|
3744 |
return rval; |
|
3745 |
}
|
|
3746 |
||
3747 |
||
3748 |
NLM_EXTERN CharPtr GetSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) |
|
3749 |
{
|
|
3750 |
CharPtr str = NULL; |
|
3751 |
SubSourcePtr ssp; |
|
3752 |
OrgModPtr mod; |
|
3753 |
Int4 orgmod_subtype = -1, subsrc_subtype = -1; |
|
3754 |
||
3755 |
if (biop == NULL || scp == NULL) return NULL; |
|
3756 |
||
3757 |
switch (scp->choice) |
|
3758 |
{
|
|
3759 |
case SourceQualChoice_textqual: |
|
3760 |
if (scp->data.intvalue == Source_qual_taxname) { |
|
3761 |
if (biop->org != NULL && !StringHasNoText (biop->org->taxname) |
|
3762 |
&& DoesStringMatchConstraint (biop->org->taxname, constraint)) { |
|
3763 |
str = StringSave (biop->org->taxname); |
|
3764 |
}
|
|
3765 |
} else if (scp->data.intvalue == Source_qual_common_name) { |
|
3766 |
if (biop->org != NULL && !StringHasNoText (biop->org->common) |
|
3767 |
&& DoesStringMatchConstraint (biop->org->common, constraint)) { |
|
3768 |
str = StringSave (biop->org->common); |
|
3769 |
}
|
|
3770 |
} else if (scp->data.intvalue == Source_qual_lineage) { |
|
3771 |
if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) |
|
3772 |
&& DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { |
|
3773 |
str = StringSave (biop->org->orgname->lineage); |
|
3774 |
}
|
|
3775 |
} else if (scp->data.intvalue == Source_qual_division) { |
|
3776 |
if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) |
|
3777 |
&& DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { |
|
3778 |
str = StringSave (biop->org->orgname->div); |
|
3779 |
}
|
|
3780 |
} else { |
|
3781 |
orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue); |
|
3782 |
if (orgmod_subtype == -1) { |
|
3783 |
subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue); |
|
3784 |
for (ssp = biop->subtype; ssp != NULL && str == NULL; ssp = ssp->next) { |
|
3785 |
if (ssp->subtype == subsrc_subtype) { |
|
3786 |
if (StringHasNoText (ssp->name)) { |
|
3787 |
if (IsNonTextSourceQual (scp->data.intvalue) |
|
3788 |
&& DoesStringMatchConstraint ("TRUE", constraint)) { |
|
3789 |
str = StringSave ("TRUE"); |
|
3790 |
}
|
|
3791 |
} else { |
|
3792 |
if (DoesStringMatchConstraint (ssp->name, constraint)) { |
|
3793 |
str = StringSave (ssp->name); |
|
3794 |
}
|
|
3795 |
}
|
|
3796 |
}
|
|
3797 |
}
|
|
3798 |
} else { |
|
3799 |
if (biop->org != NULL && biop->org->orgname != NULL) { |
|
3800 |
for (mod = biop->org->orgname->mod; mod != NULL && str == NULL; mod = mod->next) { |
|
3801 |
if (mod->subtype == orgmod_subtype) { |
|
3802 |
if (StringHasNoText (mod->subname)) { |
|
3803 |
if (IsNonTextSourceQual (scp->data.intvalue) |
|
3804 |
&& DoesStringMatchConstraint ("TRUE", constraint)) { |
|
3805 |
str = StringSave ("TRUE"); |
|
3806 |
}
|
|
3807 |
} else { |
|
3808 |
if (DoesStringMatchConstraint (mod->subname, constraint)) { |
|
3809 |
str = StringSave (mod->subname); |
|
3810 |
}
|
|
3811 |
}
|
|
3812 |
}
|
|
3813 |
}
|
|
3814 |
}
|
|
3815 |
}
|
|
3816 |
}
|
|
3817 |
break; |
|
3818 |
case SourceQualChoice_location: |
|
3819 |
str = LocNameFromGenome (biop->genome); |
|
3820 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
3821 |
str = StringSave (str); |
|
3822 |
} else { |
|
3823 |
str = NULL; |
|
3824 |
}
|
|
3825 |
break; |
|
3826 |
case SourceQualChoice_origin: |
|
3827 |
str = OriginNameFromOrigin (biop->origin); |
|
3828 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
3829 |
str = StringSave (str); |
|
3830 |
} else { |
|
3831 |
str = NULL; |
|
3832 |
}
|
|
3833 |
break; |
|
3834 |
}
|
|
3835 |
return str; |
|
3836 |
}
|
|
3837 |
||
3838 |
||
3839 |
static Boolean RemoveSourceQualFromBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint) |
|
3840 |
{
|
|
3841 |
SubSourcePtr ssp, ssp_prev = NULL, ssp_next; |
|
3842 |
OrgModPtr mod, mod_prev = NULL, mod_next; |
|
3843 |
Int4 orgmod_subtype = -1, subsrc_subtype = -1; |
|
3844 |
CharPtr str; |
|
3845 |
Boolean rval = FALSE; |
|
3846 |
||
3847 |
if (biop == NULL || scp == NULL) return FALSE; |
|
3848 |
||
3849 |
switch (scp->choice) |
|
3850 |
{
|
|
3851 |
case SourceQualChoice_textqual: |
|
3852 |
if (scp->data.intvalue == Source_qual_taxname) { |
|
3853 |
if (biop->org != NULL && !StringHasNoText (biop->org->taxname) |
|
3854 |
&& DoesStringMatchConstraint (biop->org->taxname, constraint)) { |
|
3855 |
biop->org->taxname = MemFree (biop->org->taxname); |
|
3856 |
rval = TRUE; |
|
3857 |
}
|
|
3858 |
} else if (scp->data.intvalue == Source_qual_common_name) { |
|
3859 |
if (biop->org != NULL && !StringHasNoText (biop->org->common) |
|
3860 |
&& DoesStringMatchConstraint (biop->org->common, constraint)) { |
|
3861 |
biop->org->common = MemFree (biop->org->common); |
|
3862 |
rval = TRUE; |
|
3863 |
}
|
|
3864 |
} else if (scp->data.intvalue == Source_qual_lineage) { |
|
3865 |
if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->lineage) |
|
3866 |
&& DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { |
|
3867 |
biop->org->orgname->lineage = MemFree (biop->org->orgname->lineage); |
|
3868 |
rval = TRUE; |
|
3869 |
}
|
|
3870 |
} else if (scp->data.intvalue == Source_qual_division) { |
|
3871 |
if (biop->org != NULL && biop->org->orgname != NULL && !StringHasNoText (biop->org->orgname->div) |
|
3872 |
&& DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { |
|
3873 |
biop->org->orgname->div = MemFree (biop->org->orgname->div); |
|
3874 |
rval = TRUE; |
|
3875 |
}
|
|
3876 |
} else { |
|
3877 |
orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue); |
|
3878 |
if (orgmod_subtype == -1) { |
|
3879 |
subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue); |
|
3880 |
ssp = biop->subtype; |
|
3881 |
while (ssp != NULL) { |
|
3882 |
ssp_next = ssp->next; |
|
3883 |
if (ssp->subtype == subsrc_subtype |
|
3884 |
&& DoesStringMatchConstraint (ssp->name, constraint)) { |
|
3885 |
if (ssp_prev == NULL) { |
|
3886 |
biop->subtype = ssp->next; |
|
3887 |
} else { |
|
3888 |
ssp_prev->next = ssp->next; |
|
3889 |
}
|
|
3890 |
ssp->next = NULL; |
|
3891 |
ssp = SubSourceFree (ssp); |
|
3892 |
rval = TRUE; |
|
3893 |
} else { |
|
3894 |
ssp_prev = ssp; |
|
3895 |
}
|
|
3896 |
ssp = ssp_next; |
|
3897 |
}
|
|
3898 |
} else { |
|
3899 |
if (biop->org != NULL && biop->org->orgname != NULL) { |
|
3900 |
mod = biop->org->orgname->mod; |
|
3901 |
while (mod != NULL) { |
|
3902 |
mod_next = mod->next; |
|
3903 |
if (mod->subtype == orgmod_subtype |
|
3904 |
&& DoesStringMatchConstraint (mod->subname, constraint)) { |
|
3905 |
if (mod_prev == NULL) { |
|
3906 |
biop->org->orgname->mod = mod->next; |
|
3907 |
} else { |
|
3908 |
mod_prev->next = mod->next; |
|
3909 |
}
|
|
3910 |
mod->next = NULL; |
|
3911 |
mod = OrgModFree (mod); |
|
3912 |
rval = TRUE; |
|
3913 |
} else { |
|
3914 |
mod_prev = mod; |
|
3915 |
}
|
|
3916 |
mod = mod_next; |
|
3917 |
}
|
|
3918 |
}
|
|
3919 |
}
|
|
3920 |
}
|
|
3921 |
break; |
|
3922 |
case SourceQualChoice_location: |
|
3923 |
str = LocNameFromGenome (biop->genome); |
|
3924 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
3925 |
if (scp->data.intvalue == 0 || biop->genome == GenomeFromSrcLoc (scp->data.intvalue)) { |
|
3926 |
biop->genome = 0; |
|
3927 |
rval = TRUE; |
|
3928 |
}
|
|
3929 |
}
|
|
3930 |
break; |
|
3931 |
case SourceQualChoice_origin: |
|
3932 |
str = OriginNameFromOrigin (biop->origin); |
|
3933 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
3934 |
if (scp->data.intvalue == 0 || biop->origin == OriginFromSrcOrig (scp->data.intvalue)) { |
|
3935 |
biop->origin = 0; |
|
3936 |
rval = TRUE; |
|
3937 |
}
|
|
3938 |
}
|
|
3939 |
break; |
|
3940 |
}
|
|
3941 |
return rval; |
|
3942 |
}
|
|
3943 |
||
3944 |
||
3945 |
NLM_EXTERN Boolean SetSourceQualInBioSource (BioSourcePtr biop, SourceQualChoicePtr scp, StringConstraintPtr constraint, CharPtr value, Uint2 existing_text) |
|
3946 |
{
|
|
3947 |
SubSourcePtr ssp, ssp_prev = NULL, ssp_next; |
|
3948 |
OrgModPtr mod, mod_prev = NULL, mod_next; |
|
3949 |
Int4 orgmod_subtype = -1, subsrc_subtype = -1; |
|
3950 |
CharPtr str; |
|
3951 |
Boolean rval = FALSE, found = FALSE; |
|
3952 |
||
3953 |
if (biop == NULL || scp == NULL) return FALSE; |
|
3954 |
||
3955 |
switch (scp->choice) |
|
3956 |
{
|
|
3957 |
case SourceQualChoice_textqual: |
|
3958 |
if (scp->data.intvalue == Source_qual_taxname) { |
|
3959 |
if (biop->org != NULL |
|
3960 |
&& DoesStringMatchConstraint (biop->org->taxname, constraint)) { |
|
3961 |
rval = SetStringValue (&(biop->org->taxname), value, existing_text); |
|
3962 |
}
|
|
3963 |
} else if (scp->data.intvalue == Source_qual_common_name) { |
|
3964 |
if (biop->org != NULL |
|
3965 |
&& DoesStringMatchConstraint (biop->org->common, constraint)) { |
|
3966 |
rval = SetStringValue (&(biop->org->common), value, existing_text); |
|
3967 |
}
|
|
3968 |
} else if (scp->data.intvalue == Source_qual_lineage) { |
|
3969 |
if (biop->org != NULL && biop->org->orgname != NULL |
|
3970 |
&& DoesStringMatchConstraint (biop->org->orgname->lineage, constraint)) { |
|
3971 |
rval = SetStringValue (&(biop->org->orgname->lineage), value, existing_text); |
|
3972 |
}
|
|
3973 |
} else if (scp->data.intvalue == Source_qual_division) { |
|
3974 |
if (biop->org != NULL && biop->org->orgname != NULL |
|
3975 |
&& DoesStringMatchConstraint (biop->org->orgname->div, constraint)) { |
|
3976 |
rval = SetStringValue (&(biop->org->orgname->div), value, existing_text); |
|
3977 |
}
|
|
3978 |
} else { |
|
3979 |
orgmod_subtype = GetOrgModQualFromSrcQual (scp->data.intvalue); |
|
3980 |
if (orgmod_subtype == -1) { |
|
3981 |
subsrc_subtype = GetSubSrcQualFromSrcQual (scp->data.intvalue); |
|
3982 |
if (subsrc_subtype > -1) { |
|
3983 |
ssp = biop->subtype; |
|
3984 |
while (ssp != NULL) { |
|
3985 |
ssp_next = ssp->next; |
|
3986 |
if (ssp->subtype == subsrc_subtype |
|
3987 |
&& DoesStringMatchConstraint (ssp->name, constraint)) { |
|
3988 |
rval = SetStringValue (&(ssp->name), value, existing_text); |
|
3989 |
found = TRUE; |
|
3990 |
if (rval && StringHasNoText (ssp->name) && !IsNonTextSourceQual(scp->data.intvalue)) { |
|
3991 |
if (ssp_prev == NULL) { |
|
3992 |
biop->subtype = ssp->next; |
|
3993 |
} else { |
|
3994 |
ssp_prev->next = ssp->next; |
|
3995 |
}
|
|
3996 |
ssp->next = NULL; |
|
3997 |
ssp = SubSourceFree (ssp); |
|
3998 |
} else { |
|
3999 |
ssp_prev = ssp; |
|
4000 |
}
|
|
4001 |
} else { |
|
4002 |
ssp_prev = ssp; |
|
4003 |
}
|
|
4004 |
ssp = ssp_next; |
|
4005 |
}
|
|
4006 |
if (!found && IsStringConstraintEmpty (constraint)) { |
|
4007 |
ssp = SubSourceNew (); |
|
4008 |
ssp->subtype = subsrc_subtype; |
|
4009 |
rval = SetStringValue (&(ssp->name), value, existing_text); |
|
4010 |
if (ssp_prev == NULL) { |
|
4011 |
biop->subtype = ssp; |
|
4012 |
} else { |
|
4013 |
ssp_prev->next = ssp; |
|
4014 |
}
|
|
4015 |
}
|
|
4016 |
}
|
|
4017 |
} else { |
|
4018 |
if (biop->org != NULL && biop->org->orgname != NULL) { |
|
4019 |
mod = biop->org->orgname->mod; |
|
4020 |
while (mod != NULL) { |
|
4021 |
mod_next = mod->next; |
|
4022 |
if (mod->subtype == orgmod_subtype |
|
4023 |
&& DoesStringMatchConstraint (mod->subname, constraint)) { |
|
4024 |
rval = SetStringValue (&(mod->subname), value, existing_text); |
|
4025 |
found = TRUE; |
|
4026 |
if (rval && StringHasNoText (mod->subname) && !IsNonTextSourceQual(scp->data.intvalue)) { |
|
4027 |
if (mod_prev == NULL) { |
|
4028 |
biop->org->orgname->mod = mod->next; |
|
4029 |
} else { |
|
4030 |
mod_prev->next = mod->next; |
|
4031 |
}
|
|
4032 |
mod->next = NULL; |
|
4033 |
mod = OrgModFree (mod); |
|
4034 |
} else { |
|
4035 |
mod_prev = mod; |
|
4036 |
}
|
|
4037 |
} else { |
|
4038 |
mod_prev = mod; |
|
4039 |
}
|
|
4040 |
mod = mod_next; |
|
4041 |
}
|
|
4042 |
}
|
|
4043 |
if (!found && IsStringConstraintEmpty (constraint)) { |
|
4044 |
if (biop->org == NULL) { |
|
4045 |
biop->org = OrgRefNew(); |
|
4046 |
}
|
|
4047 |
if (biop->org->orgname == NULL) { |
|
4048 |
biop->org->orgname = OrgNameNew(); |
|
4049 |
}
|
|
4050 |
mod = OrgModNew (); |
|
4051 |
mod->subtype = orgmod_subtype; |
|
4052 |
rval = SetStringValue (&(mod->subname), value, existing_text); |
|
4053 |
if (mod_prev == NULL) { |
|
4054 |
biop->org->orgname->mod = mod; |
|
4055 |
} else { |
|
4056 |
mod_prev->next = mod; |
|
4057 |
}
|
|
4058 |
}
|
|
4059 |
}
|
|
4060 |
}
|
|
4061 |
break; |
|
4062 |
case SourceQualChoice_location: |
|
4063 |
str = LocNameFromGenome (biop->genome); |
|
4064 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
4065 |
biop->genome = GenomeFromSrcLoc (scp->data.intvalue); |
|
4066 |
rval = TRUE; |
|
4067 |
}
|
|
4068 |
break; |
|
4069 |
case SourceQualChoice_origin: |
|
4070 |
str = OriginNameFromOrigin (biop->origin); |
|
4071 |
if (DoesStringMatchConstraint (str, constraint)) { |
|
4072 |
biop->origin = OriginFromSrcOrig(scp->data.intvalue); |
|
4073 |
rval = TRUE; |
|
4074 |
}
|
|
4075 |
break; |
|
4076 |
}
|
|
4077 |
return rval; |
|
4078 |
}
|
|
4079 |
||
4080 |
||
4081 |
static BioseqPtr GetSequenceForObject (Uint1 choice, Pointer data) |
|
4082 |
{
|
|
4083 |
BioseqPtr bsp = NULL; |
|
4084 |
SeqFeatPtr sfp; |
|
4085 |
SeqDescrPtr sdp; |
|
4086 |
ObjValNodePtr ovp; |
|
4087 |
CGPSetPtr cgp; |
|
4088 |
ValNodePtr vnp; |
|
4089 |
||
4090 |
if (data == NULL) return NULL; |
|
4091 |
||
4092 |
switch (choice) { |
|
4093 |
case OBJ_BIOSEQ: |
|
4094 |
bsp = (BioseqPtr) data; |
|
4095 |
break; |
|
4096 |
case OBJ_SEQFEAT: |
|
4097 |
sfp = (SeqFeatPtr) data; |
|
4098 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
4099 |
break; |
|
4100 |
case OBJ_SEQDESC: |
|
4101 |
sdp = (SeqDescrPtr) data; |
|
4102 |
if (sdp->extended) { |
|
4103 |
ovp = (ObjValNodePtr) sdp; |
|
4104 |
if (ovp->idx.parenttype == OBJ_BIOSEQ && ovp->idx.parentptr != NULL) { |
|
4105 |
bsp = ovp->idx.parentptr; |
|
4106 |
}
|
|
4107 |
}
|
|
4108 |
break; |
|
4109 |
case 0: |
|
4110 |
cgp = (CGPSetPtr) data; |
|
4111 |
for (vnp = cgp->cds_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { |
|
4112 |
sfp = vnp->data.ptrvalue; |
|
4113 |
if (sfp != NULL) { |
|
4114 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
4115 |
}
|
|
4116 |
}
|
|
4117 |
for (vnp = cgp->mrna_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { |
|
4118 |
sfp = vnp->data.ptrvalue; |
|
4119 |
if (sfp != NULL) { |
|
4120 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
4121 |
}
|
|
4122 |
}
|
|
4123 |
break; |
|
4124 |
for (vnp = cgp->gene_list; vnp != NULL && bsp == NULL; vnp = vnp->next) { |
|
4125 |
sfp = vnp->data.ptrvalue; |
|
4126 |
if (sfp != NULL) { |
|
4127 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
4128 |
}
|
|
4129 |
}
|
|
4130 |
break; |
|
4131 |
}
|
|
4132 |
return bsp; |
|
4133 |
}
|
|
4134 |
||
4135 |
||
4136 |
NLM_EXTERN BioSourcePtr GetBioSourceFromObject (Uint1 choice, Pointer data) |
|
4137 |
{
|
|
4138 |
BioSourcePtr biop = NULL; |
|
4139 |
SeqDescrPtr sdp; |
|
4140 |
SeqFeatPtr sfp; |
|
4141 |
BioseqPtr bsp = NULL; |
|
4142 |
SeqMgrDescContext context; |
|
4143 |
||
4144 |
if (data == NULL) return NULL; |
|
4145 |
||
4146 |
switch (choice) |
|
4147 |
{
|
|
4148 |
case OBJ_SEQDESC: |
|
4149 |
sdp = (SeqDescrPtr) data; |
|
4150 |
if (sdp->choice == Seq_descr_source) { |
|
4151 |
biop = sdp->data.ptrvalue; |
|
4152 |
}
|
|
4153 |
break; |
|
4154 |
case OBJ_SEQFEAT: |
|
4155 |
sfp = (SeqFeatPtr) data; |
|
4156 |
if (sfp->data.choice == SEQFEAT_BIOSRC) { |
|
4157 |
biop = sfp->data.value.ptrvalue; |
|
4158 |
}
|
|
4159 |
break; |
|
4160 |
}
|
|
4161 |
if (biop == NULL) { |
|
4162 |
bsp = GetSequenceForObject (choice, data); |
|
4163 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); |
|
4164 |
if (sdp != NULL && sdp->choice == Seq_descr_source) { |
|
4165 |
biop = sdp->data.ptrvalue; |
|
4166 |
}
|
|
4167 |
}
|
|
4168 |
return biop; |
|
4169 |
}
|
|
4170 |
||
4171 |
||
4172 |
/* functions for dealing with CDS-Gene-Prot sets */
|
|
4173 |
static CharPtr GetFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) |
|
4174 |
{
|
|
4175 |
CharPtr str = NULL; |
|
4176 |
ValNodePtr vnp; |
|
4177 |
SeqFeatPtr sfp; |
|
4178 |
GeneRefPtr grp; |
|
4179 |
RnaRefPtr rrp; |
|
4180 |
ProtRefPtr prp; |
|
4181 |
||
4182 |
if (c == NULL) return NULL; |
|
4183 |
switch (field) { |
|
4184 |
case CDSGeneProt_field_cds_comment: |
|
4185 |
for (vnp = c->cds_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4186 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4187 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4188 |
{
|
|
4189 |
str = StringSave (sfp->comment); |
|
4190 |
}
|
|
4191 |
}
|
|
4192 |
break; |
|
4193 |
case CDSGeneProt_field_gene_locus: |
|
4194 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4195 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4196 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4197 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4198 |
&& !StringHasNoText (grp->locus) |
|
4199 |
&& DoesStringMatchConstraint(grp->locus, scp)) |
|
4200 |
{
|
|
4201 |
str = StringSave (grp->locus); |
|
4202 |
}
|
|
4203 |
}
|
|
4204 |
break; |
|
4205 |
case CDSGeneProt_field_gene_description: |
|
4206 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4207 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4208 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4209 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4210 |
&& !StringHasNoText (grp->desc) |
|
4211 |
&& DoesStringMatchConstraint(grp->desc, scp)) |
|
4212 |
{
|
|
4213 |
str = StringSave (grp->desc); |
|
4214 |
}
|
|
4215 |
}
|
|
4216 |
break; |
|
4217 |
case CDSGeneProt_field_gene_comment: |
|
4218 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4219 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4220 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4221 |
{
|
|
4222 |
str = StringSave (sfp->comment); |
|
4223 |
}
|
|
4224 |
}
|
|
4225 |
break; |
|
4226 |
case CDSGeneProt_field_gene_allele: |
|
4227 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4228 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4229 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4230 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4231 |
&& !StringHasNoText (grp->allele) |
|
4232 |
&& DoesStringMatchConstraint(grp->allele, scp)) |
|
4233 |
{
|
|
4234 |
str = StringSave (grp->allele); |
|
4235 |
}
|
|
4236 |
}
|
|
4237 |
break; |
|
4238 |
case CDSGeneProt_field_gene_maploc: |
|
4239 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4240 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4241 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4242 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4243 |
&& !StringHasNoText (grp->maploc) |
|
4244 |
&& DoesStringMatchConstraint(grp->maploc, scp)) |
|
4245 |
{
|
|
4246 |
str = StringSave (grp->maploc); |
|
4247 |
}
|
|
4248 |
}
|
|
4249 |
break; |
|
4250 |
case CDSGeneProt_field_gene_locus_tag: |
|
4251 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4252 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4253 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4254 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4255 |
&& !StringHasNoText (grp->locus_tag) |
|
4256 |
&& DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
4257 |
{
|
|
4258 |
str = StringSave (grp->locus_tag); |
|
4259 |
}
|
|
4260 |
}
|
|
4261 |
break; |
|
4262 |
case CDSGeneProt_field_gene_synonym: |
|
4263 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4264 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4265 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4266 |
&& (grp = sfp->data.value.ptrvalue) != NULL) |
|
4267 |
{
|
|
4268 |
str = GetFirstValNodeStringMatch (grp->syn, scp); |
|
4269 |
}
|
|
4270 |
}
|
|
4271 |
break; |
|
4272 |
case CDSGeneProt_field_gene_old_locus_tag: |
|
4273 |
for (vnp = c->gene_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4274 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4275 |
if (sfp != NULL) { |
|
4276 |
str = GetFirstGBQualMatch (sfp->qual, "old-locus-tag", scp); |
|
4277 |
}
|
|
4278 |
}
|
|
4279 |
break; |
|
4280 |
case CDSGeneProt_field_mrna_product: |
|
4281 |
for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4282 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4283 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA |
|
4284 |
&& (rrp = sfp->data.value.ptrvalue) != NULL |
|
4285 |
&& rrp->ext.choice == 1 |
|
4286 |
&& !StringHasNoText (rrp->ext.value.ptrvalue) |
|
4287 |
&& DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) |
|
4288 |
{
|
|
4289 |
str = StringSave (rrp->ext.value.ptrvalue); |
|
4290 |
}
|
|
4291 |
}
|
|
4292 |
break; |
|
4293 |
case CDSGeneProt_field_mrna_comment: |
|
4294 |
for (vnp = c->mrna_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4295 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4296 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4297 |
{
|
|
4298 |
str = StringSave (sfp->comment); |
|
4299 |
}
|
|
4300 |
}
|
|
4301 |
break; |
|
4302 |
case CDSGeneProt_field_prot_name: |
|
4303 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4304 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4305 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4306 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4307 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4308 |
{
|
|
4309 |
str = GetFirstValNodeStringMatch (prp->name, scp); |
|
4310 |
}
|
|
4311 |
}
|
|
4312 |
break; |
|
4313 |
case CDSGeneProt_field_prot_description: |
|
4314 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4315 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4316 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4317 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4318 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4319 |
&& !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
4320 |
str = StringSave (prp->desc); |
|
4321 |
}
|
|
4322 |
}
|
|
4323 |
break; |
|
4324 |
case CDSGeneProt_field_prot_ec_number: |
|
4325 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4326 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4327 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4328 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4329 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4330 |
{
|
|
4331 |
str = GetFirstValNodeStringMatch (prp->ec, scp); |
|
4332 |
}
|
|
4333 |
}
|
|
4334 |
break; |
|
4335 |
case CDSGeneProt_field_prot_activity: |
|
4336 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4337 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4338 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4339 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4340 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4341 |
{
|
|
4342 |
str = GetFirstValNodeStringMatch (prp->activity, scp); |
|
4343 |
}
|
|
4344 |
}
|
|
4345 |
break; |
|
4346 |
case CDSGeneProt_field_prot_comment: |
|
4347 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4348 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4349 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT |
|
4350 |
&& !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4351 |
{
|
|
4352 |
str = StringSave (sfp->comment); |
|
4353 |
}
|
|
4354 |
}
|
|
4355 |
break; |
|
4356 |
case CDSGeneProt_field_mat_peptide_name: |
|
4357 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4358 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4359 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4360 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4361 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4362 |
{
|
|
4363 |
str = GetFirstValNodeStringMatch (prp->name, scp); |
|
4364 |
}
|
|
4365 |
}
|
|
4366 |
break; |
|
4367 |
case CDSGeneProt_field_mat_peptide_description: |
|
4368 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4369 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4370 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4371 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4372 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4373 |
&& !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
4374 |
str = StringSave (prp->desc); |
|
4375 |
}
|
|
4376 |
}
|
|
4377 |
break; |
|
4378 |
case CDSGeneProt_field_mat_peptide_ec_number: |
|
4379 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4380 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4381 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4382 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4383 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4384 |
{
|
|
4385 |
str = GetFirstValNodeStringMatch (prp->ec, scp); |
|
4386 |
}
|
|
4387 |
}
|
|
4388 |
break; |
|
4389 |
case CDSGeneProt_field_mat_peptide_activity: |
|
4390 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4391 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4392 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4393 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4394 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4395 |
{
|
|
4396 |
str = GetFirstValNodeStringMatch (prp->activity, scp); |
|
4397 |
}
|
|
4398 |
}
|
|
4399 |
break; |
|
4400 |
case CDSGeneProt_field_mat_peptide_comment: |
|
4401 |
for (vnp = c->prot_list; vnp != NULL && str == NULL; vnp = vnp->next) { |
|
4402 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4403 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4404 |
&& !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4405 |
{
|
|
4406 |
str = StringSave (sfp->comment); |
|
4407 |
}
|
|
4408 |
}
|
|
4409 |
break; |
|
4410 |
}
|
|
4411 |
return str; |
|
4412 |
}
|
|
4413 |
||
4414 |
||
4415 |
static Boolean RemoveFieldValueFromCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp) |
|
4416 |
{
|
|
4417 |
Boolean rval = FALSE; |
|
4418 |
ValNodePtr vnp; |
|
4419 |
SeqFeatPtr sfp; |
|
4420 |
GeneRefPtr grp; |
|
4421 |
RnaRefPtr rrp; |
|
4422 |
ProtRefPtr prp; |
|
4423 |
||
4424 |
if (c == NULL) return FALSE; |
|
4425 |
switch (field) { |
|
4426 |
case CDSGeneProt_field_cds_comment: |
|
4427 |
for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { |
|
4428 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4429 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4430 |
{
|
|
4431 |
sfp->comment = MemFree (sfp->comment); |
|
4432 |
rval = TRUE; |
|
4433 |
}
|
|
4434 |
}
|
|
4435 |
break; |
|
4436 |
case CDSGeneProt_field_gene_locus: |
|
4437 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4438 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4439 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4440 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4441 |
&& !StringHasNoText (grp->locus) |
|
4442 |
&& DoesStringMatchConstraint(grp->locus, scp)) |
|
4443 |
{
|
|
4444 |
grp->locus = MemFree (grp->locus); |
|
4445 |
rval = TRUE; |
|
4446 |
}
|
|
4447 |
}
|
|
4448 |
break; |
|
4449 |
case CDSGeneProt_field_gene_description: |
|
4450 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4451 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4452 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4453 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4454 |
&& !StringHasNoText (grp->desc) |
|
4455 |
&& DoesStringMatchConstraint(grp->desc, scp)) |
|
4456 |
{
|
|
4457 |
grp->desc = MemFree(grp->desc); |
|
4458 |
rval = TRUE; |
|
4459 |
}
|
|
4460 |
}
|
|
4461 |
break; |
|
4462 |
case CDSGeneProt_field_gene_comment: |
|
4463 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4464 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4465 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4466 |
{
|
|
4467 |
sfp->comment = MemFree (sfp->comment); |
|
4468 |
rval = TRUE; |
|
4469 |
}
|
|
4470 |
}
|
|
4471 |
break; |
|
4472 |
case CDSGeneProt_field_gene_allele: |
|
4473 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4474 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4475 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4476 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4477 |
&& !StringHasNoText (grp->allele) |
|
4478 |
&& DoesStringMatchConstraint(grp->allele, scp)) |
|
4479 |
{
|
|
4480 |
grp->allele = MemFree (grp->allele); |
|
4481 |
rval = TRUE; |
|
4482 |
}
|
|
4483 |
}
|
|
4484 |
break; |
|
4485 |
case CDSGeneProt_field_gene_maploc: |
|
4486 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4487 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4488 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4489 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4490 |
&& !StringHasNoText (grp->maploc) |
|
4491 |
&& DoesStringMatchConstraint(grp->maploc, scp)) |
|
4492 |
{
|
|
4493 |
grp->maploc = MemFree (grp->maploc); |
|
4494 |
rval = TRUE; |
|
4495 |
}
|
|
4496 |
}
|
|
4497 |
break; |
|
4498 |
case CDSGeneProt_field_gene_locus_tag: |
|
4499 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4500 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4501 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4502 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4503 |
&& !StringHasNoText (grp->locus_tag) |
|
4504 |
&& DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
4505 |
{
|
|
4506 |
grp->locus_tag = MemFree (grp->locus_tag); |
|
4507 |
rval = TRUE; |
|
4508 |
}
|
|
4509 |
}
|
|
4510 |
break; |
|
4511 |
case CDSGeneProt_field_gene_synonym: |
|
4512 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4513 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4514 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4515 |
&& (grp = sfp->data.value.ptrvalue) != NULL) |
|
4516 |
{
|
|
4517 |
rval |= RemoveValNodeStringMatch (&(grp->syn), scp); |
|
4518 |
}
|
|
4519 |
}
|
|
4520 |
break; |
|
4521 |
case CDSGeneProt_field_gene_old_locus_tag: |
|
4522 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4523 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4524 |
if (sfp != NULL) { |
|
4525 |
rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", scp); |
|
4526 |
}
|
|
4527 |
}
|
|
4528 |
break; |
|
4529 |
case CDSGeneProt_field_mrna_product: |
|
4530 |
for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { |
|
4531 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4532 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_RNA |
|
4533 |
&& (rrp = sfp->data.value.ptrvalue) != NULL |
|
4534 |
&& rrp->ext.choice == 1 |
|
4535 |
&& !StringHasNoText (rrp->ext.value.ptrvalue) |
|
4536 |
&& DoesStringMatchConstraint(rrp->ext.value.ptrvalue, scp)) |
|
4537 |
{
|
|
4538 |
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); |
|
4539 |
rrp->ext.choice = 0; |
|
4540 |
rval = TRUE; |
|
4541 |
}
|
|
4542 |
}
|
|
4543 |
break; |
|
4544 |
case CDSGeneProt_field_mrna_comment: |
|
4545 |
for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { |
|
4546 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4547 |
if (sfp != NULL && !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4548 |
{
|
|
4549 |
sfp->comment = MemFree (sfp->comment); |
|
4550 |
rval = TRUE; |
|
4551 |
}
|
|
4552 |
}
|
|
4553 |
break; |
|
4554 |
case CDSGeneProt_field_prot_name: |
|
4555 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4556 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4557 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4558 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4559 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4560 |
{
|
|
4561 |
rval |= RemoveValNodeStringMatch (&(prp->name), scp); |
|
4562 |
}
|
|
4563 |
}
|
|
4564 |
break; |
|
4565 |
case CDSGeneProt_field_prot_description: |
|
4566 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4567 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4568 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4569 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4570 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4571 |
&& !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
4572 |
prp->desc = MemFree (prp->desc); |
|
4573 |
rval = TRUE; |
|
4574 |
}
|
|
4575 |
}
|
|
4576 |
break; |
|
4577 |
case CDSGeneProt_field_prot_ec_number: |
|
4578 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4579 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4580 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4581 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4582 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4583 |
{
|
|
4584 |
rval |= RemoveValNodeStringMatch (&(prp->ec), scp); |
|
4585 |
}
|
|
4586 |
}
|
|
4587 |
break; |
|
4588 |
case CDSGeneProt_field_prot_activity: |
|
4589 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4590 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4591 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4592 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4593 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4594 |
{
|
|
4595 |
rval |= RemoveValNodeStringMatch (&(prp->activity), scp); |
|
4596 |
}
|
|
4597 |
}
|
|
4598 |
break; |
|
4599 |
case CDSGeneProt_field_prot_comment: |
|
4600 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4601 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4602 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT |
|
4603 |
&& !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4604 |
{
|
|
4605 |
sfp->comment = MemFree (sfp->comment); |
|
4606 |
rval = TRUE; |
|
4607 |
}
|
|
4608 |
}
|
|
4609 |
break; |
|
4610 |
case CDSGeneProt_field_mat_peptide_name: |
|
4611 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4612 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4613 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4614 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4615 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4616 |
{
|
|
4617 |
rval |= RemoveValNodeStringMatch (&(prp->name), scp); |
|
4618 |
}
|
|
4619 |
}
|
|
4620 |
break; |
|
4621 |
case CDSGeneProt_field_mat_peptide_description: |
|
4622 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4623 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4624 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4625 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4626 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4627 |
&& !StringHasNoText (prp->desc) && DoesStringMatchConstraint(prp->desc, scp)) { |
|
4628 |
prp->desc = MemFree (prp->desc); |
|
4629 |
rval = TRUE; |
|
4630 |
}
|
|
4631 |
}
|
|
4632 |
break; |
|
4633 |
case CDSGeneProt_field_mat_peptide_ec_number: |
|
4634 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4635 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4636 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4637 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4638 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4639 |
{
|
|
4640 |
rval |= RemoveValNodeStringMatch (&(prp->ec), scp); |
|
4641 |
}
|
|
4642 |
}
|
|
4643 |
break; |
|
4644 |
case CDSGeneProt_field_mat_peptide_activity: |
|
4645 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4646 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4647 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4648 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4649 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4650 |
{
|
|
4651 |
rval |= RemoveValNodeStringMatch (&(prp->activity), scp); |
|
4652 |
}
|
|
4653 |
}
|
|
4654 |
break; |
|
4655 |
case CDSGeneProt_field_mat_peptide_comment: |
|
4656 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4657 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4658 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4659 |
&& !StringHasNoText (sfp->comment) && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4660 |
{
|
|
4661 |
sfp->comment = MemFree (sfp->comment); |
|
4662 |
rval = TRUE; |
|
4663 |
}
|
|
4664 |
}
|
|
4665 |
break; |
|
4666 |
}
|
|
4667 |
return rval; |
|
4668 |
}
|
|
4669 |
||
4670 |
||
4671 |
static SeqFeatPtr CreateGeneForCGPSet (CGPSetPtr c) |
|
4672 |
{
|
|
4673 |
SeqFeatPtr gene = NULL, sfp = NULL; |
|
4674 |
BioseqPtr bsp; |
|
4675 |
ValNodePtr vnp; |
|
4676 |
||
4677 |
if (c == NULL) return NULL; |
|
4678 |
||
4679 |
for (vnp = c->cds_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { |
|
4680 |
sfp = vnp->data.ptrvalue; |
|
4681 |
}
|
|
4682 |
for (vnp = c->mrna_list; vnp != NULL && sfp == NULL; vnp = vnp->next) { |
|
4683 |
sfp = vnp->data.ptrvalue; |
|
4684 |
}
|
|
4685 |
if (sfp != NULL) { |
|
4686 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
4687 |
if (bsp != NULL) { |
|
4688 |
gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, sfp->location); |
|
4689 |
if (gene != NULL) { |
|
4690 |
gene->data.value.ptrvalue = GeneRefNew(); |
|
4691 |
}
|
|
4692 |
}
|
|
4693 |
}
|
|
4694 |
return gene; |
|
4695 |
}
|
|
4696 |
||
4697 |
||
4698 |
static Boolean SetFieldValueInCGPSet (CGPSetPtr c, Uint2 field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) |
|
4699 |
{
|
|
4700 |
Boolean rval = FALSE; |
|
4701 |
ValNodePtr vnp; |
|
4702 |
SeqFeatPtr sfp; |
|
4703 |
GeneRefPtr grp; |
|
4704 |
ProtRefPtr prp; |
|
4705 |
||
4706 |
if (c == NULL) return FALSE; |
|
4707 |
switch (field) { |
|
4708 |
case CDSGeneProt_field_cds_comment: |
|
4709 |
for (vnp = c->cds_list; vnp != NULL; vnp = vnp->next) { |
|
4710 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4711 |
if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4712 |
{
|
|
4713 |
rval |= SetStringValue ( &(sfp->comment), value, existing_text); |
|
4714 |
}
|
|
4715 |
}
|
|
4716 |
break; |
|
4717 |
case CDSGeneProt_field_gene_locus: |
|
4718 |
if (c->gene_list == NULL && scp == NULL) { |
|
4719 |
sfp = CreateGeneForCGPSet (c); |
|
4720 |
if (sfp != NULL) { |
|
4721 |
ValNodeAddPointer (&(c->gene_list), 0, sfp); |
|
4722 |
}
|
|
4723 |
}
|
|
4724 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4725 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4726 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4727 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4728 |
&& DoesStringMatchConstraint(grp->locus, scp)) |
|
4729 |
{
|
|
4730 |
rval |= SetStringValue ( &(grp->locus), value, existing_text); |
|
4731 |
}
|
|
4732 |
}
|
|
4733 |
break; |
|
4734 |
case CDSGeneProt_field_gene_description: |
|
4735 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4736 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4737 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4738 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4739 |
&& DoesStringMatchConstraint(grp->desc, scp)) |
|
4740 |
{
|
|
4741 |
rval |= SetStringValue ( &(grp->desc), value, existing_text); |
|
4742 |
}
|
|
4743 |
}
|
|
4744 |
break; |
|
4745 |
case CDSGeneProt_field_gene_comment: |
|
4746 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4747 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4748 |
if (sfp != NULL && DoesStringMatchConstraint(sfp->comment, scp)) |
|
4749 |
{
|
|
4750 |
rval |= SetStringValue ( &(sfp->comment), value, existing_text); |
|
4751 |
}
|
|
4752 |
}
|
|
4753 |
break; |
|
4754 |
case CDSGeneProt_field_gene_allele: |
|
4755 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4756 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4757 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4758 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4759 |
&& DoesStringMatchConstraint(grp->allele, scp)) |
|
4760 |
{
|
|
4761 |
rval |= SetStringValue (&(grp->allele), value, existing_text); |
|
4762 |
}
|
|
4763 |
}
|
|
4764 |
break; |
|
4765 |
case CDSGeneProt_field_gene_maploc: |
|
4766 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4767 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4768 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4769 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4770 |
&& DoesStringMatchConstraint(grp->maploc, scp)) |
|
4771 |
{
|
|
4772 |
rval |= SetStringValue ( &(grp->maploc), value, existing_text); |
|
4773 |
}
|
|
4774 |
}
|
|
4775 |
break; |
|
4776 |
case CDSGeneProt_field_gene_locus_tag: |
|
4777 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4778 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4779 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4780 |
&& (grp = sfp->data.value.ptrvalue) != NULL |
|
4781 |
&& DoesStringMatchConstraint(grp->locus_tag, scp)) |
|
4782 |
{
|
|
4783 |
rval |= SetStringValue ( &(grp->locus_tag), value, existing_text); |
|
4784 |
}
|
|
4785 |
}
|
|
4786 |
break; |
|
4787 |
case CDSGeneProt_field_gene_synonym: |
|
4788 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4789 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4790 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_GENE |
|
4791 |
&& (grp = sfp->data.value.ptrvalue) != NULL) |
|
4792 |
{
|
|
4793 |
rval |= SetStringsInValNodeStringList (&(grp->syn), scp, value, existing_text); |
|
4794 |
}
|
|
4795 |
}
|
|
4796 |
break; |
|
4797 |
case CDSGeneProt_field_gene_old_locus_tag: |
|
4798 |
for (vnp = c->gene_list; vnp != NULL; vnp = vnp->next) { |
|
4799 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4800 |
if (sfp != NULL) { |
|
4801 |
rval |= RemoveGBQualMatch (&(sfp->qual), "old-locus-tag", scp); |
|
4802 |
}
|
|
4803 |
}
|
|
4804 |
break; |
|
4805 |
case CDSGeneProt_field_mrna_product: |
|
4806 |
for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { |
|
4807 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4808 |
rval |= SetRNAProductString (sfp, scp, value, existing_text); |
|
4809 |
}
|
|
4810 |
break; |
|
4811 |
case CDSGeneProt_field_mrna_comment: |
|
4812 |
for (vnp = c->mrna_list; vnp != NULL; vnp = vnp->next) { |
|
4813 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4814 |
if (sfp != NULL&& DoesStringMatchConstraint(sfp->comment, scp)) |
|
4815 |
{
|
|
4816 |
rval |= SetStringValue ( &(sfp->comment), value, existing_text); |
|
4817 |
}
|
|
4818 |
}
|
|
4819 |
break; |
|
4820 |
case CDSGeneProt_field_prot_name: |
|
4821 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4822 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4823 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4824 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4825 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4826 |
{
|
|
4827 |
rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); |
|
4828 |
}
|
|
4829 |
}
|
|
4830 |
break; |
|
4831 |
case CDSGeneProt_field_prot_description: |
|
4832 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4833 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4834 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4835 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4836 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4837 |
&& DoesStringMatchConstraint(prp->desc, scp)) { |
|
4838 |
rval |= SetStringValue ( &(prp->desc), value, existing_text); |
|
4839 |
}
|
|
4840 |
}
|
|
4841 |
break; |
|
4842 |
case CDSGeneProt_field_prot_ec_number: |
|
4843 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4844 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4845 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4846 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4847 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4848 |
{
|
|
4849 |
rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); |
|
4850 |
}
|
|
4851 |
}
|
|
4852 |
break; |
|
4853 |
case CDSGeneProt_field_prot_activity: |
|
4854 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4855 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4856 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4857 |
&& sfp->idx.subtype == FEATDEF_PROT |
|
4858 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4859 |
{
|
|
4860 |
rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); |
|
4861 |
}
|
|
4862 |
}
|
|
4863 |
break; |
|
4864 |
case CDSGeneProt_field_prot_comment: |
|
4865 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4866 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4867 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_PROT |
|
4868 |
&& DoesStringMatchConstraint(sfp->comment, scp)) |
|
4869 |
{
|
|
4870 |
rval |= SetStringValue ( &(sfp->comment), value, existing_text); |
|
4871 |
}
|
|
4872 |
}
|
|
4873 |
break; |
|
4874 |
case CDSGeneProt_field_mat_peptide_name: |
|
4875 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4876 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4877 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4878 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4879 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4880 |
{
|
|
4881 |
rval |= SetStringsInValNodeStringList (&(prp->name), scp, value, existing_text); |
|
4882 |
}
|
|
4883 |
}
|
|
4884 |
break; |
|
4885 |
case CDSGeneProt_field_mat_peptide_description: |
|
4886 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4887 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4888 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4889 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4890 |
&& (prp = sfp->data.value.ptrvalue) != NULL |
|
4891 |
&& DoesStringMatchConstraint(prp->desc, scp)) { |
|
4892 |
rval |= SetStringValue ( &(prp->desc), value, existing_text); |
|
4893 |
}
|
|
4894 |
}
|
|
4895 |
break; |
|
4896 |
case CDSGeneProt_field_mat_peptide_ec_number: |
|
4897 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4898 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4899 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4900 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4901 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4902 |
{
|
|
4903 |
rval |= SetStringsInValNodeStringList (&(prp->ec), scp, value, existing_text); |
|
4904 |
}
|
|
4905 |
}
|
|
4906 |
break; |
|
4907 |
case CDSGeneProt_field_mat_peptide_activity: |
|
4908 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4909 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4910 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_PROT |
|
4911 |
&& sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4912 |
&& (prp = sfp->data.value.ptrvalue) != NULL) |
|
4913 |
{
|
|
4914 |
rval |= SetStringsInValNodeStringList (&(prp->activity), scp, value, existing_text); |
|
4915 |
}
|
|
4916 |
}
|
|
4917 |
break; |
|
4918 |
case CDSGeneProt_field_mat_peptide_comment: |
|
4919 |
for (vnp = c->prot_list; vnp != NULL; vnp = vnp->next) { |
|
4920 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
4921 |
if (sfp != NULL && sfp->idx.subtype == FEATDEF_mat_peptide_aa |
|
4922 |
&& DoesStringMatchConstraint(sfp->comment, scp)) |
|
4923 |
{
|
|
4924 |
rval |= SetStringValue ( &(sfp->comment), value, existing_text); |
|
4925 |
}
|
|
4926 |
}
|
|
4927 |
break; |
|
4928 |
}
|
|
4929 |
return rval; |
|
4930 |
}
|
|
4931 |
||
4932 |
||
4933 |
static MolInfoPtr GetMolInfoForBioseq (BioseqPtr bsp) |
|
4934 |
{
|
|
4935 |
MolInfoPtr m = NULL; |
|
4936 |
SeqDescrPtr sdp; |
|
4937 |
||
4938 |
if (bsp == NULL) return NULL; |
|
4939 |
sdp = bsp->descr; |
|
4940 |
while (sdp != NULL && sdp->choice != Seq_descr_molinfo) { |
|
4941 |
sdp = sdp->next; |
|
4942 |
}
|
|
4943 |
if (sdp != NULL) { |
|
4944 |
m = (MolInfoPtr) sdp->data.ptrvalue; |
|
4945 |
}
|
|
4946 |
return m; |
|
4947 |
}
|
|
4948 |
||
4949 |
||
4950 |
static CharPtr GetSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) |
|
4951 |
{
|
|
4952 |
CharPtr rval = NULL; |
|
4953 |
MolInfoPtr m; |
|
4954 |
||
4955 |
if (bsp == NULL || field == NULL) return NULL; |
|
4956 |
||
4957 |
switch (field->choice) { |
|
4958 |
case MolinfoField_molecule: |
|
4959 |
m = GetMolInfoForBioseq (bsp); |
|
4960 |
if (m != NULL) { |
|
4961 |
rval = BiomolNameFromBiomol (m->biomol); |
|
4962 |
}
|
|
4963 |
break; |
|
4964 |
case MolinfoField_technique: |
|
4965 |
m = GetMolInfoForBioseq (bsp); |
|
4966 |
if (m != NULL) { |
|
4967 |
rval = TechNameFromTech (m->tech); |
|
4968 |
}
|
|
4969 |
break; |
|
4970 |
case MolinfoField_completedness: |
|
4971 |
m = GetMolInfoForBioseq (bsp); |
|
4972 |
if (m != NULL) { |
|
4973 |
rval = CompletenessNameFromCompleteness (m->completeness); |
|
4974 |
}
|
|
4975 |
break; |
|
4976 |
case MolinfoField_mol_class: |
|
4977 |
rval = MolNameFromMol (bsp->mol); |
|
4978 |
break; |
|
4979 |
case MolinfoField_topology: |
|
4980 |
rval = TopologyNameFromTopology (bsp->topology); |
|
4981 |
break; |
|
4982 |
case MolinfoField_strand: |
|
4983 |
rval = StrandNameFromStrand (bsp->strand); |
|
4984 |
break; |
|
4985 |
}
|
|
4986 |
if (rval != NULL) rval = StringSave (rval); |
|
4987 |
return rval; |
|
4988 |
}
|
|
4989 |
||
4990 |
||
4991 |
static Boolean RemoveSequenceQualFromBioseq (BioseqPtr bsp, ValNodePtr field) |
|
4992 |
{
|
|
4993 |
MolInfoPtr m; |
|
4994 |
Boolean rval = FALSE; |
|
4995 |
||
4996 |
if (bsp == NULL || field == NULL) return FALSE; |
|
4997 |
||
4998 |
switch (field->choice) { |
|
4999 |
case MolinfoField_molecule: |
|
5000 |
m = GetMolInfoForBioseq (bsp); |
|
5001 |
if (m != NULL) { |
|
5002 |
m->biomol = 0; |
|
5003 |
rval = TRUE; |
|
5004 |
}
|
|
5005 |
break; |
|
5006 |
case MolinfoField_technique: |
|
5007 |
m = GetMolInfoForBioseq (bsp); |
|
5008 |
if (m != NULL) { |
|
5009 |
m->tech = 0; |
|
5010 |
rval = TRUE; |
|
5011 |
}
|
|
5012 |
break; |
|
5013 |
case MolinfoField_completedness: |
|
5014 |
m = GetMolInfoForBioseq (bsp); |
|
5015 |
if (m != NULL) { |
|
5016 |
m->completeness = 0; |
|
5017 |
rval = TRUE; |
|
5018 |
}
|
|
5019 |
break; |
|
5020 |
case MolinfoField_mol_class: |
|
5021 |
bsp->mol = 0; |
|
5022 |
rval = TRUE; |
|
5023 |
break; |
|
5024 |
case MolinfoField_topology: |
|
5025 |
bsp->topology = 0; |
|
5026 |
rval = TRUE; |
|
5027 |
break; |
|
5028 |
case MolinfoField_strand: |
|
5029 |
bsp->strand = 0; |
|
5030 |
rval = TRUE; |
|
5031 |
break; |
|
5032 |
}
|
|
5033 |
return rval; |
|
5034 |
}
|
|
5035 |
||
5036 |
||
5037 |
static MolInfoPtr AddMolInfoToBioseq (BioseqPtr bsp) |
|
5038 |
{
|
|
5039 |
SeqDescrPtr sdp; |
|
5040 |
MolInfoPtr m; |
|
5041 |
||
5042 |
sdp = CreateNewDescriptorOnBioseq (bsp, Seq_descr_molinfo); |
|
5043 |
m = MolInfoNew (); |
|
5044 |
sdp->data.ptrvalue = m; |
|
5045 |
return m; |
|
5046 |
}
|
|
5047 |
||
5048 |
||
5049 |
static Boolean SetSequenceQualOnBioseq (BioseqPtr bsp, ValNodePtr field) |
|
5050 |
{
|
|
5051 |
MolInfoPtr m; |
|
5052 |
Boolean rval = FALSE; |
|
5053 |
||
5054 |
if (bsp == NULL || field == NULL) return FALSE; |
|
5055 |
||
5056 |
switch (field->choice) { |
|
5057 |
case MolinfoField_molecule: |
|
5058 |
m = GetMolInfoForBioseq (bsp); |
|
5059 |
if (m == NULL) { |
|
5060 |
m = AddMolInfoToBioseq (bsp); |
|
5061 |
}
|
|
5062 |
m->biomol = BiomolFromMoleculeType (field->data.intvalue); |
|
5063 |
rval = TRUE; |
|
5064 |
break; |
|
5065 |
case MolinfoField_technique: |
|
5066 |
m = GetMolInfoForBioseq (bsp); |
|
5067 |
if (m == NULL) { |
|
5068 |
m = AddMolInfoToBioseq (bsp); |
|
5069 |
}
|
|
5070 |
m->tech = TechFromTechniqueType (field->data.intvalue); |
|
5071 |
rval = TRUE; |
|
5072 |
break; |
|
5073 |
case MolinfoField_completedness: |
|
5074 |
m = GetMolInfoForBioseq (bsp); |
|
5075 |
if (m == NULL) { |
|
5076 |
m = AddMolInfoToBioseq (bsp); |
|
5077 |
}
|
|
5078 |
m->completeness = CompletenessFromCompletednessType (field->data.intvalue); |
|
5079 |
rval = TRUE; |
|
5080 |
break; |
|
5081 |
case MolinfoField_mol_class: |
|
5082 |
bsp->mol = MolFromMoleculeClassType (field->data.intvalue); |
|
5083 |
rval = TRUE; |
|
5084 |
break; |
|
5085 |
case MolinfoField_topology: |
|
5086 |
bsp->topology = TopologyFromTopologyType (field->data.intvalue); |
|
5087 |
rval = TRUE; |
|
5088 |
break; |
|
5089 |
case MolinfoField_strand: |
|
5090 |
bsp->strand = StrandFromStrandType (field->data.intvalue); |
|
5091 |
rval = TRUE; |
|
5092 |
break; |
|
5093 |
}
|
|
5094 |
return rval; |
|
5095 |
}
|
|
5096 |
||
5097 |
||
5098 |
NLM_EXTERN FieldTypePtr GetFromFieldFromFieldPair (FieldPairTypePtr fieldpair) |
|
5099 |
{
|
|
5100 |
SourceQualChoicePtr ss = NULL; |
|
5101 |
SourceQualPairPtr sqpp; |
|
5102 |
FeatureFieldPairPtr fp; |
|
5103 |
FeatureFieldPtr fs; |
|
5104 |
FieldTypePtr f = NULL; |
|
5105 |
CDSGeneProtFieldPairPtr cp; |
|
5106 |
MolinfoFieldPairPtr mp; |
|
5107 |
ValNodePtr vnp; |
|
5108 |
||
5109 |
if (fieldpair == NULL) return NULL; |
|
5110 |
switch (fieldpair->choice) { |
|
5111 |
case FieldPairType_source_qual: |
|
5112 |
sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue; |
|
5113 |
if (sqpp != NULL) { |
|
5114 |
ss = ValNodeNew (NULL); |
|
5115 |
ss->choice = SourceQualChoice_textqual; |
|
5116 |
ss->data.intvalue = sqpp->field_from; |
|
5117 |
f = ValNodeNew (NULL); |
|
5118 |
f->choice = FieldType_source_qual; |
|
5119 |
f->data.ptrvalue = ss; |
|
5120 |
}
|
|
5121 |
break; |
|
5122 |
case FieldPairType_feature_field: |
|
5123 |
fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue; |
|
5124 |
if (fp != NULL) { |
|
5125 |
fs = FeatureFieldNew (); |
|
5126 |
fs->type = fp->type; |
|
5127 |
fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_from, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); |
|
5128 |
f = ValNodeNew (NULL); |
|
5129 |
f->choice = FieldType_feature_field; |
|
5130 |
f->data.ptrvalue = fs; |
|
5131 |
}
|
|
5132 |
break; |
|
5133 |
case FieldPairType_cds_gene_prot: |
|
5134 |
cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue; |
|
5135 |
if (cp != NULL) { |
|
5136 |
f = ValNodeNew (NULL); |
|
5137 |
f->choice = FieldType_cds_gene_prot; |
|
5138 |
f->data.intvalue = cp->field_from; |
|
5139 |
}
|
|
5140 |
break; |
|
5141 |
case FieldPairType_molinfo_field: |
|
5142 |
mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue; |
|
5143 |
if (mp != NULL && mp->data.ptrvalue != NULL) { |
|
5144 |
vnp = NULL; |
|
5145 |
switch (mp->choice) { |
|
5146 |
case MolinfoFieldPair_molecule: |
|
5147 |
vnp = ValNodeNew (NULL); |
|
5148 |
vnp->choice = MolinfoField_molecule; |
|
5149 |
vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->from; |
|
5150 |
break; |
|
5151 |
case MolinfoFieldPair_technique: |
|
5152 |
vnp = ValNodeNew (NULL); |
|
5153 |
vnp->choice = MolinfoField_technique; |
|
5154 |
vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->from; |
|
5155 |
break; |
|
5156 |
case MolinfoFieldPair_completedness: |
|
5157 |
vnp = ValNodeNew (NULL); |
|
5158 |
vnp->choice = MolinfoField_completedness; |
|
5159 |
vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->from; |
|
5160 |
break; |
|
5161 |
case MolinfoFieldPair_mol_class: |
|
5162 |
vnp = ValNodeNew (NULL); |
|
5163 |
vnp->choice = MolinfoField_mol_class; |
|
5164 |
vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->from; |
|
5165 |
break; |
|
5166 |
case MolinfoFieldPair_topology: |
|
5167 |
vnp = ValNodeNew (NULL); |
|
5168 |
vnp->choice = MolinfoField_topology; |
|
5169 |
vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->from; |
|
5170 |
break; |
|
5171 |
case MolinfoFieldPair_strand: |
|
5172 |
vnp = ValNodeNew (NULL); |
|
5173 |
vnp->choice = MolinfoField_strand; |
|
5174 |
vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->from; |
|
5175 |
break; |
|
5176 |
}
|
|
5177 |
if (vnp != NULL) { |
|
5178 |
f = ValNodeNew (NULL); |
|
5179 |
f->choice = FieldType_molinfo_field; |
|
5180 |
f->data.ptrvalue = vnp; |
|
5181 |
}
|
|
5182 |
}
|
|
5183 |
break; |
|
5184 |
}
|
|
5185 |
return f; |
|
5186 |
}
|
|
5187 |
||
5188 |
||
5189 |
NLM_EXTERN FieldTypePtr GetToFieldFromFieldPair (FieldPairTypePtr fieldpair) |
|
5190 |
{
|
|
5191 |
SourceQualChoicePtr ss = NULL; |
|
5192 |
SourceQualPairPtr sqpp; |
|
5193 |
FeatureFieldPairPtr fp; |
|
5194 |
FeatureFieldPtr fs; |
|
5195 |
FieldTypePtr f = NULL; |
|
5196 |
CDSGeneProtFieldPairPtr cp; |
|
5197 |
MolinfoFieldPairPtr mp; |
|
5198 |
ValNodePtr vnp; |
|
5199 |
||
5200 |
if (fieldpair == NULL) return NULL; |
|
5201 |
switch (fieldpair->choice) { |
|
5202 |
case FieldPairType_source_qual: |
|
5203 |
sqpp = (SourceQualPairPtr) fieldpair->data.ptrvalue; |
|
5204 |
if (sqpp != NULL) { |
|
5205 |
ss = ValNodeNew (NULL); |
|
5206 |
ss->choice = SourceQualChoice_textqual; |
|
5207 |
ss->data.intvalue = sqpp->field_to; |
|
5208 |
f = ValNodeNew (NULL); |
|
5209 |
f->choice = FieldType_source_qual; |
|
5210 |
f->data.ptrvalue = ss; |
|
5211 |
}
|
|
5212 |
break; |
|
5213 |
case FieldPairType_feature_field: |
|
5214 |
fp = (FeatureFieldPairPtr) fieldpair->data.ptrvalue; |
|
5215 |
if (fp != NULL) { |
|
5216 |
fs = FeatureFieldNew (); |
|
5217 |
fs->type = fp->type; |
|
5218 |
fs->field = (FeatQualChoicePtr) AsnIoMemCopy (fp->field_to, (AsnReadFunc) FeatQualChoiceAsnRead, (AsnWriteFunc) FeatQualChoiceAsnWrite); |
|
5219 |
f = ValNodeNew (NULL); |
|
5220 |
f->choice = FieldType_feature_field; |
|
5221 |
f->data.ptrvalue = fs; |
|
5222 |
}
|
|
5223 |
break; |
|
5224 |
case FieldPairType_cds_gene_prot: |
|
5225 |
cp = (CDSGeneProtFieldPairPtr) fieldpair->data.ptrvalue; |
|
5226 |
if (cp != NULL) { |
|
5227 |
f = ValNodeNew (NULL); |
|
5228 |
f->choice = FieldType_cds_gene_prot; |
|
5229 |
f->data.intvalue = cp->field_to; |
|
5230 |
}
|
|
5231 |
break; |
|
5232 |
case FieldPairType_molinfo_field: |
|
5233 |
mp = (MolinfoFieldPairPtr) fieldpair->data.ptrvalue; |
|
5234 |
if (mp != NULL && mp->data.ptrvalue != NULL) { |
|
5235 |
vnp = NULL; |
|
5236 |
switch (mp->choice) { |
|
5237 |
case MolinfoFieldPair_molecule: |
|
5238 |
vnp = ValNodeNew (NULL); |
|
5239 |
vnp->choice = MolinfoField_molecule; |
|
5240 |
vnp->data.intvalue = ((MolinfoMoleculePairPtr)mp->data.ptrvalue)->to; |
|
5241 |
break; |
|
5242 |
case MolinfoFieldPair_technique: |
|
5243 |
vnp = ValNodeNew (NULL); |
|
5244 |
vnp->choice = MolinfoField_technique; |
|
5245 |
vnp->data.intvalue = ((MolinfoTechniquePairPtr)mp->data.ptrvalue)->to; |
|
5246 |
break; |
|
5247 |
case MolinfoFieldPair_completedness: |
|
5248 |
vnp = ValNodeNew (NULL); |
|
5249 |
vnp->choice = MolinfoField_completedness; |
|
5250 |
vnp->data.intvalue = ((MolinfoCompletednessPairPtr)mp->data.ptrvalue)->to; |
|
5251 |
break; |
|
5252 |
case MolinfoFieldPair_mol_class: |
|
5253 |
vnp = ValNodeNew (NULL); |
|
5254 |
vnp->choice = MolinfoField_mol_class; |
|
5255 |
vnp->data.intvalue = ((MolinfoMolClassPairPtr)mp->data.ptrvalue)->to; |
|
5256 |
break; |
|
5257 |
case MolinfoFieldPair_topology: |
|
5258 |
vnp = ValNodeNew (NULL); |
|
5259 |
vnp->choice = MolinfoField_topology; |
|
5260 |
vnp->data.intvalue = ((MolinfoTopologyPairPtr)mp->data.ptrvalue)->to; |
|
5261 |
break; |
|
5262 |
case MolinfoFieldPair_strand: |
|
5263 |
vnp = ValNodeNew (NULL); |
|
5264 |
vnp->choice = MolinfoField_strand; |
|
5265 |
vnp->data.intvalue = ((MolinfoStrandPairPtr)mp->data.ptrvalue)->to; |
|
5266 |
break; |
|
5267 |
}
|
|
5268 |
if (vnp != NULL) { |
|
5269 |
f = ValNodeNew (NULL); |
|
5270 |
f->choice = FieldType_molinfo_field; |
|
5271 |
f->data.ptrvalue = vnp; |
|
5272 |
}
|
|
5273 |
}
|
|
5274 |
break; |
|
5275 |
}
|
|
5276 |
return f; |
|
5277 |
}
|
|
5278 |
||
5279 |
||
5280 |
static Uint1 FieldTypeChoiceFromFieldPairTypeChoice (Uint1 field_pair_choice) |
|
5281 |
{
|
|
5282 |
Uint1 field_type_choice = 0; |
|
5283 |
||
5284 |
switch (field_pair_choice) { |
|
5285 |
case FieldPairType_source_qual: |
|
5286 |
field_type_choice = FieldType_source_qual; |
|
5287 |
break; |
|
5288 |
case FieldPairType_feature_field: |
|
5289 |
field_type_choice = FieldType_feature_field; |
|
5290 |
break; |
|
5291 |
case FieldPairType_cds_gene_prot: |
|
5292 |
field_type_choice = FieldType_cds_gene_prot; |
|
5293 |
break; |
|
5294 |
case FieldPairType_molinfo_field: |
|
5295 |
field_type_choice = FieldType_molinfo_field; |
|
5296 |
break; |
|
5297 |
}
|
|
5298 |
||
5299 |
return field_type_choice; |
|
5300 |
}
|
|
5301 |
||
5302 |
||
5303 |
NLM_EXTERN Uint1 FieldTypeFromAECRAction (AECRActionPtr action) |
|
5304 |
{
|
|
5305 |
Uint1 field_type = 0; |
|
5306 |
ApplyActionPtr a; |
|
5307 |
EditActionPtr e; |
|
5308 |
ConvertActionPtr v; |
|
5309 |
CopyActionPtr c; |
|
5310 |
SwapActionPtr s; |
|
5311 |
RemoveActionPtr r; |
|
5312 |
AECRParseActionPtr p; |
|
5313 |
||
5314 |
if (action == NULL || action->action == NULL || action->action->data.ptrvalue == NULL) { |
|
5315 |
return 0; |
|
5316 |
}
|
|
5317 |
switch (action->action->choice) { |
|
5318 |
case ActionChoice_apply: |
|
5319 |
a = (ApplyActionPtr) action->action->data.ptrvalue; |
|
5320 |
if (a->field != NULL) { |
|
5321 |
field_type = a->field->choice; |
|
5322 |
}
|
|
5323 |
break; |
|
5324 |
case ActionChoice_edit: |
|
5325 |
e = (EditActionPtr) action->action->data.ptrvalue; |
|
5326 |
if (e->field != NULL) { |
|
5327 |
field_type = e->field->choice; |
|
5328 |
}
|
|
5329 |
break; |
|
5330 |
case ActionChoice_convert: |
|
5331 |
v = (ConvertActionPtr) action->action->data.ptrvalue; |
|
5332 |
field_type = FieldTypeChoiceFromFieldPairTypeChoice (v->fields->choice); |
|
5333 |
break; |
|
5334 |
case ActionChoice_copy: |
|
5335 |
c = (CopyActionPtr) action->action->data.ptrvalue; |
|
5336 |
field_type = FieldTypeChoiceFromFieldPairTypeChoice (c->fields->choice); |
|
5337 |
break; |
|
5338 |
case ActionChoice_swap: |
|
5339 |
s = (SwapActionPtr) action->action->data.ptrvalue; |
|
5340 |
field_type = FieldTypeChoiceFromFieldPairTypeChoice (s->fields->choice); |
|
5341 |
break; |
|
5342 |
case ActionChoice_remove: |
|
5343 |
r = (RemoveActionPtr) action->action->data.ptrvalue; |
|
5344 |
if (r->field != NULL) { |
|
5345 |
field_type = r->field->choice; |
|
5346 |
}
|
|
5347 |
break; |
|
5348 |
case ActionChoice_parse: |
|
5349 |
p = (AECRParseActionPtr) action->action->data.ptrvalue; |
|
5350 |
field_type = FieldTypeChoiceFromFieldPairTypeChoice (p->fields->choice); |
|
5351 |
break; |
|
5352 |
}
|
|
5353 |
return field_type; |
|
5354 |
}
|
|
5355 |
||
5356 |
||
5357 |
static CharPtr GetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) |
|
5358 |
{
|
|
5359 |
CharPtr str = NULL; |
|
5360 |
FeatureFieldPtr feature_field; |
|
5361 |
||
5362 |
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; |
|
5363 |
||
5364 |
switch (field->choice) { |
|
5365 |
case FieldType_source_qual : |
|
5366 |
str = GetSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); |
|
5367 |
break; |
|
5368 |
case FieldType_feature_field : |
|
5369 |
if (choice == OBJ_SEQFEAT) { |
|
5370 |
str = GetQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp); |
|
5371 |
}
|
|
5372 |
break; |
|
5373 |
case FieldType_cds_gene_prot : |
|
5374 |
if (choice == 0) { |
|
5375 |
str = GetFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); |
|
5376 |
} else if (choice == OBJ_SEQFEAT) { |
|
5377 |
feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); |
|
5378 |
str = GetQualFromFeature ((SeqFeatPtr) data, feature_field, scp); |
|
5379 |
feature_field = FeatureFieldFree (feature_field); |
|
5380 |
}
|
|
5381 |
break; |
|
5382 |
case FieldType_molinfo_field : |
|
5383 |
if (choice == OBJ_BIOSEQ) { |
|
5384 |
str = GetSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); |
|
5385 |
}
|
|
5386 |
break; |
|
5387 |
}
|
|
5388 |
return str; |
|
5389 |
}
|
|
5390 |
||
5391 |
||
5392 |
static Boolean RemoveFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp) |
|
5393 |
{
|
|
5394 |
Boolean rval = FALSE; |
|
5395 |
FeatureFieldPtr feature_field; |
|
5396 |
||
5397 |
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; |
|
5398 |
||
5399 |
switch (field->choice) { |
|
5400 |
case FieldType_source_qual : |
|
5401 |
rval = RemoveSourceQualFromBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp); |
|
5402 |
break; |
|
5403 |
case FieldType_feature_field : |
|
5404 |
if (choice == OBJ_SEQFEAT) { |
|
5405 |
rval = RemoveQualFromFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp); |
|
5406 |
}
|
|
5407 |
break; |
|
5408 |
case FieldType_cds_gene_prot: |
|
5409 |
if (choice == 0) { |
|
5410 |
rval = RemoveFieldValueFromCGPSet ((CGPSetPtr) data, field->data.intvalue, scp); |
|
5411 |
} else if (choice == OBJ_SEQFEAT) { |
|
5412 |
feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); |
|
5413 |
rval = RemoveQualFromFeature ((SeqFeatPtr) data, feature_field, scp); |
|
5414 |
feature_field = FeatureFieldFree (feature_field); |
|
5415 |
}
|
|
5416 |
break; |
|
5417 |
case FieldType_molinfo_field : |
|
5418 |
if (choice == OBJ_BIOSEQ) { |
|
5419 |
rval = RemoveSequenceQualFromBioseq ((BioseqPtr) data, field->data.ptrvalue); |
|
5420 |
}
|
|
5421 |
break; |
|
5422 |
}
|
|
5423 |
return rval; |
|
5424 |
}
|
|
5425 |
||
5426 |
||
5427 |
static Boolean SetFieldValueForObject (Uint1 choice, Pointer data, FieldTypePtr field, StringConstraintPtr scp, CharPtr value, Uint2 existing_text) |
|
5428 |
{
|
|
5429 |
Boolean rval = FALSE; |
|
5430 |
FeatureFieldPtr feature_field; |
|
5431 |
||
5432 |
if (data == NULL || field == NULL || field->data.ptrvalue == NULL) return FALSE; |
|
5433 |
||
5434 |
switch (field->choice) { |
|
5435 |
case FieldType_source_qual : |
|
5436 |
rval = SetSourceQualInBioSource (GetBioSourceFromObject (choice, data), (SourceQualChoicePtr) field->data.ptrvalue, scp, value, existing_text); |
|
5437 |
break; |
|
5438 |
case FieldType_feature_field : |
|
5439 |
if (choice == OBJ_SEQFEAT) { |
|
5440 |
rval = SetQualOnFeature ((SeqFeatPtr) data, (FeatureFieldPtr) field->data.ptrvalue, scp, value, existing_text); |
|
5441 |
}
|
|
5442 |
break; |
|
5443 |
case FieldType_cds_gene_prot: |
|
5444 |
if (choice == 0) { |
|
5445 |
rval = SetFieldValueInCGPSet ((CGPSetPtr) data, field->data.intvalue, scp, value, existing_text); |
|
5446 |
} else if (choice == OBJ_SEQFEAT) { |
|
5447 |
feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); |
|
5448 |
rval = SetQualOnFeature ((SeqFeatPtr) data, feature_field, scp, value, existing_text); |
|
5449 |
feature_field = FeatureFieldFree (feature_field); |
|
5450 |
}
|
|
5451 |
break; |
|
5452 |
case FieldType_molinfo_field: |
|
5453 |
if (choice == OBJ_BIOSEQ) { |
|
5454 |
rval = SetSequenceQualOnBioseq ((BioseqPtr) data, field->data.ptrvalue); |
|
5455 |
}
|
|
5456 |
break; |
|
5457 |
}
|
|
5458 |
return rval; |
|
5459 |
}
|
|
5460 |
||
5461 |
||
5462 |
static Boolean IsObjectAppropriateForFieldValue (Uint1 choice, Pointer data, FieldTypePtr field) |
|
5463 |
{
|
|
5464 |
SeqFeatPtr sfp; |
|
5465 |
SeqDescrPtr sdp; |
|
5466 |
FeatureFieldPtr fp; |
|
5467 |
Boolean rval = FALSE; |
|
5468 |
||
5469 |
if (data == NULL || field == NULL) return FALSE; |
|
5470 |
||
5471 |
switch (field->choice) { |
|
5472 |
case FieldType_source_qual : |
|
5473 |
if (choice == OBJ_SEQFEAT) { |
|
5474 |
sfp = (SeqFeatPtr) data; |
|
5475 |
if (sfp->data.choice == SEQFEAT_BIOSRC) { |
|
5476 |
rval = TRUE; |
|
5477 |
}
|
|
5478 |
} else if (choice == OBJ_SEQDESC) { |
|
5479 |
sdp = (SeqDescrPtr) data; |
|
5480 |
if (sdp->choice == Seq_descr_source) { |
|
5481 |
rval = TRUE; |
|
5482 |
}
|
|
5483 |
}
|
|
5484 |
break; |
|
5485 |
case FieldType_feature_field : |
|
5486 |
if (choice == OBJ_SEQFEAT) { |
|
5487 |
sfp = (SeqFeatPtr) data; |
|
5488 |
fp = (FeatureFieldPtr) field->data.ptrvalue; |
|
5489 |
if (fp != NULL && (fp->type == Feature_type_any || GetFeatdefFromFeatureType (fp->type) == sfp->idx.subtype)) { |
|
5490 |
rval = TRUE; |
|
5491 |
}
|
|
5492 |
}
|
|
5493 |
break; |
|
5494 |
case FieldType_cds_gene_prot : |
|
5495 |
if (choice == 0) { |
|
5496 |
rval = TRUE; |
|
5497 |
}
|
|
5498 |
break; |
|
5499 |
case FieldType_molinfo_field : |
|
5500 |
if (choice == OBJ_BIOSEQ) { |
|
5501 |
rval = TRUE; |
|
5502 |
}
|
|
5503 |
break; |
|
5504 |
}
|
|
5505 |
return rval; |
|
5506 |
}
|
|
5507 |
||
5508 |
||
5509 |
static Boolean IsObjectAppropriateForFieldPair (Uint1 choice, Pointer data, FieldPairTypePtr fieldpair) |
|
5510 |
{
|
|
5511 |
FieldTypePtr f; |
|
5512 |
Boolean rval; |
|
5513 |
||
5514 |
f = GetFromFieldFromFieldPair(fieldpair); |
|
5515 |
rval = IsObjectAppropriateForFieldValue(choice, data, f); |
|
5516 |
f = FieldTypeFree (f); |
|
5517 |
return rval; |
|
5518 |
}
|
|
5519 |
||
5520 |
||
5521 |
static Boolean DoFieldTypesMatch (FieldTypePtr field1, FieldTypePtr field2) |
|
5522 |
{
|
|
5523 |
Boolean rval = FALSE; |
|
5524 |
SourceQualChoicePtr scp1, scp2; |
|
5525 |
FeatureFieldPtr fp1, fp2; |
|
5526 |
||
5527 |
if (field1 == NULL || field2 == NULL) return FALSE; |
|
5528 |
if (field1->choice != field2->choice) return FALSE; |
|
5529 |
||
5530 |
switch (field1->choice) { |
|
5531 |
case FieldType_source_qual : |
|
5532 |
scp1 = (SourceQualChoicePtr) field1->data.ptrvalue; |
|
5533 |
scp2 = (SourceQualChoicePtr) field2->data.ptrvalue; |
|
5534 |
if (scp1 != NULL && scp2 != NULL && scp1->choice == scp2->choice) { |
|
5535 |
switch (scp1->choice) { |
|
5536 |
case SourceQualChoice_textqual: |
|
5537 |
if (scp1->data.intvalue == scp2->data.intvalue) { |
|
5538 |
rval = TRUE; |
|
5539 |
}
|
|
5540 |
break; |
|
5541 |
case SourceQualChoice_location: |
|
5542 |
case SourceQualChoice_origin: |
|
5543 |
rval = TRUE; |
|
5544 |
break; |
|
5545 |
}
|
|
5546 |
}
|
|
5547 |
break; |
|
5548 |
case FieldType_feature_field : |
|
5549 |
fp1 = (FeatureFieldPtr) field1->data.ptrvalue; |
|
5550 |
fp2 = (FeatureFieldPtr) field2->data.ptrvalue; |
|
5551 |
if (fp1 != NULL && fp2 != NULL |
|
5552 |
&& (fp1->type == fp2->type || fp1->type == Feature_type_any || fp2->type == Feature_type_any) |
|
5553 |
&& fp1->field != NULL && fp2->field != NULL |
|
5554 |
&& fp1->field->choice == FeatQualChoice_legal_qual && fp2->field->choice == FeatQualChoice_legal_qual |
|
5555 |
&& fp1->field->data.intvalue == fp2->field->data.intvalue) { |
|
5556 |
rval = TRUE; |
|
5557 |
}
|
|
5558 |
break; |
|
5559 |
case FieldType_cds_gene_prot : |
|
5560 |
if (field1->data.intvalue == field2->data.intvalue) { |
|
5561 |
rval = TRUE; |
|
5562 |
}
|
|
5563 |
break; |
|
5564 |
}
|
|
5565 |
return rval; |
|
5566 |
}
|
|
5567 |
||
5568 |
||
5569 |
static Boolean IsNonTextSourceQualPresent (BioSourcePtr biop, Int4 srcqual) |
|
5570 |
{
|
|
5571 |
Int4 orgmod_subtype, subsrc_subtype; |
|
5572 |
OrgModPtr mod; |
|
5573 |
SubSourcePtr ssp; |
|
5574 |
Boolean rval = FALSE; |
|
5575 |
||
5576 |
if (biop == NULL) return FALSE; |
|
5577 |
||
5578 |
orgmod_subtype = GetOrgModQualFromSrcQual (srcqual); |
|
5579 |
if (orgmod_subtype == -1) { |
|
5580 |
subsrc_subtype = GetSubSrcQualFromSrcQual (srcqual); |
|
5581 |
for (ssp = biop->subtype; ssp != NULL && !rval; ssp = ssp->next) { |
|
5582 |
if (ssp->subtype == subsrc_subtype) { |
|
5583 |
rval = TRUE; |
|
5584 |
}
|
|
5585 |
}
|
|
5586 |
} else { |
|
5587 |
if (biop->org != NULL && biop->org->orgname != NULL) { |
|
5588 |
for (mod = biop->org->orgname->mod; mod != NULL && !rval; mod = mod->next) { |
|
5589 |
if (mod->subtype == orgmod_subtype) { |
|
5590 |
rval = TRUE; |
|
5591 |
}
|
|
5592 |
}
|
|
5593 |
}
|
|
5594 |
}
|
|
5595 |
return rval; |
|
5596 |
}
|
|
5597 |
||
5598 |
||
5599 |
static Boolean IsSourceQualPresent (BioSourcePtr biop, SourceQualChoicePtr scp) |
|
5600 |
{
|
|
5601 |
Boolean rval = FALSE; |
|
5602 |
CharPtr str; |
|
5603 |
||
5604 |
if (biop == NULL) return FALSE; |
|
5605 |
if (scp == NULL) return TRUE; |
|
5606 |
||
5607 |
switch (scp->choice) { |
|
5608 |
case SourceQualChoice_textqual: |
|
5609 |
if (IsNonTextSourceQual (scp->data.intvalue)) { |
|
5610 |
rval = IsNonTextSourceQualPresent (biop, scp->data.intvalue); |
|
5611 |
} else { |
|
5612 |
str = GetSourceQualFromBioSource (biop, scp, NULL); |
|
5613 |
if (!StringHasNoText (str)) { |
|
5614 |
rval = TRUE; |
|
5615 |
}
|
|
5616 |
str = MemFree (str); |
|
5617 |
}
|
|
5618 |
break; |
|
5619 |
case SourceQualChoice_location: |
|
5620 |
if (biop->genome != 0) { |
|
5621 |
rval = TRUE; |
|
5622 |
}
|
|
5623 |
break; |
|
5624 |
case SourceQualChoice_origin: |
|
5625 |
if (biop->origin != 0) { |
|
5626 |
rval = TRUE; |
|
5627 |
}
|
|
5628 |
break; |
|
5629 |
}
|
|
5630 |
return rval; |
|
5631 |
}
|
|
5632 |
||
5633 |
||
5634 |
typedef struct objecthasstring |
|
5635 |
{
|
|
5636 |
StringConstraintPtr scp; |
|
5637 |
Boolean found; |
|
5638 |
} ObjectHasStringData, PNTR ObjectHasStringPtr; |
|
5639 |
||
5640 |
||
5641 |
static void LIBCALLBACK AsnWriteConstraintCallBack (AsnExpOptStructPtr pAEOS) |
|
5642 |
||
5643 |
{
|
|
5644 |
CharPtr pchSource; |
|
5645 |
ObjectHasStringPtr ohsp; |
|
5646 |
||
5647 |
ohsp = (ObjectHasStringPtr) pAEOS->data; |
|
5648 |
if (ISA_STRINGTYPE (AsnFindBaseIsa (pAEOS->atp))) |
|
5649 |
{
|
|
5650 |
pchSource = (CharPtr) pAEOS->dvp->ptrvalue; |
|
5651 |
ohsp->found |= DoesSingleStringMatchConstraint (pchSource, ohsp->scp); |
|
5652 |
}
|
|
5653 |
}
|
|
5654 |
||
5655 |
||
5656 |
static Boolean DoesObjectMatchStringConstraint (Uint1 choice, Pointer data, StringConstraintPtr scp) |
|
5657 |
||
5658 |
{
|
|
5659 |
ObjMgrPtr omp; |
|
5660 |
ObjMgrTypePtr omtp; |
|
5661 |
AsnIoPtr aip; |
|
5662 |
AsnExpOptPtr aeop; |
|
5663 |
ObjectHasStringData ohsd; |
|
5664 |
SeqFeatPtr sfp, prot; |
|
5665 |
SeqMgrFeatContext fcontext; |
|
5666 |
CharPtr search_txt; |
|
5667 |
CGPSetPtr c; |
|
5668 |
ValNodePtr vnp; |
|
5669 |
Boolean all_match = TRUE, any_match = FALSE, rval; |
|
5670 |
BioseqPtr protbsp; |
|
5671 |
||
5672 |
if (data == NULL) return FALSE; |
|
5673 |
if (scp == NULL) return TRUE; |
|
5674 |
||
5675 |
if (choice == 0) { |
|
5676 |
/* CDS-Gene-Prot set */
|
|
5677 |
c = (CGPSetPtr) data; |
|
5678 |
for (vnp = c->gene_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { |
|
5679 |
if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { |
|
5680 |
any_match = TRUE; |
|
5681 |
} else { |
|
5682 |
all_match = FALSE; |
|
5683 |
}
|
|
5684 |
}
|
|
5685 |
for (vnp = c->cds_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { |
|
5686 |
if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { |
|
5687 |
any_match = TRUE; |
|
5688 |
} else { |
|
5689 |
all_match = FALSE; |
|
5690 |
}
|
|
5691 |
}
|
|
5692 |
for (vnp = c->mrna_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { |
|
5693 |
if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { |
|
5694 |
any_match = TRUE; |
|
5695 |
} else { |
|
5696 |
all_match = FALSE; |
|
5697 |
}
|
|
5698 |
}
|
|
5699 |
for (vnp = c->prot_list; vnp != NULL && (!any_match || all_match); vnp = vnp->next) { |
|
5700 |
if (DoesObjectMatchStringConstraint (OBJ_SEQFEAT, vnp->data.ptrvalue, scp)) { |
|
5701 |
any_match = TRUE; |
|
5702 |
} else { |
|
5703 |
all_match = FALSE; |
|
5704 |
}
|
|
5705 |
}
|
|
5706 |
if (scp->not_present) { |
|
5707 |
rval = all_match; |
|
5708 |
} else { |
|
5709 |
rval = any_match; |
|
5710 |
}
|
|
5711 |
} else { |
|
5712 |
omp = ObjMgrGet (); |
|
5713 |
omtp = ObjMgrTypeFind (omp, choice, NULL, NULL); |
|
5714 |
if (omtp == NULL) return FALSE; |
|
5715 |
aip = AsnIoNullOpen (); |
|
5716 |
aeop = AsnExpOptNew (aip, NULL, NULL, AsnWriteConstraintCallBack); |
|
5717 |
ohsd.found = FALSE; |
|
5718 |
ohsd.scp = scp; |
|
5719 |
if (aeop != NULL) { |
|
5720 |
aeop->user_data = (Pointer) &ohsd; |
|
5721 |
}
|
|
5722 |
||
5723 |
(omtp->asnwrite) (data, aip, NULL); |
|
5724 |
||
5725 |
if (!ohsd.found && omtp->datatype == OBJ_SEQFEAT) |
|
5726 |
{
|
|
5727 |
sfp = (SeqFeatPtr) data; |
|
5728 |
if (sfp->data.choice == SEQFEAT_CDREGION) { |
|
5729 |
protbsp = BioseqFindFromSeqLoc (sfp->product); |
|
5730 |
prot = SeqMgrGetNextFeature (protbsp, NULL, 0, FEATDEF_PROT, &fcontext); |
|
5731 |
if (prot != NULL) { |
|
5732 |
(omtp->asnwrite) (prot, aip, NULL); |
|
5733 |
}
|
|
5734 |
} else { |
|
5735 |
if (SeqMgrFeaturesAreIndexed(sfp->idx.entityID) == 0) { |
|
5736 |
SeqMgrIndexFeatures (sfp->idx.entityID, NULL); |
|
5737 |
}
|
|
5738 |
if (sfp->idx.subtype == FEATDEF_tRNA) { |
|
5739 |
sfp = SeqMgrGetDesiredFeature (sfp->idx.entityID, NULL, sfp->idx.itemID, 0, sfp, &fcontext); |
|
5740 |
ohsd.found = DoesSingleStringMatchConstraint (fcontext.label, ohsd.scp); |
|
5741 |
if (!ohsd.found && sfp != NULL && sfp->idx.subtype == FEATDEF_tRNA) |
|
5742 |
{
|
|
5743 |
search_txt = (CharPtr) MemNew ((StringLen (fcontext.label) + 6) * sizeof (Char)); |
|
5744 |
if (search_txt != NULL) |
|
5745 |
{
|
|
5746 |
sprintf (search_txt, "tRNA-%s", fcontext.label); |
|
5747 |
ohsd.found = DoesSingleStringMatchConstraint (search_txt, ohsd.scp); |
|
5748 |
search_txt = MemFree (search_txt); |
|
5749 |
}
|
|
5750 |
}
|
|
5751 |
}
|
|
5752 |
}
|
|
5753 |
}
|
|
5754 |
AsnIoClose (aip); |
|
5755 |
if (scp->not_present) { |
|
5756 |
rval = !ohsd.found; |
|
5757 |
} else { |
|
5758 |
rval = ohsd.found; |
|
5759 |
}
|
|
5760 |
}
|
|
5761 |
return rval; |
|
5762 |
}
|
|
5763 |
||
5764 |
||
5765 |
NLM_EXTERN Boolean IsSourceConstraintEmpty (SourceConstraintPtr scp) |
|
5766 |
{
|
|
5767 |
if (scp == NULL) return TRUE; |
|
5768 |
||
5769 |
if (scp->field1 == NULL |
|
5770 |
&& scp->field2 == NULL |
|
5771 |
&& IsStringConstraintEmpty(scp->constraint)) { |
|
5772 |
return TRUE; |
|
5773 |
} else { |
|
5774 |
return FALSE; |
|
5775 |
}
|
|
5776 |
}
|
|
5777 |
||
5778 |
NLM_EXTERN Boolean DoesBiosourceMatchConstraint (BioSourcePtr biop, SourceConstraintPtr scp) |
|
5779 |
{
|
|
5780 |
Boolean rval = FALSE; |
|
5781 |
CharPtr str1, str2; |
|
5782 |
ValNode vn; |
|
5783 |
||
5784 |
if (biop == NULL) return FALSE; |
|
5785 |
if (scp == NULL) return TRUE; |
|
5786 |
||
5787 |
if (IsStringConstraintEmpty(scp->constraint)) { |
|
5788 |
/* looking for qual present */
|
|
5789 |
if (scp->field1 != NULL && scp->field2 == NULL) { |
|
5790 |
rval = IsSourceQualPresent (biop, scp->field1); |
|
5791 |
} else if (scp->field2 != NULL && scp->field1 == NULL) { |
|
5792 |
rval = IsSourceQualPresent (biop, scp->field2); |
|
5793 |
/* looking for quals to match */
|
|
5794 |
} else if (scp->field1 != NULL && scp->field2 != NULL) { |
|
5795 |
str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); |
|
5796 |
str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); |
|
5797 |
if (StringCmp (str1, str2) == 0) { |
|
5798 |
rval = TRUE; |
|
5799 |
}
|
|
5800 |
str1 = MemFree (str1); |
|
5801 |
str2 = MemFree (str2); |
|
5802 |
} else { |
|
5803 |
/* nothing specified, automatic match */
|
|
5804 |
rval = TRUE; |
|
5805 |
}
|
|
5806 |
} else { |
|
5807 |
if (scp->field1 != NULL && scp->field2 == NULL) { |
|
5808 |
str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); |
|
5809 |
if (str1 == NULL) { |
|
5810 |
if (scp->constraint->not_present) { |
|
5811 |
str1 = GetSourceQualFromBioSource (biop, scp->field1, NULL); |
|
5812 |
if (str1 == NULL) { |
|
5813 |
rval = TRUE; |
|
5814 |
}
|
|
5815 |
}
|
|
5816 |
} else if (!StringHasNoText (str1)) { |
|
5817 |
rval = TRUE; |
|
5818 |
}
|
|
5819 |
str1 = MemFree (str1); |
|
5820 |
} else if (scp->field2 != NULL && scp->field1 == NULL) { |
|
5821 |
str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); |
|
5822 |
if (str2 == NULL) { |
|
5823 |
if (scp->constraint->not_present) { |
|
5824 |
str2 = GetSourceQualFromBioSource (biop, scp->field2, NULL); |
|
5825 |
if (str2 == NULL) { |
|
5826 |
rval = TRUE; |
|
5827 |
}
|
|
5828 |
}
|
|
5829 |
} else if (!StringHasNoText (str2)) { |
|
5830 |
rval = TRUE; |
|
5831 |
}
|
|
5832 |
str2 = MemFree (str2); |
|
5833 |
} else if (scp->field1 != NULL && scp->field2 != NULL) { |
|
5834 |
str1 = GetSourceQualFromBioSource (biop, scp->field1, scp->constraint); |
|
5835 |
str2 = GetSourceQualFromBioSource (biop, scp->field2, scp->constraint); |
|
5836 |
if (StringCmp (str1, str2) == 0) { |
|
5837 |
rval = TRUE; |
|
5838 |
}
|
|
5839 |
str1 = MemFree (str1); |
|
5840 |
str2 = MemFree (str2); |
|
5841 |
} else { |
|
5842 |
/* generic string constraint */
|
|
5843 |
vn.choice = Seq_descr_source; |
|
5844 |
vn.next = NULL; |
|
5845 |
vn.extended = 0; |
|
5846 |
vn.data.ptrvalue = biop; |
|
5847 |
rval = DoesObjectMatchStringConstraint (OBJ_SEQDESC, &vn, scp->constraint); |
|
5848 |
}
|
|
5849 |
}
|
|
5850 |
return rval; |
|
5851 |
}
|
|
5852 |
||
5853 |
||
5854 |
static Boolean DoesCGPSetMatchPseudoConstraint (CGPSetPtr c, CDSGeneProtPseudoConstraintPtr constraint) |
|
5855 |
{
|
|
5856 |
Boolean any_pseudo = FALSE; |
|
5857 |
ValNodePtr vnp; |
|
5858 |
SeqFeatPtr sfp; |
|
5859 |
Boolean rval = FALSE; |
|
5860 |
||
5861 |
if (c == NULL) return FALSE; |
|
5862 |
if (constraint == NULL) return TRUE; |
|
5863 |
||
5864 |
switch (constraint->feature) { |
|
5865 |
case CDSGeneProt_feature_type_constraint_gene : |
|
5866 |
for (vnp = c->gene_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { |
|
5867 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
5868 |
if (sfp != NULL && sfp->pseudo) { |
|
5869 |
any_pseudo = TRUE; |
|
5870 |
}
|
|
5871 |
}
|
|
5872 |
break; |
|
5873 |
case CDSGeneProt_feature_type_constraint_mRNA : |
|
5874 |
for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { |
|
5875 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
5876 |
if (sfp != NULL && sfp->pseudo) { |
|
5877 |
any_pseudo = TRUE; |
|
5878 |
}
|
|
5879 |
}
|
|
5880 |
break; |
|
5881 |
case CDSGeneProt_feature_type_constraint_cds : |
|
5882 |
for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { |
|
5883 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
5884 |
if (sfp != NULL && sfp->pseudo) { |
|
5885 |
any_pseudo = TRUE; |
|
5886 |
}
|
|
5887 |
}
|
|
5888 |
break; |
|
5889 |
case CDSGeneProt_feature_type_constraint_prot : |
|
5890 |
for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { |
|
5891 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
5892 |
if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_PROT) { |
|
5893 |
any_pseudo = TRUE; |
|
5894 |
}
|
|
5895 |
}
|
|
5896 |
break; |
|
5897 |
case CDSGeneProt_feature_type_constraint_mat_peptide : |
|
5898 |
for (vnp = c->mrna_list; vnp != NULL && !any_pseudo; vnp = vnp->next) { |
|
5899 |
sfp = (SeqFeatPtr) vnp->data.ptrvalue; |
|
5900 |
if (sfp != NULL && sfp->pseudo && sfp->idx.subtype == FEATDEF_mat_peptide_aa) { |
|
5901 |
any_pseudo = TRUE; |
|
5902 |
}
|
|
5903 |
}
|
|
5904 |
break; |
|
5905 |
}
|
|
5906 |
||
5907 |
if ((any_pseudo && constraint->is_pseudo) |
|
5908 |
|| (!any_pseudo && !constraint->is_pseudo)) { |
|
5909 |
rval = TRUE; |
|
5910 |
}
|
|
5911 |
return rval; |
|
5912 |
}
|
|
5913 |
||
5914 |
||
5915 |
NLM_EXTERN Boolean IsCDSGeneProtQualConstraintEmpty (CDSGeneProtQualConstraintPtr constraint) |
|
5916 |
{
|
|
5917 |
if (constraint == NULL) return TRUE; |
|
5918 |
if (constraint->field1 == NULL && constraint->field2 == NULL && IsStringConstraintEmpty (constraint->constraint)) { |
|
5919 |
return TRUE; |
|
5920 |
} else { |
|
5921 |
return FALSE; |
|
5922 |
}
|
|
5923 |
}
|
|
5924 |
||
5925 |
||
5926 |
static Boolean DoesCGPSetMatchQualConstraint (CGPSetPtr c, CDSGeneProtQualConstraintPtr constraint) |
|
5927 |
{
|
|
5928 |
Boolean rval = FALSE, any_match = FALSE, all_match = TRUE; |
|
5929 |
CharPtr str, str1, str2; |
|
5930 |
||
5931 |
if (c == NULL) return FALSE; |
|
5932 |
if (constraint == NULL) return TRUE; |
|
5933 |
||
5934 |
if (IsStringConstraintEmpty (constraint->constraint)) { |
|
5935 |
/* looking for qual present */
|
|
5936 |
if (constraint->field1 != NULL && constraint->field2 == NULL) { |
|
5937 |
str = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); |
|
5938 |
if (str != NULL) { |
|
5939 |
rval = TRUE; |
|
5940 |
str = MemFree (str); |
|
5941 |
}
|
|
5942 |
} else if (constraint->field2 != NULL && constraint->field1 == NULL) { |
|
5943 |
str = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); |
|
5944 |
if (str == NULL) { |
|
5945 |
rval = FALSE; |
|
5946 |
} else { |
|
5947 |
str = MemFree (str); |
|
5948 |
}
|
|
5949 |
/* looking for quals to match */
|
|
5950 |
} else if (constraint->field1 != NULL && constraint->field2 != NULL) { |
|
5951 |
str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); |
|
5952 |
str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); |
|
5953 |
if (StringCmp (str1, str2) == 0) { |
|
5954 |
rval = TRUE; |
|
5955 |
}
|
|
5956 |
str1 = MemFree (str1); |
|
5957 |
str2 = MemFree (str2); |
|
5958 |
} else { |
|
5959 |
/* nothing specified, automatic match */
|
|
5960 |
rval = TRUE; |
|
5961 |
}
|
|
5962 |
} else { |
|
5963 |
if (constraint->field1 != NULL && constraint->field2 == NULL) { |
|
5964 |
str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); |
|
5965 |
if (str1 == NULL) { |
|
5966 |
if (constraint->constraint->not_present) { |
|
5967 |
str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, NULL); |
|
5968 |
if (str1 == NULL) { |
|
5969 |
rval = TRUE; |
|
5970 |
}
|
|
5971 |
}
|
|
5972 |
} else if (!StringHasNoText (str1)) { |
|
5973 |
rval = TRUE; |
|
5974 |
}
|
|
5975 |
str1 = MemFree (str1); |
|
5976 |
} else if (constraint->field2 != NULL && constraint->field1 == NULL) { |
|
5977 |
str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); |
|
5978 |
if (str2 == NULL) { |
|
5979 |
if (constraint->constraint->not_present) { |
|
5980 |
str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, NULL); |
|
5981 |
if (str2 == NULL) { |
|
5982 |
rval = TRUE; |
|
5983 |
}
|
|
5984 |
}
|
|
5985 |
} else if (!StringHasNoText (str2)) { |
|
5986 |
rval = TRUE; |
|
5987 |
}
|
|
5988 |
str2 = MemFree (str2); |
|
5989 |
} else if (constraint->field1 != NULL && constraint->field2 != NULL) { |
|
5990 |
str1 = GetFieldValueFromCGPSet (c, constraint->field1->data.intvalue, constraint->constraint); |
|
5991 |
str2 = GetFieldValueFromCGPSet (c, constraint->field2->data.intvalue, constraint->constraint); |
|
5992 |
if (StringCmp (str1, str2) == 0) { |
|
5993 |
rval = TRUE; |
|
5994 |
}
|
|
5995 |
str1 = MemFree (str1); |
|
5996 |
str2 = MemFree (str2); |
|
5997 |
} else { |
|
5998 |
/* generic string constraint */
|
|
5999 |
rval = DoesObjectMatchStringConstraint (0, c, constraint->constraint); |
|
6000 |
}
|
|
6001 |
}
|
|
6002 |
return rval; |
|
6003 |
}
|
|
6004 |
||
6005 |
||
6006 |
NLM_EXTERN Boolean IsSequenceConstraintEmpty (SequenceConstraintPtr constraint) |
|
6007 |
{
|
|
6008 |
if (constraint == NULL) return TRUE; |
|
6009 |
if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) return FALSE; |
|
6010 |
if (constraint->feature != Feature_type_any) return FALSE; |
|
6011 |
if (!IsStringConstraintEmpty (constraint->id)) return FALSE; |
|
6012 |
return TRUE; |
|
6013 |
}
|
|
6014 |
||
6015 |
||
6016 |
extern Boolean DoesSeqIDListMeetStringConstraint (SeqIdPtr sip, StringConstraintPtr string_constraint) |
|
6017 |
{
|
|
6018 |
Char id [41]; |
|
6019 |
CharPtr cp, cp_dst; |
|
6020 |
SeqIdPtr tmp; |
|
6021 |
Boolean match, changed; |
|
6022 |
||
6023 |
if (sip == NULL) |
|
6024 |
{
|
|
6025 |
return FALSE; |
|
6026 |
}
|
|
6027 |
if (string_constraint == NULL) |
|
6028 |
{
|
|
6029 |
return TRUE; |
|
6030 |
}
|
|
6031 |
||
6032 |
while (sip != NULL) |
|
6033 |
{
|
|
6034 |
/* temporary disconnect ID from list */
|
|
6035 |
tmp = sip->next; |
|
6036 |
sip->next = NULL; |
|
6037 |
id [0] = '\0'; |
|
6038 |
SeqIdWrite (sip, id, PRINTID_FASTA_LONG, sizeof (id) - 1); |
|
6039 |
match = DoesSingleStringMatchConstraint (id, string_constraint); |
|
6040 |
if (!match) |
|
6041 |
{
|
|
6042 |
changed = FALSE; |
|
6043 |
/* remove terminating pipe character */
|
|
6044 |
if (id[StringLen(id) - 1] == '|') |
|
6045 |
{
|
|
6046 |
id[StringLen(id) - 1] = 0; |
|
6047 |
changed = TRUE; |
|
6048 |
}
|
|
6049 |
/* remove leading pipe identifier */
|
|
6050 |
cp = StringChr (id, '|'); |
|
6051 |
if (cp != NULL) |
|
6052 |
{
|
|
6053 |
changed = TRUE; |
|
6054 |
cp++; |
|
6055 |
cp_dst = id; |
|
6056 |
while (*cp != 0) |
|
6057 |
{
|
|
6058 |
*cp_dst = *cp; |
|
6059 |
cp_dst++; |
|
6060 |
cp++; |
|
6061 |
}
|
|
6062 |
*cp_dst = 0; |
|
6063 |
}
|
|
6064 |
if (changed) |
|
6065 |
{
|
|
6066 |
match = DoesSingleStringMatchConstraint (id, string_constraint); |
|
6067 |
}
|
|
6068 |
||
6069 |
/* if search text doesn't have ., try ID without version */
|
|
6070 |
if (!match && StringChr (string_constraint->match_text, '.') == NULL) |
|
6071 |
{
|
|
6072 |
cp = StringChr (id, '.'); |
|
6073 |
if (cp != NULL) |
|
6074 |
{
|
|
6075 |
*cp = 0; |
|
6076 |
match = DoesSingleStringMatchConstraint (id, string_constraint); |
|
6077 |
}
|
|
6078 |
}
|
|
6079 |
}
|
|
6080 |
sip->next = tmp; |
|
6081 |
||
6082 |
if (match) |
|
6083 |
{
|
|
6084 |
if (string_constraint->not_present) |
|
6085 |
{
|
|
6086 |
return FALSE; |
|
6087 |
}
|
|
6088 |
else
|
|
6089 |
{
|
|
6090 |
return TRUE; |
|
6091 |
}
|
|
6092 |
}
|
|
6093 |
sip = sip->next; |
|
6094 |
}
|
|
6095 |
if (string_constraint->not_present) |
|
6096 |
{
|
|
6097 |
return TRUE; |
|
6098 |
}
|
|
6099 |
else
|
|
6100 |
{
|
|
6101 |
return FALSE; |
|
6102 |
}
|
|
6103 |
}
|
|
6104 |
||
6105 |
||
6106 |
typedef struct rnatypebiomol { |
|
6107 |
Int4 rnatype; |
|
6108 |
Uint1 biomol; |
|
6109 |
CharPtr rnamolname; |
|
6110 |
} RnaTypeBiomolData, PNTR RnaTypeBiomolPtr; |
|
6111 |
||
6112 |
static RnaTypeBiomolData rna_type_biomol[] = { |
|
6113 |
{ Sequence_constraint_rnamol_genomic , MOLECULE_TYPE_GENOMIC, "Genomic RNA" } , |
|
6114 |
{ Sequence_constraint_rnamol_precursor_RNA , MOLECULE_TYPE_PRE_MRNA , "Precursor RNA" } , |
|
6115 |
{ Sequence_constraint_rnamol_mRNA , MOLECULE_TYPE_MRNA , "mRNA [cDNA]" } , |
|
6116 |
{ Sequence_constraint_rnamol_rRNA , MOLECULE_TYPE_RRNA , "Ribosomal RNA" } , |
|
6117 |
{ Sequence_constraint_rnamol_tRNA , MOLECULE_TYPE_TRNA , "Transfer RNA" } , |
|
6118 |
{ Sequence_constraint_rnamol_snRNA , MOLECULE_TYPE_SNRNA , "Small nuclear RNA" } , |
|
6119 |
{ Sequence_constraint_rnamol_scRNA , MOLECULE_TYPE_SCRNA , "Small cytoplasmic RNA" } , |
|
6120 |
{ Sequence_constraint_rnamol_genomic_mRNA , MOLECULE_TYPE_GENOMIC_MRNA_MIX , "Genomic-mRNA" } , |
|
6121 |
{ Sequence_constraint_rnamol_cRNA , MOLECULE_TYPE_CRNA , "cRNA" } , |
|
6122 |
{ Sequence_constraint_rnamol_snoRNA , MOLECULE_TYPE_SNORNA , "Small nucleolar RNA" } , |
|
6123 |
{ Sequence_constraint_rnamol_transcribed_RNA , MOLECULE_TYPE_TRANSCRIBED_RNA , "Transcribed RNA" } , |
|
6124 |
{ Sequence_constraint_rnamol_ncRNA , MOLECULE_TYPE_NCRNA , "Non-coding RNA" } , |
|
6125 |
{ Sequence_constraint_rnamol_transfer_messenger_RNA , MOLECULE_TYPE_TMRNA , "Transfer-messenger RNA" } } ; |
|
6126 |
||
6127 |
#define NUM_rna_type_biomol sizeof (rna_type_biomol) / sizeof (RnaTypeBiomolData)
|
|
6128 |
||
6129 |
||
6130 |
NLM_EXTERN Uint1 GetBiomolForRnaType (Int4 rnatype) |
|
6131 |
{
|
|
6132 |
Int4 i; |
|
6133 |
||
6134 |
for (i = 0; i < NUM_rna_type_biomol; i++) { |
|
6135 |
if (rna_type_biomol[i].rnatype == rnatype) { |
|
6136 |
return rna_type_biomol[i].biomol; |
|
6137 |
}
|
|
6138 |
}
|
|
6139 |
return 0; |
|
6140 |
}
|
|
6141 |
||
6142 |
||
6143 |
NLM_EXTERN CharPtr GetBiomolNameForRnaType (Int4 rnatype) |
|
6144 |
{
|
|
6145 |
Int4 i; |
|
6146 |
||
6147 |
for (i = 0; i < NUM_rna_type_biomol; i++) { |
|
6148 |
if (rna_type_biomol[i].rnatype == rnatype) { |
|
6149 |
return rna_type_biomol[i].rnamolname; |
|
6150 |
}
|
|
6151 |
}
|
|
6152 |
return "invalid RNA type"; |
|
6153 |
}
|
|
6154 |
||
6155 |
NLM_EXTERN void AddAllRNASubtypesToChoiceList (ValNodePtr PNTR field_list) |
|
6156 |
{
|
|
6157 |
Int4 i; |
|
6158 |
||
6159 |
if (field_list == NULL) return; |
|
6160 |
||
6161 |
ValNodeAddPointer (field_list, Sequence_constraint_rnamol_any, StringSave ("Any RNA")); |
|
6162 |
for (i = 0; i < NUM_rna_type_biomol; i++) { |
|
6163 |
ValNodeAddPointer (field_list, rna_type_biomol[i].rnatype, StringSave (rna_type_biomol[i].rnamolname)); |
|
6164 |
}
|
|
6165 |
}
|
|
6166 |
||
6167 |
||
6168 |
static Boolean DoesSequenceMatchSequenceConstraint (BioseqPtr bsp, SequenceConstraintPtr constraint) |
|
6169 |
{
|
|
6170 |
SeqFeatPtr sfp; |
|
6171 |
SeqMgrFeatContext fcontext; |
|
6172 |
SeqDescrPtr sdp; |
|
6173 |
SeqMgrDescContext dcontext; |
|
6174 |
MolInfoPtr mip; |
|
6175 |
||
6176 |
if (bsp == NULL) return FALSE; |
|
6177 |
if (IsSequenceConstraintEmpty (constraint)) return TRUE; |
|
6178 |
||
6179 |
if (constraint->seqtype != NULL && constraint->seqtype->choice != SequenceConstraintMolTypeConstraint_any) { |
|
6180 |
switch (constraint->seqtype->choice) { |
|
6181 |
case SequenceConstraintMolTypeConstraint_nucleotide : |
|
6182 |
if (ISA_aa (bsp->mol)) { |
|
6183 |
return FALSE; |
|
6184 |
}
|
|
6185 |
break; |
|
6186 |
case SequenceConstraintMolTypeConstraint_dna : |
|
6187 |
if (bsp->mol != Seq_mol_dna) { |
|
6188 |
return FALSE; |
|
6189 |
}
|
|
6190 |
break; |
|
6191 |
case SequenceConstraintMolTypeConstraint_rna : |
|
6192 |
if (bsp->mol != Seq_mol_rna) { |
|
6193 |
return FALSE; |
|
6194 |
}
|
|
6195 |
if (constraint->seqtype->data.intvalue != Sequence_constraint_rnamol_any) { |
|
6196 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext); |
|
6197 |
if (sdp == NULL || sdp->data.ptrvalue == NULL || sdp->choice != Seq_descr_molinfo) { |
|
6198 |
return FALSE; |
|
6199 |
}
|
|
6200 |
mip = (MolInfoPtr) sdp->data.ptrvalue; |
|
6201 |
if (GetBiomolForRnaType (constraint->seqtype->data.intvalue) != mip->biomol) { |
|
6202 |
return FALSE; |
|
6203 |
}
|
|
6204 |
}
|
|
6205 |
break; |
|
6206 |
case SequenceConstraintMolTypeConstraint_protein : |
|
6207 |
if (!ISA_aa (bsp->mol)) { |
|
6208 |
return FALSE; |
|
6209 |
}
|
|
6210 |
break; |
|
6211 |
}
|
|
6212 |
}
|
|
6213 |
||
6214 |
if (constraint->feature != Feature_type_any) { |
|
6215 |
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, GetFeatdefFromFeatureType (constraint->feature), &fcontext); |
|
6216 |
if (sfp == NULL) { |
|
6217 |
return FALSE; |
|
6218 |
}
|
|
6219 |
}
|
|
6220 |
||
6221 |
if (!IsStringConstraintEmpty (constraint->id) && !DoesSeqIDListMeetStringConstraint (bsp->id, constraint->id)) { |
|
6222 |
return FALSE; |
|
6223 |
}
|
|
6224 |
return TRUE; |
|
6225 |
}
|
|
6226 |
||
6227 |
static Boolean DoesSequenceInSetMatchSequenceConstraint (BioseqSetPtr bssp, SequenceConstraintPtr constraint) |
|
6228 |
{
|
|
6229 |
Boolean rval = FALSE; |
|
6230 |
SeqEntryPtr sep; |
|
6231 |
||
6232 |
if (bssp == NULL) return FALSE; |
|
6233 |
if (IsSequenceConstraintEmpty (constraint)) return TRUE; |
|
6234 |
||
6235 |
for (sep = bssp->seq_set; sep != NULL && !rval; sep = sep->next) { |
|
6236 |
if (IS_Bioseq (sep)) { |
|
6237 |
rval = DoesSequenceMatchSequenceConstraint ((BioseqPtr) sep->data.ptrvalue, constraint); |
|
6238 |
} else if (IS_Bioseq_set (sep)) { |
|
6239 |
rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) sep->data.ptrvalue, constraint); |
|
6240 |
}
|
|
6241 |
}
|
|
6242 |
return rval; |
|
6243 |
}
|
|
6244 |
||
6245 |
||
6246 |
static Boolean DoesObjectMatchSequenceConstraint (Uint1 choice, Pointer data, SequenceConstraintPtr constraint) |
|
6247 |
{
|
|
6248 |
BioseqPtr bsp; |
|
6249 |
SeqDescrPtr sdp; |
|
6250 |
ObjValNodePtr ovp; |
|
6251 |
Boolean rval = FALSE; |
|
6252 |
||
6253 |
if (data == NULL) return FALSE; |
|
6254 |
if (IsSequenceConstraintEmpty (constraint)) return TRUE; |
|
6255 |
||
6256 |
bsp = GetSequenceForObject (choice, data); |
|
6257 |
if (bsp == NULL) { |
|
6258 |
if (choice == OBJ_SEQDESC) { |
|
6259 |
sdp = (SeqDescrPtr) data; |
|
6260 |
if (sdp->extended) { |
|
6261 |
ovp = (ObjValNodePtr) sdp; |
|
6262 |
if (ovp->idx.parenttype == OBJ_BIOSEQSET && ovp->idx.parentptr != NULL) { |
|
6263 |
rval = DoesSequenceInSetMatchSequenceConstraint ((BioseqSetPtr) ovp->idx.parentptr, constraint); |
|
6264 |
}
|
|
6265 |
}
|
|
6266 |
}
|
|
6267 |
} else { |
|
6268 |
rval = DoesSequenceMatchSequenceConstraint (bsp, constraint); |
|
6269 |
}
|
|
6270 |
return rval; |
|
6271 |
}
|
|
6272 |
||
6273 |
||
6274 |
static Boolean DoesObjectMatchConstraint (Uint1 choice, Pointer data, ConstraintChoicePtr constraint) |
|
6275 |
{
|
|
6276 |
Boolean rval = TRUE; |
|
6277 |
||
6278 |
if (data == NULL) return FALSE; |
|
6279 |
if (constraint == NULL) return TRUE; |
|
6280 |
||
6281 |
switch (constraint->choice) { |
|
6282 |
case ConstraintChoice_string : |
|
6283 |
rval = DoesObjectMatchStringConstraint (choice, data, constraint->data.ptrvalue); |
|
6284 |
break; |
|
6285 |
case ConstraintChoice_location : |
|
6286 |
rval = DoesObjectMatchLocationConstraint (choice, data, constraint->data.ptrvalue); |
|
6287 |
break; |
|
6288 |
case ConstraintChoice_source : |
|
6289 |
rval = DoesBiosourceMatchConstraint (GetBioSourceFromObject (choice, data), constraint->data.ptrvalue); |
|
6290 |
break; |
|
6291 |
case ConstraintChoice_cdsgeneprot_qual : |
|
6292 |
if (choice == 0) { |
|
6293 |
rval = DoesCGPSetMatchQualConstraint (data, constraint->data.ptrvalue); |
|
6294 |
} else { |
|
6295 |
rval = FALSE; |
|
6296 |
}
|
|
6297 |
break; |
|
6298 |
case ConstraintChoice_cdsgeneprot_pseudo : |
|
6299 |
if (choice == 0) { |
|
6300 |
rval = DoesCGPSetMatchPseudoConstraint (data, constraint->data.ptrvalue); |
|
6301 |
} else { |
|
6302 |
rval = FALSE; |
|
6303 |
}
|
|
6304 |
break; |
|
6305 |
case ConstraintChoice_sequence : |
|
6306 |
rval = DoesObjectMatchSequenceConstraint (choice, data, constraint->data.ptrvalue); |
|
6307 |
break; |
|
6308 |
}
|
|
6309 |
return rval; |
|
6310 |
}
|
|
6311 |
||
6312 |
||
6313 |
static Boolean DoesObjectMatchConstraintChoiceSet (Uint1 choice, Pointer data, ConstraintChoiceSetPtr csp) |
|
6314 |
{
|
|
6315 |
Boolean rval = TRUE; |
|
6316 |
||
6317 |
if (data == NULL) return FALSE; |
|
6318 |
||
6319 |
while (csp != NULL && rval) { |
|
6320 |
rval = DoesObjectMatchConstraint (choice, data, csp); |
|
6321 |
csp = csp->next; |
|
6322 |
}
|
|
6323 |
return rval; |
|
6324 |
}
|
|
6325 |
||
6326 |
||
6327 |
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForField (FieldTypePtr field, ConstraintChoiceSetPtr csp) |
|
6328 |
{
|
|
6329 |
StringConstraintPtr scp = NULL; |
|
6330 |
ConstraintChoicePtr constraint; |
|
6331 |
SourceConstraintPtr source_constraint; |
|
6332 |
CDSGeneProtQualConstraintPtr cgp_constraint; |
|
6333 |
||
6334 |
while (csp != NULL) { |
|
6335 |
constraint = (ConstraintChoicePtr) csp->data.ptrvalue; |
|
6336 |
switch (constraint->choice) { |
|
6337 |
case ConstraintChoice_string : |
|
6338 |
scp = constraint->data.ptrvalue; |
|
6339 |
break; |
|
6340 |
case ConstraintChoice_source : |
|
6341 |
source_constraint = (SourceConstraintPtr) constraint->data.ptrvalue; |
|
6342 |
if (source_constraint != NULL && source_constraint->constraint != NULL |
|
6343 |
&& ((source_constraint->field1 != NULL |
|
6344 |
&& DoFieldTypesMatch (field, source_constraint->field1)) |
|
6345 |
|| (source_constraint->field2 != NULL |
|
6346 |
&& DoFieldTypesMatch (field, source_constraint->field2)))) { |
|
6347 |
scp = source_constraint->constraint; |
|
6348 |
}
|
|
6349 |
break; |
|
6350 |
case ConstraintChoice_cdsgeneprot_qual : |
|
6351 |
cgp_constraint = (CDSGeneProtQualConstraintPtr) field->data.ptrvalue; |
|
6352 |
if (field->choice == FieldType_cds_gene_prot |
|
6353 |
&& cgp_constraint != NULL && cgp_constraint->constraint != NULL |
|
6354 |
&& ((cgp_constraint->field1 != NULL && cgp_constraint->field1->data.intvalue == field->data.intvalue) |
|
6355 |
|| (cgp_constraint->field2 != NULL && cgp_constraint->field2->data.intvalue == field->data.intvalue))) { |
|
6356 |
scp = cgp_constraint->constraint; |
|
6357 |
}
|
|
6358 |
break; |
|
6359 |
}
|
|
6360 |
csp = csp->next; |
|
6361 |
}
|
|
6362 |
return scp; |
|
6363 |
}
|
|
6364 |
||
6365 |
||
6366 |
NLM_EXTERN StringConstraintPtr FindStringConstraintInConstraintSetForFieldPair (FieldPairTypePtr fieldpair, ConstraintChoiceSetPtr csp) |
|
6367 |
{
|
|
6368 |
StringConstraintPtr scp; |
|
6369 |
FieldTypePtr f; |
|
6370 |
||
6371 |
f = GetFromFieldFromFieldPair (fieldpair); |
|
6372 |
scp = FindStringConstraintInConstraintSetForField (f, csp); |
|
6373 |
f = FieldTypeFree (f); |
|
6374 |
return scp; |
|
6375 |
}
|
|
6376 |
||
6377 |
||
6378 |
NLM_EXTERN StringConstraintPtr StringConstraintFromFieldEdit (FieldEditPtr edit) |
|
6379 |
{
|
|
6380 |
StringConstraintPtr scp; |
|
6381 |
||
6382 |
if (edit == NULL || edit->find_txt == NULL) return NULL; |
|
6383 |
scp = StringConstraintNew (); |
|
6384 |
scp->match_text = StringSave (edit->find_txt); |
|
6385 |
||
6386 |
switch (edit->location) { |
|
6387 |
case Field_edit_location_anywhere : |
|
6388 |
scp->match_location = String_location_contains; |
|
6389 |
break; |
|
6390 |
case Field_edit_location_beginning : |
|
6391 |
scp->match_location = String_location_starts; |
|
6392 |
break; |
|
6393 |
case Field_edit_location_end : |
|
6394 |
scp->match_location = String_location_ends; |
|
6395 |
break; |
|
6396 |
}
|
|
6397 |
||
6398 |
scp->case_sensitive = TRUE; |
|
6399 |
scp->whole_word = FALSE; |
|
6400 |
scp->not_present = FALSE; |
|
6401 |
||
6402 |
return scp; |
|
6403 |
}
|
|
6404 |
||
6405 |
||
6406 |
static CharPtr ApplyEditToString (CharPtr str, FieldEditPtr edit) |
|
6407 |
{
|
|
6408 |
CharPtr cp_found, new_str; |
|
6409 |
Int4 found_len, replace_len, new_len; |
|
6410 |
||
6411 |
if (edit == NULL) return StringSave (str); |
|
6412 |
||
6413 |
str = StringSave (str); |
|
6414 |
cp_found = StringISearch (str, edit->find_txt); |
|
6415 |
||
6416 |
found_len = StringLen (edit->find_txt); |
|
6417 |
replace_len = StringLen (edit->repl_txt); |
|
6418 |
if (edit->location == Field_edit_location_beginning |
|
6419 |
&& cp_found != str) { |
|
6420 |
cp_found = NULL; |
|
6421 |
}
|
|
6422 |
while (cp_found != NULL) |
|
6423 |
{
|
|
6424 |
if (edit->location == Field_edit_location_end |
|
6425 |
&& cp_found != str + StringLen (str) - found_len) { |
|
6426 |
cp_found = StringISearch (cp_found + found_len, edit->find_txt); |
|
6427 |
} else { |
|
6428 |
new_len = StringLen (str) + 1 - found_len + replace_len; |
|
6429 |
new_str = (CharPtr) MemNew (new_len * sizeof (Char)); |
|
6430 |
if (new_str != NULL) |
|
6431 |
{
|
|
6432 |
if (cp_found != str) |
|
6433 |
{
|
|
6434 |
StringNCpy (new_str, str, cp_found - str); |
|
6435 |
}
|
|
6436 |
StringCat (new_str, edit->repl_txt); |
|
6437 |
StringCat (new_str, cp_found + found_len); |
|
6438 |
cp_found = new_str + (cp_found - str) + replace_len; |
|
6439 |
str = MemFree (str); |
|
6440 |
str = new_str; |
|
6441 |
}
|
|
6442 |
cp_found = StringISearch (cp_found, edit->find_txt); |
|
6443 |
}
|
|
6444 |
}
|
|
6445 |
return str; |
|
6446 |
}
|
|
6447 |
||
6448 |
||
6449 |
typedef struct objectcollection { |
|
6450 |
AECRActionPtr action; |
|
6451 |
ValNodePtr object_list; |
|
6452 |
} ObjectCollectionData, PNTR ObjectCollectionPtr; |
|
6453 |
||
6454 |
||
6455 |
static void AECRActionObjectCollectionItemCallback (Uint1 objecttype, Pointer objectdata, ObjectCollectionPtr o) |
|
6456 |
{
|
|
6457 |
ApplyActionPtr a; |
|
6458 |
EditActionPtr e; |
|
6459 |
ConvertActionPtr v; |
|
6460 |
CopyActionPtr c; |
|
6461 |
SwapActionPtr s; |
|
6462 |
RemoveActionPtr r; |
|
6463 |
AECRParseActionPtr p; |
|
6464 |
CharPtr str, portion; |
|
6465 |
StringConstraintPtr scp; |
|
6466 |
FieldTypePtr field_from = NULL, field_to = NULL; |
|
6467 |
||
6468 |
if (objectdata == NULL || o == NULL) return; |
|
6469 |
||
6470 |
/* check to make sure object is appropriate for field and meets filter */
|
|
6471 |
switch (o->action->action->choice) { |
|
6472 |
case ActionChoice_apply : |
|
6473 |
a = (ApplyActionPtr) o->action->action->data.ptrvalue; |
|
6474 |
if (a != NULL |
|
6475 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, a->field) |
|
6476 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6477 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6478 |
}
|
|
6479 |
break; |
|
6480 |
case ActionChoice_edit : |
|
6481 |
e = (EditActionPtr) o->action->action->data.ptrvalue; |
|
6482 |
if (e != NULL |
|
6483 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, e->field) |
|
6484 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6485 |
scp = StringConstraintFromFieldEdit (e->edit); |
|
6486 |
str = GetFieldValueForObject (objecttype, objectdata, e->field, scp); |
|
6487 |
if (!StringHasNoText (str)) { |
|
6488 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6489 |
}
|
|
6490 |
str = MemFree (str); |
|
6491 |
}
|
|
6492 |
break; |
|
6493 |
case ActionChoice_convert : |
|
6494 |
v = (ConvertActionPtr) o->action->action->data.ptrvalue; |
|
6495 |
if (v != NULL |
|
6496 |
&& (field_from = GetFromFieldFromFieldPair(v->fields)) != NULL |
|
6497 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) |
|
6498 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6499 |
scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); |
|
6500 |
str = GetFieldValueForObject (objecttype, objectdata, field_from, scp); |
|
6501 |
if (!StringHasNoText (str)) { |
|
6502 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6503 |
}
|
|
6504 |
str = MemFree (str); |
|
6505 |
}
|
|
6506 |
field_from = FieldTypeFree (field_from); |
|
6507 |
break; |
|
6508 |
case ActionChoice_copy : |
|
6509 |
c = (CopyActionPtr) o->action->action->data.ptrvalue; |
|
6510 |
if (c != NULL |
|
6511 |
&& (field_from = GetFromFieldFromFieldPair(c->fields)) != NULL |
|
6512 |
&& (field_to = GetFromFieldFromFieldPair(c->fields)) != NULL |
|
6513 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) |
|
6514 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) |
|
6515 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6516 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6517 |
}
|
|
6518 |
field_from = FieldTypeFree (field_from); |
|
6519 |
field_to = FieldTypeFree (field_to); |
|
6520 |
break; |
|
6521 |
case ActionChoice_swap : |
|
6522 |
s = (SwapActionPtr) o->action->action->data.ptrvalue; |
|
6523 |
if (s != NULL |
|
6524 |
&& (field_from = GetFromFieldFromFieldPair(s->fields)) != NULL |
|
6525 |
&& (field_to = GetFromFieldFromFieldPair(s->fields)) != NULL |
|
6526 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) |
|
6527 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) |
|
6528 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6529 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6530 |
}
|
|
6531 |
field_from = FieldTypeFree (field_from); |
|
6532 |
field_to = FieldTypeFree (field_to); |
|
6533 |
break; |
|
6534 |
case ActionChoice_remove : |
|
6535 |
r = (RemoveActionPtr) o->action->action->data.ptrvalue; |
|
6536 |
if (r != NULL |
|
6537 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, r->field) |
|
6538 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6539 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6540 |
}
|
|
6541 |
break; |
|
6542 |
case ActionChoice_parse : |
|
6543 |
p = (AECRParseActionPtr) o->action->action->data.ptrvalue; |
|
6544 |
if (p != NULL |
|
6545 |
&& (field_from = GetFromFieldFromFieldPair(p->fields)) != NULL |
|
6546 |
&& (field_to = GetFromFieldFromFieldPair(p->fields)) != NULL |
|
6547 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_from) |
|
6548 |
&& IsObjectAppropriateForFieldValue (objecttype, objectdata, field_to) |
|
6549 |
&& DoesObjectMatchConstraintChoiceSet (objecttype, objectdata, o->action->constraint)) { |
|
6550 |
scp = FindStringConstraintInConstraintSetForField (field_from, o->action->constraint); |
|
6551 |
str = GetFieldValueForObject (objecttype, objectdata, field_from, scp); |
|
6552 |
portion = GetTextPortionFromString (str, p->portion); |
|
6553 |
if (!StringHasNoText (portion)) { |
|
6554 |
ValNodeAddPointer (&(o->object_list), objecttype, objectdata); |
|
6555 |
}
|
|
6556 |
portion = MemFree (portion); |
|
6557 |
str = MemFree (str); |
|
6558 |
}
|
|
6559 |
field_from = FieldTypeFree (field_from); |
|
6560 |
field_to = FieldTypeFree (field_to); |
|
6561 |
break; |
|
6562 |
}
|
|
6563 |
||
6564 |
}
|
|
6565 |
||
6566 |
||
6567 |
static void AECRActionObjectCollectionFeatureCallback (SeqFeatPtr sfp, Pointer data) |
|
6568 |
{
|
|
6569 |
ObjectCollectionPtr o; |
|
6570 |
if (sfp == NULL || data == NULL) return; |
|
6571 |
||
6572 |
o = (ObjectCollectionPtr) data; |
|
6573 |
AECRActionObjectCollectionItemCallback (OBJ_SEQFEAT, sfp, o); |
|
6574 |
||
6575 |
}
|
|
6576 |
||
6577 |
||
6578 |
static void AECRActionObjectCollectionDescriptorCallback (SeqDescrPtr sdp, Pointer data) |
|
6579 |
{
|
|
6580 |
ObjectCollectionPtr o; |
|
6581 |
||
6582 |
if (sdp == NULL || data == NULL) return; |
|
6583 |
||
6584 |
o = (ObjectCollectionPtr) data; |
|
6585 |
AECRActionObjectCollectionItemCallback (OBJ_SEQDESC, sdp, o); |
|
6586 |
}
|
|
6587 |
||
6588 |
||
6589 |
static void AECRObjectCollectionBioseqCallback (BioseqPtr bsp, Pointer data) |
|
6590 |
{
|
|
6591 |
ObjectCollectionPtr o; |
|
6592 |
||
6593 |
if (bsp == NULL || data == NULL) return; |
|
6594 |
||
6595 |
o = (ObjectCollectionPtr) data; |
|
6596 |
AECRActionObjectCollectionItemCallback (OBJ_BIOSEQ, bsp, o); |
|
6597 |
}
|
|
6598 |
||
6599 |
||
6600 |
NLM_EXTERN ValNodePtr GetObjectListForAECRAction (SeqEntryPtr sep, AECRActionPtr action) |
|
6601 |
{
|
|
6602 |
ObjectCollectionData ocd; |
|
6603 |
||
6604 |
ocd.action = action; |
|
6605 |
ocd.object_list = NULL; |
|
6606 |
||
6607 |
if (action == NULL) return NULL; |
|
6608 |
if (FieldTypeFromAECRAction (action) == FieldType_molinfo_field) { |
|
6609 |
VisitBioseqsInSep (sep, &ocd, AECRObjectCollectionBioseqCallback); |
|
6610 |
} else { |
|
6611 |
VisitFeaturesInSep (sep, &ocd, AECRActionObjectCollectionFeatureCallback); |
|
6612 |
VisitDescriptorsInSep (sep, &ocd, AECRActionObjectCollectionDescriptorCallback); |
|
6613 |
}
|
|
6614 |
return ocd.object_list; |
|
6615 |
}
|
|
6616 |
||
6617 |
||
6618 |
typedef struct buildcgpset |
|
6619 |
{
|
|
6620 |
ValNodePtr cds_list; |
|
6621 |
ValNodePtr mrna_list; |
|
6622 |
ValNodePtr gene_list; |
|
6623 |
} BuildCGPSetData, PNTR BuildCGPSetPtr; |
|
6624 |
||
6625 |
static void BuildCGPSetCallback (SeqFeatPtr sfp, Pointer userdata) |
|
6626 |
{
|
|
6627 |
BuildCGPSetPtr b; |
|
6628 |
||
6629 |
if (sfp == NULL || sfp->idx.deleteme || userdata == NULL) return; |
|
6630 |
b = (BuildCGPSetPtr) userdata; |
|
6631 |
if (sfp->data.choice == SEQFEAT_CDREGION) |
|
6632 |
{
|
|
6633 |
ValNodeAddPointer (&(b->cds_list), OBJ_SEQFEAT, sfp); |
|
6634 |
}
|
|
6635 |
else if (sfp->data.choice == SEQFEAT_GENE) |
|
6636 |
{
|
|
6637 |
ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); |
|
6638 |
}
|
|
6639 |
else if (sfp->idx.subtype == FEATDEF_mRNA) |
|
6640 |
{
|
|
6641 |
ValNodeAddPointer (&(b->mrna_list), OBJ_SEQFEAT, sfp); |
|
6642 |
}
|
|
6643 |
else if (SeqMgrGetGeneXref (sfp) != NULL) |
|
6644 |
{
|
|
6645 |
ValNodeAddPointer (&(b->gene_list), OBJ_SEQFEAT, sfp); |
|
6646 |
}
|
|
6647 |
}
|
|
6648 |
||
6649 |
||
6650 |
static CGPSetPtr BuildCGPSetFromCodingRegion (SeqFeatPtr cds, BoolPtr indexing_needed) |
|
6651 |
{
|
|
6652 |
SeqMgrFeatContext fcontext; |
|
6653 |
SeqFeatPtr gene = NULL, mrna, prot; |
|
6654 |
BioseqPtr protbsp; |
|
6655 |
CGPSetPtr cdsp; |
|
6656 |
ProtRefPtr prp; |
|
6657 |
||
6658 |
if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return NULL; |
|
6659 |
||
6660 |
cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); |
|
6661 |
ValNodeAddPointer (&(cdsp->cds_list), 0, cds); |
|
6662 |
||
6663 |
gene = GetGeneForFeature (cds); |
|
6664 |
if (gene != NULL) |
|
6665 |
{
|
|
6666 |
ValNodeAddPointer (&(cdsp->gene_list), 0, gene); |
|
6667 |
/* mark gene, so that we'll know it isn't lonely */
|
|
6668 |
gene->idx.deleteme = TRUE; |
|
6669 |
}
|
|
6670 |
||
6671 |
mrna = SeqMgrGetOverlappingmRNA (cds->location, &fcontext); |
|
6672 |
if (mrna != NULL) |
|
6673 |
{
|
|
6674 |
ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); |
|
6675 |
/* mark mrna, so that we'll know it's already in a set */
|
|
6676 |
mrna->idx.deleteme = TRUE; |
|
6677 |
}
|
|
6678 |
||
6679 |
if (cds->product != NULL) |
|
6680 |
{
|
|
6681 |
protbsp = BioseqFindFromSeqLoc (cds->product); |
|
6682 |
if (protbsp != NULL) |
|
6683 |
{
|
|
6684 |
prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_PROT, &fcontext); |
|
6685 |
/* if there is no full-length protein feature, make one */
|
|
6686 |
if (prot == NULL) |
|
6687 |
{
|
|
6688 |
prp = ProtRefNew (); |
|
6689 |
prot = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); |
|
6690 |
if (prot != NULL) |
|
6691 |
{
|
|
6692 |
prot->data.value.ptrvalue = prp; |
|
6693 |
if (indexing_needed != NULL) |
|
6694 |
{
|
|
6695 |
*indexing_needed = TRUE; |
|
6696 |
}
|
|
6697 |
}
|
|
6698 |
}
|
|
6699 |
if (prot != NULL) |
|
6700 |
{
|
|
6701 |
ValNodeAddPointer (&(cdsp->prot_list), 0, prot); |
|
6702 |
}
|
|
6703 |
||
6704 |
/* also add in mat_peptides from protein feature */
|
|
6705 |
prot = SeqMgrGetNextFeature (protbsp, NULL, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); |
|
6706 |
while (prot != NULL) |
|
6707 |
{
|
|
6708 |
ValNodeAddPointer (&(cdsp->prot_list), 0, prot); |
|
6709 |
prot = SeqMgrGetNextFeature (protbsp, prot, SEQFEAT_PROT, FEATDEF_mat_peptide_aa, &fcontext); |
|
6710 |
}
|
|
6711 |
}
|
|
6712 |
}
|
|
6713 |
return cdsp; |
|
6714 |
}
|
|
6715 |
||
6716 |
||
6717 |
static CGPSetPtr BuildCGPSetFrommRNA (SeqFeatPtr mrna) |
|
6718 |
{
|
|
6719 |
SeqFeatPtr gene; |
|
6720 |
CGPSetPtr cdsp; |
|
6721 |
||
6722 |
if (mrna == NULL || mrna->idx.deleteme || mrna->idx.subtype != FEATDEF_mRNA) return NULL; |
|
6723 |
||
6724 |
cdsp = (CGPSetPtr) MemNew (sizeof (CGPSetData)); |
|
6725 |
ValNodeAddPointer (&(cdsp->mrna_list), 0, mrna); |
|
6726 |
||
6727 |
gene = GetGeneForFeature (mrna); |
|
6728 |
if (gene != NULL) |
|
6729 |
{
|
|
6730 |
ValNodeAddPointer (&(cdsp->gene_list), 0, gene); |
|
6731 |
/* mark gene, so that we'll know it isn't lonely */
|
|
6732 |
gene->idx.deleteme = TRUE; |
|
6733 |
}
|
|
6734 |
||
6735 |
return cdsp; |
|
6736 |
}
|
|
6737 |
||
6738 |
||
6739 |
static void UnmarkFeatureList (ValNodePtr list) |
|
6740 |
{
|
|
6741 |
SeqFeatPtr sfp; |
|
6742 |
||
6743 |
while (list != NULL) |
|
6744 |
{
|
|
6745 |
sfp = list->data.ptrvalue; |
|
6746 |
if (sfp != NULL) |
|
6747 |
{
|
|
6748 |
sfp->idx.deleteme = FALSE; |
|
6749 |
}
|
|
6750 |
list = list->next; |
|
6751 |
}
|
|
6752 |
}
|
|
6753 |
||
6754 |
||
6755 |
static ValNodePtr BuildCGPSetList (Uint2 entityID, ValNodePtr constraint) |
|
6756 |
{
|
|
6757 |
SeqEntryPtr sep; |
|
6758 |
BuildCGPSetData b; |
|
6759 |
CGPSetPtr cdsp; |
|
6760 |
ValNodePtr vnp, vnp_next, vnp_prev; |
|
6761 |
ValNodePtr cdset_list = NULL; |
|
6762 |
SeqFeatPtr cds, gene, mrna; |
|
6763 |
Boolean need_indexing = FALSE; |
|
6764 |
||
6765 |
sep = GetTopSeqEntryForEntityID (entityID); |
|
6766 |
||
6767 |
b.cds_list = NULL; |
|
6768 |
b.gene_list = NULL; |
|
6769 |
b.mrna_list = NULL; |
|
6770 |
||
6771 |
VisitFeaturesInSep (sep, &b, BuildCGPSetCallback); |
|
6772 |
||
6773 |
/* build cdsets that have coding regions */
|
|
6774 |
for (vnp = b.cds_list; vnp != NULL; vnp = vnp->next) |
|
6775 |
{
|
|
6776 |
cds = (SeqFeatPtr) vnp->data.ptrvalue; |
|
6777 |
if (cds == NULL) continue; |
|
6778 |
cdsp = BuildCGPSetFromCodingRegion (cds, &need_indexing); |
|
6779 |
if (cdsp != NULL) |
|
6780 |
{
|
|
6781 |
ValNodeAddPointer (&cdset_list, 0, cdsp); |
|
6782 |
}
|
|
6783 |
}
|
|
6784 |
if (need_indexing) |
|
6785 |
{
|
|
6786 |
/* indexing because we have created full-length protein features */
|
|
6787 |
SeqMgrIndexFeatures (entityID, NULL); |
|
6788 |
}
|
|
6789 |
||
6790 |
/* build cdsets for mrna features that don't have coding regions */
|
|
6791 |
for (vnp = b.mrna_list; vnp != NULL; vnp = vnp->next) |
|
6792 |
{
|
|
6793 |
mrna = (SeqFeatPtr) vnp->data.ptrvalue; |
|
6794 |
if (mrna == NULL || mrna->idx.deleteme) continue; |
|
6795 |
cdsp = BuildCGPSetFrommRNA (mrna); |
|
6796 |
if (cdsp != NULL) |
|
6797 |
{
|
|
6798 |
ValNodeAddPointer (&cdset_list, 0, cdsp); |
|
6799 |
}
|
|
6800 |
}
|
|
6801 |
||
6802 |
/* build cdsets for lonely genes / features with gene xrefs that are not coding regions or mrnas */
|
|
6803 |
for (vnp = b.gene_list; vnp != NULL; vnp = vnp->next) |
|
6804 |
{
|
|
6805 |
gene = (SeqFeatPtr) vnp->data.ptrvalue; |
|
6806 |
if (gene == NULL || gene->idx.deleteme) continue; |
|
6807 |
cdsp = CGPSetNew (); |
|
6808 |
ValNodeAddPointer (&(cdsp->gene_list), 0, gene); |
|
6809 |
ValNodeAddPointer (&cdset_list, 0, cdsp); |
|
6810 |
}
|
|
6811 |
||
6812 |
/* now unmark features */
|
|
6813 |
UnmarkFeatureList (b.cds_list); |
|
6814 |
UnmarkFeatureList (b.mrna_list); |
|
6815 |
UnmarkFeatureList (b.gene_list); |
|
6816 |
||
6817 |
b.cds_list = ValNodeFree (b.cds_list); |
|
6818 |
b.mrna_list = ValNodeFree (b.mrna_list); |
|
6819 |
b.gene_list = ValNodeFree (b.gene_list); |
|
6820 |
||
6821 |
/* now remove sets that don't match our choice constraint */
|
|
6822 |
vnp_prev = NULL; |
|
6823 |
for (vnp = cdset_list; vnp != NULL; vnp = vnp_next) |
|
6824 |
{
|
|
6825 |
vnp_next = vnp->next; |
|
6826 |
if (!DoesObjectMatchConstraintChoiceSet (0, vnp->data.ptrvalue, constraint)) |
|
6827 |
{
|
|
6828 |
if (vnp_prev == NULL) |
|
6829 |
{
|
|
6830 |
cdset_list = vnp->next; |
|
6831 |
}
|
|
6832 |
else
|
|
6833 |
{
|
|
6834 |
vnp_prev->next = vnp->next; |
|
6835 |
}
|
|
6836 |
vnp->next = NULL; |
|
6837 |
FreeCGPSetList (vnp); |
|
6838 |
}
|
|
6839 |
else
|
|
6840 |
{
|
|
6841 |
vnp_prev = vnp; |
|
6842 |
}
|
|
6843 |
}
|
|
6844 |
||
6845 |
return cdset_list; |
|
6846 |
}
|
|
6847 |
||
6848 |
||
6849 |
NLM_EXTERN Int4 DoApplyActionToObjectList (ApplyActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
6850 |
{
|
|
6851 |
ValNodePtr vnp; |
|
6852 |
Int4 num_succeed = 0, num_fail = 0; |
|
6853 |
||
6854 |
if (action == NULL || object_list == NULL) return 0; |
|
6855 |
||
6856 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6857 |
if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp, action->value, action->existing_text)) { |
|
6858 |
num_succeed ++; |
|
6859 |
} else { |
|
6860 |
num_fail++; |
|
6861 |
}
|
|
6862 |
}
|
|
6863 |
return num_succeed; |
|
6864 |
}
|
|
6865 |
||
6866 |
||
6867 |
NLM_EXTERN Int4 DoEditActionToObjectList (EditActionPtr action, ValNodePtr object_list) |
|
6868 |
{
|
|
6869 |
ValNodePtr vnp; |
|
6870 |
Int4 num_succeed = 0, num_fail = 0; |
|
6871 |
StringConstraintPtr scp; |
|
6872 |
CharPtr str, new_str; |
|
6873 |
||
6874 |
if (action == NULL || object_list == NULL) return 0; |
|
6875 |
scp = StringConstraintFromFieldEdit (action->edit); |
|
6876 |
||
6877 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6878 |
str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp); |
|
6879 |
new_str = ApplyEditToString (str, action->edit); |
|
6880 |
if (StringCmp (str, new_str) != 0 |
|
6881 |
&& SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp, new_str, ExistingTextOption_replace_old)) { |
|
6882 |
num_succeed ++; |
|
6883 |
} else { |
|
6884 |
num_fail++; |
|
6885 |
}
|
|
6886 |
new_str = MemFree (new_str); |
|
6887 |
str = MemFree (str); |
|
6888 |
}
|
|
6889 |
return num_succeed; |
|
6890 |
}
|
|
6891 |
||
6892 |
||
6893 |
NLM_EXTERN Int4 DoConvertActionToObjectList (ConvertActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
6894 |
{
|
|
6895 |
ValNodePtr vnp; |
|
6896 |
Int4 num_succeed = 0, num_fail = 0; |
|
6897 |
CharPtr str, from_val; |
|
6898 |
FieldTypePtr field_from, field_to; |
|
6899 |
||
6900 |
if (action == NULL || object_list == NULL || action->fields == NULL) return 0; |
|
6901 |
||
6902 |
field_from = GetFromFieldFromFieldPair (action->fields); |
|
6903 |
field_to = GetToFieldFromFieldPair (action->fields); |
|
6904 |
||
6905 |
if (action->fields->choice == FieldPairType_molinfo_field) { |
|
6906 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6907 |
str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, NULL); |
|
6908 |
from_val = GetSequenceQualValName (field_from->data.ptrvalue); |
|
6909 |
if (StringCmp (str, from_val) == 0 |
|
6910 |
&& SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, ExistingTextOption_replace_old)) { |
|
6911 |
num_succeed ++; |
|
6912 |
}
|
|
6913 |
str = MemFree (str); |
|
6914 |
}
|
|
6915 |
} else { |
|
6916 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6917 |
str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp); |
|
6918 |
if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text) |
|
6919 |
&& RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp)) { |
|
6920 |
num_succeed ++; |
|
6921 |
} else { |
|
6922 |
num_fail++; |
|
6923 |
}
|
|
6924 |
str = MemFree (str); |
|
6925 |
}
|
|
6926 |
}
|
|
6927 |
||
6928 |
field_from = FieldTypeFree (field_from); |
|
6929 |
field_to = FieldTypeFree (field_to); |
|
6930 |
||
6931 |
return num_succeed; |
|
6932 |
}
|
|
6933 |
||
6934 |
||
6935 |
NLM_EXTERN Int4 DoCopyActionToObjectList (CopyActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
6936 |
{
|
|
6937 |
ValNodePtr vnp; |
|
6938 |
Int4 num_succeed = 0, num_fail = 0; |
|
6939 |
CharPtr str; |
|
6940 |
FieldTypePtr field_from, field_to; |
|
6941 |
||
6942 |
if (action == NULL || object_list == NULL) return 0; |
|
6943 |
field_from = GetFromFieldFromFieldPair (action->fields); |
|
6944 |
field_to = GetToFieldFromFieldPair (action->fields); |
|
6945 |
||
6946 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6947 |
str = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp); |
|
6948 |
if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str, action->existing_text)) { |
|
6949 |
num_succeed ++; |
|
6950 |
} else { |
|
6951 |
num_fail++; |
|
6952 |
}
|
|
6953 |
str = MemFree (str); |
|
6954 |
}
|
|
6955 |
||
6956 |
field_from = FieldTypeFree (field_from); |
|
6957 |
field_to = FieldTypeFree (field_to); |
|
6958 |
return num_succeed; |
|
6959 |
}
|
|
6960 |
||
6961 |
||
6962 |
NLM_EXTERN Int4 DoSwapActionToObjectList (SwapActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
6963 |
{
|
|
6964 |
ValNodePtr vnp; |
|
6965 |
Int4 num_succeed = 0, num_fail = 0; |
|
6966 |
CharPtr str1, str2; |
|
6967 |
FieldTypePtr field_from, field_to; |
|
6968 |
||
6969 |
if (action == NULL || object_list == NULL) return 0; |
|
6970 |
field_from = GetFromFieldFromFieldPair (action->fields); |
|
6971 |
field_to = GetToFieldFromFieldPair (action->fields); |
|
6972 |
||
6973 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6974 |
str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp); |
|
6975 |
str2 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL); |
|
6976 |
if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, ExistingTextOption_replace_old) |
|
6977 |
&& SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, str2, ExistingTextOption_replace_old)) { |
|
6978 |
num_succeed ++; |
|
6979 |
} else { |
|
6980 |
num_fail++; |
|
6981 |
}
|
|
6982 |
str1 = MemFree (str1); |
|
6983 |
str2 = MemFree (str2); |
|
6984 |
}
|
|
6985 |
field_from = FieldTypeFree (field_from); |
|
6986 |
field_to = FieldTypeFree (field_to); |
|
6987 |
return num_succeed; |
|
6988 |
}
|
|
6989 |
||
6990 |
||
6991 |
NLM_EXTERN Int4 DoRemoveActionToObjectList (RemoveActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
6992 |
{
|
|
6993 |
ValNodePtr vnp; |
|
6994 |
Int4 num_succeed = 0, num_fail = 0; |
|
6995 |
||
6996 |
if (action == NULL || object_list == NULL) return 0; |
|
6997 |
||
6998 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
6999 |
if (RemoveFieldValueForObject (vnp->choice, vnp->data.ptrvalue, action->field, scp)) { |
|
7000 |
num_succeed ++; |
|
7001 |
} else { |
|
7002 |
num_fail++; |
|
7003 |
}
|
|
7004 |
}
|
|
7005 |
return num_succeed; |
|
7006 |
}
|
|
7007 |
||
7008 |
||
7009 |
NLM_EXTERN Int4 DoParseActionToObjectList (AECRParseActionPtr action, ValNodePtr object_list, StringConstraintPtr scp) |
|
7010 |
{
|
|
7011 |
ValNodePtr vnp; |
|
7012 |
CharPtr str1, str2, cp; |
|
7013 |
Int4 len, num_succeed = 0; |
|
7014 |
FieldTypePtr field_from, field_to; |
|
7015 |
||
7016 |
if (action == NULL || object_list == NULL) return 0; |
|
7017 |
field_from = GetFromFieldFromFieldPair (action->fields); |
|
7018 |
field_to = GetToFieldFromFieldPair (action->fields); |
|
7019 |
||
7020 |
for (vnp = object_list; vnp != NULL; vnp = vnp->next) { |
|
7021 |
str1 = GetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp); |
|
7022 |
str2 = GetTextPortionFromString (str1, action->portion); |
|
7023 |
if (str2 != NULL) { |
|
7024 |
if (action->remove_from_parsed) { |
|
7025 |
cp = StringSearch (str1, str2); |
|
7026 |
len = StringLen (str2); |
|
7027 |
StringCpy (cp, cp + len); |
|
7028 |
SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_from, scp, str1, ExistingTextOption_replace_old); |
|
7029 |
}
|
|
7030 |
if (SetFieldValueForObject (vnp->choice, vnp->data.ptrvalue, field_to, NULL, str1, action->existing_text)) { |
|
7031 |
num_succeed++; |
|
7032 |
}
|
|
7033 |
}
|
|
7034 |
str1 = MemFree (str1); |
|
7035 |
str2 = MemFree (str2); |
|
7036 |
}
|
|
7037 |
field_from = FieldTypeFree (field_from); |
|
7038 |
field_to = FieldTypeFree (field_to); |
|
7039 |
return num_succeed; |
|
7040 |
}
|
|
7041 |
||
7042 |
||
7043 |
static Int4 ApplyAECRActionToSeqEntry (AECRActionPtr act, SeqEntryPtr sep) |
|
7044 |
{
|
|
7045 |
StringConstraintPtr scp; |
|
7046 |
ApplyActionPtr a; |
|
7047 |
ValNodePtr object_list = NULL; |
|
7048 |
Uint1 field_type; |
|
7049 |
Uint2 entityID; |
|
7050 |
Int4 num_succeed = 0; |
|
7051 |
||
7052 |
if (act == NULL || act->action == NULL) return 0; |
|
7053 |
field_type = FieldTypeFromAECRAction (act); |
|
7054 |
if (field_type == FieldType_cds_gene_prot) { |
|
7055 |
entityID = ObjMgrGetEntityIDForChoice(sep); |
|
7056 |
object_list = BuildCGPSetList (entityID, act->constraint); |
|
7057 |
} else { |
|
7058 |
object_list = GetObjectListForAECRAction (sep, act); |
|
7059 |
}
|
|
7060 |
||
7061 |
switch (act->action->choice) { |
|
7062 |
case ActionChoice_apply: |
|
7063 |
a = (ApplyActionPtr) act->action->data.ptrvalue; |
|
7064 |
scp = FindStringConstraintInConstraintSetForField (a->field, act->constraint); |
|
7065 |
num_succeed = DoApplyActionToObjectList (act->action->data.ptrvalue, object_list, scp); |
|
7066 |
scp = StringConstraintFree (scp); |
|
7067 |
break; |
|
7068 |
case ActionChoice_edit: |
|
7069 |
num_succeed = DoEditActionToObjectList (act->action->data.ptrvalue, object_list); |
|
7070 |
break; |
|
7071 |
case ActionChoice_convert: |
|
7072 |
num_succeed = DoConvertActionToObjectList (act->action->data.ptrvalue, object_list, NULL); |
|
7073 |
break; |
|
7074 |
case ActionChoice_swap: |
|
7075 |
num_succeed = DoSwapActionToObjectList (act->action->data.ptrvalue, object_list, NULL); |
|
7076 |
break; |
|
7077 |
case ActionChoice_copy: |
|
7078 |
num_succeed = DoCopyActionToObjectList (act->action->data.ptrvalue, object_list, NULL); |
|
7079 |
break; |
|
7080 |
case ActionChoice_remove: |
|
7081 |
num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, NULL); |
|
7082 |
break; |
|
7083 |
case ActionChoice_parse: |
|
7084 |
num_succeed = DoRemoveActionToObjectList (act->action->data.ptrvalue, object_list, NULL); |
|
7085 |
break; |
|
7086 |
}
|
|
7087 |
object_list = ValNodeFree (object_list); |
|
7088 |
return num_succeed; |
|
7089 |
}
|
|
7090 |
||
7091 |
||
7092 |
/* This section handles parsing where the source field and destination field may not be on the same
|
|
7093 |
* group of objects. */
|
|
7094 |
typedef struct parsesourceinfo |
|
7095 |
{
|
|
7096 |
BioseqPtr bsp; |
|
7097 |
SeqFeatPtr sfp; |
|
7098 |
SeqDescrPtr sdp; |
|
7099 |
SeqIdPtr sip; |
|
7100 |
ValNodePtr dest_list; |
|
7101 |
CharPtr parse_src_txt; |
|
7102 |
} ParseSourceInfoData, PNTR ParseSourceInfoPtr; |
|
7103 |
||
7104 |
static ParseSourceInfoPtr ParseSourceInfoNew (BioseqPtr bsp, SeqFeatPtr sfp, SeqDescrPtr sdp, SeqIdPtr sip, CharPtr parse_src_txt) |
|
7105 |
{
|
|
7106 |
ParseSourceInfoPtr psip; |
|
7107 |
||
7108 |
psip = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); |
|
7109 |
if (psip != NULL) { |
|
7110 |
psip->bsp = bsp; |
|
7111 |
psip->sdp = sdp; |
|
7112 |
psip->sfp = sfp; |
|
7113 |
psip->sip = sip; |
|
7114 |
psip->dest_list = NULL; |
|
7115 |
psip->parse_src_txt = parse_src_txt; |
|
7116 |
}
|
|
7117 |
return psip; |
|
7118 |
}
|
|
7119 |
||
7120 |
||
7121 |
static ParseSourceInfoPtr ParseSourceInfoFree (ParseSourceInfoPtr psip) |
|
7122 |
{
|
|
7123 |
if (psip != NULL) |
|
7124 |
{
|
|
7125 |
psip->dest_list = ValNodeFree (psip->dest_list); |
|
7126 |
psip->parse_src_txt = MemFree (psip->parse_src_txt); |
|
7127 |
psip = MemFree (psip); |
|
7128 |
}
|
|
7129 |
return psip; |
|
7130 |
}
|
|
7131 |
||
7132 |
static ParseSourceInfoPtr ParseSourceInfoCopy (ParseSourceInfoPtr psip) |
|
7133 |
{
|
|
7134 |
ParseSourceInfoPtr pcopy = NULL; |
|
7135 |
||
7136 |
if (psip != NULL) |
|
7137 |
{
|
|
7138 |
pcopy = (ParseSourceInfoPtr) MemNew (sizeof (ParseSourceInfoData)); |
|
7139 |
if (pcopy != NULL) { |
|
7140 |
pcopy->bsp = psip->bsp; |
|
7141 |
pcopy->sfp = psip->sfp; |
|
7142 |
pcopy->sdp = psip->sdp; |
|
7143 |
pcopy->sip = psip->sip; |
|
7144 |
pcopy->dest_list = NULL; |
|
7145 |
pcopy->parse_src_txt = NULL; |
|
7146 |
}
|
|
7147 |
}
|
|
7148 |
return pcopy; |
|
7149 |
}
|
|
7150 |
||
7151 |
static ValNodePtr ParseSourceListFree (ValNodePtr vnp) |
|
7152 |
{
|
|
7153 |
ValNodePtr vnp_next; |
|
7154 |
while (vnp != NULL) { |
|
7155 |
vnp_next = vnp->next; |
|
7156 |
vnp->next = NULL; |
|
7157 |
vnp->data.ptrvalue = ParseSourceInfoFree (vnp->data.ptrvalue); |
|
7158 |
vnp = ValNodeFree (vnp); |
|
7159 |
vnp = vnp_next; |
|
7160 |
}
|
|
7161 |
return vnp; |
|
7162 |
}
|
|
7163 |
||
7164 |
||
7165 |
static void |
|
7166 |
GetDeflineSourcesForBioseq
|
|
7167 |
(BioseqPtr bsp, |
|
7168 |
TextPortionPtr portion, |
|
7169 |
ValNodePtr PNTR source_list) |
|
7170 |
{
|
|
7171 |
SeqDescrPtr sdp; |
|
7172 |
SeqMgrDescContext dcontext; |
|
7173 |
CharPtr str; |
|
7174 |
ParseSourceInfoPtr psip; |
|
7175 |
||
7176 |
if (bsp == NULL || source_list == NULL) |
|
7177 |
{
|
|
7178 |
return; |
|
7179 |
}
|
|
7180 |
||
7181 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); |
|
7182 |
while (sdp != NULL) |
|
7183 |
{
|
|
7184 |
str = GetTextPortionFromString (sdp->data.ptrvalue, portion); |
|
7185 |
if (str != NULL) { |
|
7186 |
psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); |
|
7187 |
if (psip != NULL) { |
|
7188 |
ValNodeAddPointer (source_list, 0, psip); |
|
7189 |
} else { |
|
7190 |
str = MemFree (str); |
|
7191 |
}
|
|
7192 |
}
|
|
7193 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); |
|
7194 |
}
|
|
7195 |
}
|
|
7196 |
||
7197 |
||
7198 |
static CharPtr GetIDSrc (SeqIdPtr sip, Uint1 id_type, CharPtr tag) |
|
7199 |
{
|
|
7200 |
DbtagPtr dbt = NULL; |
|
7201 |
ObjectIdPtr oip = NULL; |
|
7202 |
Char id_str[128]; |
|
7203 |
CharPtr str_src = NULL; |
|
7204 |
||
7205 |
if (sip == NULL || sip->choice != id_type) return NULL; |
|
7206 |
||
7207 |
if (id_type == SEQID_GENERAL) |
|
7208 |
{
|
|
7209 |
dbt = (DbtagPtr) sip->data.ptrvalue; |
|
7210 |
if (dbt == NULL || (tag != NULL && StringCmp (dbt->db, tag) != 0)) return NULL; |
|
7211 |
oip = dbt->tag; |
|
7212 |
}
|
|
7213 |
else if (id_type == SEQID_LOCAL) |
|
7214 |
{
|
|
7215 |
oip = sip->data.ptrvalue; |
|
7216 |
}
|
|
7217 |
||
7218 |
if (oip == NULL) |
|
7219 |
{
|
|
7220 |
SeqIdWrite (sip, id_str, PRINTID_REPORT, sizeof (id_str)); |
|
7221 |
str_src = StringSave (id_str); |
|
7222 |
}
|
|
7223 |
else
|
|
7224 |
{
|
|
7225 |
if (oip->str == NULL) |
|
7226 |
{
|
|
7227 |
sprintf (id_str, "%d", oip->id); |
|
7228 |
str_src = StringSave (id_str); |
|
7229 |
}
|
|
7230 |
else
|
|
7231 |
{
|
|
7232 |
str_src = StringSave (oip->str); |
|
7233 |
}
|
|
7234 |
}
|
|
7235 |
return str_src; |
|
7236 |
}
|
|
7237 |
||
7238 |
||
7239 |
static void |
|
7240 |
GetIDSourcesForBioseq
|
|
7241 |
(BioseqPtr bsp, |
|
7242 |
TextPortionPtr portion, |
|
7243 |
Uint1 id_type, |
|
7244 |
CharPtr tag, |
|
7245 |
ValNodePtr PNTR source_list) |
|
7246 |
{
|
|
7247 |
SeqIdPtr sip; |
|
7248 |
ParseSourceInfoPtr psip; |
|
7249 |
CharPtr src_str = NULL, str; |
|
7250 |
||
7251 |
if (bsp == NULL || source_list == NULL) |
|
7252 |
{
|
|
7253 |
return; |
|
7254 |
}
|
|
7255 |
||
7256 |
sip = bsp->id; |
|
7257 |
while (sip != NULL) |
|
7258 |
{
|
|
7259 |
if ((src_str = GetIDSrc (sip, id_type, tag)) != NULL) { |
|
7260 |
str = GetTextPortionFromString (src_str, portion); |
|
7261 |
if (str != NULL) { |
|
7262 |
psip = ParseSourceInfoNew (bsp, NULL, NULL, sip, str); |
|
7263 |
if (psip != NULL) { |
|
7264 |
ValNodeAddPointer (source_list, 0, psip); |
|
7265 |
} else { |
|
7266 |
str = MemFree (str); |
|
7267 |
}
|
|
7268 |
}
|
|
7269 |
src_str = MemFree (src_str); |
|
7270 |
}
|
|
7271 |
sip = sip->next; |
|
7272 |
}
|
|
7273 |
}
|
|
7274 |
||
7275 |
||
7276 |
static void |
|
7277 |
GetLocalIDSourcesForBioseq
|
|
7278 |
(BioseqPtr bsp, |
|
7279 |
TextPortionPtr tp, |
|
7280 |
ValNodePtr PNTR source_list) |
|
7281 |
{
|
|
7282 |
GetIDSourcesForBioseq (bsp, tp, SEQID_LOCAL, NULL, source_list); |
|
7283 |
}
|
|
7284 |
||
7285 |
||
7286 |
static void GetNcbiFileSourceForBioseq |
|
7287 |
(BioseqPtr bsp, |
|
7288 |
TextPortionPtr tp, |
|
7289 |
ValNodePtr PNTR source_list) |
|
7290 |
{
|
|
7291 |
GetIDSourcesForBioseq (bsp, tp, SEQID_GENERAL, "NCBIFILE", source_list); |
|
7292 |
}
|
|
7293 |
||
7294 |
||
7295 |
static void StripBankitCommentForParse (SeqDescrPtr sdp, TextPortionPtr tp) |
|
7296 |
{
|
|
7297 |
UserObjectPtr uop; |
|
7298 |
ObjectIdPtr oip; |
|
7299 |
UserFieldPtr ufp; |
|
7300 |
||
7301 |
if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL) { |
|
7302 |
return; |
|
7303 |
}
|
|
7304 |
||
7305 |
/* Bankit Comments */
|
|
7306 |
uop = (UserObjectPtr) sdp->data.ptrvalue; |
|
7307 |
if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { |
|
7308 |
oip = uop->type; |
|
7309 |
if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { |
|
7310 |
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { |
|
7311 |
oip = ufp->label; |
|
7312 |
if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { |
|
7313 |
ReplaceStringForParse (ufp->data.ptrvalue, tp); |
|
7314 |
}
|
|
7315 |
}
|
|
7316 |
}
|
|
7317 |
}
|
|
7318 |
}
|
|
7319 |
||
7320 |
||
7321 |
static void StripStructuredCommentForParse (SeqDescrPtr sdp, CharPtr comment_field, TextPortionPtr tp) |
|
7322 |
{
|
|
7323 |
UserObjectPtr uop; |
|
7324 |
ObjectIdPtr oip; |
|
7325 |
UserFieldPtr ufp; |
|
7326 |
||
7327 |
if (sdp == NULL || sdp->choice != Seq_descr_user || tp == NULL || StringHasNoText (comment_field)) { |
|
7328 |
return; |
|
7329 |
}
|
|
7330 |
||
7331 |
uop = (UserObjectPtr) sdp->data.ptrvalue; |
|
7332 |
oip = uop->type; |
|
7333 |
if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) { |
|
7334 |
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { |
|
7335 |
oip = ufp->label; |
|
7336 |
if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { |
|
7337 |
ReplaceStringForParse (ufp->data.ptrvalue, tp); |
|
7338 |
}
|
|
7339 |
}
|
|
7340 |
}
|
|
7341 |
}
|
|
7342 |
||
7343 |
||
7344 |
static void |
|
7345 |
GetBankitCommentSourcesForBioseq
|
|
7346 |
(BioseqPtr bsp, |
|
7347 |
TextPortionPtr tp, |
|
7348 |
ValNodePtr PNTR source_list) |
|
7349 |
{
|
|
7350 |
SeqDescrPtr sdp; |
|
7351 |
SeqMgrDescContext dcontext; |
|
7352 |
ParseSourceInfoPtr psip; |
|
7353 |
UserObjectPtr uop; |
|
7354 |
ObjectIdPtr oip; |
|
7355 |
UserFieldPtr ufp; |
|
7356 |
CharPtr str = NULL; |
|
7357 |
||
7358 |
if (bsp == NULL || source_list == NULL) { |
|
7359 |
return; |
|
7360 |
}
|
|
7361 |
||
7362 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); |
|
7363 |
while (sdp != NULL) { |
|
7364 |
if (sdp->extended != 0) { |
|
7365 |
/* Bankit Comments */
|
|
7366 |
uop = (UserObjectPtr) sdp->data.ptrvalue; |
|
7367 |
if (uop != NULL && StringCmp (uop->_class, "SMART_V1.0") != 0) { |
|
7368 |
oip = uop->type; |
|
7369 |
if (oip != NULL && StringCmp (oip->str, "Submission") == 0) { |
|
7370 |
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { |
|
7371 |
oip = ufp->label; |
|
7372 |
if (oip != NULL && StringCmp (oip->str, "AdditionalComment") == 0) { |
|
7373 |
str = GetTextPortionFromString (ufp->data.ptrvalue, tp); |
|
7374 |
if (str != NULL) { |
|
7375 |
psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); |
|
7376 |
if (psip == NULL) { |
|
7377 |
str = MemFree (str); |
|
7378 |
} else { |
|
7379 |
ValNodeAddPointer (source_list, 0, psip); |
|
7380 |
}
|
|
7381 |
}
|
|
7382 |
}
|
|
7383 |
}
|
|
7384 |
}
|
|
7385 |
}
|
|
7386 |
}
|
|
7387 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); |
|
7388 |
}
|
|
7389 |
}
|
|
7390 |
||
7391 |
||
7392 |
static void |
|
7393 |
GetCommentSourcesForBioseq
|
|
7394 |
(BioseqPtr bsp, |
|
7395 |
TextPortionPtr tp, |
|
7396 |
ValNodePtr PNTR source_list) |
|
7397 |
{
|
|
7398 |
SeqDescrPtr sdp; |
|
7399 |
SeqFeatPtr sfp; |
|
7400 |
SeqMgrFeatContext fcontext; |
|
7401 |
SeqMgrDescContext dcontext; |
|
7402 |
ParseSourceInfoPtr psip; |
|
7403 |
CharPtr str; |
|
7404 |
||
7405 |
if (bsp == NULL || source_list == NULL) { |
|
7406 |
return; |
|
7407 |
}
|
|
7408 |
||
7409 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_comment, &dcontext); |
|
7410 |
while (sdp != NULL) { |
|
7411 |
str = GetTextPortionFromString (sdp->data.ptrvalue, tp); |
|
7412 |
if (str != NULL) { |
|
7413 |
psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); |
|
7414 |
if (psip == NULL) { |
|
7415 |
str = MemFree (str); |
|
7416 |
} else { |
|
7417 |
ValNodeAddPointer (source_list, 0, psip); |
|
7418 |
}
|
|
7419 |
}
|
|
7420 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_comment, &dcontext); |
|
7421 |
}
|
|
7422 |
||
7423 |
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_COMMENT, 0, &fcontext); |
|
7424 |
while (sfp != NULL) { |
|
7425 |
str = GetTextPortionFromString (sfp->data.value.ptrvalue, tp); |
|
7426 |
if (str != NULL) { |
|
7427 |
psip = ParseSourceInfoNew (bsp, sfp, NULL, NULL, str); |
|
7428 |
if (psip == NULL) { |
|
7429 |
str = MemFree (str); |
|
7430 |
} else { |
|
7431 |
ValNodeAddPointer (source_list, 0, psip); |
|
7432 |
}
|
|
7433 |
}
|
|
7434 |
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_COMMENT, 0, &fcontext); |
|
7435 |
}
|
|
7436 |
GetBankitCommentSourcesForBioseq (bsp, tp, source_list); |
|
7437 |
}
|
|
7438 |
||
7439 |
||
7440 |
static void |
|
7441 |
GetStructuredCommentSourcesForBioseq
|
|
7442 |
(BioseqPtr bsp, |
|
7443 |
TextPortionPtr tp, |
|
7444 |
CharPtr comment_field, |
|
7445 |
ValNodePtr PNTR source_list) |
|
7446 |
{
|
|
7447 |
SeqDescrPtr sdp; |
|
7448 |
UserObjectPtr uop; |
|
7449 |
ObjectIdPtr oip; |
|
7450 |
UserFieldPtr ufp; |
|
7451 |
SeqMgrDescContext dcontext; |
|
7452 |
CharPtr str; |
|
7453 |
ParseSourceInfoPtr psip; |
|
7454 |
||
7455 |
if (bsp == NULL || source_list == NULL) |
|
7456 |
{
|
|
7457 |
return; |
|
7458 |
}
|
|
7459 |
||
7460 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_user, &dcontext); |
|
7461 |
while (sdp != NULL) { |
|
7462 |
if (sdp->extended != 0 |
|
7463 |
&& sdp->data.ptrvalue != NULL) { |
|
7464 |
uop = (UserObjectPtr) sdp->data.ptrvalue; |
|
7465 |
oip = uop->type; |
|
7466 |
if (oip != NULL && StringCmp (oip->str, "StructuredComment") == 0) { |
|
7467 |
for (ufp = uop->data; ufp != NULL; ufp = ufp->next) { |
|
7468 |
oip = ufp->label; |
|
7469 |
if (oip != NULL && StringCmp (oip->str, comment_field) == 0) { |
|
7470 |
str = GetTextPortionFromString (ufp->data.ptrvalue, tp); |
|
7471 |
if (str != NULL) { |
|
7472 |
psip = ParseSourceInfoNew (bsp, NULL, sdp, NULL, str); |
|
7473 |
if (psip == NULL) { |
|
7474 |
str = MemFree (str); |
|
7475 |
} else { |
|
7476 |
ValNodeAddPointer (source_list, 0, psip); |
|
7477 |
}
|
|
7478 |
}
|
|
7479 |
}
|
|
7480 |
}
|
|
7481 |
}
|
|
7482 |
}
|
|
7483 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_user, &dcontext); |
|
7484 |
}
|
|
7485 |
}
|
|
7486 |
||
7487 |
||
7488 |
const CharPtr nomial_keywords[] = { |
|
7489 |
"f. sp. ", |
|
7490 |
"var.", |
|
7491 |
"pv.", |
|
7492 |
"bv.", |
|
7493 |
"serovar", |
|
7494 |
"subsp." }; |
|
7495 |
||
7496 |
const Int4 num_nomial_keywords = sizeof(nomial_keywords) / sizeof (CharPtr); |
|
7497 |
||
7498 |
static CharPtr GetTextAfterNomial (CharPtr taxname) |
|
7499 |
||
7500 |
{
|
|
7501 |
CharPtr ptr, nomial_end; |
|
7502 |
Int4 i; |
|
7503 |
Boolean found_keyword = TRUE; |
|
7504 |
||
7505 |
ptr = StringChr (taxname, ' '); |
|
7506 |
if (ptr == NULL) return NULL; |
|
7507 |
/* skip over the first word and the spaces after it. */
|
|
7508 |
while (*ptr == ' ') { |
|
7509 |
ptr++; |
|
7510 |
}
|
|
7511 |
ptr = StringChr (ptr, ' '); |
|
7512 |
/* if there are only two words, give up. */
|
|
7513 |
if (ptr == NULL) { |
|
7514 |
return NULL; |
|
7515 |
}
|
|
7516 |
nomial_end = ptr; |
|
7517 |
while (*ptr == ' ') { |
|
7518 |
ptr++; |
|
7519 |
}
|
|
7520 |
||
7521 |
while (found_keyword) { |
|
7522 |
found_keyword = FALSE; |
|
7523 |
/* if the next word is a nomial keyword, skip that plus the first word that follows it. */
|
|
7524 |
for (i = 0; i < num_nomial_keywords && *nomial_end != 0; i++) { |
|
7525 |
if (StringNCmp (ptr, nomial_keywords[i], StringLen(nomial_keywords[i])) == 0) { |
|
7526 |
ptr += StringLen(nomial_keywords[i]); |
|
7527 |
while (*ptr == ' ' ) { |
|
7528 |
ptr++; |
|
7529 |
}
|
|
7530 |
nomial_end = StringChr (ptr, ' '); |
|
7531 |
if (nomial_end == NULL) { |
|
7532 |
nomial_end = ptr + StringLen (ptr); |
|
7533 |
} else { |
|
7534 |
ptr = nomial_end; |
|
7535 |
while (*ptr == ' ') { |
|
7536 |
ptr++; |
|
7537 |
}
|
|
7538 |
found_keyword = TRUE; |
|
7539 |
}
|
|
7540 |
}
|
|
7541 |
}
|
|
7542 |
}
|
|
7543 |
return nomial_end; |
|
7544 |
}
|
|
7545 |
||
7546 |
||
7547 |
static void |
|
7548 |
GetOrgParseSourcesForBioSource
|
|
7549 |
(BioSourcePtr biop, |
|
7550 |
BioseqPtr bsp, |
|
7551 |
SeqDescrPtr sdp, |
|
7552 |
SeqFeatPtr sfp, |
|
7553 |
ParseSrcOrgPtr o, |
|
7554 |
TextPortionPtr tp, |
|
7555 |
ValNodePtr PNTR source_list) |
|
7556 |
{
|
|
7557 |
CharPtr str = NULL, portion, tmp; |
|
7558 |
ValNode vn; |
|
7559 |
ParseSourceInfoPtr psip; |
|
7560 |
||
7561 |
if (biop == NULL || o == NULL || o->field == NULL || source_list == NULL) return; |
|
7562 |
||
7563 |
switch (o->field->choice) { |
|
7564 |
case ParseSrcOrgChoice_source_qual : |
|
7565 |
vn.choice = SourceQualChoice_textqual; |
|
7566 |
vn.data.intvalue = o->field->data.intvalue; |
|
7567 |
vn.next = NULL; |
|
7568 |
str = GetSourceQualFromBioSource (biop, &vn, NULL); |
|
7569 |
break; |
|
7570 |
case ParseSrcOrgChoice_taxname_after_binomial : |
|
7571 |
vn.choice = SourceQualChoice_textqual; |
|
7572 |
vn.data.intvalue = Source_qual_taxname; |
|
7573 |
vn.next = NULL; |
|
7574 |
str = GetSourceQualFromBioSource (biop, &vn, NULL); |
|
7575 |
tmp = GetTextAfterNomial (str); |
|
7576 |
tmp = StringSave (tmp); |
|
7577 |
str = MemFree (str); |
|
7578 |
str = tmp; |
|
7579 |
break; |
|
7580 |
}
|
|
7581 |
portion = GetTextPortionFromString (str, tp); |
|
7582 |
if (portion != NULL) { |
|
7583 |
psip = ParseSourceInfoNew (bsp, sfp, sdp, NULL, portion); |
|
7584 |
if (psip == NULL) { |
|
7585 |
portion = MemFree (portion); |
|
7586 |
} else { |
|
7587 |
ValNodeAddPointer (source_list, 0, psip); |
|
7588 |
}
|
|
7589 |
}
|
|
7590 |
str = MemFree (str); |
|
7591 |
}
|
|
7592 |
||
7593 |
||
7594 |
static void GetOrgParseSourcesForBioseq (BioseqPtr bsp, ParseSrcOrgPtr o, TextPortionPtr tp, ValNodePtr PNTR source_list) |
|
7595 |
{
|
|
7596 |
SeqDescrPtr sdp; |
|
7597 |
SeqFeatPtr sfp; |
|
7598 |
SeqMgrFeatContext fcontext; |
|
7599 |
SeqMgrDescContext dcontext; |
|
7600 |
||
7601 |
if (bsp == NULL || o == NULL || source_list == NULL) return; |
|
7602 |
||
7603 |
if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) { |
|
7604 |
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); |
|
7605 |
sdp != NULL; |
|
7606 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext)) { |
|
7607 |
GetOrgParseSourcesForBioSource (sdp->data.ptrvalue, bsp, sdp, NULL, o, tp, source_list); |
|
7608 |
}
|
|
7609 |
}
|
|
7610 |
||
7611 |
if (o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) { |
|
7612 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); |
|
7613 |
sfp != NULL; |
|
7614 |
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext)) { |
|
7615 |
GetOrgParseSourcesForBioSource (sfp->data.value.ptrvalue, bsp, NULL, sfp, o, tp, source_list); |
|
7616 |
}
|
|
7617 |
}
|
|
7618 |
}
|
|
7619 |
||
7620 |
||
7621 |
typedef struct parsesrccollection { |
|
7622 |
ParseSrcPtr src; |
|
7623 |
TextPortionPtr portion; |
|
7624 |
ValNodePtr src_list; |
|
7625 |
} ParseSrcCollectionData, PNTR ParseSrcCollectionPtr; |
|
7626 |
||
7627 |
||
7628 |
static void FindParseSourceBioseqCallback (BioseqPtr bsp, Pointer userdata) |
|
7629 |
{
|
|
7630 |
ParseSrcCollectionPtr psp; |
|
7631 |
||
7632 |
if (bsp == NULL || userdata == NULL) |
|
7633 |
{
|
|
7634 |
return; |
|
7635 |
}
|
|
7636 |
||
7637 |
psp = (ParseSrcCollectionPtr) userdata; |
|
7638 |
if (psp->src == NULL) return; |
|
7639 |
||
7640 |
switch (psp->src->choice) |
|
7641 |
{
|
|
7642 |
case ParseSrc_defline: |
|
7643 |
if (!ISA_aa (bsp->mol)) { |
|
7644 |
GetDeflineSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); |
|
7645 |
}
|
|
7646 |
break; |
|
7647 |
case ParseSrc_local_id: |
|
7648 |
if (! ISA_aa (bsp->mol) && bsp->repr != Seq_repr_seg) { |
|
7649 |
GetLocalIDSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); |
|
7650 |
}
|
|
7651 |
break; |
|
7652 |
case ParseSrc_file_id: |
|
7653 |
GetNcbiFileSourceForBioseq (bsp, psp->portion, &(psp->src_list)); |
|
7654 |
break; |
|
7655 |
case ParseSrc_org: |
|
7656 |
GetOrgParseSourcesForBioseq (bsp, psp->src->data.ptrvalue, psp->portion, &(psp->src_list)); |
|
7657 |
break; |
|
7658 |
case ParseSrc_comment: |
|
7659 |
GetCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); |
|
7660 |
break; |
|
7661 |
case ParseSrc_structured_comment: |
|
7662 |
GetStructuredCommentSourcesForBioseq(bsp, psp->portion, psp->src->data.ptrvalue, &(psp->src_list)); |
|
7663 |
break; |
|
7664 |
case ParseSrc_bankit_comment: |
|
7665 |
if (!ISA_aa (bsp->mol)) { |
|
7666 |
GetBankitCommentSourcesForBioseq (bsp, psp->portion, &(psp->src_list)); |
|
7667 |
}
|
|
7668 |
break; |
|
7669 |
}
|
|
7670 |
}
|
|
7671 |
||
7672 |
||
7673 |
static void GetOrgNamesInRecordCallback (BioSourcePtr biop, Pointer userdata) |
|
7674 |
{
|
|
7675 |
ValNodePtr PNTR org_names; |
|
7676 |
||
7677 |
if (biop == NULL || biop->org == NULL || StringHasNoText (biop->org->taxname) |
|
7678 |
|| userdata == NULL) |
|
7679 |
{
|
|
7680 |
return; |
|
7681 |
}
|
|
7682 |
||
7683 |
org_names = (ValNodePtr PNTR) userdata; |
|
7684 |
||
7685 |
ValNodeAddPointer (org_names, 0, biop->org->taxname); |
|
7686 |
}
|
|
7687 |
||
7688 |
||
7689 |
static void SetToUpper (CharPtr cp) |
|
7690 |
{
|
|
7691 |
if (cp == NULL) return; |
|
7692 |
while (*cp != 0) { |
|
7693 |
if (isalpha (*cp)) { |
|
7694 |
*cp = toupper (*cp); |
|
7695 |
}
|
|
7696 |
cp++; |
|
7697 |
}
|
|
7698 |
}
|
|
7699 |
||
7700 |
||
7701 |
static void |
|
7702 |
FixCapitalizationInString
|
|
7703 |
(CharPtr PNTR pTitle, |
|
7704 |
Uint2 capitalization, |
|
7705 |
ValNodePtr org_names) |
|
7706 |
{
|
|
7707 |
if (pTitle == NULL || capitalization == Cap_change_none) return; |
|
7708 |
||
7709 |
switch (capitalization) { |
|
7710 |
case Cap_change_tolower: |
|
7711 |
ResetCapitalization (FALSE, *pTitle); |
|
7712 |
FixAbbreviationsInElement (pTitle); |
|
7713 |
FixOrgNamesInString (*pTitle, org_names); |
|
7714 |
break; |
|
7715 |
case Cap_change_toupper: |
|
7716 |
SetToUpper (*pTitle); |
|
7717 |
FixAbbreviationsInElement (pTitle); |
|
7718 |
FixOrgNamesInString (*pTitle, org_names); |
|
7719 |
break; |
|
7720 |
case Cap_change_firstcap: |
|
7721 |
ResetCapitalization (TRUE, *pTitle); |
|
7722 |
FixAbbreviationsInElement (pTitle); |
|
7723 |
FixOrgNamesInString (*pTitle, org_names); |
|
7724 |
break; |
|
7725 |
}
|
|
7726 |
}
|
|
7727 |
||
7728 |
||
7729 |
static void AddDeflineDestinationsForBioseq (BioseqPtr bsp, ValNodePtr PNTR dest_list) |
|
7730 |
{
|
|
7731 |
SeqDescrPtr sdp; |
|
7732 |
SeqMgrDescContext dcontext; |
|
7733 |
||
7734 |
if (bsp == NULL || dest_list == NULL) { |
|
7735 |
return; |
|
7736 |
}
|
|
7737 |
||
7738 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_title, &dcontext); |
|
7739 |
while (sdp != NULL) { |
|
7740 |
ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); |
|
7741 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_title, &dcontext); |
|
7742 |
}
|
|
7743 |
}
|
|
7744 |
||
7745 |
||
7746 |
static void AddFeatureDestinationsForBioseq (BioseqPtr bsp, FeatureFieldLegalPtr featfield, ValNodePtr PNTR dest_list) |
|
7747 |
{
|
|
7748 |
SeqFeatPtr sfp; |
|
7749 |
SeqMgrFeatContext fcontext; |
|
7750 |
Int4 featdef; |
|
7751 |
||
7752 |
if (bsp == NULL || featfield == NULL || dest_list == NULL) return; |
|
7753 |
||
7754 |
featdef = GetFeatdefFromFeatureType (featfield->type); |
|
7755 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); |
|
7756 |
sfp != NULL; |
|
7757 |
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) { |
|
7758 |
ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp); |
|
7759 |
}
|
|
7760 |
}
|
|
7761 |
||
7762 |
||
7763 |
static void GetBioSourceDestinationsForBioseq (BioseqPtr bsp, Uint2 object_type, ValNodePtr PNTR dest_list) |
|
7764 |
{
|
|
7765 |
SeqDescrPtr sdp; |
|
7766 |
SeqFeatPtr sfp; |
|
7767 |
SeqMgrFeatContext fcontext; |
|
7768 |
SeqMgrDescContext dcontext; |
|
7769 |
||
7770 |
if (bsp == NULL || dest_list == NULL) |
|
7771 |
{
|
|
7772 |
return; |
|
7773 |
}
|
|
7774 |
||
7775 |
if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_descriptor) |
|
7776 |
{
|
|
7777 |
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &dcontext); |
|
7778 |
while (sdp != NULL) |
|
7779 |
{
|
|
7780 |
ValNodeAddPointer (dest_list, OBJ_SEQDESC, sdp); |
|
7781 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &dcontext); |
|
7782 |
}
|
|
7783 |
}
|
|
7784 |
||
7785 |
if (object_type == Object_type_constraint_any || object_type == Object_type_constraint_feature) |
|
7786 |
{
|
|
7787 |
sfp = SeqMgrGetNextFeature (bsp, NULL, SEQFEAT_BIOSRC, 0, &fcontext); |
|
7788 |
while (sfp != NULL) |
|
7789 |
{
|
|
7790 |
ValNodeAddPointer (dest_list, OBJ_SEQFEAT, sfp); |
|
7791 |
sfp = SeqMgrGetNextFeature (bsp, sfp, SEQFEAT_BIOSRC, 0, &fcontext); |
|
7792 |
}
|
|
7793 |
}
|
|
7794 |
}
|
|
7795 |
||
7796 |
||
7797 |
static void AddParseDestinations (ParseSourceInfoPtr psip, ParseDestPtr dst) |
|
7798 |
{
|
|
7799 |
ParseDstOrgPtr o; |
|
7800 |
||
7801 |
if (psip == NULL || dst == NULL) return; |
|
7802 |
||
7803 |
switch (dst->choice) { |
|
7804 |
case ParseDest_defline : |
|
7805 |
AddDeflineDestinationsForBioseq (psip->bsp, &(psip->dest_list)); |
|
7806 |
break; |
|
7807 |
case ParseDest_org : |
|
7808 |
o = (ParseDstOrgPtr) dst->data.ptrvalue; |
|
7809 |
if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_descriptor) |
|
7810 |
&& psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { |
|
7811 |
ValNodeAddPointer (&(psip->dest_list), OBJ_SEQDESC, psip->sdp); |
|
7812 |
} else if ((o->type == Object_type_constraint_any || o->type == Object_type_constraint_feature) |
|
7813 |
&& psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { |
|
7814 |
ValNodeAddPointer (&(psip->dest_list), OBJ_SEQFEAT, psip->sfp); |
|
7815 |
} else { |
|
7816 |
GetBioSourceDestinationsForBioseq (psip->bsp, o->type, &(psip->dest_list)); |
|
7817 |
}
|
|
7818 |
break; |
|
7819 |
case ParseDest_featqual : |
|
7820 |
AddFeatureDestinationsForBioseq (psip->bsp, dst->data.ptrvalue, &(psip->dest_list)); |
|
7821 |
break; |
|
7822 |
case ParseDest_dbxref : |
|
7823 |
GetBioSourceDestinationsForBioseq (psip->bsp, Object_type_constraint_any, &(psip->dest_list)); |
|
7824 |
break; |
|
7825 |
}
|
|
7826 |
}
|
|
7827 |
||
7828 |
||
7829 |
static Boolean SourceHasOneUndeletedDestination (ParseSourceInfoPtr source) |
|
7830 |
{
|
|
7831 |
Int4 num_seen = 0; |
|
7832 |
ValNodePtr vnp; |
|
7833 |
||
7834 |
if (source == NULL |
|
7835 |
|| source->dest_list == NULL) |
|
7836 |
{
|
|
7837 |
return FALSE; |
|
7838 |
}
|
|
7839 |
||
7840 |
vnp = source->dest_list; |
|
7841 |
while (vnp != NULL && num_seen < 2) |
|
7842 |
{
|
|
7843 |
if (vnp->choice > 1) |
|
7844 |
{
|
|
7845 |
num_seen ++; |
|
7846 |
}
|
|
7847 |
vnp = vnp->next; |
|
7848 |
}
|
|
7849 |
if (num_seen == 1) |
|
7850 |
{
|
|
7851 |
return TRUE; |
|
7852 |
}
|
|
7853 |
else
|
|
7854 |
{
|
|
7855 |
return FALSE; |
|
7856 |
}
|
|
7857 |
}
|
|
7858 |
||
7859 |
||
7860 |
static void CombineSourcesForDestinations (ValNodePtr PNTR source_list) |
|
7861 |
{
|
|
7862 |
ValNodePtr source1_vnp, source2_vnp, dest1_vnp, dest2_vnp; |
|
7863 |
ValNodePtr source_new, del_vnp; |
|
7864 |
ParseSourceInfoPtr psip1, psip2, new_psip; |
|
7865 |
CharPtr comb_txt; |
|
7866 |
||
7867 |
for (source1_vnp = *source_list; |
|
7868 |
source1_vnp != NULL; |
|
7869 |
source1_vnp = source1_vnp->next) |
|
7870 |
{
|
|
7871 |
psip1 = (ParseSourceInfoPtr) source1_vnp->data.ptrvalue; |
|
7872 |
if (psip1 == NULL || psip1->dest_list == NULL) |
|
7873 |
{
|
|
7874 |
continue; |
|
7875 |
}
|
|
7876 |
for (source2_vnp = source1_vnp->next; |
|
7877 |
source2_vnp != NULL; |
|
7878 |
source2_vnp = source2_vnp->next) |
|
7879 |
{
|
|
7880 |
if (source2_vnp->choice > 0) |
|
7881 |
{
|
|
7882 |
/* already marked for deletion */
|
|
7883 |
continue; |
|
7884 |
}
|
|
7885 |
psip2 = (ParseSourceInfoPtr) source2_vnp->data.ptrvalue; |
|
7886 |
if (psip2 == NULL || psip2->dest_list == NULL) |
|
7887 |
{
|
|
7888 |
continue; |
|
7889 |
}
|
|
7890 |
for (dest1_vnp = psip1->dest_list; |
|
7891 |
dest1_vnp != NULL; |
|
7892 |
dest1_vnp = dest1_vnp->next) |
|
7893 |
{
|
|
7894 |
if (dest1_vnp->choice == 0) |
|
7895 |
{
|
|
7896 |
/* already marked for deletion */
|
|
7897 |
continue; |
|
7898 |
}
|
|
7899 |
for (dest2_vnp = psip2->dest_list; |
|
7900 |
dest2_vnp != NULL; |
|
7901 |
dest2_vnp = dest2_vnp->next) |
|
7902 |
{
|
|
7903 |
if (dest2_vnp->choice == 0) |
|
7904 |
{
|
|
7905 |
/* already marked for deletion */
|
|
7906 |
continue; |
|
7907 |
}
|
|
7908 |
if (dest1_vnp->choice == dest2_vnp->choice |
|
7909 |
&& dest1_vnp->data.ptrvalue == dest2_vnp->data.ptrvalue) |
|
7910 |
{
|
|
7911 |
comb_txt = (CharPtr) (MemNew (sizeof (Char) |
|
7912 |
* (StringLen (psip1->parse_src_txt) |
|
7913 |
+ StringLen (psip2->parse_src_txt) |
|
7914 |
+ 2))); |
|
7915 |
StringCpy (comb_txt, psip1->parse_src_txt); |
|
7916 |
StringCat (comb_txt, ";"); |
|
7917 |
StringCat (comb_txt, psip2->parse_src_txt); |
|
7918 |
||
7919 |
/* If the first source has a single destination, then we can
|
|
7920 |
* add the text from the second source to the first and remove
|
|
7921 |
* the destination from the second source.
|
|
7922 |
*/
|
|
7923 |
if (SourceHasOneUndeletedDestination (psip1)) |
|
7924 |
{
|
|
7925 |
||
7926 |
psip1->parse_src_txt = MemFree (psip1->parse_src_txt); |
|
7927 |
psip1->parse_src_txt = comb_txt; |
|
7928 |
dest2_vnp->choice = 0; |
|
7929 |
}
|
|
7930 |
/* If the first source has more than one destination and
|
|
7931 |
* the second source has a single destination, then we can
|
|
7932 |
* remove the repeated desination from the first source
|
|
7933 |
* and add the text from the first source to the second source.
|
|
7934 |
*/
|
|
7935 |
else if (SourceHasOneUndeletedDestination (psip2)) |
|
7936 |
{
|
|
7937 |
psip2->parse_src_txt = MemFree (psip2->parse_src_txt); |
|
7938 |
psip2->parse_src_txt = comb_txt; |
|
7939 |
dest1_vnp->choice = 0; |
|
7940 |
}
|
|
7941 |
/* If the first and second sources have multiple destinations,
|
|
7942 |
* we need to remove the repeated destination from both the first
|
|
7943 |
* and second source and create a new source with the combined
|
|
7944 |
* text for just the repeated destination.
|
|
7945 |
*/
|
|
7946 |
else
|
|
7947 |
{
|
|
7948 |
new_psip = ParseSourceInfoNew (NULL, NULL, NULL, NULL, comb_txt); |
|
7949 |
ValNodeAddPointer (&(new_psip->dest_list), |
|
7950 |
dest1_vnp->choice, |
|
7951 |
dest1_vnp->data.ptrvalue); |
|
7952 |
dest1_vnp->choice = 0; |
|
7953 |
dest2_vnp->choice = 0; |
|
7954 |
source_new = ValNodeNew (NULL); |
|
7955 |
source_new->choice = 0; |
|
7956 |
source_new->data.ptrvalue = new_psip; |
|
7957 |
source_new->next = source1_vnp->next; |
|
7958 |
source1_vnp->next = source_new; |
|
7959 |
}
|
|
7960 |
}
|
|
7961 |
}
|
|
7962 |
}
|
|
7963 |
||
7964 |
del_vnp = ValNodeExtractList (&(psip1->dest_list), 0); |
|
7965 |
del_vnp = ValNodeFree (del_vnp); |
|
7966 |
if (psip1->dest_list == NULL) |
|
7967 |
{
|
|
7968 |
source1_vnp->choice = 1; |
|
7969 |
}
|
|
7970 |
del_vnp = ValNodeExtractList (&(psip2->dest_list), 0); |
|
7971 |
del_vnp = ValNodeFree (del_vnp); |
|
7972 |
if (psip2->dest_list == NULL) |
|
7973 |
{
|
|
7974 |
source2_vnp->choice = 1; |
|
7975 |
}
|
|
7976 |
}
|
|
7977 |
}
|
|
7978 |
||
7979 |
/* now remove sources deleted */
|
|
7980 |
del_vnp = ValNodeExtractList (source_list, 1); |
|
7981 |
del_vnp = ParseSourceListFree (del_vnp); |
|
7982 |
}
|
|
7983 |
||
7984 |
||
7985 |
static BioseqSetPtr GetPartsForSourceDescriptorOnSegSet (SeqDescrPtr sdp) |
|
7986 |
{
|
|
7987 |
ObjValNodePtr ovp; |
|
7988 |
BioseqSetPtr bssp; |
|
7989 |
SeqEntryPtr sep; |
|
7990 |
||
7991 |
if (sdp == NULL || sdp->extended != 1) { |
|
7992 |
return NULL; |
|
7993 |
}
|
|
7994 |
ovp = (ObjValNodePtr) sdp; |
|
7995 |
if (ovp->idx.parenttype != OBJ_BIOSEQSET || ovp->idx.parentptr == NULL) { |
|
7996 |
return NULL; |
|
7997 |
}
|
|
7998 |
bssp = (BioseqSetPtr) ovp->idx.parentptr; |
|
7999 |
||
8000 |
if (bssp->_class == BioseqseqSet_class_nuc_prot |
|
8001 |
&& IS_Bioseq_set (bssp->seq_set) |
|
8002 |
&& bssp->seq_set->data.ptrvalue != NULL) { |
|
8003 |
bssp = (BioseqSetPtr) bssp->seq_set->data.ptrvalue; |
|
8004 |
}
|
|
8005 |
||
8006 |
if (bssp->_class == BioseqseqSet_class_segset) { |
|
8007 |
sep = bssp->seq_set; |
|
8008 |
while (sep != NULL) { |
|
8009 |
if (IS_Bioseq_set (sep) && sep->data.ptrvalue != NULL) { |
|
8010 |
bssp = (BioseqSetPtr) sep->data.ptrvalue; |
|
8011 |
if (bssp->_class == BioseqseqSet_class_parts) { |
|
8012 |
return bssp; |
|
8013 |
}
|
|
8014 |
}
|
|
8015 |
sep = sep->next; |
|
8016 |
}
|
|
8017 |
}
|
|
8018 |
||
8019 |
return NULL; |
|
8020 |
}
|
|
8021 |
||
8022 |
||
8023 |
static SeqDescrPtr FindSourceDescriptorInSeqEntry (SeqEntryPtr sep) |
|
8024 |
{
|
|
8025 |
BioseqPtr bsp; |
|
8026 |
BioseqSetPtr bssp; |
|
8027 |
SeqDescrPtr sdp = NULL; |
|
8028 |
||
8029 |
if (sep != NULL && sep->data.ptrvalue != NULL) { |
|
8030 |
if (IS_Bioseq (sep)) { |
|
8031 |
bsp = (BioseqPtr) sep->data.ptrvalue; |
|
8032 |
sdp = bsp->descr; |
|
8033 |
} else if (IS_Bioseq_set (sep)) { |
|
8034 |
bssp = (BioseqSetPtr) sep->data.ptrvalue; |
|
8035 |
sdp = bssp->descr; |
|
8036 |
}
|
|
8037 |
while (sdp != NULL && sdp->choice != Seq_descr_source) |
|
8038 |
{
|
|
8039 |
sdp = sdp->next; |
|
8040 |
}
|
|
8041 |
}
|
|
8042 |
return sdp; |
|
8043 |
}
|
|
8044 |
||
8045 |
||
8046 |
static SeqDescrPtr PropagateToSeqEntry (SeqEntryPtr sep, SeqDescrPtr sdp) |
|
8047 |
{
|
|
8048 |
BioseqPtr bsp; |
|
8049 |
BioseqSetPtr bssp; |
|
8050 |
SeqDescrPtr new_sdp = NULL; |
|
8051 |
||
8052 |
if (sep != NULL && sep->data.ptrvalue != NULL) { |
|
8053 |
if (IS_Bioseq (sep)) { |
|
8054 |
bsp = (BioseqPtr) sep->data.ptrvalue; |
|
8055 |
new_sdp = AsnIoMemCopy ((Pointer) sdp, |
|
8056 |
(AsnReadFunc) SeqDescrAsnRead, |
|
8057 |
(AsnWriteFunc) SeqDescrAsnWrite); |
|
8058 |
ValNodeLink (&(bsp->descr), new_sdp); |
|
8059 |
} else if (IS_Bioseq_set (sep)) { |
|
8060 |
bssp = (BioseqSetPtr) sep->data.ptrvalue; |
|
8061 |
new_sdp = AsnIoMemCopy ((Pointer) sdp, |
|
8062 |
(AsnReadFunc) SeqDescrAsnRead, |
|
8063 |
(AsnWriteFunc) SeqDescrAsnWrite); |
|
8064 |
ValNodeLink (&(bssp->descr), new_sdp); |
|
8065 |
}
|
|
8066 |
}
|
|
8067 |
return new_sdp; |
|
8068 |
}
|
|
8069 |
||
8070 |
||
8071 |
static void PropagateSourceOnSegSetForParse (ValNodePtr parse_source_list) |
|
8072 |
{
|
|
8073 |
ParseSourceInfoPtr psip; |
|
8074 |
ValNodePtr vnp_src, vnp_dst; |
|
8075 |
SeqDescrPtr sdp, other_sdp; |
|
8076 |
SeqEntryPtr sep; |
|
8077 |
ValNodePtr extra_dests = NULL; |
|
8078 |
BioseqSetPtr parts_bssp; |
|
8079 |
||
8080 |
for (vnp_src = parse_source_list; vnp_src != NULL; vnp_src = vnp_src->next) { |
|
8081 |
psip = (ParseSourceInfoPtr) vnp_src->data.ptrvalue; |
|
8082 |
if (psip != NULL) { |
|
8083 |
for (vnp_dst = psip->dest_list; vnp_dst != NULL; vnp_dst = vnp_dst->next) { |
|
8084 |
if (vnp_dst->choice == OBJ_SEQDESC) { |
|
8085 |
sdp = (SeqDescrPtr) vnp_dst->data.ptrvalue; |
|
8086 |
if (sdp != NULL && sdp->choice == Seq_descr_source) { |
|
8087 |
parts_bssp = GetPartsForSourceDescriptorOnSegSet (sdp); |
|
8088 |
if (parts_bssp != NULL) { |
|
8089 |
for (sep = parts_bssp->seq_set; sep != NULL; sep = sep->next) { |
|
8090 |
if (IS_Bioseq(sep) && sep->data.ptrvalue == psip->bsp) { |
|
8091 |
other_sdp = FindSourceDescriptorInSeqEntry (sep); |
|
8092 |
if (other_sdp == NULL) { |
|
8093 |
other_sdp = PropagateToSeqEntry (sep, sdp); |
|
8094 |
ValNodeAddPointer (&extra_dests, OBJ_SEQDESC, other_sdp); |
|
8095 |
}
|
|
8096 |
}
|
|
8097 |
}
|
|
8098 |
||
8099 |
/* set choice to 0 so master won't be a destination */
|
|
8100 |
vnp_dst->choice = 0; |
|
8101 |
||
8102 |
}
|
|
8103 |
}
|
|
8104 |
}
|
|
8105 |
}
|
|
8106 |
/* add extra destinations to list */
|
|
8107 |
ValNodeLink (&psip->dest_list, extra_dests); |
|
8108 |
extra_dests = NULL; |
|
8109 |
}
|
|
8110 |
}
|
|
8111 |
||
8112 |
}
|
|
8113 |
||
8114 |
||
8115 |
static Boolean SetDBxrefForBioSource (BioSourcePtr biop, CharPtr db_name, CharPtr str, Uint2 existing_text) |
|
8116 |
{
|
|
8117 |
ValNodePtr dbx; |
|
8118 |
DbtagPtr dbtag; |
|
8119 |
Boolean found = FALSE; |
|
8120 |
Char buf[20]; |
|
8121 |
Boolean rval = FALSE; |
|
8122 |
||
8123 |
if (biop == NULL || StringHasNoText (db_name) || StringHasNoText (str)) { |
|
8124 |
return FALSE; |
|
8125 |
}
|
|
8126 |
||
8127 |
if (biop->org == NULL) |
|
8128 |
{
|
|
8129 |
biop->org = OrgRefNew(); |
|
8130 |
}
|
|
8131 |
dbx = biop->org->db; |
|
8132 |
while (dbx != NULL && !found) |
|
8133 |
{
|
|
8134 |
dbtag = (DbtagPtr) dbx->data.ptrvalue; |
|
8135 |
if (dbtag != NULL && dbtag->tag != NULL |
|
8136 |
&& StringCmp (dbtag->db, db_name) == 0) |
|
8137 |
{
|
|
8138 |
found = TRUE; |
|
8139 |
}
|
|
8140 |
if (!found) |
|
8141 |
{
|
|
8142 |
dbx = dbx->next; |
|
8143 |
}
|
|
8144 |
}
|
|
8145 |
if (!found) |
|
8146 |
{
|
|
8147 |
dbtag = DbtagNew(); |
|
8148 |
dbtag->db = StringSave (db_name); |
|
8149 |
ValNodeAddPointer (&(biop->org->db), 0, dbtag); |
|
8150 |
}
|
|
8151 |
if (dbtag->tag == NULL) |
|
8152 |
{
|
|
8153 |
dbtag->tag = ObjectIdNew(); |
|
8154 |
}
|
|
8155 |
/* if it was a number before, make it a string now */
|
|
8156 |
if (dbtag->tag->id > 0 && dbtag->tag->str == NULL) |
|
8157 |
{
|
|
8158 |
sprintf (buf, "%s", dbtag->tag->id); |
|
8159 |
dbtag->tag->id = 0; |
|
8160 |
dbtag->tag->str = StringSave (buf); |
|
8161 |
}
|
|
8162 |
rval = SetStringValue (&(dbtag->tag->str), str, existing_text); |
|
8163 |
return rval; |
|
8164 |
}
|
|
8165 |
||
8166 |
||
8167 |
static Int4 SetFieldForDestList (ValNodePtr dest_list, ParseDestPtr field, CharPtr str, Uint2 existing_text) |
|
8168 |
{
|
|
8169 |
ValNodePtr vnp; |
|
8170 |
SeqDescrPtr sdp; |
|
8171 |
CharPtr cp; |
|
8172 |
BioSourcePtr biop; |
|
8173 |
ParseDstOrgPtr o; |
|
8174 |
FeatureFieldLegalPtr fl; |
|
8175 |
FeatureField f; |
|
8176 |
Int4 num_succeeded = 0; |
|
8177 |
||
8178 |
if (dest_list == NULL || field == NULL) return 0; |
|
8179 |
||
8180 |
switch (field->choice) { |
|
8181 |
case ParseDest_defline : |
|
8182 |
for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { |
|
8183 |
if (vnp->choice == OBJ_SEQDESC && vnp->data.ptrvalue != NULL) { |
|
8184 |
sdp = (SeqDescrPtr) vnp->data.ptrvalue; |
|
8185 |
if (sdp->choice == Seq_descr_title) { |
|
8186 |
cp = sdp->data.ptrvalue; |
|
8187 |
if (SetStringValue (&cp, str, existing_text)) { |
|
8188 |
num_succeeded++; |
|
8189 |
}
|
|
8190 |
sdp->data.ptrvalue = cp; |
|
8191 |
}
|
|
8192 |
}
|
|
8193 |
}
|
|
8194 |
break; |
|
8195 |
case ParseDest_org : |
|
8196 |
o = (ParseDstOrgPtr) field->data.ptrvalue; |
|
8197 |
if (o != NULL) { |
|
8198 |
for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { |
|
8199 |
biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); |
|
8200 |
if (SetSourceQualInBioSource (biop, o->field, NULL, str, existing_text)) { |
|
8201 |
num_succeeded++; |
|
8202 |
}
|
|
8203 |
}
|
|
8204 |
}
|
|
8205 |
break; |
|
8206 |
case ParseDest_featqual: |
|
8207 |
fl = (FeatureFieldLegalPtr) field->data.ptrvalue; |
|
8208 |
if (fl != NULL) { |
|
8209 |
f.type = fl->type; |
|
8210 |
f.field = ValNodeNew(NULL); |
|
8211 |
f.field->next = NULL; |
|
8212 |
f.field->choice = FeatQualChoice_legal_qual; |
|
8213 |
f.field->data.intvalue = fl->field; |
|
8214 |
for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { |
|
8215 |
if (SetQualOnFeature (vnp->data.ptrvalue, &f, NULL, str, existing_text)) { |
|
8216 |
num_succeeded++; |
|
8217 |
}
|
|
8218 |
}
|
|
8219 |
f.field = ValNodeFree (f.field); |
|
8220 |
}
|
|
8221 |
break; |
|
8222 |
case ParseDest_dbxref: |
|
8223 |
if (!StringHasNoText (field->data.ptrvalue)) { |
|
8224 |
for (vnp = dest_list; vnp != NULL; vnp = vnp->next) { |
|
8225 |
biop = GetBioSourceFromObject (vnp->choice, vnp->data.ptrvalue); |
|
8226 |
if (SetDBxrefForBioSource (biop, field->data.ptrvalue, str, existing_text)) { |
|
8227 |
num_succeeded++; |
|
8228 |
}
|
|
8229 |
}
|
|
8230 |
}
|
|
8231 |
break; |
|
8232 |
}
|
|
8233 |
return num_succeeded; |
|
8234 |
}
|
|
8235 |
||
8236 |
||
8237 |
static void StripFieldForSrcList (ParseSourceInfoPtr psip, ParseSrcPtr field, TextPortionPtr text_portion) |
|
8238 |
{
|
|
8239 |
CharPtr str; |
|
8240 |
ParseSrcOrgPtr o; |
|
8241 |
BioSourcePtr biop; |
|
8242 |
||
8243 |
if (psip == NULL || field == NULL || text_portion == NULL) return; |
|
8244 |
||
8245 |
switch (field->choice) { |
|
8246 |
case ParseSrc_defline : |
|
8247 |
if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_title) { |
|
8248 |
ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); |
|
8249 |
}
|
|
8250 |
break; |
|
8251 |
case ParseSrc_org : |
|
8252 |
o = (ParseSrcOrgPtr) field->data.ptrvalue; |
|
8253 |
if (o != NULL) { |
|
8254 |
if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_source) { |
|
8255 |
biop = (BioSourcePtr) psip->sdp->data.ptrvalue; |
|
8256 |
str = GetSourceQualFromBioSource (biop, o->field, NULL); |
|
8257 |
ReplaceStringForParse (str, text_portion); |
|
8258 |
SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); |
|
8259 |
str = MemFree (str); |
|
8260 |
} else if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_BIOSRC) { |
|
8261 |
biop = (BioSourcePtr) psip->sfp->data.value.ptrvalue; |
|
8262 |
str = GetSourceQualFromBioSource (biop, o->field, NULL); |
|
8263 |
ReplaceStringForParse (str, text_portion); |
|
8264 |
SetSourceQualInBioSource (biop, o->field, NULL, str, ExistingTextOption_replace_old); |
|
8265 |
str = MemFree (str); |
|
8266 |
}
|
|
8267 |
}
|
|
8268 |
break; |
|
8269 |
case ParseSrc_comment: |
|
8270 |
if (psip->sdp != NULL) { |
|
8271 |
if (psip->sdp->choice == Seq_descr_user) { |
|
8272 |
StripBankitCommentForParse (psip->sdp, text_portion); |
|
8273 |
} else if (psip->sdp->choice == Seq_descr_comment) { |
|
8274 |
ReplaceStringForParse (psip->sdp->data.ptrvalue, text_portion); |
|
8275 |
}
|
|
8276 |
}
|
|
8277 |
if (psip->sfp != NULL && psip->sfp->data.choice == SEQFEAT_COMMENT) { |
|
8278 |
ReplaceStringForParse (psip->sfp->data.value.ptrvalue, text_portion); |
|
8279 |
}
|
|
8280 |
break; |
|
8281 |
case ParseSrc_bankit_comment: |
|
8282 |
if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { |
|
8283 |
StripBankitCommentForParse (psip->sdp, text_portion); |
|
8284 |
}
|
|
8285 |
break; |
|
8286 |
case ParseSrc_structured_comment: |
|
8287 |
if (psip->sdp != NULL && psip->sdp->choice == Seq_descr_user) { |
|
8288 |
StripStructuredCommentForParse (psip->sdp, field->data.ptrvalue, text_portion); |
|
8289 |
}
|
|
8290 |
break; |
|
8291 |
}
|
|
8292 |
}
|
|
8293 |
||
8294 |
||
8295 |
static Int4 ApplyParseActionToSeqEntry (ParseActionPtr action, SeqEntryPtr sep) |
|
8296 |
{
|
|
8297 |
ParseSrcCollectionData psd; |
|
8298 |
ParseSourceInfoPtr psip; |
|
8299 |
ValNodePtr orgnames = NULL, source_list_for_removal = NULL, vnp; |
|
8300 |
Int4 num_succeeded = 0; |
|
8301 |
||
8302 |
if (action == NULL || sep == NULL) return 0; |
|
8303 |
||
8304 |
psd.src = action->src; |
|
8305 |
psd.portion = action->portion; |
|
8306 |
psd.src_list = NULL; |
|
8307 |
||
8308 |
/* first, we need to get a list of the parse sources */
|
|
8309 |
VisitBioseqsInSep (sep, &psd, FindParseSourceBioseqCallback); |
|
8310 |
||
8311 |
if (action->capitalization != Cap_change_none) { |
|
8312 |
/* if we will be fixing capitalization, get org names to use in fixes */
|
|
8313 |
VisitBioSourcesInSep (sep, &orgnames, GetOrgNamesInRecordCallback); |
|
8314 |
}
|
|
8315 |
||
8316 |
/* for each parse source, we need to get a list of the destinations */
|
|
8317 |
for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) |
|
8318 |
{
|
|
8319 |
if (vnp->data.ptrvalue == NULL) continue; |
|
8320 |
psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; |
|
8321 |
if (action->remove_from_parsed) { |
|
8322 |
ValNodeAddPointer (&source_list_for_removal, 0, ParseSourceInfoCopy (psip)); |
|
8323 |
}
|
|
8324 |
/* fix source text */
|
|
8325 |
FixCapitalizationInString (&(psip->parse_src_txt), action->capitalization, orgnames); |
|
8326 |
||
8327 |
/* find destinations */
|
|
8328 |
AddParseDestinations (psip, action->dest); |
|
8329 |
||
8330 |
}
|
|
8331 |
||
8332 |
/* free orgname list if we created it */
|
|
8333 |
orgnames = ValNodeFree (orgnames); |
|
8334 |
||
8335 |
CombineSourcesForDestinations (&(psd.src_list)); |
|
8336 |
||
8337 |
if (action->dest->choice == ParseDest_org) { |
|
8338 |
PropagateSourceOnSegSetForParse (psd.src_list); |
|
8339 |
}
|
|
8340 |
||
8341 |
/* now do the parsing */
|
|
8342 |
for (vnp = psd.src_list; vnp != NULL; vnp = vnp->next) { |
|
8343 |
psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; |
|
8344 |
num_succeeded += SetFieldForDestList (psip->dest_list, action->dest, psip->parse_src_txt, action->existing_text); |
|
8345 |
}
|
|
8346 |
||
8347 |
/* now remove strings from sources */
|
|
8348 |
for (vnp = source_list_for_removal; vnp != NULL; vnp = vnp->next) |
|
8349 |
{
|
|
8350 |
if (vnp->data.ptrvalue == NULL) continue; |
|
8351 |
psip = (ParseSourceInfoPtr) vnp->data.ptrvalue; |
|
8352 |
StripFieldForSrcList (psip, action->src, action->portion); |
|
8353 |
}
|
|
8354 |
return num_succeeded; |
|
8355 |
}
|
|
8356 |
||
8357 |
||
8358 |
static void SetCdRegionGeneticCode (SeqFeatPtr cds) |
|
8359 |
{
|
|
8360 |
CdRegionPtr crp; |
|
8361 |
SeqEntryPtr parent_sep; |
|
8362 |
BioseqPtr bsp; |
|
8363 |
Int4 genCode; |
|
8364 |
ValNodePtr code, vnp; |
|
8365 |
||
8366 |
if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; |
|
8367 |
if (cds->data.value.ptrvalue == NULL) { |
|
8368 |
cds->data.value.ptrvalue = CdRegionNew(); |
|
8369 |
}
|
|
8370 |
crp = (CdRegionPtr) cds->data.value.ptrvalue; |
|
8371 |
bsp = BioseqFindFromSeqLoc (cds->location); |
|
8372 |
if (bsp == NULL) return; |
|
8373 |
parent_sep = GetBestTopParentForData (bsp->idx.entityID, bsp); |
|
8374 |
genCode = SeqEntryToGeneticCode (parent_sep, NULL, NULL, 0); |
|
8375 |
||
8376 |
code = ValNodeNew (NULL); |
|
8377 |
if (code != NULL) { |
|
8378 |
code->choice = 254; |
|
8379 |
vnp = ValNodeNew (NULL); |
|
8380 |
code->data.ptrvalue = vnp; |
|
8381 |
if (vnp != NULL) { |
|
8382 |
vnp->choice = 2; |
|
8383 |
vnp->data.intvalue = genCode; |
|
8384 |
}
|
|
8385 |
}
|
|
8386 |
crp->genetic_code = code; |
|
8387 |
}
|
|
8388 |
||
8389 |
||
8390 |
static void CreateDataForFeature (SeqFeatPtr sfp, Int4 feature_type) |
|
8391 |
{
|
|
8392 |
Int4 featdef, seqfeattype; |
|
8393 |
CharPtr label = NULL; |
|
8394 |
RnaRefPtr rrp; |
|
8395 |
GBQualPtr gbq; |
|
8396 |
ImpFeatPtr ifp; |
|
8397 |
||
8398 |
featdef = GetFeatdefFromFeatureType (feature_type); |
|
8399 |
sfp->idx.subtype = featdef; |
|
8400 |
seqfeattype = FindFeatFromFeatDefType (featdef); |
|
8401 |
switch (seqfeattype) { |
|
8402 |
case SEQFEAT_GENE: |
|
8403 |
sfp->data.value.ptrvalue = GeneRefNew(); |
|
8404 |
break; |
|
8405 |
case SEQFEAT_CDREGION: |
|
8406 |
sfp->data.value.ptrvalue = CdRegionNew(); |
|
8407 |
SetCdRegionGeneticCode (sfp); |
|
8408 |
break; |
|
8409 |
case SEQFEAT_RNA: |
|
8410 |
rrp = RnaRefNew(); |
|
8411 |
rrp->ext.choice = 0; |
|
8412 |
sfp->data.value.ptrvalue = rrp; |
|
8413 |
switch (featdef) { |
|
8414 |
case FEATDEF_preRNA: |
|
8415 |
rrp->type = RNA_TYPE_premsg; |
|
8416 |
break; |
|
8417 |
case FEATDEF_mRNA: |
|
8418 |
rrp->type = RNA_TYPE_mRNA; |
|
8419 |
break; |
|
8420 |
case FEATDEF_tRNA: |
|
8421 |
rrp->type = RNA_TYPE_tRNA; |
|
8422 |
break; |
|
8423 |
case FEATDEF_rRNA: |
|
8424 |
rrp->type = RNA_TYPE_rRNA; |
|
8425 |
break; |
|
8426 |
case FEATDEF_snRNA: |
|
8427 |
rrp->type = RNA_TYPE_other; |
|
8428 |
rrp->ext.choice = 1; |
|
8429 |
rrp->ext.value.ptrvalue = StringSave ("ncRNA"); |
|
8430 |
gbq = GBQualNew (); |
|
8431 |
gbq->qual = StringSave ("ncRNA_class"); |
|
8432 |
gbq->val = StringSave ("snRNA"); |
|
8433 |
break; |
|
8434 |
case FEATDEF_scRNA: |
|
8435 |
rrp->type = RNA_TYPE_other; |
|
8436 |
rrp->ext.choice = 1; |
|
8437 |
rrp->ext.value.ptrvalue = StringSave ("ncRNA"); |
|
8438 |
gbq = GBQualNew (); |
|
8439 |
gbq->qual = StringSave ("ncRNA_class"); |
|
8440 |
gbq->val = StringSave ("scRNA"); |
|
8441 |
break; |
|
8442 |
case FEATDEF_tmRNA: |
|
8443 |
rrp->type = RNA_TYPE_other; |
|
8444 |
rrp->ext.choice = 1; |
|
8445 |
rrp->ext.value.ptrvalue = StringSave ("tmRNA"); |
|
8446 |
break; |
|
8447 |
case FEATDEF_ncRNA: |
|
8448 |
rrp->type = RNA_TYPE_other; |
|
8449 |
rrp->ext.choice = 1; |
|
8450 |
rrp->ext.value.ptrvalue = StringSave ("ncRNA"); |
|
8451 |
break; |
|
8452 |
}
|
|
8453 |
break; |
|
8454 |
case SEQFEAT_IMP: |
|
8455 |
ifp = ImpFeatNew(); |
|
8456 |
sfp->data.value.ptrvalue = ifp; |
|
8457 |
label = GetFeatureNameFromFeatureType (feature_type); |
|
8458 |
ifp->key = StringSave (label); |
|
8459 |
break; |
|
8460 |
}
|
|
8461 |
}
|
|
8462 |
||
8463 |
||
8464 |
static void ExtraCDSCreationActions (SeqFeatPtr cds, SeqEntryPtr parent_sep) |
|
8465 |
{
|
|
8466 |
ByteStorePtr bs; |
|
8467 |
CharPtr prot, ptr; |
|
8468 |
BioseqPtr bsp; |
|
8469 |
Char ch; |
|
8470 |
Int4 i; |
|
8471 |
SeqEntryPtr psep, nsep; |
|
8472 |
MolInfoPtr mip; |
|
8473 |
ValNodePtr vnp, descr; |
|
8474 |
SeqFeatPtr prot_sfp; |
|
8475 |
ProtRefPtr prp; |
|
8476 |
Boolean partial5, partial3; |
|
8477 |
||
8478 |
if (cds == NULL) return; |
|
8479 |
||
8480 |
CheckSeqLocForPartial (cds->location, &partial5, &partial3); |
|
8481 |
||
8482 |
/* Create corresponding protein sequence data for the CDS */
|
|
8483 |
||
8484 |
bs = ProteinFromCdRegionEx (cds, TRUE, FALSE); |
|
8485 |
if (NULL == bs) |
|
8486 |
return; |
|
8487 |
||
8488 |
prot = BSMerge (bs, NULL); |
|
8489 |
bs = BSFree (bs); |
|
8490 |
if (NULL == prot) |
|
8491 |
return; |
|
8492 |
||
8493 |
ptr = prot; |
|
8494 |
ch = *ptr; |
|
8495 |
while (ch != '\0') { |
|
8496 |
*ptr = TO_UPPER (ch); |
|
8497 |
ptr++; |
|
8498 |
ch = *ptr; |
|
8499 |
}
|
|
8500 |
i = StringLen (prot); |
|
8501 |
if (i > 0 && prot [i - 1] == '*') { |
|
8502 |
prot [i - 1] = '\0'; |
|
8503 |
}
|
|
8504 |
bs = BSNew (1000); |
|
8505 |
if (bs != NULL) { |
|
8506 |
ptr = prot; |
|
8507 |
BSWrite (bs, (VoidPtr) ptr, (Int4) StringLen (ptr)); |
|
8508 |
}
|
|
8509 |
||
8510 |
/* Create the product protein Bioseq */
|
|
8511 |
||
8512 |
bsp = BioseqNew (); |
|
8513 |
if (NULL == bsp) |
|
8514 |
return; |
|
8515 |
||
8516 |
bsp->repr = Seq_repr_raw; |
|
8517 |
bsp->mol = Seq_mol_aa; |
|
8518 |
bsp->seq_data_type = Seq_code_ncbieaa; |
|
8519 |
bsp->seq_data = (SeqDataPtr) bs; |
|
8520 |
bsp->length = BSLen (bs); |
|
8521 |
bs = NULL; |
|
8522 |
bsp->id = MakeNewProteinSeqId (cds->location, NULL); |
|
8523 |
SeqMgrAddToBioseqIndex (bsp); |
|
8524 |
||
8525 |
/* Create a new SeqEntry for the Prot Bioseq */
|
|
8526 |
||
8527 |
psep = SeqEntryNew (); |
|
8528 |
if (NULL == psep) |
|
8529 |
return; |
|
8530 |
||
8531 |
psep->choice = 1; |
|
8532 |
psep->data.ptrvalue = (Pointer) bsp; |
|
8533 |
SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, psep); |
|
8534 |
||
8535 |
/* Add a descriptor to the protein Bioseq */
|
|
8536 |
||
8537 |
mip = MolInfoNew (); |
|
8538 |
if (NULL == mip) |
|
8539 |
return; |
|
8540 |
||
8541 |
mip->biomol = 8; |
|
8542 |
mip->tech = 8; |
|
8543 |
if (partial5 && partial3) { |
|
8544 |
mip->completeness = 5; |
|
8545 |
} else if (partial5) { |
|
8546 |
mip->completeness = 3; |
|
8547 |
} else if (partial3) { |
|
8548 |
mip->completeness = 4; |
|
8549 |
}
|
|
8550 |
vnp = CreateNewDescriptor (psep, Seq_descr_molinfo); |
|
8551 |
if (NULL == vnp) |
|
8552 |
return; |
|
8553 |
||
8554 |
vnp->data.ptrvalue = (Pointer) mip; |
|
8555 |
||
8556 |
/**/
|
|
8557 |
||
8558 |
descr = ExtractBioSourceAndPubs (parent_sep); |
|
8559 |
||
8560 |
AddSeqEntryToSeqEntry (parent_sep, psep, TRUE); |
|
8561 |
nsep = FindNucSeqEntry (parent_sep); |
|
8562 |
ReplaceBioSourceAndPubs (parent_sep, descr); |
|
8563 |
SetSeqFeatProduct (cds, bsp); |
|
8564 |
||
8565 |
prp = ProtRefNew (); |
|
8566 |
||
8567 |
if (prp != NULL) { |
|
8568 |
prot_sfp = CreateNewFeature (psep, NULL, SEQFEAT_PROT, NULL); |
|
8569 |
if (prot_sfp != NULL) { |
|
8570 |
prot_sfp->data.value.ptrvalue = (Pointer) prp; |
|
8571 |
SetSeqLocPartial (prot_sfp->location, partial5, partial3); |
|
8572 |
prot_sfp->partial = (partial5 || partial3); |
|
8573 |
}
|
|
8574 |
}
|
|
8575 |
}
|
|
8576 |
||
8577 |
||
8578 |
static SeqLocPtr LocationFromApplyFeatureAction (BioseqPtr bsp, ApplyFeatureActionPtr action) |
|
8579 |
{
|
|
8580 |
LocationIntervalPtr l; |
|
8581 |
SeqLocPtr slp = NULL; |
|
8582 |
Uint1 strand = Seq_strand_plus; |
|
8583 |
Int4 from, to; |
|
8584 |
||
8585 |
if (bsp == NULL || action == NULL || action->location == NULL) return NULL; |
|
8586 |
||
8587 |
if (!action->plus_strand) { |
|
8588 |
strand = Seq_strand_minus; |
|
8589 |
}
|
|
8590 |
if (action->location->choice == LocationChoice_interval) { |
|
8591 |
l = (LocationIntervalPtr) action->location->data.ptrvalue; |
|
8592 |
if (l != NULL) { |
|
8593 |
from = MIN (l->from, l->to); |
|
8594 |
to = MAX (l->from, l->to); |
|
8595 |
slp = SeqLocIntNew (from, to, strand, SeqIdFindWorst (bsp->id)); |
|
8596 |
}
|
|
8597 |
} else if (action->location->choice == LocationChoice_whole_sequence) { |
|
8598 |
slp = SeqLocIntNew (0, bsp->length - 1, strand, SeqIdFindWorst (bsp->id)); |
|
8599 |
}
|
|
8600 |
SetSeqLocPartial (slp, action->partial5, action->partial3); |
|
8601 |
return slp; |
|
8602 |
}
|
|
8603 |
||
8604 |
||
8605 |
static Boolean OkToApplyToBioseq (ApplyFeatureActionPtr action, BioseqPtr bsp) |
|
8606 |
{
|
|
8607 |
SeqFeatPtr sfp; |
|
8608 |
SeqMgrFeatContext context; |
|
8609 |
Int4 featdef; |
|
8610 |
Boolean rval = TRUE; |
|
8611 |
||
8612 |
if (action == NULL || bsp == NULL) return FALSE; |
|
8613 |
||
8614 |
if (!action->add_redundant) { |
|
8615 |
featdef = GetFeatdefFromFeatureType (action->type); |
|
8616 |
sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &context); |
|
8617 |
if (sfp != NULL) { |
|
8618 |
rval = FALSE; |
|
8619 |
}
|
|
8620 |
}
|
|
8621 |
return rval; |
|
8622 |
}
|
|
8623 |
||
8624 |
static void AddParts (ApplyFeatureActionPtr action, BioseqSetPtr parts, ValNodePtr PNTR bsp_list) |
|
8625 |
{
|
|
8626 |
SeqEntryPtr sep; |
|
8627 |
Int4 seg_num; |
|
8628 |
||
8629 |
if (action == NULL || !action->apply_to_parts |
|
8630 |
|| parts == NULL || parts->_class != BioseqseqSet_class_parts |
|
8631 |
|| bsp_list == NULL) { |
|
8632 |
return; |
|
8633 |
}
|
|
8634 |
||
8635 |
if (action->only_seg_num > -1) { |
|
8636 |
seg_num = 0; |
|
8637 |
sep = parts->seq_set; |
|
8638 |
while (seg_num < action->only_seg_num && sep != NULL) { |
|
8639 |
sep = sep->next; |
|
8640 |
seg_num++; |
|
8641 |
}
|
|
8642 |
if (sep != NULL && IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { |
|
8643 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); |
|
8644 |
}
|
|
8645 |
} else { |
|
8646 |
for (sep = parts->seq_set; sep != NULL; sep = sep->next) { |
|
8647 |
if (IS_Bioseq (sep) && OkToApplyToBioseq (action, sep->data.ptrvalue)) { |
|
8648 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, sep->data.ptrvalue); |
|
8649 |
}
|
|
8650 |
}
|
|
8651 |
}
|
|
8652 |
}
|
|
8653 |
||
8654 |
||
8655 |
static void AddSequenceOrParts (ApplyFeatureActionPtr action, BioseqPtr bsp, ValNodePtr PNTR bsp_list) |
|
8656 |
{
|
|
8657 |
BioseqSetPtr bssp, parts; |
|
8658 |
SeqEntryPtr sep; |
|
8659 |
||
8660 |
if (action == NULL || bsp == NULL || bsp_list == NULL) return; |
|
8661 |
||
8662 |
if (bsp->idx.parenttype == OBJ_BIOSEQSET && bsp->idx.parentptr != NULL) { |
|
8663 |
bssp = (BioseqSetPtr) bsp->idx.parentptr; |
|
8664 |
if (bssp->_class == BioseqseqSet_class_segset) { |
|
8665 |
if (action->apply_to_parts) { |
|
8666 |
sep = bssp->seq_set; |
|
8667 |
while (sep != NULL && !IS_Bioseq_set (sep)) { |
|
8668 |
sep = sep->next; |
|
8669 |
}
|
|
8670 |
if (sep != NULL) { |
|
8671 |
AddParts (action, sep->data.ptrvalue, bsp_list); |
|
8672 |
}
|
|
8673 |
} else { |
|
8674 |
if (OkToApplyToBioseq (action, bsp)) { |
|
8675 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); |
|
8676 |
}
|
|
8677 |
}
|
|
8678 |
} else if (bssp->_class == BioseqseqSet_class_parts) { |
|
8679 |
if (action->apply_to_parts) { |
|
8680 |
AddParts (action, bssp, bsp_list); |
|
8681 |
} else { |
|
8682 |
parts = bssp; |
|
8683 |
if (parts->idx.parenttype == OBJ_BIOSEQSET && parts->idx.parentptr != NULL) { |
|
8684 |
bssp = (BioseqSetPtr) parts->idx.parentptr; |
|
8685 |
if (IS_Bioseq (bssp->seq_set) && OkToApplyToBioseq (action, bssp->seq_set->data.ptrvalue)) { |
|
8686 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp_list); |
|
8687 |
}
|
|
8688 |
}
|
|
8689 |
}
|
|
8690 |
} else { |
|
8691 |
if (OkToApplyToBioseq (action, bsp)) { |
|
8692 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); |
|
8693 |
}
|
|
8694 |
}
|
|
8695 |
} else { |
|
8696 |
if (OkToApplyToBioseq (action, bsp)) { |
|
8697 |
ValNodeAddPointer (bsp_list, OBJ_BIOSEQ, bsp); |
|
8698 |
}
|
|
8699 |
}
|
|
8700 |
}
|
|
8701 |
||
8702 |
static void AddSequenceOrPartsFromSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep, ValNodePtr PNTR bsp_list) |
|
8703 |
{
|
|
8704 |
BioseqSetPtr bssp; |
|
8705 |
SeqEntryPtr seq_set; |
|
8706 |
||
8707 |
if (action == NULL || sep == NULL) return; |
|
8708 |
||
8709 |
while (sep != NULL) { |
|
8710 |
if (IS_Bioseq (sep)) { |
|
8711 |
AddSequenceOrParts (action, sep->data.ptrvalue, bsp_list); |
|
8712 |
} else if (IS_Bioseq_set (sep)) { |
|
8713 |
bssp = (BioseqSetPtr) sep->data.ptrvalue; |
|
8714 |
if (bssp->_class == BioseqseqSet_class_segset) { |
|
8715 |
/* find master segment */
|
|
8716 |
seq_set = bssp->seq_set; |
|
8717 |
while (seq_set != NULL && !IS_Bioseq (seq_set)) { |
|
8718 |
seq_set = seq_set->next; |
|
8719 |
}
|
|
8720 |
if (seq_set != NULL) { |
|
8721 |
AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); |
|
8722 |
}
|
|
8723 |
} else if (bssp->_class == BioseqseqSet_class_nuc_prot) { |
|
8724 |
/* find nucleotide sequence */
|
|
8725 |
seq_set = bssp->seq_set; |
|
8726 |
if (seq_set != NULL) { |
|
8727 |
if (IS_Bioseq_set (seq_set)) { |
|
8728 |
/* nucleotide is segmented set */
|
|
8729 |
bssp = (BioseqSetPtr) seq_set->data.ptrvalue; |
|
8730 |
if (bssp != NULL && bssp->_class == BioseqseqSet_class_segset |
|
8731 |
&& bssp->seq_set != NULL && IS_Bioseq (bssp->seq_set)) { |
|
8732 |
AddSequenceOrParts (action, bssp->seq_set->data.ptrvalue, bsp_list); |
|
8733 |
}
|
|
8734 |
} else if (IS_Bioseq (seq_set)) { |
|
8735 |
AddSequenceOrParts (action, seq_set->data.ptrvalue, bsp_list); |
|
8736 |
}
|
|
8737 |
}
|
|
8738 |
} else { |
|
8739 |
/* add from set members */
|
|
8740 |
AddSequenceOrPartsFromSeqEntry (action, bssp->seq_set, bsp_list); |
|
8741 |
}
|
|
8742 |
}
|
|
8743 |
sep = sep->next; |
|
8744 |
}
|
|
8745 |
}
|
|
8746 |
||
8747 |
||
8748 |
static void AdjustProteinSequenceForReadingFrame (SeqFeatPtr cds) |
|
8749 |
{
|
|
8750 |
BioseqPtr protbsp, bsp; |
|
8751 |
ByteStorePtr bs; |
|
8752 |
SeqFeatPtr prot_sfp; |
|
8753 |
Boolean partial5, partial3; |
|
8754 |
||
8755 |
if (cds == NULL || cds->data.choice != SEQFEAT_CDREGION) return; |
|
8756 |
||
8757 |
protbsp = BioseqFindFromSeqLoc (cds->product); |
|
8758 |
||
8759 |
if (protbsp == NULL) { |
|
8760 |
bsp = BioseqFindFromSeqLoc (cds->location); |
|
8761 |
if (bsp != NULL) { |
|
8762 |
ExtraCDSCreationActions (cds, GetBestTopParentForData (bsp->idx.entityID, bsp)); |
|
8763 |
}
|
|
8764 |
} else { |
|
8765 |
bs = ProteinFromCdRegionExWithTrailingCodonHandling (cds, |
|
8766 |
TRUE, |
|
8767 |
FALSE, |
|
8768 |
FALSE); |
|
8769 |
protbsp->seq_data = (SeqDataPtr) BSFree ((ByteStorePtr)(protbsp->seq_data)); |
|
8770 |
protbsp->seq_data = (SeqDataPtr) bs; |
|
8771 |
protbsp->length = BSLen (bs); |
|
8772 |
prot_sfp = GetProtFeature (protbsp); |
|
8773 |
if (prot_sfp == NULL) { |
|
8774 |
CheckSeqLocForPartial (cds->location, &partial5, &partial3); |
|
8775 |
prot_sfp = CreateNewFeatureOnBioseq (protbsp, SEQFEAT_PROT, NULL); |
|
8776 |
prot_sfp->data.value.ptrvalue = ProtRefNew (); |
|
8777 |
SetSeqLocPartial (prot_sfp->location, partial5, partial3); |
|
8778 |
prot_sfp->partial = (partial5 || partial3); |
|
8779 |
} else { |
|
8780 |
if (SeqLocLen (prot_sfp->location) != protbsp->length) { |
|
8781 |
prot_sfp->location = SeqLocFree (prot_sfp->location); |
|
8782 |
prot_sfp->location = SeqLocIntNew (0, protbsp->length - 1, Seq_strand_plus, SeqIdFindWorst (protbsp->id)); |
|
8783 |
}
|
|
8784 |
}
|
|
8785 |
}
|
|
8786 |
}
|
|
8787 |
||
8788 |
||
8789 |
static Int4 ApplyApplyFeatureActionToSeqEntry (ApplyFeatureActionPtr action, SeqEntryPtr sep) |
|
8790 |
{
|
|
8791 |
ValNodePtr bsp_list = NULL, vnp, field_vnp; |
|
8792 |
Int4 featdef, seqfeattype; |
|
8793 |
BioseqPtr bsp; |
|
8794 |
SeqFeatPtr sfp; |
|
8795 |
SeqLocPtr slp; |
|
8796 |
FeatQualLegalValPtr q; |
|
8797 |
FeatureField f; |
|
8798 |
SeqIdPtr sip; |
|
8799 |
SeqFeatPtr gene; |
|
8800 |
Int4 num_created = 0; |
|
8801 |
||
8802 |
if (sep == NULL || action == NULL) return 0; |
|
8803 |
||
8804 |
/* first, get list of Bioseqs to apply features to */
|
|
8805 |
/* relevant values : seq_list, add_redundant, apply_to_parts, only_seg_num */
|
|
8806 |
if (action->seq_list != NULL && action->seq_list->choice == SequenceListChoice_list) { |
|
8807 |
for (vnp = action->seq_list->data.ptrvalue; vnp != NULL; vnp = vnp->next) { |
|
8808 |
sip = CreateSeqIdFromText (vnp->data.ptrvalue, sep); |
|
8809 |
bsp = BioseqFind (sip); |
|
8810 |
if (bsp != NULL) { |
|
8811 |
AddSequenceOrParts (action, bsp, &bsp_list); |
|
8812 |
}
|
|
8813 |
}
|
|
8814 |
} else { |
|
8815 |
AddSequenceOrPartsFromSeqEntry (action, sep, &bsp_list); |
|
8816 |
}
|
|
8817 |
||
8818 |
/* now add feature to each bioseq in list */
|
|
8819 |
for (vnp = bsp_list; vnp != NULL; vnp = vnp->next) { |
|
8820 |
bsp = vnp->data.ptrvalue; |
|
8821 |
if (bsp == NULL) continue; |
|
8822 |
featdef = GetFeatdefFromFeatureType (action->type); |
|
8823 |
seqfeattype = FindFeatFromFeatDefType (featdef); |
|
8824 |
slp = LocationFromApplyFeatureAction (bsp, action); |
|
8825 |
sfp = CreateNewFeatureOnBioseq (bsp, seqfeattype, slp); |
|
8826 |
if (sfp == NULL) continue; |
|
8827 |
CreateDataForFeature (sfp, action->type); |
|
8828 |
/* any extra actions */
|
|
8829 |
switch (action->type) { |
|
8830 |
case (Feature_type_cds) : |
|
8831 |
ExtraCDSCreationActions (sfp, GetBestTopParentForData (bsp->idx.entityID, bsp)); |
|
8832 |
break; |
|
8833 |
}
|
|
8834 |
gene = NULL; |
|
8835 |
for (field_vnp = action->fields; field_vnp != NULL; field_vnp = field_vnp->next) { |
|
8836 |
q = (FeatQualLegalValPtr) field_vnp->data.ptrvalue; |
|
8837 |
if (q != NULL) { |
|
8838 |
f.field = ValNodeNew(NULL); |
|
8839 |
f.field->next = NULL; |
|
8840 |
f.field->choice = FeatQualChoice_legal_qual; |
|
8841 |
f.field->data.intvalue = q->qual; |
|
8842 |
if (sfp->data.choice != SEQFEAT_GENE |
|
8843 |
&& (q->qual == Feat_qual_legal_gene || q->qual == Feat_qual_legal_gene_description)) { |
|
8844 |
if (gene == NULL) { |
|
8845 |
gene = CreateNewFeatureOnBioseq (bsp, SEQFEAT_GENE, slp); |
|
8846 |
CreateDataForFeature (gene, Feature_type_gene); |
|
8847 |
}
|
|
8848 |
f.type = Feature_type_gene; |
|
8849 |
SetQualOnFeature (gene, &f, NULL, q->val, ExistingTextOption_replace_old); |
|
8850 |
} else { |
|
8851 |
f.type = action->type; |
|
8852 |
SetQualOnFeature (sfp, &f, NULL, q->val, ExistingTextOption_replace_old); |
|
8853 |
}
|
|
8854 |
}
|
|
8855 |
}
|
|
8856 |
if (action->type == Feature_type_cds) { |
|
8857 |
/* retranslate, to account for change in reading frame */
|
|
8858 |
AdjustProteinSequenceForReadingFrame (sfp); |
|
8859 |
/* after the feature has been created, then adjust it for gaps */
|
|
8860 |
/* Note - this step may result in multiple coding regions being created. */
|
|
8861 |
AdjustCDSLocationsForUnknownGapsCallback (sfp, NULL); |
|
8862 |
}
|
|
8863 |
num_created++; |
|
8864 |
}
|
|
8865 |
return num_created; |
|
8866 |
}
|
|
8867 |
||
8868 |
||
8869 |
typedef struct convertandremovefeaturecollection { |
|
8870 |
Uint1 featdef; |
|
8871 |
ValNodePtr constraint_set; |
|
8872 |
ValNodePtr feature_list; |
|
8873 |
} ConvertAndRemoveFeatureCollectionData, PNTR ConvertAndRemoveFeatureCollectionPtr; |
|
8874 |
||
8875 |
static void ConvertAndRemoveFeatureCollectionCallback (SeqFeatPtr sfp, Pointer data) |
|
8876 |
{
|
|
8877 |
ConvertAndRemoveFeatureCollectionPtr p; |
|
8878 |
||
8879 |
if (sfp == NULL || data == NULL) return; |
|
8880 |
||
8881 |
p = (ConvertAndRemoveFeatureCollectionPtr) data; |
|
8882 |
if (sfp->idx.subtype == p->featdef && DoesObjectMatchConstraintChoiceSet (OBJ_SEQFEAT, sfp, p->constraint_set)) { |
|
8883 |
ValNodeAddPointer (&(p->feature_list), OBJ_SEQFEAT, sfp); |
|
8884 |
}
|
|
8885 |
}
|
|
8886 |
||
8887 |
||
8888 |
static Int4 ApplyRemoveFeatureActionToSeqEntry (RemoveFeatureActionPtr action, SeqEntryPtr sep) |
|
8889 |
{
|
|
8890 |
ConvertAndRemoveFeatureCollectionData d; |
|
8891 |
ValNodePtr vnp; |
|
8892 |
SeqFeatPtr sfp; |
|
8893 |
Int4 num_deleted = 0; |
|
8894 |
||
8895 |
if (action == NULL) return 0; |
|
8896 |
||
8897 |
d.featdef = GetFeatdefFromFeatureType (action->type); |
|
8898 |
d.constraint_set = action->constraint; |
|
8899 |
d.feature_list = NULL; |
|
8900 |
||
8901 |
VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); |
|
8902 |
for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { |
|
8903 |
sfp = vnp->data.ptrvalue; |
|
8904 |
if (sfp != NULL) { |
|
8905 |
sfp->idx.deleteme = TRUE; |
|
8906 |
num_deleted ++; |
|
8907 |
}
|
|
8908 |
}
|
|
8909 |
DeleteMarkedObjects (ObjMgrGetEntityIDForChoice(sep), 0, NULL); |
|
8910 |
return num_deleted; |
|
8911 |
}
|
|
8912 |
||
8913 |
||
8914 |
static Boolean DoesStrandMatch (Int4 strand_choice, Uint1 strand_val) |
|
8915 |
{
|
|
8916 |
Boolean rval = FALSE; |
|
8917 |
||
8918 |
switch (strand_choice) |
|
8919 |
{
|
|
8920 |
case Feature_location_strand_from_any: |
|
8921 |
rval = TRUE; |
|
8922 |
break; |
|
8923 |
case Feature_location_strand_from_unknown: |
|
8924 |
if (strand_val == Seq_strand_unknown) |
|
8925 |
{
|
|
8926 |
rval = TRUE; |
|
8927 |
}
|
|
8928 |
break; |
|
8929 |
case Feature_location_strand_from_plus: |
|
8930 |
if (strand_val != Seq_strand_minus) |
|
8931 |
{
|
|
8932 |
rval = TRUE; |
|
8933 |
}
|
|
8934 |
break; |
|
8935 |
case Feature_location_strand_from_minus: |
|
8936 |
if (strand_val == Seq_strand_minus) |
|
8937 |
{
|
|
8938 |
rval = TRUE; |
|
8939 |
}
|
|
8940 |
break; |
|
8941 |
case Feature_location_strand_from_both: |
|
8942 |
if (strand_val == Seq_strand_both) |
|
8943 |
{
|
|
8944 |
rval = TRUE; |
|
8945 |
}
|
|
8946 |
break; |
|
8947 |
}
|
|
8948 |
return rval; |
|
8949 |
}
|
|
8950 |
||
8951 |
||
8952 |
static Uint1 GetNewStrandValue (Int4 strand_choice, Uint1 strand_val) |
|
8953 |
{
|
|
8954 |
Uint1 rval = Seq_strand_unknown; |
|
8955 |
||
8956 |
switch (strand_choice) |
|
8957 |
{
|
|
8958 |
case Feature_location_strand_to_reverse: |
|
8959 |
switch (strand_val) |
|
8960 |
{
|
|
8961 |
case Seq_strand_plus: |
|
8962 |
case Seq_strand_unknown: |
|
8963 |
rval = Seq_strand_minus; |
|
8964 |
break; |
|
8965 |
case Seq_strand_minus: |
|
8966 |
rval = Seq_strand_plus; |
|
8967 |
break; |
|
8968 |
default: |
|
8969 |
rval = strand_val; |
|
8970 |
break; |
|
8971 |
}
|
|
8972 |
break; |
|
8973 |
case Feature_location_strand_to_unknown: |
|
8974 |
rval = Seq_strand_unknown; |
|
8975 |
break; |
|
8976 |
case Feature_location_strand_to_plus: |
|
8977 |
rval = Seq_strand_plus; |
|
8978 |
break; |
|
8979 |
case Feature_location_strand_to_minus: |
|
8980 |
rval = Seq_strand_minus; |
|
8981 |
break; |
|
8982 |
case Feature_location_strand_to_both: |
|
8983 |
rval = Seq_strand_both; |
|
8984 |
break; |
|
8985 |
}
|
|
8986 |
return rval; |
|
8987 |
}
|
|
8988 |
||
8989 |
||
8990 |
static Boolean ConvertLocationStrand (SeqLocPtr slp, Int4 fromStrand, Int4 toStrand) |
|
8991 |
{
|
|
8992 |
SeqLocPtr loc; |
|
8993 |
PackSeqPntPtr psp; |
|
8994 |
SeqBondPtr sbp; |
|
8995 |
SeqIntPtr sinp; |
|
8996 |
SeqPntPtr spp; |
|
8997 |
Boolean rval = FALSE; |
|
8998 |
Uint1 strand_orig; |
|
8999 |
||
9000 |
while (slp != NULL) { |
|
9001 |
switch (slp->choice) { |
|
9002 |
case SEQLOC_NULL : |
|
9003 |
break; |
|
9004 |
case SEQLOC_EMPTY : |
|
9005 |
case SEQLOC_WHOLE : |
|
9006 |
break; |
|
9007 |
case SEQLOC_INT : |
|
9008 |
sinp = (SeqIntPtr) slp->data.ptrvalue; |
|
9009 |
if (sinp != NULL && DoesStrandMatch (fromStrand, sinp->strand)) |
|
9010 |
{
|
|
9011 |
strand_orig = sinp->strand; |
|
9012 |
sinp->strand = GetNewStrandValue (toStrand, sinp->strand); |
|
9013 |
if (strand_orig != sinp->strand) { |
|
9014 |
rval = TRUE; |
|
9015 |
}
|
|
9016 |
}
|
|
9017 |
break; |
|
9018 |
case SEQLOC_PNT : |
|
9019 |
spp = (SeqPntPtr) slp->data.ptrvalue; |
|
9020 |
if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) |
|
9021 |
{
|
|
9022 |
strand_orig = spp->strand; |
|
9023 |
spp->strand = GetNewStrandValue (toStrand, spp->strand); |
|
9024 |
if (strand_orig != spp->strand) { |
|
9025 |
rval = TRUE; |
|
9026 |
}
|
|
9027 |
}
|
|
9028 |
break; |
|
9029 |
case SEQLOC_PACKED_PNT : |
|
9030 |
psp = (PackSeqPntPtr) slp->data.ptrvalue; |
|
9031 |
if (psp != NULL && DoesStrandMatch (fromStrand, psp->strand)) |
|
9032 |
{
|
|
9033 |
strand_orig = psp->strand; |
|
9034 |
psp->strand = GetNewStrandValue (toStrand, psp->strand); |
|
9035 |
if (strand_orig != psp->strand) { |
|
9036 |
rval = TRUE; |
|
9037 |
}
|
|
9038 |
}
|
|
9039 |
break; |
|
9040 |
case SEQLOC_PACKED_INT : |
|
9041 |
case SEQLOC_MIX : |
|
9042 |
case SEQLOC_EQUIV : |
|
9043 |
loc = (SeqLocPtr) slp->data.ptrvalue; |
|
9044 |
while (loc != NULL) { |
|
9045 |
rval |= ConvertLocationStrand (loc, fromStrand, toStrand); |
|
9046 |
loc = loc->next; |
|
9047 |
}
|
|
9048 |
break; |
|
9049 |
case SEQLOC_BOND : |
|
9050 |
sbp = (SeqBondPtr) slp->data.ptrvalue; |
|
9051 |
if (sbp != NULL) { |
|
9052 |
spp = (SeqPntPtr) sbp->a; |
|
9053 |
if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) |
|
9054 |
{
|
|
9055 |
strand_orig = spp->strand; |
|
9056 |
spp->strand = GetNewStrandValue (toStrand, spp->strand); |
|
9057 |
if (strand_orig != spp->strand) { |
|
9058 |
rval = TRUE; |
|
9059 |
}
|
|
9060 |
}
|
|
9061 |
spp = (SeqPntPtr) sbp->b; |
|
9062 |
if (spp != NULL && DoesStrandMatch (fromStrand, spp->strand)) |
|
9063 |
{
|
|
9064 |
strand_orig = spp->strand; |
|
9065 |
spp->strand = GetNewStrandValue (toStrand, spp->strand); |
|
9066 |
if (strand_orig != spp->strand) { |
|
9067 |
rval = TRUE; |
|
9068 |
}
|
|
9069 |
}
|
|
9070 |
}
|
|
9071 |
break; |
|
9072 |
case SEQLOC_FEAT : |
|
9073 |
break; |
|
9074 |
default : |
|
9075 |
break; |
|
9076 |
}
|
|
9077 |
slp = slp->next; |
|
9078 |
}
|
|
9079 |
return rval; |
|
9080 |
}
|
|
9081 |
||
9082 |
||
9083 |
static Boolean ApplyEditLocationStrandToSeqFeat (EditLocationStrandPtr edit, SeqFeatPtr sfp) |
|
9084 |
{
|
|
9085 |
Boolean rval = FALSE; |
|
9086 |
||
9087 |
if (edit == NULL || sfp == NULL) { |
|
9088 |
return FALSE; |
|
9089 |
}
|
|
9090 |
||
9091 |
rval = ConvertLocationStrand (sfp->location, edit->strand_from, edit->strand_to); |
|
9092 |
return rval; |
|
9093 |
}
|
|
9094 |
||
9095 |
||
9096 |
static Boolean At5EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) |
|
9097 |
{
|
|
9098 |
Uint1 strand; |
|
9099 |
Int4 start; |
|
9100 |
Boolean at_end = FALSE; |
|
9101 |
||
9102 |
if (slp == NULL || bsp == NULL) return FALSE; |
|
9103 |
||
9104 |
strand = SeqLocStrand (slp); |
|
9105 |
||
9106 |
if (strand == Seq_strand_minus) { |
|
9107 |
start = SeqLocStop (slp); |
|
9108 |
if (start == bsp->length - 1) { |
|
9109 |
at_end = TRUE; |
|
9110 |
}
|
|
9111 |
} else { |
|
9112 |
start = SeqLocStart (slp); |
|
9113 |
if (start == 0) { |
|
9114 |
at_end = TRUE; |
|
9115 |
}
|
|
9116 |
}
|
|
9117 |
return at_end; |
|
9118 |
}
|
|
9119 |
||
9120 |
||
9121 |
static Boolean HasGoodStartCodon (SeqFeatPtr sfp) |
|
9122 |
{
|
|
9123 |
ByteStorePtr bs; |
|
9124 |
CharPtr prot; |
|
9125 |
Boolean has_start = FALSE; |
|
9126 |
||
9127 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { |
|
9128 |
bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); |
|
9129 |
if (bs != NULL) { |
|
9130 |
prot = BSMerge (bs, NULL); |
|
9131 |
bs = BSFree (bs); |
|
9132 |
if (prot != NULL && *prot == 'M') { |
|
9133 |
has_start = TRUE; |
|
9134 |
}
|
|
9135 |
prot = MemFree (prot); |
|
9136 |
}
|
|
9137 |
}
|
|
9138 |
return has_start; |
|
9139 |
}
|
|
9140 |
||
9141 |
||
9142 |
static Boolean ApplyPartial5SetActionToSeqFeat (Partial5SetActionPtr action, SeqFeatPtr sfp) |
|
9143 |
{
|
|
9144 |
Boolean rval = FALSE; |
|
9145 |
Boolean make_partial = FALSE; |
|
9146 |
Uint1 strand; |
|
9147 |
BioseqPtr bsp; |
|
9148 |
CdRegionPtr crp; |
|
9149 |
Boolean partial5, partial3; |
|
9150 |
||
9151 |
if (action == NULL || sfp == NULL) return FALSE; |
|
9152 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
9153 |
strand = SeqLocStrand (sfp->location); |
|
9154 |
||
9155 |
switch (action->constraint) { |
|
9156 |
case Partial_5_set_constraint_all: |
|
9157 |
make_partial = TRUE; |
|
9158 |
break; |
|
9159 |
case Partial_5_set_constraint_at_end: |
|
9160 |
make_partial = At5EndOfSequence (sfp->location, bsp); |
|
9161 |
break; |
|
9162 |
case Partial_5_set_constraint_bad_start: |
|
9163 |
make_partial = HasGoodStartCodon (sfp); |
|
9164 |
break; |
|
9165 |
case Partial_5_set_constraint_frame_not_one: |
|
9166 |
if (sfp->data.choice == SEQFEAT_CDREGION |
|
9167 |
&& (crp = sfp->data.value.ptrvalue) != NULL |
|
9168 |
&& crp->frame != 0 && crp->frame != 1) { |
|
9169 |
make_partial = TRUE; |
|
9170 |
}
|
|
9171 |
break; |
|
9172 |
}
|
|
9173 |
||
9174 |
if (make_partial) { |
|
9175 |
CheckSeqLocForPartial (sfp->location, &partial5, &partial3); |
|
9176 |
if (!partial5) { |
|
9177 |
SetSeqLocPartial (sfp->location, TRUE, partial3); |
|
9178 |
if (action->extend && bsp != NULL) { |
|
9179 |
ExtendSeqLocToEnd (sfp->location, bsp, TRUE); |
|
9180 |
}
|
|
9181 |
rval = TRUE; |
|
9182 |
}
|
|
9183 |
}
|
|
9184 |
return rval; |
|
9185 |
}
|
|
9186 |
||
9187 |
||
9188 |
static Boolean ApplyClear5PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) |
|
9189 |
{
|
|
9190 |
Boolean rval = FALSE, clear_partial = FALSE; |
|
9191 |
Boolean partial5, partial3; |
|
9192 |
||
9193 |
if (sfp == NULL) return FALSE; |
|
9194 |
||
9195 |
switch (action) { |
|
9196 |
case Partial_5_clear_constraint_all: |
|
9197 |
clear_partial = TRUE; |
|
9198 |
break; |
|
9199 |
case Partial_5_clear_constraint_not_at_end: |
|
9200 |
clear_partial = !At5EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); |
|
9201 |
break; |
|
9202 |
case Partial_5_clear_constraint_good_start: |
|
9203 |
clear_partial = !HasGoodStartCodon(sfp); |
|
9204 |
break; |
|
9205 |
}
|
|
9206 |
if (clear_partial) { |
|
9207 |
CheckSeqLocForPartial (sfp->location, &partial5, &partial3); |
|
9208 |
if (partial5) { |
|
9209 |
SetSeqLocPartial (sfp->location, FALSE, partial3); |
|
9210 |
rval = TRUE; |
|
9211 |
}
|
|
9212 |
}
|
|
9213 |
return rval; |
|
9214 |
}
|
|
9215 |
||
9216 |
||
9217 |
static Boolean At3EndOfSequence (SeqLocPtr slp, BioseqPtr bsp) |
|
9218 |
{
|
|
9219 |
Uint1 strand; |
|
9220 |
Int4 stop; |
|
9221 |
Boolean at_end = FALSE; |
|
9222 |
||
9223 |
if (slp == NULL || bsp == NULL) return FALSE; |
|
9224 |
||
9225 |
strand = SeqLocStrand (slp); |
|
9226 |
||
9227 |
if (strand == Seq_strand_minus) { |
|
9228 |
stop = SeqLocStart (slp); |
|
9229 |
if (stop == 0) { |
|
9230 |
at_end = TRUE; |
|
9231 |
}
|
|
9232 |
} else { |
|
9233 |
stop = SeqLocStop (slp); |
|
9234 |
if (stop == bsp->length - 1) { |
|
9235 |
at_end = TRUE; |
|
9236 |
}
|
|
9237 |
}
|
|
9238 |
return at_end; |
|
9239 |
}
|
|
9240 |
||
9241 |
||
9242 |
static Boolean HasGoodStopCodon (SeqFeatPtr sfp) |
|
9243 |
{
|
|
9244 |
ByteStorePtr bs; |
|
9245 |
CharPtr prot; |
|
9246 |
Boolean has_stop = FALSE; |
|
9247 |
||
9248 |
if (sfp != NULL && sfp->data.choice == SEQFEAT_CDREGION) { |
|
9249 |
bs = ProteinFromCdRegionEx (sfp, TRUE, FALSE); |
|
9250 |
if (bs != NULL) { |
|
9251 |
prot = BSMerge (bs, NULL); |
|
9252 |
bs = BSFree (bs); |
|
9253 |
if (prot != NULL && prot[StringLen (prot) - 1] == '*') { |
|
9254 |
has_stop = TRUE; |
|
9255 |
}
|
|
9256 |
prot = MemFree (prot); |
|
9257 |
}
|
|
9258 |
}
|
|
9259 |
return has_stop; |
|
9260 |
}
|
|
9261 |
||
9262 |
||
9263 |
static Boolean ApplyPartial3SetActionToSeqFeat (Partial3SetActionPtr action, SeqFeatPtr sfp) |
|
9264 |
{
|
|
9265 |
Boolean rval = FALSE; |
|
9266 |
Boolean make_partial = FALSE; |
|
9267 |
Uint1 strand; |
|
9268 |
BioseqPtr bsp; |
|
9269 |
Boolean partial5, partial3; |
|
9270 |
||
9271 |
if (action == NULL || sfp == NULL) return FALSE; |
|
9272 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
9273 |
strand = SeqLocStrand (sfp->location); |
|
9274 |
||
9275 |
switch (action->constraint) { |
|
9276 |
case Partial_3_set_constraint_all: |
|
9277 |
make_partial = TRUE; |
|
9278 |
break; |
|
9279 |
case Partial_3_set_constraint_at_end: |
|
9280 |
make_partial = At3EndOfSequence (sfp->location, bsp); |
|
9281 |
break; |
|
9282 |
case Partial_3_set_constraint_bad_end: |
|
9283 |
make_partial = HasGoodStopCodon (sfp); |
|
9284 |
break; |
|
9285 |
}
|
|
9286 |
||
9287 |
if (make_partial) { |
|
9288 |
CheckSeqLocForPartial (sfp->location, &partial5, &partial3); |
|
9289 |
if (!partial3) { |
|
9290 |
SetSeqLocPartial (sfp->location, partial5, TRUE); |
|
9291 |
if (action->extend && bsp != NULL) { |
|
9292 |
ExtendSeqLocToEnd (sfp->location, bsp, FALSE); |
|
9293 |
}
|
|
9294 |
rval = TRUE; |
|
9295 |
}
|
|
9296 |
}
|
|
9297 |
return rval; |
|
9298 |
}
|
|
9299 |
||
9300 |
||
9301 |
static Boolean ApplyClear3PartialToSeqFeat (Int4 action, SeqFeatPtr sfp) |
|
9302 |
{
|
|
9303 |
Boolean rval = FALSE, clear_partial = FALSE; |
|
9304 |
Boolean partial5, partial3; |
|
9305 |
||
9306 |
if (sfp == NULL) return FALSE; |
|
9307 |
||
9308 |
switch (action) { |
|
9309 |
case Partial_3_clear_constraint_all: |
|
9310 |
clear_partial = TRUE; |
|
9311 |
break; |
|
9312 |
case Partial_3_clear_constraint_not_at_end: |
|
9313 |
clear_partial = !At3EndOfSequence(sfp->location, BioseqFindFromSeqLoc (sfp->location)); |
|
9314 |
break; |
|
9315 |
case Partial_3_clear_constraint_good_end: |
|
9316 |
clear_partial = !HasGoodStopCodon(sfp); |
|
9317 |
break; |
|
9318 |
}
|
|
9319 |
if (clear_partial) { |
|
9320 |
CheckSeqLocForPartial (sfp->location, &partial5, &partial3); |
|
9321 |
if (partial3) { |
|
9322 |
SetSeqLocPartial (sfp->location, partial5, FALSE); |
|
9323 |
rval = TRUE; |
|
9324 |
}
|
|
9325 |
}
|
|
9326 |
return rval; |
|
9327 |
}
|
|
9328 |
||
9329 |
||
9330 |
static Boolean ApplyConvertLocationToSeqFeat (Int4 convert_location, SeqFeatPtr sfp) |
|
9331 |
{
|
|
9332 |
Boolean hasNulls, rval = FALSE; |
|
9333 |
SeqLocPtr slp; |
|
9334 |
BioseqPtr bsp; |
|
9335 |
Boolean partial5, partial3; |
|
9336 |
||
9337 |
if (sfp == NULL || (bsp = BioseqFindFromSeqLoc (sfp->location))== NULL) { |
|
9338 |
return FALSE; |
|
9339 |
}
|
|
9340 |
||
9341 |
CheckSeqLocForPartial (sfp->location, &partial5, &partial3); |
|
9342 |
hasNulls = LocationHasNullsBetween (sfp->location); |
|
9343 |
switch (convert_location) |
|
9344 |
{
|
|
9345 |
case Convert_location_type_join : |
|
9346 |
if (hasNulls) |
|
9347 |
{
|
|
9348 |
slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, FALSE); |
|
9349 |
sfp->location = SeqLocFree (sfp->location); |
|
9350 |
sfp->location = slp; |
|
9351 |
if (bsp->repr == Seq_repr_seg) |
|
9352 |
{
|
|
9353 |
slp = SegLocToPartsEx (bsp, sfp->location, FALSE); |
|
9354 |
sfp->location = SeqLocFree (sfp->location); |
|
9355 |
sfp->location = slp; |
|
9356 |
hasNulls = LocationHasNullsBetween (sfp->location); |
|
9357 |
sfp->partial = (sfp->partial || hasNulls); |
|
9358 |
}
|
|
9359 |
FreeAllFuzz (sfp->location); |
|
9360 |
SetSeqLocPartial (sfp->location, partial5, partial3); |
|
9361 |
rval = TRUE; |
|
9362 |
}
|
|
9363 |
break; |
|
9364 |
case Convert_location_type_order : |
|
9365 |
if (!hasNulls) |
|
9366 |
{
|
|
9367 |
slp = SeqLocMerge (bsp, sfp->location, NULL, FALSE, FALSE, TRUE); |
|
9368 |
sfp->location = SeqLocFree (sfp->location); |
|
9369 |
sfp->location = slp; |
|
9370 |
if (bsp->repr == Seq_repr_seg) |
|
9371 |
{
|
|
9372 |
slp = SegLocToPartsEx (bsp, sfp->location, TRUE); |
|
9373 |
sfp->location = SeqLocFree (sfp->location); |
|
9374 |
sfp->location = slp; |
|
9375 |
hasNulls = LocationHasNullsBetween (sfp->location); |
|
9376 |
sfp->partial = (sfp->partial || hasNulls); |
|
9377 |
}
|
|
9378 |
FreeAllFuzz (sfp->location); |
|
9379 |
SetSeqLocPartial (sfp->location, partial5, partial3); |
|
9380 |
rval = TRUE; |
|
9381 |
}
|
|
9382 |
break; |
|
9383 |
case Convert_location_type_merge : |
|
9384 |
if (sfp->location->choice != SEQLOC_INT) { |
|
9385 |
slp = SeqLocMerge (bsp, sfp->location, NULL, TRUE, FALSE, FALSE); |
|
9386 |
sfp->location = SeqLocFree (sfp->location); |
|
9387 |
sfp->location = slp; |
|
9388 |
SetSeqLocPartial (sfp->location, partial5, partial3); |
|
9389 |
rval = TRUE; |
|
9390 |
}
|
|
9391 |
default: |
|
9392 |
break; |
|
9393 |
}
|
|
9394 |
return rval; |
|
9395 |
}
|
|
9396 |
||
9397 |
||
9398 |
static Boolean ApplyLocationEditTypeToSeqFeat (ValNodePtr action, SeqFeatPtr sfp) |
|
9399 |
{
|
|
9400 |
Boolean rval = FALSE; |
|
9401 |
||
9402 |
if (action == NULL || sfp == NULL) { |
|
9403 |
return FALSE; |
|
9404 |
}
|
|
9405 |
||
9406 |
switch (action->choice) { |
|
9407 |
case LocationEditType_strand: |
|
9408 |
rval = ApplyEditLocationStrandToSeqFeat (action->data.ptrvalue, sfp); |
|
9409 |
break; |
|
9410 |
case LocationEditType_set_5_partial: |
|
9411 |
rval = ApplyPartial5SetActionToSeqFeat (action->data.ptrvalue, sfp); |
|
9412 |
break; |
|
9413 |
case LocationEditType_clear_5_partial: |
|
9414 |
rval = ApplyClear5PartialToSeqFeat (action->data.intvalue, sfp); |
|
9415 |
break; |
|
9416 |
case LocationEditType_set_3_partial: |
|
9417 |
rval = ApplyPartial3SetActionToSeqFeat (action->data.ptrvalue, sfp); |
|
9418 |
break; |
|
9419 |
case LocationEditType_clear_3_partial: |
|
9420 |
rval = ApplyClear3PartialToSeqFeat (action->data.intvalue, sfp); |
|
9421 |
break; |
|
9422 |
case LocationEditType_convert: |
|
9423 |
rval = ApplyConvertLocationToSeqFeat (action->data.intvalue, sfp); |
|
9424 |
break; |
|
9425 |
}
|
|
9426 |
return rval; |
|
9427 |
}
|
|
9428 |
||
9429 |
||
9430 |
static Int4 ApplyEditFeatureLocationActionToSeqEntry (EditFeatureLocationActionPtr action, SeqEntryPtr sep) |
|
9431 |
{
|
|
9432 |
ConvertAndRemoveFeatureCollectionData d; |
|
9433 |
ValNodePtr vnp; |
|
9434 |
SeqFeatPtr sfp; |
|
9435 |
Int4 num_affected = 0; |
|
9436 |
||
9437 |
if (action == NULL) return 0; |
|
9438 |
||
9439 |
d.featdef = GetFeatdefFromFeatureType (action->type); |
|
9440 |
d.constraint_set = action->constraint; |
|
9441 |
d.feature_list = NULL; |
|
9442 |
||
9443 |
VisitFeaturesInSep (sep, &d, ConvertAndRemoveFeatureCollectionCallback); |
|
9444 |
for (vnp = d.feature_list; vnp != NULL; vnp = vnp->next) { |
|
9445 |
sfp = vnp->data.ptrvalue; |
|
9446 |
if (sfp != NULL && ApplyLocationEditTypeToSeqFeat (action->action, sfp)) { |
|
9447 |
num_affected++; |
|
9448 |
}
|
|
9449 |
}
|
|
9450 |
return num_affected; |
|
9451 |
}
|
|
9452 |
||
9453 |
||
9454 |
NLM_EXTERN void ApplyMacroToSeqEntry (SeqEntryPtr sep, ValNodePtr macro, Int4Ptr pNumFields, Int4Ptr pNumFeat) |
|
9455 |
{
|
|
9456 |
Int4 num_AECR = 0, num_parse = 0, num_feature = 0, num_fields = 0; |
|
9457 |
||
9458 |
while (macro != NULL) { |
|
9459 |
switch (macro->choice) { |
|
9460 |
case MacroActionChoice_aecr: |
|
9461 |
num_AECR += ApplyAECRActionToSeqEntry ((AECRActionPtr) macro->data.ptrvalue, sep); |
|
9462 |
break; |
|
9463 |
case MacroActionChoice_parse: |
|
9464 |
num_parse += ApplyParseActionToSeqEntry ((ParseActionPtr) macro->data.ptrvalue, sep); |
|
9465 |
break; |
|
9466 |
case MacroActionChoice_add_feature: |
|
9467 |
num_feature += ApplyApplyFeatureActionToSeqEntry ((ApplyFeatureActionPtr) macro->data.ptrvalue, sep); |
|
9468 |
SeqMgrIndexFeatures (ObjMgrGetEntityIDForChoice(sep), NULL); |
|
9469 |
break; |
|
9470 |
case MacroActionChoice_remove_feature: |
|
9471 |
num_feature += ApplyRemoveFeatureActionToSeqEntry ((RemoveFeatureActionPtr) macro->data.ptrvalue, sep); |
|
9472 |
break; |
|
9473 |
case MacroActionChoice_edit_location: |
|
9474 |
num_fields += ApplyEditFeatureLocationActionToSeqEntry ((EditFeatureLocationActionPtr) macro->data.ptrvalue, sep); |
|
9475 |
break; |
|
9476 |
}
|
|
9477 |
macro = macro->next; |
|
9478 |
}
|
|
9479 |
if (pNumFields != NULL) { |
|
9480 |
*pNumFields = num_AECR + num_parse + num_fields; |
|
9481 |
}
|
|
9482 |
if (pNumFeat != NULL) { |
|
9483 |
*pNumFeat = num_feature; |
|
9484 |
}
|
|
9485 |
}
|
|
9486 |
||
9487 |
||
9488 |
/* for generating text descriptions of macro objects */
|
|
9489 |
NLM_EXTERN CharPtr SummarizeSourceQual (ValNodePtr field) |
|
9490 |
{
|
|
9491 |
CharPtr summ = NULL, locname, origname; |
|
9492 |
Int4 genome, origin; |
|
9493 |
CharPtr loc_fmt = "location %s"; |
|
9494 |
CharPtr orig_fmt = "origin %s"; |
|
9495 |
||
9496 |
if (field == NULL) return NULL; |
|
9497 |
switch (field->choice) { |
|
9498 |
case SourceQualChoice_textqual: |
|
9499 |
summ = StringSave (GetSourceQualName (field->data.intvalue)); |
|
9500 |
break; |
|
9501 |
case SourceQualChoice_location: |
|
9502 |
genome = GenomeFromSrcLoc (field->data.intvalue); |
|
9503 |
locname = LocNameFromGenome (genome); |
|
9504 |
summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (loc_fmt) + StringLen (locname))); |
|
9505 |
sprintf (summ, loc_fmt, locname); |
|
9506 |
break; |
|
9507 |
case SourceQualChoice_origin: |
|
9508 |
origin = OriginFromSrcOrig (field->data.intvalue); |
|
9509 |
origname = OriginNameFromOrigin (origin); |
|
9510 |
summ = (CharPtr) MemNew (sizeof (Char) * (StringLen (orig_fmt) + StringLen (origname))); |
|
9511 |
sprintf (summ, orig_fmt, origname); |
|
9512 |
break; |
|
9513 |
}
|
|
9514 |
return summ; |
|
9515 |
}
|
|
9516 |
||
9517 |
||
9518 |
NLM_EXTERN CharPtr FeatureFieldLabel (CharPtr feature_name, ValNodePtr field) |
|
9519 |
{
|
|
9520 |
CharPtr cp; |
|
9521 |
CharPtr label = NULL; |
|
9522 |
CharPtr legal_fmt = "%s %s"; |
|
9523 |
CharPtr illegal_fmt = "constrained field on %s"; |
|
9524 |
||
9525 |
if (feature_name == NULL) { |
|
9526 |
feature_name = "Unknown feature"; |
|
9527 |
}
|
|
9528 |
||
9529 |
if (field == NULL) { |
|
9530 |
return StringSave ("missing field"); |
|
9531 |
} else if (field->choice == FeatQualChoice_legal_qual) { |
|
9532 |
cp = GetFeatQualName (field->data.intvalue); |
|
9533 |
if (cp == NULL) cp = "Unknown field type"; |
|
9534 |
label = (CharPtr) MemNew (sizeof (Char) * (StringLen (legal_fmt) + StringLen (feature_name) + StringLen (cp))); |
|
9535 |
sprintf (label, legal_fmt, feature_name, cp); |
|
9536 |
} else if (field->choice == FeatQualChoice_illegal_qual) { |
|
9537 |
label = (CharPtr) MemNew (sizeof (Char) * (StringLen (illegal_fmt) + StringLen (feature_name))); |
|
9538 |
sprintf (label, illegal_fmt, feature_name); |
|
9539 |
} else { |
|
9540 |
label = StringSave ("illegal field value"); |
|
9541 |
}
|
|
9542 |
return label; |
|
9543 |
}
|
|
9544 |
||
9545 |
||
9546 |
NLM_EXTERN Boolean IsFeatureFieldEmpty (FeatureFieldPtr field) |
|
9547 |
{
|
|
9548 |
if (field == NULL) return TRUE; |
|
9549 |
if (field->field == NULL) return TRUE; |
|
9550 |
return FALSE; |
|
9551 |
}
|
|
9552 |
||
9553 |
||
9554 |
NLM_EXTERN Boolean IsFieldTypeEmpty (FieldTypePtr field) |
|
9555 |
{
|
|
9556 |
Boolean rval = TRUE; |
|
9557 |
||
9558 |
if (field == NULL) return TRUE; |
|
9559 |
switch (field->choice) { |
|
9560 |
case FieldType_source_qual: |
|
9561 |
if (field->data.ptrvalue != NULL) { |
|
9562 |
rval = FALSE; |
|
9563 |
}
|
|
9564 |
break; |
|
9565 |
case FieldType_feature_field: |
|
9566 |
if (!IsFeatureFieldEmpty (field->data.ptrvalue)) { |
|
9567 |
rval = FALSE; |
|
9568 |
}
|
|
9569 |
break; |
|
9570 |
case FieldType_cds_gene_prot: |
|
9571 |
rval = FALSE; |
|
9572 |
break; |
|
9573 |
}
|
|
9574 |
return rval; |
|
9575 |
}
|
|
9576 |
||
9577 |
||
9578 |
NLM_EXTERN CharPtr SummarizeFieldType (ValNodePtr vnp) |
|
9579 |
{
|
|
9580 |
FeatureFieldPtr ffp; |
|
9581 |
CharPtr str = NULL; |
|
9582 |
CharPtr label = NULL; |
|
9583 |
||
9584 |
if (vnp == NULL) { |
|
9585 |
str = StringSave ("missing field"); |
|
9586 |
} else { |
|
9587 |
switch (vnp->choice) { |
|
9588 |
case FieldType_source_qual: |
|
9589 |
str = SummarizeSourceQual (vnp->data.ptrvalue); |
|
9590 |
break; |
|
9591 |
case FieldType_feature_field: |
|
9592 |
ffp = (FeatureFieldPtr) vnp->data.ptrvalue; |
|
9593 |
if (ffp == NULL || ffp->field == NULL) { |
|
9594 |
str = StringSave ("missing field"); |
|
9595 |
} else { |
|
9596 |
label = GetFeatureNameFromFeatureType (ffp->type); |
|
9597 |
str = FeatureFieldLabel (label, ffp->field); |
|
9598 |
}
|
|
9599 |
break; |
|
9600 |
case FieldType_cds_gene_prot: |
|
9601 |
str = StringSaveNoNull (CDSGeneProtNameFromField (vnp->data.intvalue)); |
|
9602 |
if (str == NULL) { |
|
9603 |
str = StringSave ("Invalid CDS-Gene-Prot Field"); |
|
9604 |
}
|
|
9605 |
break; |
|
9606 |
case FieldType_molinfo_field: |
|
9607 |
str = GetSequenceQualName (vnp->data.ptrvalue); |
|
9608 |
if (str == NULL) { |
|
9609 |
str = StringSave ("Invalid Sequence Qual Field"); |
|
9610 |
}
|
|
9611 |
break; |
|
9612 |
default: |
|
9613 |
str = StringSave ("Invalid field type"); |
|
9614 |
break; |
|
9615 |
}
|
|
9616 |
}
|
|
9617 |
return str; |
|
9618 |
}
|
|
9619 |
||
9620 |
||
9621 |
/* for table readers that use the macro language functions */
|
|
9622 |
NLM_EXTERN TabColumnConfigPtr TabColumnConfigNew (void) |
|
9623 |
{
|
|
9624 |
TabColumnConfigPtr t; |
|
9625 |
||
9626 |
t = (TabColumnConfigPtr) MemNew (sizeof (TabColumnConfigData)); |
|
9627 |
t->field = NULL; |
|
9628 |
t->existing_text = ExistingTextOption_replace_old; |
|
9629 |
t->skip_blank = TRUE; |
|
9630 |
return t; |
|
9631 |
}
|
|
9632 |
||
9633 |
||
9634 |
||
9635 |
NLM_EXTERN TabColumnConfigPtr TabColumnConfigFree (TabColumnConfigPtr t) |
|
9636 |
{
|
|
9637 |
if (t != NULL) { |
|
9638 |
t->field = FieldTypeFree (t->field); |
|
9639 |
t = MemFree (t); |
|
9640 |
}
|
|
9641 |
return t; |
|
9642 |
}
|
|
9643 |
||
9644 |
||
9645 |
NLM_EXTERN TabColumnConfigPtr TabColumnConfigCopy (TabColumnConfigPtr orig) |
|
9646 |
{
|
|
9647 |
TabColumnConfigPtr t = NULL; |
|
9648 |
||
9649 |
if (orig != NULL) { |
|
9650 |
t = TabColumnConfigNew (); |
|
9651 |
t->match_type = orig->match_type; |
|
9652 |
t->existing_text = orig->existing_text; |
|
9653 |
t->skip_blank = orig->skip_blank; |
|
9654 |
t->match_mrna = orig->match_mrna; |
|
9655 |
t->field = AsnIoMemCopy (orig->field, (AsnReadFunc) FieldTypeAsnRead, (AsnWriteFunc) FieldTypeAsnWrite); |
|
9656 |
}
|
|
9657 |
return t; |
|
9658 |
}
|
|
9659 |
||
9660 |
||
9661 |
NLM_EXTERN ValNodePtr TabColumnConfigListFree (ValNodePtr columns) |
|
9662 |
{
|
|
9663 |
ValNodePtr vnp_next; |
|
9664 |
||
9665 |
while (columns != NULL) { |
|
9666 |
vnp_next = columns->next; |
|
9667 |
columns->data.ptrvalue = TabColumnConfigFree (columns->data.ptrvalue); |
|
9668 |
columns->next = NULL; |
|
9669 |
columns = ValNodeFree (columns); |
|
9670 |
columns = vnp_next; |
|
9671 |
}
|
|
9672 |
return columns; |
|
9673 |
}
|
|
9674 |
||
9675 |
||
9676 |
NLM_EXTERN ValNodePtr TabColumnConfigListCopy (ValNodePtr orig) |
|
9677 |
{
|
|
9678 |
ValNodePtr new_list = NULL; |
|
9679 |
TabColumnConfigPtr t; |
|
9680 |
||
9681 |
while (orig != NULL) { |
|
9682 |
t = TabColumnConfigCopy (orig->data.ptrvalue); |
|
9683 |
ValNodeAddPointer (&new_list, 0, t); |
|
9684 |
orig = orig->next; |
|
9685 |
}
|
|
9686 |
return new_list; |
|
9687 |
}
|
|
9688 |
||
9689 |
||
9690 |
||
9691 |
/* This checks the column names and returns a list of the feature fields */
|
|
9692 |
NLM_EXTERN ValNodePtr ValidateFeatureFieldColumnNames (ValNodePtr header_line, ValNodePtr PNTR perr_list) |
|
9693 |
{
|
|
9694 |
ValNodePtr header_vnp; |
|
9695 |
ValNodePtr err_list = NULL, col_list = NULL; |
|
9696 |
Boolean rval = TRUE; |
|
9697 |
TabColumnConfigPtr t; |
|
9698 |
FeatureFieldPtr field; |
|
9699 |
Int4 featqual, feat_type; |
|
9700 |
CharPtr first_space; |
|
9701 |
||
9702 |
if (header_line == NULL) |
|
9703 |
{
|
|
9704 |
return FALSE; |
|
9705 |
}
|
|
9706 |
||
9707 |
header_vnp = header_line->data.ptrvalue; |
|
9708 |
if (header_vnp == NULL || header_vnp->next == NULL) |
|
9709 |
{
|
|
9710 |
return FALSE; |
|
9711 |
}
|
|
9712 |
||
9713 |
/* skip ID column */
|
|
9714 |
header_vnp = header_vnp->next; |
|
9715 |
while (header_vnp != NULL && rval) |
|
9716 |
{
|
|
9717 |
first_space = StringChr (header_vnp->data.ptrvalue, ' '); |
|
9718 |
if (first_space != NULL) { |
|
9719 |
*first_space = 0; |
|
9720 |
feat_type = GetFeatureTypeByName (header_vnp->data.ptrvalue); |
|
9721 |
featqual = GetFeatQualByName (first_space + 1); |
|
9722 |
*first_space = ' '; |
|
9723 |
if (feat_type < 0 || featqual < 0) { |
|
9724 |
/* unable to recognize column name */
|
|
9725 |
ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); |
|
9726 |
/* if we're not able to send back a list of errors, just quit now */
|
|
9727 |
if (perr_list == NULL) { |
|
9728 |
rval = FALSE; |
|
9729 |
}
|
|
9730 |
} else if (err_list == NULL) { |
|
9731 |
/* if we've already found errors, don't bother collecting more fields */
|
|
9732 |
field = FeatureFieldNew (); |
|
9733 |
field->type = feat_type; |
|
9734 |
field->field = ValNodeNew (NULL); |
|
9735 |
field->field->choice = FeatQualChoice_legal_qual; |
|
9736 |
field->field->data.intvalue = featqual; |
|
9737 |
t = TabColumnConfigNew (); |
|
9738 |
t->field = ValNodeNew (NULL); |
|
9739 |
t->field->choice = FieldType_feature_field; |
|
9740 |
t->field->data.ptrvalue = field; |
|
9741 |
ValNodeAddPointer (&col_list, 0, t); |
|
9742 |
}
|
|
9743 |
} else { |
|
9744 |
featqual = GetFeatQualByName (header_vnp->data.ptrvalue); |
|
9745 |
if (featqual < 0) { |
|
9746 |
/* unable to recognize column name */
|
|
9747 |
ValNodeAddPointer (&err_list, 0, StringSave (header_vnp->data.ptrvalue)); |
|
9748 |
/* if we're not able to send back a list of errors, just quit now */
|
|
9749 |
if (perr_list == NULL) { |
|
9750 |
rval = FALSE; |
|
9751 |
}
|
|
9752 |
} else if (err_list == NULL) { |
|
9753 |
/* if we've already found errors, don't bother collecting more fields */
|
|
9754 |
field = FeatureFieldNew (); |
|
9755 |
field->type = Feature_type_any; |
|
9756 |
field->field = ValNodeNew (NULL); |
|
9757 |
field->field->choice = FeatQualChoice_legal_qual; |
|
9758 |
field->field->data.intvalue = featqual; |
|
9759 |
t = TabColumnConfigNew (); |
|
9760 |
t->field = ValNodeNew (NULL); |
|
9761 |
t->field->choice = FieldType_feature_field; |
|
9762 |
t->field->data.ptrvalue = field; |
|
9763 |
ValNodeAddPointer (&col_list, 0, t); |
|
9764 |
}
|
|
9765 |
}
|
|
9766 |
header_vnp = header_vnp->next; |
|
9767 |
}
|
|
9768 |
if (err_list != NULL) { |
|
9769 |
col_list = TabColumnConfigListFree (col_list); |
|
9770 |
if (perr_list != NULL) { |
|
9771 |
*perr_list = err_list; |
|
9772 |
} else { |
|
9773 |
err_list = ValNodeFreeData (err_list); |
|
9774 |
}
|
|
9775 |
}
|
|
9776 |
return col_list; |
|
9777 |
}
|
|
9778 |
||
9779 |
typedef struct findgenelocustag { |
|
9780 |
CharPtr locus_tag; |
|
9781 |
ValNodePtr gene_list; |
|
9782 |
} FindGeneLocusTagData, PNTR FindGeneLocusTagPtr; |
|
9783 |
||
9784 |
static void FindGeneByLocusTagBioseqCallback (BioseqPtr bsp, Pointer userdata) |
|
9785 |
{
|
|
9786 |
FindGeneLocusTagPtr p; |
|
9787 |
SeqFeatPtr gene; |
|
9788 |
SeqMgrFeatContext fcontext; |
|
9789 |
||
9790 |
if (bsp == NULL || userdata == NULL || !ISA_na (bsp->mol)) { |
|
9791 |
return; |
|
9792 |
}
|
|
9793 |
||
9794 |
p = (FindGeneLocusTagPtr) userdata; |
|
9795 |
||
9796 |
gene = SeqMgrGetGeneByLocusTag (bsp, p->locus_tag, &fcontext); |
|
9797 |
if (gene != NULL) { |
|
9798 |
ValNodeAddPointer (&p->gene_list, OBJ_SEQFEAT, gene); |
|
9799 |
}
|
|
9800 |
}
|
|
9801 |
||
9802 |
||
9803 |
typedef struct objbystr { |
|
9804 |
ValNodePtr obj_list; |
|
9805 |
CharPtr str; |
|
9806 |
} ObjByStrData, PNTR ObjByStrPtr; |
|
9807 |
||
9808 |
static void GetFeaturesByDbxrefCallback (SeqFeatPtr sfp, Pointer userdata) |
|
9809 |
{
|
|
9810 |
ObjByStrPtr p; |
|
9811 |
ValNodePtr vnp; |
|
9812 |
DbtagPtr dbt; |
|
9813 |
Char buf[20]; |
|
9814 |
Boolean found = FALSE; |
|
9815 |
||
9816 |
if (sfp == NULL || sfp->dbxref == NULL || userdata == NULL) return; |
|
9817 |
p = (ObjByStrPtr) userdata; |
|
9818 |
||
9819 |
if (StringHasNoText (p->str)) return; |
|
9820 |
||
9821 |
for (vnp = sfp->dbxref; vnp != NULL && !found; vnp = vnp->next) { |
|
9822 |
dbt = (DbtagPtr) vnp->data.ptrvalue; |
|
9823 |
if (dbt != NULL && dbt->tag != NULL) { |
|
9824 |
if (dbt->tag->id > 0) { |
|
9825 |
sprintf (buf, "%d", dbt->tag->id); |
|
9826 |
if (StringCmp (buf, p->str) == 0) { |
|
9827 |
found = TRUE; |
|
9828 |
}
|
|
9829 |
} else if (StringCmp (dbt->tag->str, p->str) == 0) { |
|
9830 |
found = TRUE; |
|
9831 |
}
|
|
9832 |
}
|
|
9833 |
}
|
|
9834 |
if (found) { |
|
9835 |
ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); |
|
9836 |
}
|
|
9837 |
||
9838 |
}
|
|
9839 |
||
9840 |
||
9841 |
static ValNodePtr GetFeaturesByDbxref (SeqEntryPtr sep, CharPtr dbxref) |
|
9842 |
{
|
|
9843 |
ObjByStrData d; |
|
9844 |
||
9845 |
d.str = dbxref; |
|
9846 |
d.obj_list = NULL; |
|
9847 |
VisitFeaturesInSep (sep, &d, GetFeaturesByDbxrefCallback); |
|
9848 |
return d.obj_list; |
|
9849 |
}
|
|
9850 |
||
9851 |
||
9852 |
static void GetBioSourcesByTaxNameDescriptorCallback (SeqDescrPtr sdp, Pointer userdata) |
|
9853 |
{
|
|
9854 |
ObjByStrPtr p; |
|
9855 |
BioSourcePtr biop; |
|
9856 |
||
9857 |
if (sdp == NULL || sdp->choice != Seq_descr_source || userdata == NULL) return; |
|
9858 |
p = (ObjByStrPtr) userdata; |
|
9859 |
||
9860 |
if (StringHasNoText (p->str)) return; |
|
9861 |
||
9862 |
biop = (BioSourcePtr) sdp->data.ptrvalue; |
|
9863 |
if (biop != NULL && biop->org != NULL && StringCmp (biop->org->taxname, p->str) == 0) { |
|
9864 |
ValNodeAddPointer (&(p->obj_list), OBJ_SEQDESC, sdp); |
|
9865 |
}
|
|
9866 |
||
9867 |
}
|
|
9868 |
||
9869 |
||
9870 |
static void GetBioSourcesByTaxNameFeatureCallback (SeqFeatPtr sfp, Pointer userdata) |
|
9871 |
{
|
|
9872 |
ObjByStrPtr p; |
|
9873 |
BioSourcePtr biop; |
|
9874 |
||
9875 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_BIOSRC || userdata == NULL) return; |
|
9876 |
p = (ObjByStrPtr) userdata; |
|
9877 |
||
9878 |
if (StringHasNoText (p->str)) return; |
|
9879 |
||
9880 |
biop = (BioSourcePtr) sfp->data.value.ptrvalue; |
|
9881 |
if (biop != NULL && biop->org != NULL && StringCmp (biop->org->taxname, p->str) == 0) { |
|
9882 |
ValNodeAddPointer (&(p->obj_list), OBJ_SEQFEAT, sfp); |
|
9883 |
}
|
|
9884 |
||
9885 |
}
|
|
9886 |
||
9887 |
||
9888 |
static ValNodePtr GetBioSourcesByTaxName (SeqEntryPtr sep, CharPtr taxname) |
|
9889 |
{
|
|
9890 |
ObjByStrData d; |
|
9891 |
||
9892 |
d.str = taxname; |
|
9893 |
d.obj_list = NULL; |
|
9894 |
VisitDescriptorsInSep (sep, &d, GetBioSourcesByTaxNameDescriptorCallback); |
|
9895 |
||
9896 |
VisitFeaturesInSep (sep, &d, GetBioSourcesByTaxNameFeatureCallback); |
|
9897 |
return d.obj_list; |
|
9898 |
}
|
|
9899 |
||
9900 |
||
9901 |
||
9902 |
static ValNodePtr |
|
9903 |
FindMatchForRow
|
|
9904 |
(ValNodePtr match_type, |
|
9905 |
Uint2 entityID, |
|
9906 |
SeqEntryPtr sep) |
|
9907 |
{
|
|
9908 |
ValNodePtr match_list = NULL; |
|
9909 |
SeqIdPtr sip; |
|
9910 |
BioseqPtr bsp, nbsp = NULL; |
|
9911 |
FindGeneLocusTagData fd; |
|
9912 |
SeqFeatPtr sfp; |
|
9913 |
SeqMgrFeatContext fcontext; |
|
9914 |
||
9915 |
if (match_type == NULL || sep == NULL) return NULL; |
|
9916 |
||
9917 |
switch (match_type->choice) { |
|
9918 |
case eTableMatchFeatureID: |
|
9919 |
sfp = SeqMgrGetFeatureByFeatID (entityID, NULL, match_type->data.ptrvalue, NULL, &fcontext); |
|
9920 |
if (sfp != NULL) { |
|
9921 |
ValNodeAddPointer (&match_list, OBJ_SEQFEAT, sfp); |
|
9922 |
}
|
|
9923 |
break; |
|
9924 |
case eTableMatchGeneLocusTag: |
|
9925 |
fd.locus_tag = match_type->data.ptrvalue; |
|
9926 |
fd.gene_list = NULL; |
|
9927 |
VisitBioseqsInSep (sep, &fd, FindGeneByLocusTagBioseqCallback); |
|
9928 |
ValNodeLink (&match_list, fd.gene_list); |
|
9929 |
break; |
|
9930 |
case eTableMatchProteinID: |
|
9931 |
case eTableMatchNucID: |
|
9932 |
sip = CreateSeqIdFromText (match_type->data.ptrvalue, sep); |
|
9933 |
bsp = BioseqFind (sip); |
|
9934 |
sip = SeqIdFree (sip); |
|
9935 |
if (bsp != NULL) |
|
9936 |
{
|
|
9937 |
ValNodeAddPointer (&match_list, OBJ_BIOSEQ, bsp); |
|
9938 |
}
|
|
9939 |
break; |
|
9940 |
case eTableMatchDbxref: |
|
9941 |
match_list = GetFeaturesByDbxref (sep, match_type->data.ptrvalue); |
|
9942 |
break; |
|
9943 |
case eTableMatchBioSource: |
|
9944 |
match_list = GetBioSourcesByTaxName (sep, match_type->data.ptrvalue); |
|
9945 |
break; |
|
9946 |
}
|
|
9947 |
return match_list; |
|
9948 |
}
|
|
9949 |
||
9950 |
||
9951 |
static ValNodePtr GetFeatureListForProteinBioseq (Uint1 featdef, BioseqPtr bsp) |
|
9952 |
{
|
|
9953 |
ValNodePtr feat_list = NULL; |
|
9954 |
SeqFeatPtr sfp, cds; |
|
9955 |
SeqMgrFeatContext fcontext; |
|
9956 |
Int4 seqfeattype; |
|
9957 |
||
9958 |
if (bsp == NULL || !ISA_aa (bsp->mol)) |
|
9959 |
{
|
|
9960 |
return NULL; |
|
9961 |
}
|
|
9962 |
||
9963 |
seqfeattype = FindFeatFromFeatDefType (featdef); |
|
9964 |
if (seqfeattype == SEQFEAT_PROT) |
|
9965 |
{
|
|
9966 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); |
|
9967 |
sfp != NULL; |
|
9968 |
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) |
|
9969 |
{
|
|
9970 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); |
|
9971 |
}
|
|
9972 |
}
|
|
9973 |
else
|
|
9974 |
{
|
|
9975 |
cds = SeqMgrGetCDSgivenProduct (bsp, NULL); |
|
9976 |
if (cds != NULL) |
|
9977 |
{
|
|
9978 |
if (featdef == FEATDEF_CDS) |
|
9979 |
{
|
|
9980 |
sfp = cds; |
|
9981 |
}
|
|
9982 |
else if (featdef == FEATDEF_GENE) |
|
9983 |
{
|
|
9984 |
sfp = GetGeneForFeature (cds); |
|
9985 |
}
|
|
9986 |
else if (featdef == FEATDEF_mRNA) |
|
9987 |
{
|
|
9988 |
sfp = SeqMgrGetOverlappingmRNA (cds->location, &fcontext); |
|
9989 |
}
|
|
9990 |
if (sfp != NULL) |
|
9991 |
{
|
|
9992 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); |
|
9993 |
}
|
|
9994 |
}
|
|
9995 |
}
|
|
9996 |
return feat_list; |
|
9997 |
}
|
|
9998 |
||
9999 |
||
10000 |
static ValNodePtr GetFeatureListForNucleotideBioseq (Uint1 featdef, BioseqPtr bsp) |
|
10001 |
{
|
|
10002 |
ValNodePtr feat_list = NULL; |
|
10003 |
SeqFeatPtr sfp; |
|
10004 |
SeqMgrFeatContext fcontext; |
|
10005 |
Int4 seqfeattype; |
|
10006 |
BioseqPtr prot_bsp; |
|
10007 |
||
10008 |
if (bsp == NULL || ISA_aa (bsp->mol)) |
|
10009 |
{
|
|
10010 |
return NULL; |
|
10011 |
}
|
|
10012 |
||
10013 |
seqfeattype = FindFeatFromFeatDefType (featdef); |
|
10014 |
if (seqfeattype == SEQFEAT_PROT) |
|
10015 |
{
|
|
10016 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, FEATDEF_CDS, &fcontext); |
|
10017 |
sfp != NULL; |
|
10018 |
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, FEATDEF_CDS, &fcontext)) |
|
10019 |
{
|
|
10020 |
prot_bsp = BioseqFindFromSeqLoc (sfp->product); |
|
10021 |
ValNodeLink (&feat_list, GetFeatureListForProteinBioseq (featdef, prot_bsp)); |
|
10022 |
}
|
|
10023 |
}
|
|
10024 |
else
|
|
10025 |
{
|
|
10026 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); |
|
10027 |
sfp != NULL; |
|
10028 |
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) |
|
10029 |
{
|
|
10030 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); |
|
10031 |
}
|
|
10032 |
}
|
|
10033 |
return feat_list; |
|
10034 |
}
|
|
10035 |
||
10036 |
||
10037 |
static ValNodePtr GetFeaturesForGene (SeqFeatPtr gene, Uint1 featdef) |
|
10038 |
{
|
|
10039 |
BioseqPtr bsp; |
|
10040 |
SeqFeatPtr sfp; |
|
10041 |
ValNodePtr feat_list = NULL; |
|
10042 |
SeqMgrFeatContext fcontext; |
|
10043 |
Int4 start, stop, swap; |
|
10044 |
||
10045 |
if (gene == NULL) return NULL; |
|
10046 |
||
10047 |
bsp = BioseqFindFromSeqLoc (gene->location); |
|
10048 |
start = SeqLocStart (gene->location); |
|
10049 |
stop = SeqLocStop (gene->location); |
|
10050 |
if (stop < start) |
|
10051 |
{
|
|
10052 |
swap = start; |
|
10053 |
start = stop; |
|
10054 |
stop = swap; |
|
10055 |
}
|
|
10056 |
for (sfp = SeqMgrGetNextFeature (bsp, NULL, 0, featdef, &fcontext); |
|
10057 |
sfp != NULL && fcontext.left < stop; |
|
10058 |
sfp = SeqMgrGetNextFeature (bsp, sfp, 0, featdef, &fcontext)) |
|
10059 |
{
|
|
10060 |
if (fcontext.right >= start && gene == GetGeneForFeature (sfp)) |
|
10061 |
{
|
|
10062 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); |
|
10063 |
}
|
|
10064 |
}
|
|
10065 |
return feat_list; |
|
10066 |
}
|
|
10067 |
||
10068 |
||
10069 |
static ValNodePtr GetFeatureListForGene (Uint1 featdef, SeqFeatPtr gene) |
|
10070 |
{
|
|
10071 |
ValNodePtr feat_list = NULL, cds_list, vnp; |
|
10072 |
SeqFeatPtr sfp, cds; |
|
10073 |
SeqMgrFeatContext fcontext; |
|
10074 |
BioseqPtr protbsp; |
|
10075 |
||
10076 |
if (gene == NULL) |
|
10077 |
{
|
|
10078 |
return NULL; |
|
10079 |
}
|
|
10080 |
||
10081 |
if (featdef == FEATDEF_GENE) |
|
10082 |
{
|
|
10083 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, gene); |
|
10084 |
}
|
|
10085 |
else if (FindFeatFromFeatDefType (featdef == SEQFEAT_PROT)) |
|
10086 |
{
|
|
10087 |
cds_list = GetFeaturesForGene (gene, FEATDEF_CDS); |
|
10088 |
for (vnp = cds_list; vnp != NULL; vnp = vnp->next) |
|
10089 |
{
|
|
10090 |
cds = vnp->data.ptrvalue; |
|
10091 |
if (cds != NULL) |
|
10092 |
{
|
|
10093 |
protbsp = BioseqFindFromSeqLoc (cds->product); |
|
10094 |
for (sfp = SeqMgrGetNextFeature (protbsp, NULL, 0, featdef, &fcontext); |
|
10095 |
sfp != NULL; |
|
10096 |
sfp = SeqMgrGetNextFeature (protbsp, sfp, 0, featdef, &fcontext)) |
|
10097 |
{
|
|
10098 |
ValNodeAddPointer (&feat_list, OBJ_SEQFEAT, sfp); |
|
10099 |
}
|
|
10100 |
}
|
|
10101 |
}
|
|
10102 |
cds_list = ValNodeFree (cds_list); |
|
10103 |
}
|
|
10104 |
else
|
|
10105 |
{
|
|
10106 |
feat_list = GetFeaturesForGene (gene, featdef); |
|
10107 |
}
|
|
10108 |
||
10109 |
return feat_list; |
|
10110 |
}
|
|
10111 |
||
10112 |
||
10113 |
static ValNodePtr AddFeaturesFromBioseqSet (BioseqSetPtr bssp, Uint1 featdef) |
|
10114 |
{
|
|
10115 |
SeqEntryPtr sep; |
|
10116 |
BioseqPtr bsp; |
|
10117 |
Int4 seqfeattype; |
|
10118 |
ValNodePtr item_list = NULL; |
|
10119 |
||
10120 |
if (bssp == NULL) return NULL; |
|
10121 |
||
10122 |
seqfeattype = FindFeatFromFeatDefType (featdef); |
|
10123 |
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) { |
|
10124 |
if (sep->data.ptrvalue == NULL) continue; |
|
10125 |
if (IS_Bioseq (sep)) { |
|
10126 |
bsp = sep->data.ptrvalue; |
|
10127 |
if (seqfeattype == SEQFEAT_PROT) { |
|
10128 |
if (ISA_aa (bsp->mol)) { |
|
10129 |
ValNodeLink (&item_list, GetFeatureListForProteinBioseq (featdef, bsp)); |
|
10130 |
}
|
|
10131 |
} else if (!ISA_aa (bsp->mol)) { |
|
10132 |
ValNodeLink (&item_list, GetFeatureListForNucleotideBioseq (featdef, bsp)); |
|
10133 |
}
|
|
10134 |
} else if (IS_Bioseq_set (sep)) { |
|
10135 |
ValNodeLink (&item_list, AddFeaturesFromBioseqSet (sep->data.ptrvalue, featdef)); |
|
10136 |
}
|
|
10137 |
}
|
|
10138 |
return item_list; |
|
10139 |
}
|
|
10140 |
||
10141 |
||
10142 |
static ValNodePtr GetFeatureListForBioSourceObjects (ValNodePtr item_list, FeatureFieldPtr field) |
|
10143 |
{
|
|
10144 |
ValNodePtr vnp; |
|
10145 |
SeqFeatPtr sfp; |
|
10146 |
SeqDescrPtr sdp; |
|
10147 |
BioseqPtr bsp; |
|
10148 |
ObjValNodePtr ovp; |
|
10149 |
ValNodePtr feature_list = NULL; |
|
10150 |
||
10151 |
if (item_list == NULL || field == NULL) return NULL; |
|
10152 |
||
10153 |
for (vnp = item_list; vnp != NULL; vnp = vnp->next) { |
|
10154 |
if (vnp->choice == OBJ_SEQFEAT) { |
|
10155 |
sfp = vnp->data.ptrvalue; |
|
10156 |
if (sfp != NULL) { |
|
10157 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
10158 |
ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); |
|
10159 |
}
|
|
10160 |
} else if (vnp->choice == OBJ_SEQDESC) { |
|
10161 |
sdp = vnp->data.ptrvalue; |
|
10162 |
if (sdp != NULL && sdp->extended != 0) { |
|
10163 |
ovp = (ObjValNodePtr) sdp; |
|
10164 |
if (ovp->idx.parenttype == OBJ_BIOSEQSET) { |
|
10165 |
ValNodeLink (&feature_list, AddFeaturesFromBioseqSet (ovp->idx.parentptr, GetFeatdefFromFeatureType(field->type))); |
|
10166 |
} else if (ovp->idx.parenttype == OBJ_BIOSEQ) { |
|
10167 |
bsp = (BioseqPtr) ovp->idx.parentptr; |
|
10168 |
ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), bsp)); |
|
10169 |
}
|
|
10170 |
}
|
|
10171 |
}
|
|
10172 |
}
|
|
10173 |
return feature_list; |
|
10174 |
}
|
|
10175 |
||
10176 |
||
10177 |
static ValNodePtr ValNodeCopyPtr (ValNodePtr orig) |
|
10178 |
{
|
|
10179 |
ValNodePtr new_list = NULL, last_vnp = NULL, vnp; |
|
10180 |
||
10181 |
while (orig != NULL) { |
|
10182 |
vnp = ValNodeNew (NULL); |
|
10183 |
vnp->choice = orig->choice; |
|
10184 |
vnp->data.ptrvalue = orig->data.ptrvalue; |
|
10185 |
if (last_vnp == NULL) { |
|
10186 |
new_list = vnp; |
|
10187 |
} else { |
|
10188 |
last_vnp->next = vnp; |
|
10189 |
}
|
|
10190 |
last_vnp = vnp; |
|
10191 |
orig = orig->next; |
|
10192 |
}
|
|
10193 |
return new_list; |
|
10194 |
}
|
|
10195 |
||
10196 |
||
10197 |
static ValNodePtr GetFeatureListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FeatureFieldPtr field) |
|
10198 |
{
|
|
10199 |
ValNodePtr feature_list = NULL, vnp; |
|
10200 |
||
10201 |
if (match_list == NULL || field == NULL) return NULL; |
|
10202 |
||
10203 |
switch (match_type) { |
|
10204 |
case eTableMatchFeatureID: |
|
10205 |
feature_list = ValNodeCopyPtr (match_list); |
|
10206 |
break; |
|
10207 |
case eTableMatchGeneLocusTag: |
|
10208 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10209 |
ValNodeLink (&feature_list, GetFeatureListForGene (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); |
|
10210 |
}
|
|
10211 |
break; |
|
10212 |
case eTableMatchProteinID: |
|
10213 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10214 |
ValNodeLink (&feature_list, GetFeatureListForProteinBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); |
|
10215 |
}
|
|
10216 |
break; |
|
10217 |
case eTableMatchDbxref: |
|
10218 |
feature_list = ValNodeCopyPtr (match_list); |
|
10219 |
break; |
|
10220 |
case eTableMatchNucID: |
|
10221 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10222 |
ValNodeLink (&feature_list, GetFeatureListForNucleotideBioseq (GetFeatdefFromFeatureType(field->type), vnp->data.ptrvalue)); |
|
10223 |
}
|
|
10224 |
break; |
|
10225 |
case eTableMatchBioSource: |
|
10226 |
ValNodeLink (&feature_list, GetFeatureListForBioSourceObjects (match_list, field)); |
|
10227 |
break; |
|
10228 |
}
|
|
10229 |
return feature_list; |
|
10230 |
}
|
|
10231 |
||
10232 |
||
10233 |
static void AddBioSourcesForBioseq (BioseqPtr bsp, ValNodePtr PNTR feature_list) |
|
10234 |
{
|
|
10235 |
SeqDescrPtr sdp; |
|
10236 |
SeqMgrDescContext context; |
|
10237 |
||
10238 |
if (feature_list == NULL) return; |
|
10239 |
for (sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_source, &context); |
|
10240 |
sdp != NULL; |
|
10241 |
sdp = SeqMgrGetNextDescriptor (bsp, sdp, Seq_descr_source, &context)) { |
|
10242 |
ValNodeAddPointer (feature_list, OBJ_SEQDESC, sdp); |
|
10243 |
}
|
|
10244 |
}
|
|
10245 |
||
10246 |
static void AddBioSourcesForFeature (SeqFeatPtr sfp, ValNodePtr PNTR feature_list) |
|
10247 |
{
|
|
10248 |
BioseqPtr bsp; |
|
10249 |
||
10250 |
if (sfp == NULL || feature_list == NULL) return; |
|
10251 |
||
10252 |
if (sfp->data.choice == SEQFEAT_BIOSRC) { |
|
10253 |
ValNodeAddPointer (feature_list, OBJ_SEQFEAT, sfp); |
|
10254 |
} else { |
|
10255 |
bsp = BioseqFindFromSeqLoc (sfp->location); |
|
10256 |
AddBioSourcesForBioseq (bsp, feature_list); |
|
10257 |
}
|
|
10258 |
}
|
|
10259 |
||
10260 |
||
10261 |
static ValNodePtr GetBioSourceListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FeatureFieldPtr field) |
|
10262 |
{
|
|
10263 |
ValNodePtr feature_list = NULL, vnp; |
|
10264 |
||
10265 |
if (match_list == NULL || field == NULL) return NULL; |
|
10266 |
||
10267 |
switch (match_type) { |
|
10268 |
case eTableMatchFeatureID: |
|
10269 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10270 |
if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { |
|
10271 |
AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); |
|
10272 |
}
|
|
10273 |
}
|
|
10274 |
break; |
|
10275 |
case eTableMatchGeneLocusTag: |
|
10276 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10277 |
if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { |
|
10278 |
AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); |
|
10279 |
}
|
|
10280 |
}
|
|
10281 |
break; |
|
10282 |
case eTableMatchProteinID: |
|
10283 |
case eTableMatchNucID: |
|
10284 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10285 |
if (vnp->choice == OBJ_BIOSEQ) { |
|
10286 |
AddBioSourcesForBioseq (vnp->data.ptrvalue, &feature_list); |
|
10287 |
}
|
|
10288 |
}
|
|
10289 |
break; |
|
10290 |
case eTableMatchDbxref: |
|
10291 |
for (vnp = match_list; vnp != NULL; vnp = vnp->next) { |
|
10292 |
if (vnp->choice == OBJ_SEQFEAT && vnp->data.ptrvalue != NULL) { |
|
10293 |
AddBioSourcesForFeature (vnp->data.ptrvalue, &feature_list); |
|
10294 |
}
|
|
10295 |
}
|
|
10296 |
break; |
|
10297 |
case eTableMatchBioSource: |
|
10298 |
feature_list = ValNodeCopyPtr (match_list); |
|
10299 |
break; |
|
10300 |
}
|
|
10301 |
return feature_list; |
|
10302 |
}
|
|
10303 |
||
10304 |
||
10305 |
static ValNodePtr GetTargetListForRowAndColumn (Uint1 match_type, ValNodePtr match_list, FieldTypePtr field) |
|
10306 |
{
|
|
10307 |
ValNodePtr target_list = NULL; |
|
10308 |
FeatureFieldPtr feature_field; |
|
10309 |
||
10310 |
if (field == NULL) return NULL; |
|
10311 |
switch (field->choice) { |
|
10312 |
case FieldType_source_qual: |
|
10313 |
target_list = GetBioSourceListForRowAndColumn (match_type, match_list, field->data.ptrvalue); |
|
10314 |
break; |
|
10315 |
case FieldType_feature_field: |
|
10316 |
target_list = GetFeatureListForRowAndColumn (match_type, match_list, field->data.ptrvalue); |
|
10317 |
break; |
|
10318 |
case FieldType_cds_gene_prot: |
|
10319 |
feature_field = FeatureFieldFromCDSGeneProtField (field->data.intvalue); |
|
10320 |
target_list = GetFeatureListForRowAndColumn (match_type, match_list, feature_field); |
|
10321 |
feature_field = FeatureFieldFree (feature_field); |
|
10322 |
break; |
|
10323 |
}
|
|
10324 |
return target_list; |
|
10325 |
}
|
|
10326 |
||
10327 |
||
10328 |
static void ReportMissingTargets (ValNodePtr PNTR perr_list, FieldTypePtr ft, CharPtr match_val, Int4 col_num, Int4 line_num) |
|
10329 |
{
|
|
10330 |
CharPtr feat_name; |
|
10331 |
FeatureFieldPtr field; |
|
10332 |
CharPtr no_feat_fmt = "No %s feature for %s (column %d, line %d)"; |
|
10333 |
CharPtr no_src_fmt = "No biosource for %s (column %d, line %d)"; |
|
10334 |
CharPtr err_msg; |
|
10335 |
||
10336 |
if (perr_list == NULL || ft == NULL || match_val == NULL) return; |
|
10337 |
||
10338 |
switch (ft->choice) { |
|
10339 |
case FieldType_source_qual: |
|
10340 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) |
|
10341 |
+ StringLen (match_val) |
|
10342 |
+ 30)); |
|
10343 |
sprintf (err_msg, no_src_fmt, match_val, col_num, line_num); |
|
10344 |
ValNodeAddPointer (perr_list, 0, err_msg); |
|
10345 |
break; |
|
10346 |
case FieldType_feature_field: |
|
10347 |
field = (FeatureFieldPtr) ft->data.ptrvalue; |
|
10348 |
if (field != NULL) { |
|
10349 |
feat_name = GetFeatureNameFromFeatureType (field->type); |
|
10350 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) |
|
10351 |
+ StringLen (feat_name) |
|
10352 |
+ StringLen (match_val) |
|
10353 |
+ 30)); |
|
10354 |
sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); |
|
10355 |
ValNodeAddPointer (perr_list, 0, err_msg); |
|
10356 |
}
|
|
10357 |
break; |
|
10358 |
case FieldType_cds_gene_prot: |
|
10359 |
field = FeatureFieldFromCDSGeneProtField (ft->data.intvalue); |
|
10360 |
if (field != NULL) { |
|
10361 |
feat_name = GetFeatureNameFromFeatureType (field->type); |
|
10362 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_feat_fmt) |
|
10363 |
+ StringLen (feat_name) |
|
10364 |
+ StringLen (match_val) |
|
10365 |
+ 30)); |
|
10366 |
sprintf (err_msg, no_feat_fmt, feat_name, match_val, col_num, line_num); |
|
10367 |
ValNodeAddPointer (perr_list, 0, err_msg); |
|
10368 |
}
|
|
10369 |
field = FeatureFieldFree (field); |
|
10370 |
break; |
|
10371 |
}
|
|
10372 |
}
|
|
10373 |
||
10374 |
||
10375 |
static void ReportEmptyIDColumn (ValNodePtr PNTR perr_list, Int4 line_num) |
|
10376 |
{
|
|
10377 |
CharPtr err_msg; |
|
10378 |
CharPtr missing_id_fmt = "No ID for line %d"; |
|
10379 |
||
10380 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (missing_id_fmt) + 15)); |
|
10381 |
sprintf (err_msg, missing_id_fmt, line_num); |
|
10382 |
ValNodeAddPointer (perr_list, 0, err_msg); |
|
10383 |
}
|
|
10384 |
||
10385 |
static ValNodePtr FindMatchChoiceInLine (ValNodePtr val_vnp, ValNodePtr col_vnp) |
|
10386 |
{
|
|
10387 |
TabColumnConfigPtr t; |
|
10388 |
||
10389 |
while (val_vnp != NULL && col_vnp != NULL) { |
|
10390 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10391 |
if (t != NULL && t->match_type > 0) { |
|
10392 |
val_vnp->choice = (Uint1) t->match_type; |
|
10393 |
return val_vnp; |
|
10394 |
}
|
|
10395 |
val_vnp = val_vnp->next; |
|
10396 |
col_vnp = col_vnp->next; |
|
10397 |
}
|
|
10398 |
return NULL; |
|
10399 |
}
|
|
10400 |
||
10401 |
||
10402 |
NLM_EXTERN SeqFeatPtr GetmRNAForFeature (SeqFeatPtr sfp) |
|
10403 |
{
|
|
10404 |
SeqMgrFeatContext fcontext; |
|
10405 |
BioseqPtr pbsp; |
|
10406 |
||
10407 |
if (sfp == NULL) return NULL; |
|
10408 |
if (sfp->data.choice == SEQFEAT_PROT) |
|
10409 |
{
|
|
10410 |
pbsp = BioseqFindFromSeqLoc (sfp->location); |
|
10411 |
sfp = SeqMgrGetCDSgivenProduct (pbsp, NULL); |
|
10412 |
if (sfp == NULL) return NULL; |
|
10413 |
}
|
|
10414 |
return SeqMgrGetOverlappingmRNA (sfp->location, &fcontext); |
|
10415 |
}
|
|
10416 |
||
10417 |
||
10418 |
NLM_EXTERN Boolean AdjustmRNAProductToMatchProteinProduct (SeqFeatPtr sfp) |
|
10419 |
{
|
|
10420 |
SeqFeatPtr mrna; |
|
10421 |
ProtRefPtr prp; |
|
10422 |
RnaRefPtr rrp; |
|
10423 |
||
10424 |
if (sfp == NULL || sfp->data.choice != SEQFEAT_PROT) return FALSE; |
|
10425 |
||
10426 |
prp = (ProtRefPtr) sfp->data.value.ptrvalue; |
|
10427 |
mrna = GetmRNAForFeature (sfp); |
|
10428 |
||
10429 |
if (mrna == NULL) return FALSE; |
|
10430 |
||
10431 |
rrp = (RnaRefPtr) mrna->data.value.ptrvalue; |
|
10432 |
if (rrp == NULL) |
|
10433 |
{
|
|
10434 |
rrp = RnaRefNew(); |
|
10435 |
mrna->data.value.ptrvalue = rrp; |
|
10436 |
}
|
|
10437 |
||
10438 |
rrp->ext.value.ptrvalue = MemFree (rrp->ext.value.ptrvalue); |
|
10439 |
if (prp == NULL || prp->name == NULL || StringHasNoText (prp->name->data.ptrvalue)) |
|
10440 |
{
|
|
10441 |
rrp->ext.choice = 0; |
|
10442 |
}
|
|
10443 |
else
|
|
10444 |
{
|
|
10445 |
rrp->ext.choice = 1; |
|
10446 |
rrp->ext.value.ptrvalue = StringSave (prp->name->data.ptrvalue); |
|
10447 |
}
|
|
10448 |
return TRUE; |
|
10449 |
}
|
|
10450 |
||
10451 |
||
10452 |
NLM_EXTERN Boolean IsFieldTypeCDSProduct (FieldTypePtr ft) |
|
10453 |
{
|
|
10454 |
FeatureFieldPtr field; |
|
10455 |
Boolean rval = FALSE; |
|
10456 |
||
10457 |
if (ft == NULL) return FALSE; |
|
10458 |
if (ft->choice == FieldType_feature_field) { |
|
10459 |
field = (FeatureFieldPtr) ft->data.ptrvalue; |
|
10460 |
if (field != NULL && field->type == Feature_type_cds |
|
10461 |
&& field->field != NULL |
|
10462 |
&& field->field->choice == FeatQualChoice_legal_qual |
|
10463 |
&& field->field->data.intvalue == Feat_qual_legal_product) { |
|
10464 |
rval = TRUE; |
|
10465 |
}
|
|
10466 |
} else if (ft->choice == FieldType_cds_gene_prot) { |
|
10467 |
if (ft->data.intvalue == CDSGeneProt_field_prot_name) { |
|
10468 |
rval = TRUE; |
|
10469 |
}
|
|
10470 |
}
|
|
10471 |
return rval; |
|
10472 |
}
|
|
10473 |
||
10474 |
||
10475 |
static Boolean IsFieldTypeGeneLocusTag (FieldTypePtr ft) |
|
10476 |
{
|
|
10477 |
FeatureFieldPtr field; |
|
10478 |
Boolean rval = FALSE; |
|
10479 |
||
10480 |
if (ft == NULL) return FALSE; |
|
10481 |
if (ft->choice == FieldType_feature_field) { |
|
10482 |
field = (FeatureFieldPtr) ft->data.ptrvalue; |
|
10483 |
if (field != NULL && field->type == Feature_type_gene |
|
10484 |
&& field->field != NULL |
|
10485 |
&& field->field->choice == FeatQualChoice_legal_qual |
|
10486 |
&& field->field->data.intvalue == Feat_qual_legal_locus_tag) { |
|
10487 |
rval = TRUE; |
|
10488 |
}
|
|
10489 |
} else if (ft->choice == FieldType_cds_gene_prot) { |
|
10490 |
if (ft->data.intvalue == CDSGeneProt_field_gene_locus_tag) { |
|
10491 |
rval = TRUE; |
|
10492 |
}
|
|
10493 |
}
|
|
10494 |
return rval; |
|
10495 |
}
|
|
10496 |
||
10497 |
||
10498 |
||
10499 |
NLM_EXTERN ValNodePtr ValidateTabTableValues (ValNodePtr table, ValNodePtr columns) |
|
10500 |
{
|
|
10501 |
ValNodePtr err_list = NULL; |
|
10502 |
ValNodePtr line_vnp, col_vnp, val_vnp; |
|
10503 |
Int4 line_num, col_num; |
|
10504 |
TabColumnConfigPtr t; |
|
10505 |
ValNodePtr locus_tag_values = NULL, bad_locus_tags = NULL, vnp; |
|
10506 |
CharPtr bad_format_fmt = "Locus tag %s has incorrect format"; |
|
10507 |
CharPtr dup_fmt = "Locus tag %s appears in the table more than once"; |
|
10508 |
CharPtr inconsistent_fmt = "Locus tag prefix for %s is inconsistent"; |
|
10509 |
CharPtr err_msg; |
|
10510 |
||
10511 |
if (table == NULL || columns == NULL) { |
|
10512 |
return NULL; |
|
10513 |
}
|
|
10514 |
||
10515 |
for (line_vnp = table, line_num = 1; |
|
10516 |
line_vnp != NULL; |
|
10517 |
line_vnp = line_vnp->next, line_num++) { |
|
10518 |
for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; |
|
10519 |
val_vnp != NULL && col_vnp != NULL; |
|
10520 |
val_vnp = val_vnp->next, col_vnp = col_vnp->next, col_num++) { |
|
10521 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10522 |
if (t == NULL || t->match_type > 0 || val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)) { |
|
10523 |
continue; |
|
10524 |
}
|
|
10525 |
if (IsFieldTypeGeneLocusTag (t->field)) { |
|
10526 |
ValNodeAddPointer (&locus_tag_values, 0, val_vnp->data.ptrvalue); |
|
10527 |
}
|
|
10528 |
}
|
|
10529 |
}
|
|
10530 |
||
10531 |
bad_locus_tags = FindBadLocusTagsInList (locus_tag_values); |
|
10532 |
for (vnp = bad_locus_tags; vnp != NULL; vnp = vnp->next) { |
|
10533 |
switch (vnp->choice) { |
|
10534 |
case eLocusTagErrorBadFormat: |
|
10535 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_format_fmt) + StringLen (vnp->data.ptrvalue))); |
|
10536 |
sprintf (err_msg, bad_format_fmt, vnp->data.ptrvalue); |
|
10537 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
10538 |
break; |
|
10539 |
case eLocusTagErrorDuplicate: |
|
10540 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (dup_fmt) + StringLen (vnp->data.ptrvalue))); |
|
10541 |
sprintf (err_msg, dup_fmt, vnp->data.ptrvalue); |
|
10542 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
10543 |
break; |
|
10544 |
case eLocusTagErrorInconsistentPrefix: |
|
10545 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (inconsistent_fmt) + StringLen (vnp->data.ptrvalue))); |
|
10546 |
sprintf (err_msg, inconsistent_fmt, vnp->data.ptrvalue); |
|
10547 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
10548 |
break; |
|
10549 |
}
|
|
10550 |
}
|
|
10551 |
locus_tag_values = ValNodeFree (locus_tag_values); |
|
10552 |
return err_list; |
|
10553 |
}
|
|
10554 |
||
10555 |
||
10556 |
NLM_EXTERN ValNodePtr GetObjectTableForTabTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr PNTR p_err_list) |
|
10557 |
{
|
|
10558 |
ValNodePtr err_list = NULL; |
|
10559 |
ValNodePtr line_vnp, val_vnp, col_vnp; |
|
10560 |
ValNodePtr obj_table = NULL, obj_row; |
|
10561 |
Int4 line_num = 1, col_num; |
|
10562 |
Uint2 entityID; |
|
10563 |
ValNodePtr match_list, match_choice, target_list; |
|
10564 |
TabColumnConfigPtr t; |
|
10565 |
CharPtr err_msg; |
|
10566 |
CharPtr no_match_fmt = "No match for %s, line %d"; |
|
10567 |
CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; |
|
10568 |
CharPtr num_affected_fmt = "%d fields affected"; |
|
10569 |
Int4 num_fields_affected = 0; |
|
10570 |
||
10571 |
if (sep == NULL) { |
|
10572 |
ValNodeAddPointer (&err_list, 0, StringSave ("No SeqEntry")); |
|
10573 |
}
|
|
10574 |
if (table == NULL) { |
|
10575 |
ValNodeAddPointer (&err_list, 0, StringSave ("No table")); |
|
10576 |
}
|
|
10577 |
if (columns == NULL) { |
|
10578 |
ValNodeAddPointer (&err_list, 0, StringSave ("No column information")); |
|
10579 |
}
|
|
10580 |
if (err_list != NULL) { |
|
10581 |
if (p_err_list == NULL) { |
|
10582 |
err_list = ValNodeFreeData (err_list); |
|
10583 |
} else { |
|
10584 |
*p_err_list = err_list; |
|
10585 |
}
|
|
10586 |
return NULL; |
|
10587 |
}
|
|
10588 |
||
10589 |
entityID = SeqMgrGetEntityIDForSeqEntry (sep); |
|
10590 |
||
10591 |
for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { |
|
10592 |
obj_row = NULL; |
|
10593 |
match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); |
|
10594 |
if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { |
|
10595 |
ReportEmptyIDColumn (&err_list, line_num); |
|
10596 |
} else { |
|
10597 |
match_list = FindMatchForRow (match_choice, entityID, sep); |
|
10598 |
if (match_list == NULL) { |
|
10599 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); |
|
10600 |
sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); |
|
10601 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
10602 |
} else { |
|
10603 |
for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; |
|
10604 |
col_vnp != NULL; |
|
10605 |
col_vnp = col_vnp->next, col_num++) { |
|
10606 |
target_list = NULL; |
|
10607 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10608 |
if (t == NULL || t->match_type > 0 |
|
10609 |
|| (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { |
|
10610 |
/* no targets */
|
|
10611 |
} else { |
|
10612 |
target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field); |
|
10613 |
if (target_list == NULL) { |
|
10614 |
ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); |
|
10615 |
}
|
|
10616 |
}
|
|
10617 |
ValNodeAddPointer (&obj_row, 0, target_list); |
|
10618 |
if (val_vnp != NULL) { |
|
10619 |
val_vnp = val_vnp->next; |
|
10620 |
}
|
|
10621 |
}
|
|
10622 |
}
|
|
10623 |
}
|
|
10624 |
ValNodeAddPointer (&obj_table, 0, obj_row); |
|
10625 |
}
|
|
10626 |
||
10627 |
if (err_list != NULL) { |
|
10628 |
if (p_err_list == NULL) { |
|
10629 |
err_list = ValNodeFreeData (err_list); |
|
10630 |
} else { |
|
10631 |
*p_err_list = err_list; |
|
10632 |
}
|
|
10633 |
}
|
|
10634 |
return obj_table; |
|
10635 |
}
|
|
10636 |
||
10637 |
||
10638 |
NLM_EXTERN ValNodePtr FreeObjectTableForTabTable (ValNodePtr table) |
|
10639 |
{
|
|
10640 |
ValNodePtr vnp_next, vnp_row, vnp_row_next; |
|
10641 |
||
10642 |
while (table != NULL) { |
|
10643 |
vnp_next = table->next; |
|
10644 |
table->next = NULL; |
|
10645 |
vnp_row = table->data.ptrvalue; |
|
10646 |
while (vnp_row != NULL) { |
|
10647 |
vnp_row_next = vnp_row->next; |
|
10648 |
vnp_row->next = NULL; |
|
10649 |
vnp_row->data.ptrvalue = ValNodeFree (vnp_row->data.ptrvalue); |
|
10650 |
vnp_row = ValNodeFree (vnp_row); |
|
10651 |
vnp_row = vnp_row_next; |
|
10652 |
}
|
|
10653 |
table = ValNodeFree (table); |
|
10654 |
table = vnp_next; |
|
10655 |
}
|
|
10656 |
return table; |
|
10657 |
}
|
|
10658 |
||
10659 |
||
10660 |
typedef struct countfeat { |
|
10661 |
Uint1 featdef; |
|
10662 |
Int4 num; |
|
10663 |
} CountFeatData, PNTR CountFeatPtr; |
|
10664 |
||
10665 |
||
10666 |
static void CountFeaturesCallback (SeqFeatPtr sfp, Pointer userdata) |
|
10667 |
{
|
|
10668 |
CountFeatPtr p; |
|
10669 |
||
10670 |
if (sfp == NULL || userdata == NULL) return; |
|
10671 |
||
10672 |
p = (CountFeatPtr) userdata; |
|
10673 |
if (sfp->idx.subtype == p->featdef) { |
|
10674 |
p->num++; |
|
10675 |
}
|
|
10676 |
}
|
|
10677 |
||
10678 |
static void CountBioSourceDescriptorsCallback (SeqDescrPtr sdp, Pointer userdata) |
|
10679 |
{
|
|
10680 |
Int4Ptr p; |
|
10681 |
||
10682 |
p = (Int4Ptr) userdata; |
|
10683 |
if (sdp != NULL && p != NULL && sdp->choice == Seq_descr_source) { |
|
10684 |
(*p)++; |
|
10685 |
}
|
|
10686 |
}
|
|
10687 |
||
10688 |
||
10689 |
static ValNodePtr CountObjectsForColumnFields (SeqEntryPtr sep, ValNodePtr columns) |
|
10690 |
{
|
|
10691 |
ValNodePtr count_list = NULL, vnp; |
|
10692 |
TabColumnConfigPtr t; |
|
10693 |
CountFeatData d; |
|
10694 |
FeatureFieldPtr f; |
|
10695 |
Int4 num; |
|
10696 |
Uint1 featdef = 0; |
|
10697 |
||
10698 |
d.featdef = 0; |
|
10699 |
d.num = 0; |
|
10700 |
for (vnp = columns; vnp != NULL; vnp = vnp->next) { |
|
10701 |
num = 0; |
|
10702 |
t = (TabColumnConfigPtr) vnp->data.ptrvalue; |
|
10703 |
if (t != NULL && t->match_type == 0 && t->field != NULL) { |
|
10704 |
switch (t->field->choice) { |
|
10705 |
case FieldType_source_qual: |
|
10706 |
if (featdef != FEATDEF_BIOSRC) { |
|
10707 |
d.featdef = FEATDEF_BIOSRC; |
|
10708 |
d.num = 0; |
|
10709 |
VisitFeaturesInSep (sep, &d, CountFeaturesCallback); |
|
10710 |
VisitDescriptorsInSep (sep, &(d.num), CountBioSourceDescriptorsCallback); |
|
10711 |
}
|
|
10712 |
num = d.num; |
|
10713 |
break; |
|
10714 |
case FieldType_feature_field: |
|
10715 |
f = (FeatureFieldPtr) t->field->data.ptrvalue; |
|
10716 |
if (f != NULL) { |
|
10717 |
featdef = GetFeatdefFromFeatureType(f->type); |
|
10718 |
if (featdef != d.featdef) { |
|
10719 |
d.featdef = featdef; |
|
10720 |
d.num = 0; |
|
10721 |
VisitFeaturesInSep (sep, &d, CountFeaturesCallback); |
|
10722 |
}
|
|
10723 |
num = d.num; |
|
10724 |
}
|
|
10725 |
break; |
|
10726 |
case FieldType_cds_gene_prot: |
|
10727 |
f = FeatureFieldFromCDSGeneProtField (t->field->data.intvalue); |
|
10728 |
if (f != NULL) { |
|
10729 |
featdef = GetFeatdefFromFeatureType(f->type); |
|
10730 |
if (featdef != d.featdef) { |
|
10731 |
d.featdef = featdef; |
|
10732 |
d.num = 0; |
|
10733 |
VisitFeaturesInSep (sep, &d, CountFeaturesCallback); |
|
10734 |
}
|
|
10735 |
num = d.num; |
|
10736 |
}
|
|
10737 |
f = FeatureFieldFree (f); |
|
10738 |
break; |
|
10739 |
}
|
|
10740 |
}
|
|
10741 |
ValNodeAddInt (&count_list, 0, num); |
|
10742 |
}
|
|
10743 |
return count_list; |
|
10744 |
}
|
|
10745 |
||
10746 |
||
10747 |
NLM_EXTERN ValNodePtr ApplyTableValuesToObjectTable (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) |
|
10748 |
{
|
|
10749 |
ValNodePtr val_line_vnp, obj_line_vnp; |
|
10750 |
ValNodePtr val_vnp, obj_vnp, col_vnp; |
|
10751 |
ValNodePtr target_vnp; |
|
10752 |
TabColumnConfigPtr t; |
|
10753 |
CharPtr val, qual_name; |
|
10754 |
ValNodePtr err_list = NULL, count_list, count_affected_list = NULL, count_vnp, count_tot_vnp; |
|
10755 |
CharPtr err_msg; |
|
10756 |
CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; |
|
10757 |
CharPtr num_affected_fmt = "%d fields affected"; |
|
10758 |
CharPtr col_num_affected_fmt = "For %s (column %d), %d items were affected out of %d total"; |
|
10759 |
Int4 num_fields_affected = 0, col_num, line_num, num_this_column; |
|
10760 |
Boolean success; |
|
10761 |
ValNodePtr count_msg = NULL; |
|
10762 |
||
10763 |
count_list = CountObjectsForColumnFields (sep, columns); |
|
10764 |
||
10765 |
for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; |
|
10766 |
val_line_vnp != NULL && obj_line_vnp != NULL; |
|
10767 |
val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { |
|
10768 |
val_vnp = val_line_vnp->data.ptrvalue; |
|
10769 |
obj_vnp = obj_line_vnp->data.ptrvalue; |
|
10770 |
col_vnp = columns; |
|
10771 |
col_num = 1; |
|
10772 |
count_vnp = count_affected_list; |
|
10773 |
while (obj_vnp != NULL && col_vnp != NULL) { |
|
10774 |
num_this_column = 0; |
|
10775 |
if (obj_vnp->data.ptrvalue != NULL) { |
|
10776 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10777 |
if (t == NULL || t->match_type > 0 |
|
10778 |
|| (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { |
|
10779 |
/* ignore column or skip blank value */
|
|
10780 |
} else { |
|
10781 |
if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { |
|
10782 |
val = ""; |
|
10783 |
} else { |
|
10784 |
val = val_vnp->data.ptrvalue; |
|
10785 |
}
|
|
10786 |
for (target_vnp = obj_vnp->data.ptrvalue; target_vnp != NULL; target_vnp = target_vnp->next) { |
|
10787 |
if (val[0] == 0) { |
|
10788 |
success = RemoveFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL); |
|
10789 |
} else { |
|
10790 |
success = SetFieldValueForObject (target_vnp->choice, target_vnp->data.ptrvalue, t->field, NULL, |
|
10791 |
val_vnp->data.ptrvalue, t->existing_text); |
|
10792 |
}
|
|
10793 |
if (success) { |
|
10794 |
num_fields_affected++; |
|
10795 |
num_this_column++; |
|
10796 |
if (t->match_mrna && IsFieldTypeCDSProduct (t->field) |
|
10797 |
&& target_vnp->choice == OBJ_SEQFEAT) { |
|
10798 |
if (AdjustmRNAProductToMatchProteinProduct (target_vnp->data.ptrvalue)) { |
|
10799 |
num_fields_affected++; |
|
10800 |
}
|
|
10801 |
}
|
|
10802 |
} else { |
|
10803 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); |
|
10804 |
sprintf (err_msg, bad_col_val_fmt, col_num, line_num); |
|
10805 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
10806 |
}
|
|
10807 |
}
|
|
10808 |
}
|
|
10809 |
}
|
|
10810 |
if (val_vnp != NULL) { |
|
10811 |
val_vnp = val_vnp->next; |
|
10812 |
}
|
|
10813 |
if (count_vnp == NULL) { |
|
10814 |
ValNodeAddInt (&count_affected_list, 0, num_this_column); |
|
10815 |
} else { |
|
10816 |
count_vnp->data.intvalue ++; |
|
10817 |
count_vnp = count_vnp->next; |
|
10818 |
}
|
|
10819 |
obj_vnp = obj_vnp->next; |
|
10820 |
col_vnp = col_vnp->next; |
|
10821 |
col_num++; |
|
10822 |
}
|
|
10823 |
}
|
|
10824 |
||
10825 |
/* put message at top of list for number of fields affected */
|
|
10826 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); |
|
10827 |
sprintf (err_msg, num_affected_fmt, num_fields_affected); |
|
10828 |
ValNodeAddPointer (&count_msg, 0, err_msg); |
|
10829 |
||
10830 |
/* if any affected, list number of fields per column, and the total in the record */
|
|
10831 |
if (num_fields_affected > 0) { |
|
10832 |
for (count_vnp = count_affected_list, count_tot_vnp = count_list, col_vnp = columns, col_num = 1; |
|
10833 |
count_vnp != NULL && count_tot_vnp != NULL && col_vnp != NULL; |
|
10834 |
count_vnp = count_vnp->next, count_tot_vnp = count_tot_vnp->next, col_vnp = col_vnp->next, col_num++) { |
|
10835 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10836 |
if (t != NULL && t->match_type == 0) { |
|
10837 |
qual_name = SummarizeFieldType (t->field); |
|
10838 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (col_num_affected_fmt) + StringLen (qual_name) + 45)); |
|
10839 |
sprintf (err_msg, col_num_affected_fmt, qual_name, col_num, count_vnp->data.intvalue, count_tot_vnp->data.intvalue); |
|
10840 |
ValNodeAddPointer (&count_msg, 0, err_msg); |
|
10841 |
qual_name = MemFree (qual_name); |
|
10842 |
}
|
|
10843 |
}
|
|
10844 |
}
|
|
10845 |
||
10846 |
ValNodeLink (&count_msg, err_list); |
|
10847 |
||
10848 |
count_list = ValNodeFree (count_list); |
|
10849 |
count_affected_list = ValNodeFree (count_affected_list); |
|
10850 |
||
10851 |
return count_msg; |
|
10852 |
}
|
|
10853 |
||
10854 |
||
10855 |
static int LIBCALLBACK SortVnpByChoiceAndPtrvalue (VoidPtr ptr1, VoidPtr ptr2) |
|
10856 |
||
10857 |
{
|
|
10858 |
ValNodePtr vnp1; |
|
10859 |
ValNodePtr vnp2; |
|
10860 |
||
10861 |
if (ptr1 != NULL && ptr2 != NULL) { |
|
10862 |
vnp1 = *((ValNodePtr PNTR) ptr1); |
|
10863 |
vnp2 = *((ValNodePtr PNTR) ptr2); |
|
10864 |
if (vnp1 != NULL && vnp2 != NULL) { |
|
10865 |
if (vnp1->choice > vnp2->choice) { |
|
10866 |
return 1; |
|
10867 |
} else if (vnp1->choice < vnp2->choice) { |
|
10868 |
return -1; |
|
10869 |
} else if (vnp1->data.ptrvalue > vnp2->data.ptrvalue) { |
|
10870 |
return 1; |
|
10871 |
} else if (vnp2->data.ptrvalue < vnp2->data.ptrvalue) { |
|
10872 |
return -1; |
|
10873 |
} else { |
|
10874 |
return 0; |
|
10875 |
}
|
|
10876 |
}
|
|
10877 |
}
|
|
10878 |
return 0; |
|
10879 |
}
|
|
10880 |
||
10881 |
||
10882 |
NLM_EXTERN ValNodePtr CheckObjTableForRowsThatApplyToTheSameDestination (ValNodePtr obj_table) |
|
10883 |
{
|
|
10884 |
Int4 col_num; |
|
10885 |
ValNodePtr line_vnp, col_vnp, obj_vnp, vnp; |
|
10886 |
ValNodePtr col_list = NULL, col_obj_list; |
|
10887 |
Boolean any_column_values_left; |
|
10888 |
ValNodePtr err_list = NULL; |
|
10889 |
Boolean found_multi; |
|
10890 |
CharPtr multi_fmt = "Multiple rows apply to the same object for column %d"; |
|
10891 |
CharPtr err_msg; |
|
10892 |
||
10893 |
/* now, for each row, get pointer to first column */
|
|
10894 |
for (line_vnp = obj_table; line_vnp != NULL; line_vnp = line_vnp->next) { |
|
10895 |
ValNodeAddPointer (&col_list, 0, line_vnp->data.ptrvalue); |
|
10896 |
}
|
|
10897 |
||
10898 |
/* now for each column, make a list of all features in the column, then sort to see if there are duplicates */
|
|
10899 |
any_column_values_left = TRUE; |
|
10900 |
col_num = 1; |
|
10901 |
while (any_column_values_left) { |
|
10902 |
any_column_values_left = FALSE; |
|
10903 |
col_obj_list = NULL; |
|
10904 |
for (vnp = col_list; vnp != NULL; vnp = vnp->next) { |
|
10905 |
col_vnp = vnp->data.ptrvalue; |
|
10906 |
if (col_vnp != NULL) { |
|
10907 |
obj_vnp = col_vnp->data.ptrvalue; |
|
10908 |
ValNodeLink (&col_obj_list, ValNodeCopyPtr (obj_vnp)); |
|
10909 |
vnp->data.ptrvalue = col_vnp->next; |
|
10910 |
any_column_values_left = TRUE; |
|
10911 |
}
|
|
10912 |
}
|
|
10913 |
if (col_obj_list != NULL) { |
|
10914 |
found_multi = FALSE; |
|
10915 |
col_obj_list = ValNodeSort (col_obj_list, SortVnpByChoiceAndPtrvalue); |
|
10916 |
for (vnp = col_obj_list; vnp != NULL && vnp->next != NULL && !found_multi; vnp = vnp->next) { |
|
10917 |
if (vnp->choice == vnp->next->choice |
|
10918 |
&& vnp->data.ptrvalue == vnp->next->data.ptrvalue) { |
|
10919 |
found_multi = TRUE; |
|
10920 |
}
|
|
10921 |
}
|
|
10922 |
if (found_multi) { |
|
10923 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (multi_fmt) |
|
10924 |
+ 30)); |
|
10925 |
sprintf (err_msg, multi_fmt, col_num); |
|
10926 |
ValNodeAddPointer (&err_list, col_num, err_msg); |
|
10927 |
}
|
|
10928 |
col_obj_list = ValNodeFree (col_obj_list); |
|
10929 |
}
|
|
10930 |
col_num++; |
|
10931 |
}
|
|
10932 |
col_list = ValNodeFree (col_list); |
|
10933 |
return err_list; |
|
10934 |
}
|
|
10935 |
||
10936 |
||
10937 |
NLM_EXTERN ValNodePtr CheckObjTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns, ValNodePtr obj_table) |
|
10938 |
{
|
|
10939 |
ValNodePtr err_list = NULL, vnp; |
|
10940 |
ValNodePtr val_line_vnp, obj_line_vnp; |
|
10941 |
ValNodePtr val_vnp, obj_vnp, col_vnp; |
|
10942 |
Int4 line_num = 1, col_num, num_existing_text = 0; |
|
10943 |
Uint2 entityID; |
|
10944 |
TabColumnConfigPtr t; |
|
10945 |
CharPtr err_msg, str, qual_name, val; |
|
10946 |
CharPtr already_has_val_fmt = "%s already has value '%s' (column %d), line %d. Replacement is '%s'"; |
|
10947 |
CharPtr num_existing_text_fmt = "%d fields already have text."; |
|
10948 |
CharPtr mrna_warn_fmt = "%d coding region features have mRNAs, but %d do not."; |
|
10949 |
ValNodePtr target_list, feat_vnp; |
|
10950 |
Int4 num_with_mrna = 0, num_without_mrna = 0; |
|
10951 |
||
10952 |
if (sep == NULL) { |
|
10953 |
ValNodeAddPointer (&err_list, 1, StringSave ("No SeqEntry")); |
|
10954 |
}
|
|
10955 |
if (table == NULL) { |
|
10956 |
ValNodeAddPointer (&err_list, 1, StringSave ("No table")); |
|
10957 |
}
|
|
10958 |
if (columns == NULL) { |
|
10959 |
ValNodeAddPointer (&err_list, 1, StringSave ("No column information")); |
|
10960 |
}
|
|
10961 |
if (err_list != NULL) { |
|
10962 |
return err_list; |
|
10963 |
}
|
|
10964 |
||
10965 |
entityID = SeqMgrGetEntityIDForSeqEntry (sep); |
|
10966 |
||
10967 |
for (val_line_vnp = table, obj_line_vnp = obj_table, line_num = 1; |
|
10968 |
val_line_vnp != NULL && obj_line_vnp != NULL; |
|
10969 |
val_line_vnp = val_line_vnp->next, obj_line_vnp = obj_line_vnp->next, line_num++) { |
|
10970 |
val_vnp = val_line_vnp->data.ptrvalue; |
|
10971 |
obj_vnp = obj_line_vnp->data.ptrvalue; |
|
10972 |
col_vnp = columns; |
|
10973 |
if (val_vnp == NULL || obj_vnp == NULL) continue; |
|
10974 |
col_num = 1; |
|
10975 |
while (obj_vnp != NULL && col_vnp != NULL) { |
|
10976 |
if (obj_vnp->data.ptrvalue != NULL) { |
|
10977 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
10978 |
if (t == NULL || t->match_type > 0 |
|
10979 |
|| (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { |
|
10980 |
/* ignore column or skip blank value */
|
|
10981 |
} else { |
|
10982 |
target_list = obj_vnp->data.ptrvalue; |
|
10983 |
if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { |
|
10984 |
val = ""; |
|
10985 |
} else { |
|
10986 |
val = val_vnp->data.ptrvalue; |
|
10987 |
}
|
|
10988 |
for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { |
|
10989 |
/* check for existing text */
|
|
10990 |
str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); |
|
10991 |
if (!StringHasNoText (str)) { |
|
10992 |
qual_name = SummarizeFieldType (t->field); |
|
10993 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) |
|
10994 |
+ StringLen (qual_name) + StringLen (str) |
|
10995 |
+ StringLen (val) |
|
10996 |
+ 30)); |
|
10997 |
sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val); |
|
10998 |
ValNodeAddPointer (&err_list, col_num, err_msg); |
|
10999 |
num_existing_text ++; |
|
11000 |
}
|
|
11001 |
str = MemFree (str); |
|
11002 |
/* check for mrna if changing CDS product */
|
|
11003 |
if (IsFieldTypeCDSProduct (t->field) && feat_vnp->choice == OBJ_SEQFEAT) { |
|
11004 |
if (GetmRNAForFeature (feat_vnp->data.ptrvalue) != NULL) { |
|
11005 |
num_with_mrna++; |
|
11006 |
} else { |
|
11007 |
num_without_mrna++; |
|
11008 |
}
|
|
11009 |
}
|
|
11010 |
}
|
|
11011 |
}
|
|
11012 |
}
|
|
11013 |
if (val_vnp != NULL) { |
|
11014 |
val_vnp = val_vnp->next; |
|
11015 |
}
|
|
11016 |
obj_vnp = obj_vnp->next; |
|
11017 |
col_vnp = col_vnp->next; |
|
11018 |
col_num++; |
|
11019 |
}
|
|
11020 |
}
|
|
11021 |
if (num_existing_text > 0) { |
|
11022 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) |
|
11023 |
+ 15)); |
|
11024 |
sprintf (err_msg, num_existing_text_fmt, num_existing_text); |
|
11025 |
vnp = ValNodeNew (NULL); |
|
11026 |
vnp->choice = 0; |
|
11027 |
vnp->data.ptrvalue = err_msg; |
|
11028 |
vnp->next = err_list; |
|
11029 |
err_list = vnp; |
|
11030 |
}
|
|
11031 |
if (num_with_mrna > 0 && num_without_mrna > 0) { |
|
11032 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (mrna_warn_fmt) |
|
11033 |
+ 30)); |
|
11034 |
sprintf (err_msg, mrna_warn_fmt, num_with_mrna, num_without_mrna); |
|
11035 |
vnp = ValNodeNew (NULL); |
|
11036 |
vnp->choice = 0; |
|
11037 |
vnp->data.ptrvalue = err_msg; |
|
11038 |
vnp->next = err_list; |
|
11039 |
err_list = vnp; |
|
11040 |
}
|
|
11041 |
||
11042 |
return err_list; |
|
11043 |
}
|
|
11044 |
||
11045 |
||
11046 |
NLM_EXTERN ValNodePtr ApplyTableToFeatures (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) |
|
11047 |
{
|
|
11048 |
ValNodePtr err_list = NULL; |
|
11049 |
ValNodePtr line_vnp, val_vnp, col_vnp; |
|
11050 |
Int4 line_num = 1, col_num; |
|
11051 |
Uint2 entityID; |
|
11052 |
ValNodePtr match_list, match_choice, target_list, feat_vnp, vnp; |
|
11053 |
TabColumnConfigPtr t; |
|
11054 |
CharPtr err_msg; |
|
11055 |
CharPtr no_match_fmt = "No match for %s, line %d"; |
|
11056 |
CharPtr bad_col_val_fmt = "Did not set value for column %d, line %d"; |
|
11057 |
CharPtr num_affected_fmt = "%d fields affected"; |
|
11058 |
Int4 num_fields_affected = 0; |
|
11059 |
CharPtr val; |
|
11060 |
Boolean success; |
|
11061 |
||
11062 |
if (sep == NULL) { |
|
11063 |
ValNodeAddPointer (&err_list, 0, StringSave ("No SeqEntry")); |
|
11064 |
}
|
|
11065 |
if (table == NULL) { |
|
11066 |
ValNodeAddPointer (&err_list, 0, StringSave ("No table")); |
|
11067 |
}
|
|
11068 |
if (columns == NULL) { |
|
11069 |
ValNodeAddPointer (&err_list, 0, StringSave ("No column information")); |
|
11070 |
}
|
|
11071 |
if (err_list != NULL) { |
|
11072 |
return err_list; |
|
11073 |
}
|
|
11074 |
||
11075 |
entityID = SeqMgrGetEntityIDForSeqEntry (sep); |
|
11076 |
||
11077 |
for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { |
|
11078 |
match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); |
|
11079 |
if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { |
|
11080 |
ReportEmptyIDColumn (&err_list, line_num); |
|
11081 |
} else { |
|
11082 |
match_list = FindMatchForRow (match_choice, entityID, sep); |
|
11083 |
if (match_list == NULL) { |
|
11084 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); |
|
11085 |
sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); |
|
11086 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
11087 |
} else { |
|
11088 |
for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; |
|
11089 |
col_vnp != NULL; |
|
11090 |
col_vnp = col_vnp->next, col_num++) { |
|
11091 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
11092 |
if (t == NULL || t->match_type > 0 |
|
11093 |
|| (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { |
|
11094 |
if (val_vnp != NULL) { |
|
11095 |
val_vnp = val_vnp->next; |
|
11096 |
}
|
|
11097 |
continue; |
|
11098 |
}
|
|
11099 |
||
11100 |
target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field); |
|
11101 |
if (target_list == NULL) { |
|
11102 |
ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); |
|
11103 |
} else { |
|
11104 |
if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { |
|
11105 |
val = ""; |
|
11106 |
} else { |
|
11107 |
val = val_vnp->data.ptrvalue; |
|
11108 |
}
|
|
11109 |
for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { |
|
11110 |
if (val[0] == 0) { |
|
11111 |
success = RemoveFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); |
|
11112 |
} else { |
|
11113 |
success = SetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL, |
|
11114 |
val_vnp->data.ptrvalue, t->existing_text); |
|
11115 |
}
|
|
11116 |
if (success) { |
|
11117 |
num_fields_affected++; |
|
11118 |
if (t->match_mrna && IsFieldTypeCDSProduct (t->field) |
|
11119 |
&& feat_vnp->choice == OBJ_SEQFEAT) { |
|
11120 |
if (AdjustmRNAProductToMatchProteinProduct (feat_vnp->data.ptrvalue)) { |
|
11121 |
num_fields_affected++; |
|
11122 |
}
|
|
11123 |
}
|
|
11124 |
} else { |
|
11125 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (bad_col_val_fmt) + 30)); |
|
11126 |
sprintf (err_msg, bad_col_val_fmt, col_num, line_num); |
|
11127 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
11128 |
}
|
|
11129 |
}
|
|
11130 |
}
|
|
11131 |
target_list = ValNodeFree (target_list); |
|
11132 |
if (val_vnp != NULL) { |
|
11133 |
val_vnp = val_vnp->next; |
|
11134 |
}
|
|
11135 |
}
|
|
11136 |
}
|
|
11137 |
match_list = ValNodeFree (match_list); |
|
11138 |
}
|
|
11139 |
}
|
|
11140 |
||
11141 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_affected_fmt) + 15)); |
|
11142 |
sprintf (err_msg, num_affected_fmt, num_fields_affected); |
|
11143 |
vnp = ValNodeNew (NULL); |
|
11144 |
vnp->data.ptrvalue = err_msg; |
|
11145 |
vnp->next = err_list; |
|
11146 |
err_list = vnp; |
|
11147 |
||
11148 |
return err_list; |
|
11149 |
}
|
|
11150 |
||
11151 |
||
11152 |
NLM_EXTERN ValNodePtr CheckTableForExistingText (SeqEntryPtr sep, ValNodePtr table, ValNodePtr columns) |
|
11153 |
{
|
|
11154 |
ValNodePtr err_list = NULL, vnp; |
|
11155 |
ValNodePtr line_vnp, val_vnp, col_vnp; |
|
11156 |
Int4 line_num = 1, col_num, num_existing_text = 0; |
|
11157 |
Uint2 entityID; |
|
11158 |
TabColumnConfigPtr t; |
|
11159 |
CharPtr err_msg, str, qual_name, val; |
|
11160 |
CharPtr no_match_fmt = "No match for %s, line %d"; |
|
11161 |
CharPtr no_feat_fmt = "No %s feature for %s (column %d, line %d)"; |
|
11162 |
CharPtr already_has_val_fmt = "%s already has value '%s' (column %d), line %d. Replacement is '%s'"; |
|
11163 |
CharPtr num_existing_text_fmt = "%d fields already have text."; |
|
11164 |
ValNodePtr match_choice, match_list; |
|
11165 |
ValNodePtr target_list, feat_vnp; |
|
11166 |
||
11167 |
if (sep == NULL) { |
|
11168 |
ValNodeAddPointer (&err_list, 1, StringSave ("No SeqEntry")); |
|
11169 |
}
|
|
11170 |
if (table == NULL) { |
|
11171 |
ValNodeAddPointer (&err_list, 1, StringSave ("No table")); |
|
11172 |
}
|
|
11173 |
if (columns == NULL) { |
|
11174 |
ValNodeAddPointer (&err_list, 1, StringSave ("No column information")); |
|
11175 |
}
|
|
11176 |
if (err_list != NULL) { |
|
11177 |
return err_list; |
|
11178 |
}
|
|
11179 |
||
11180 |
entityID = SeqMgrGetEntityIDForSeqEntry (sep); |
|
11181 |
||
11182 |
for (line_vnp = table, line_num = 1; line_vnp != NULL; line_vnp = line_vnp->next, line_num++) { |
|
11183 |
match_choice = FindMatchChoiceInLine (line_vnp->data.ptrvalue, columns); |
|
11184 |
if (match_choice == NULL || StringHasNoText (match_choice->data.ptrvalue)) { |
|
11185 |
ReportEmptyIDColumn (&err_list, line_num); |
|
11186 |
} else { |
|
11187 |
match_list = FindMatchForRow (match_choice, entityID, sep); |
|
11188 |
if (match_list == NULL) { |
|
11189 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (no_match_fmt) + StringLen (match_choice->data.ptrvalue) + 15)); |
|
11190 |
sprintf (err_msg, no_match_fmt, match_choice->data.ptrvalue, line_num); |
|
11191 |
ValNodeAddPointer (&err_list, 0, err_msg); |
|
11192 |
} else { |
|
11193 |
for (val_vnp = line_vnp->data.ptrvalue, col_vnp = columns, col_num = 1; |
|
11194 |
col_vnp != NULL; |
|
11195 |
col_vnp = col_vnp->next, col_num++) { |
|
11196 |
t = (TabColumnConfigPtr) col_vnp->data.ptrvalue; |
|
11197 |
if (t == NULL || t->match_type > 0 |
|
11198 |
|| (t->skip_blank && (val_vnp == NULL || StringHasNoText (val_vnp->data.ptrvalue)))) { |
|
11199 |
if (val_vnp != NULL) { |
|
11200 |
val_vnp = val_vnp->next; |
|
11201 |
}
|
|
11202 |
continue; |
|
11203 |
}
|
|
11204 |
target_list = GetTargetListForRowAndColumn (match_choice->choice, match_list, t->field); |
|
11205 |
if (target_list == NULL) { |
|
11206 |
ReportMissingTargets (&err_list, t->field, match_choice->data.ptrvalue, col_num, line_num); |
|
11207 |
} else { |
|
11208 |
if (val_vnp == NULL || val_vnp->data.ptrvalue == NULL) { |
|
11209 |
val = ""; |
|
11210 |
} else { |
|
11211 |
val = val_vnp->data.ptrvalue; |
|
11212 |
}
|
|
11213 |
for (feat_vnp = target_list; feat_vnp != NULL; feat_vnp = feat_vnp->next) { |
|
11214 |
str = GetFieldValueForObject (feat_vnp->choice, feat_vnp->data.ptrvalue, t->field, NULL); |
|
11215 |
if (!StringHasNoText (str)) { |
|
11216 |
qual_name = SummarizeFieldType (t->field); |
|
11217 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (already_has_val_fmt) |
|
11218 |
+ StringLen (qual_name) + StringLen (str) |
|
11219 |
+ StringLen (val) |
|
11220 |
+ 30)); |
|
11221 |
sprintf (err_msg, already_has_val_fmt, qual_name, str, col_num, line_num, val); |
|
11222 |
ValNodeAddPointer (&err_list, col_num, err_msg); |
|
11223 |
num_existing_text ++; |
|
11224 |
}
|
|
11225 |
str = MemFree (str); |
|
11226 |
}
|
|
11227 |
}
|
|
11228 |
target_list = ValNodeFree (target_list); |
|
11229 |
if (val_vnp != NULL) { |
|
11230 |
val_vnp = val_vnp->next; |
|
11231 |
}
|
|
11232 |
}
|
|
11233 |
}
|
|
11234 |
match_list = ValNodeFree (match_list); |
|
11235 |
}
|
|
11236 |
}
|
|
11237 |
if (num_existing_text > 0) { |
|
11238 |
err_msg = (CharPtr) MemNew (sizeof (Char) * (StringLen (num_existing_text_fmt) |
|
11239 |
+ 15)); |
|
11240 |
sprintf (err_msg, num_existing_text_fmt, num_existing_text); |
|
11241 |
vnp = ValNodeNew (NULL); |
|
11242 |
vnp->choice = 0; |
|
11243 |
vnp->data.ptrvalue = err_msg; |
|
11244 |
vnp->next = err_list; |
|
11245 |
err_list = vnp; |
|
11246 |
}
|
|
11247 |
||
11248 |
return err_list; |
|
11249 |
}
|
|
11250 |
||
11251 |
||
11252 |
||
11253 |
||
11254 |