2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: asn2gnbi.h
28
* Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans
30
* Version Creation Date: 12/30/03
34
* File Description: New GenBank flatfile generator, internal header
37
* --------------------------------------------------------------------------
38
* ==========================================================================
49
#define NLM_EXTERN NLM_IMPORT
51
#define NLM_EXTERN extern
60
#define ASN2FF_EMBL_MAX 78
61
#define ASN2FF_GB_MAX 79
62
#define SEQID_MAX_LEN 41
64
#define TILDE_IGNORE 0
65
#define TILDE_TO_SPACES 1
66
#define TILDE_EXPAND 2
67
#define TILDE_OLD_EXPAND 3
70
/* flags set by mode to customize behavior */
72
typedef struct asn2gbflags {
73
Boolean suppressLocalID;
74
Boolean validateFeats;
75
Boolean ignorePatPubs;
77
Boolean avoidLocusColl;
79
Boolean dropBadCitGens;
80
Boolean noAffilOnUnpub;
81
Boolean dropIllegalQuals;
82
Boolean checkQualSyntax;
83
Boolean needRequiredQuals;
84
Boolean needOrganismQual;
85
Boolean needAtLeastOneRef;
87
Boolean dropBadDbxref;
88
Boolean useEmblMolType;
89
Boolean hideBankItComment;
90
Boolean checkCDSproductID;
91
Boolean suppressSegLoc;
92
Boolean srcQualsToNote;
93
Boolean hideEmptySource;
94
Boolean goQualsToNote;
95
Boolean geneSynsToNote;
96
Boolean selenocysteineToNote;
97
Boolean extraProductsToNote;
99
} Asn2gbFlags, PNTR Asn2gbFlagsPtr;
101
/* internal Asn2gbSect structure has fields on top of Asn2gbSect fields */
103
typedef struct int_Asn2gbSect {
105
} IntAsn2gbSect, PNTR IntAsn2gbSectPtr;
107
/* string structure */
109
#define STRING_BUF_LEN 1024
111
typedef struct stringitem {
112
struct stringitem *curr;
113
struct stringitem *next;
115
Char buf [STRING_BUF_LEN];
117
} StringItem, PNTR StringItemPtr;
119
/* internal asn2gbjob structure has fields on top of Asn2gbJob fields */
121
typedef struct int_asn2gb_job {
126
Boolean showFarTransl;
127
Boolean transIfNoProd;
128
Boolean alwaysTranslCds;
129
Boolean showTranscript;
132
Boolean newSourceOrg;
133
Boolean produceInsdSeq;
134
ValNodePtr lockedBspList;
135
Boolean relModeError;
144
} IntAsn2gbJob, PNTR IntAsn2gbJobPtr;
146
/* array for assigning biosource and feature data fields to qualifiers */
147
/* should be allocated to MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
149
typedef union qualval {
163
} QualVal, PNTR QualValPtr;
165
/* structure passed to individual paragraph format functions */
167
typedef struct asn2gbformat {
172
Asn2gbWriteFunc ffwrite;
174
Asn2gbLockFunc remotelock;
175
Asn2gbFreeFunc remotefree;
180
} Asn2gbFormat, PNTR Asn2gbFormatPtr;
182
/* structure for storing working parameters while building asn2gb_job structure */
184
typedef struct asn2gbwork {
192
ValNodePtr pubhead; /* for collecting publications */
193
ValNodePtr srchead; /* for collecting biosources */
195
/* linked lists of paragraphs, sections, blocks */
197
ValNodePtr sectionList;
198
ValNodePtr blockList; /* reset for each new section */
200
/* most recent node of linked lists, for quickly adding next node */
202
ValNodePtr lastsection;
203
ValNodePtr lastblock; /* reset for each new section */
207
/* set if doing immediate write at time of creation for web speed */
211
/* section fields needed for populating blocks */
225
Boolean showAllFeats;
228
Boolean showconfeats;
229
Boolean showconsource;
230
Boolean smartconfeats;
232
Boolean onlyNearFeats;
233
Boolean farFeatsSuppress;
234
Boolean nearFeatsSuppress;
236
Boolean citSubsFirst;
237
Boolean hideGeneFeats;
238
Boolean newLocusLine;
239
Boolean showBaseCount;
241
Boolean hideImpFeats;
242
Boolean hideRemImpFeats;
243
Boolean hideSnpFeats;
244
Boolean hideExonFeats;
245
Boolean hideIntronFeats;
246
Boolean hideMiscFeats;
247
Boolean hideCddFeats;
248
Boolean hideCdsProdFeats;
250
Boolean hideGeneRIFs;
251
Boolean onlyGeneRIFs;
252
Boolean latestGeneRIFs;
257
Boolean copyGpsCdsUp;
258
Boolean copyGpsGeneDown;
260
Boolean showContigAndSeq;
262
Char basename [SEQID_MAX_LEN];
276
} Asn2gbWork, PNTR Asn2gbWorkPtr;
279
/* Seq-hist replacedBy is preformatted into string field, */
280
/* then comment descriptors, Map location:, and Region:, */
281
/* then comment features, finally HTGS */
283
typedef struct comment_block {
286
} CommentBlock, PNTR CommentBlockPtr;
288
/* internal reference block has fields on top of RefBlock fields */
290
typedef struct int_ref_block {
292
DatePtr date; /* internal sorting use only */
293
SeqLocPtr loc; /* final location on target bioseq */
294
CharPtr authstr; /* author string */
295
Uint2 index; /* index if feature on target bioseq */
296
Boolean justuids; /* gibb pub with uids and Figure, etc. */
297
CharPtr fig; /* figure string from equivalent gibb pub */
298
CharPtr maploc; /* maploc string from equivalent gibb pub */
299
Boolean poly_a; /* poly_a field from equivalent gibb pub */
300
} IntRefBlock, PNTR IntRefBlockPtr;
302
/* internal source block has fields on top of BaseBlock fields */
304
typedef struct int_src_block {
306
Boolean is_descriptor;
308
Boolean is_synthetic;
314
SeqLocPtr loc; /* final location on target bioseq */
322
} IntSrcBlock, PNTR IntSrcBlockPtr;
324
/* internal feature block has fields on top of FeatBlock fields */
326
typedef struct int_feat_block {
333
Boolean isCDS; /* set if using IntCdsBlock */
335
} IntFeatBlock, PNTR IntFeatBlockPtr;
337
/* internal cds block has fields on top of IntFeatBlock fields */
339
typedef struct int_cds_block {
341
CharPtr fig; /* figure string from pub */
342
CharPtr maploc; /* maploc string from pub */
343
} IntCdsBlock, PNTR IntCdsBlockPtr;
346
/* enumerated qualifier category definitions */
349
Qual_class_ignore = 0,
357
Qual_class_EC_valnode,
368
Qual_class_consplice,
373
Qual_class_organelle,
375
Qual_class_subsource,
376
Qual_class_code_break,
377
Qual_class_anti_codon,
386
Qual_class_trna_codons,
387
Qual_class_translation,
388
Qual_class_transcription,
390
Qual_class_protnames,
398
Qual_class_locus_tag,
402
/* source 'feature' */
404
/* some qualifiers will require additional content verification not
405
explicitly indicated by the class type */
430
SCQUAL_endogenous_virus_name,
431
SCQUAL_environmental_sample,
435
SCQUAL_forma_specialis,
446
SCQUAL_isolation_source,
465
SCQUAL_sequenced_mol,
470
SCQUAL_spec_or_nat_host,
471
SCQUAL_specimen_voucher,
478
SCQUAL_subsource_note,
484
SCQUAL_transposon_name,
492
ASN2GNBK_TOTAL_SOURCE
495
NLM_EXTERN SourceType orgModToSourceIdx [38];
518
FTQUAL_exception_note,
519
FTQUAL_extra_products,
528
FTQUAL_gene_syn_refseq,
536
FTQUAL_insertion_seq,
545
FTQUAL_old_locus_tag,
549
FTQUAL_PCR_conditions,
553
FTQUAL_product_quals,
554
FTQUAL_prot_activity,
556
FTQUAL_prot_EC_number,
559
FTQUAL_prot_conflict,
574
FTQUAL_selenocysteine,
575
FTQUAL_selenocysteine_note,
579
FTQUAL_standard_name,
580
FTQUAL_transcription,
581
FTQUAL_transcript_id,
582
FTQUAL_transcript_id_note, /* !!! remove October 15, 2003 !!! */
583
FTQUAL_transl_except,
590
FTQUAL_xtra_prod_quals,
591
ASN2GNBK_TOTAL_FEATUR
594
#define MAX_WWWBUF 328
596
NLM_EXTERN Char link_feat [MAX_WWWBUF];
597
NLM_EXTERN Char link_seq [MAX_WWWBUF];
598
NLM_EXTERN Char link_wgs [MAX_WWWBUF];
599
NLM_EXTERN Char link_omim [MAX_WWWBUF];
600
NLM_EXTERN Char ref_link [MAX_WWWBUF];
601
NLM_EXTERN Char nt_link [MAX_WWWBUF];
602
NLM_EXTERN Char doc_link [MAX_WWWBUF];
603
NLM_EXTERN Char ev_link [MAX_WWWBUF];
604
NLM_EXTERN Char ec_link [MAX_WWWBUF];
605
NLM_EXTERN Char link_tax [MAX_WWWBUF];
606
NLM_EXTERN Char link_muid [MAX_WWWBUF];
607
NLM_EXTERN Char link_code [MAX_WWWBUF];
608
NLM_EXTERN Char link_encode [MAX_WWWBUF];
609
NLM_EXTERN Char link_go [MAX_WWWBUF];
611
NLM_EXTERN void FF_www_db_xref(
613
StringItemPtr ffstring,
614
CharPtr db, CharPtr identifier
617
NLM_EXTERN Boolean StringIsJustQuotes (
622
typedef struct sourcequal {
625
} SourceQual, PNTR SourceQualPtr;
627
NLM_EXTERN SourceQual asn2gnbk_source_quals [ASN2GNBK_TOTAL_SOURCE];
629
NLM_EXTERN SourceType subSourceToSourceIdx [30];
631
NLM_EXTERN void DoOneSection (
641
Boolean onePartOfSeg,
645
NLM_EXTERN void DoOneBioseq (
650
NLM_EXTERN BaseBlockPtr Asn2gbAddBlock (
656
NLM_EXTERN void InitWWW (IntAsn2gbJobPtr ajp);
657
NLM_EXTERN void FiniWWW (IntAsn2gbJobPtr ajp);
658
NLM_EXTERN Boolean GetWWW (IntAsn2gbJobPtr ajp);
660
NLM_EXTERN StringItemPtr FFGetString (IntAsn2gbJobPtr ajp);
661
NLM_EXTERN void FFRecycleString (IntAsn2gbJobPtr ajp, StringItemPtr ffstring);
662
NLM_EXTERN void FFAddOneChar (
665
Boolean convertQuotes
667
NLM_EXTERN void FFAddNewLine(StringItemPtr ffstring);
668
NLM_EXTERN void FFAddNChar (
672
Boolean convertQuotes
674
NLM_EXTERN void FFExpandTildes (StringItemPtr sip, CharPtr PNTR cpp);
675
NLM_EXTERN void FFReplaceTildesWithSpaces (StringItemPtr ffstring, CharPtr PNTR cpp);
676
NLM_EXTERN void FFOldExpand (StringItemPtr sip, CharPtr PNTR cpp);
677
NLM_EXTERN void AddCommentStringWithTildes (StringItemPtr ffstring, CharPtr string);
678
NLM_EXTERN void AddCommentWithURLlinks (
680
StringItemPtr ffstring,
685
NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string);
686
NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction);
687
NLM_EXTERN void FFAddPeriod (StringItemPtr sip);
688
NLM_EXTERN void FFAddOneString (
692
Boolean convertQuotes,
695
NLM_EXTERN void FFCatenateSubString (
697
StringItemPtr start_sip, Int4 start_pos,
698
StringItemPtr end_sip, Int4 end_pos
700
NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip);
701
NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip);
702
NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos);
703
NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr);
704
NLM_EXTERN void FFTrim (
705
StringItemPtr ffstring,
706
StringItemPtr line_start,
709
);NLM_EXTERN void FFCalculateLineBreak (
710
StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
711
Int4 init_indent, Int4 visible
713
NLM_EXTERN void FFLineWrap (
719
CharPtr eb_line_prefix
721
NLM_EXTERN void FFStartPrint (
730
CharPtr eb_line_prefix,
733
NLM_EXTERN void FFAddTextToString (
734
StringItemPtr ffstring,
739
Boolean convertQuotes,
742
NLM_EXTERN CharPtr FFEndPrint (
744
StringItemPtr ffstring,
750
CharPtr eb_line_prefix
752
NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring);
753
NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos);
754
NLM_EXTERN Char FFFindChar (
755
StringItemPtr ffstring,
756
StringItemPtr start_buf,
761
NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring);
762
NLM_EXTERN Int4 FFStringSearch (
764
const CharPtr pattern,
767
NLM_EXTERN Boolean IsWholeWordSubstr (
768
StringItemPtr searchStr,
773
NLM_EXTERN ValNodePtr ValNodeCopyStrToHead (ValNodePtr PNTR head, Int2 choice, CharPtr str);
774
NLM_EXTERN CharPtr MergeFFValNodeStrs (
777
NLM_EXTERN void AddValNodeString (
778
ValNodePtr PNTR head,
783
NLM_EXTERN void FFAddString_NoRedund (
784
StringItemPtr unique,
789
NLM_EXTERN void s_AddPeriodToEnd (CharPtr someString);
790
NLM_EXTERN Boolean s_RemovePeriodFromEnd (CharPtr someString);
791
NLM_EXTERN Boolean IsEllipsis (
794
NLM_EXTERN void A2GBSeqLocReplaceID (
798
NLM_EXTERN CharPtr asn2gb_PrintDate (
801
NLM_EXTERN CharPtr DateToFF (
807
NLM_EXTERN CharPtr FFFlatLoc (
814
NLM_EXTERN void FF_www_featloc(StringItemPtr ffstring, CharPtr loc);
816
NLM_EXTERN CharPtr GetMolTypeQual (
820
NLM_EXTERN void AddFeatureToGbseq (
827
NLM_EXTERN SeqIdPtr SeqLocIdForProduct (
831
NLM_EXTERN CharPtr GetAuthorsString (
834
CharPtr PNTR consortP,
839
NLM_EXTERN AuthListPtr GetAuthListPtr (
844
NLM_EXTERN Int2 MatchRef (
846
RefBlockPtr PNTR rbpp,
850
NLM_EXTERN SeqLocPtr SeqLocReMapEx (
859
NLM_EXTERN CharPtr Get3LetterSymbol (
862
SeqCodeTablePtr table,
866
NLM_EXTERN CharPtr CleanQualValue (
869
NLM_EXTERN CharPtr Asn2gnbkCompressSpaces (CharPtr str);
870
NLM_EXTERN CharPtr StripAllSpaces (
874
NLM_EXTERN Boolean GetAccnVerFromServer (Int4 gi, CharPtr buf);
876
NLM_EXTERN CharPtr bondList [];
877
NLM_EXTERN CharPtr siteList [];
878
NLM_EXTERN CharPtr secStrText [];
880
NLM_EXTERN CharPtr goQualType [];
881
NLM_EXTERN CharPtr goFieldType [];
883
NLM_EXTERN CharPtr legalDbXrefs [];
884
NLM_EXTERN CharPtr legalRefSeqDbXrefs [];
887
NLM_EXTERN void AddFeatureBlock (
890
NLM_EXTERN Boolean AddReferenceBlock (
894
NLM_EXTERN void AddSourceFeatBlock (
897
NLM_EXTERN void AddFeatureBlock (
900
NLM_EXTERN void AddLocusBlock (
903
NLM_EXTERN void AddAccessionBlock (
906
NLM_EXTERN void AddVersionBlock (
909
NLM_EXTERN void AddDbsourceBlock (
912
NLM_EXTERN void AddDateBlock (
915
NLM_EXTERN void AddDeflineBlock (
918
NLM_EXTERN void AddKeywordsBlock (
921
NLM_EXTERN void AddSegmentBlock (
925
NLM_EXTERN void AddSourceBlock (
928
NLM_EXTERN void AddOrganismBlock (
931
NLM_EXTERN void AddCommentBlock (
934
NLM_EXTERN void AddPrimaryBlock (
937
NLM_EXTERN void AddFeatHeaderBlock (
940
NLM_EXTERN void AddSourceFeatBlock (
943
NLM_EXTERN void AddWGSBlock (
946
NLM_EXTERN void AddGenomeBlock (
949
NLM_EXTERN void AddContigBlock (
952
NLM_EXTERN void AddBasecountBlock (
955
NLM_EXTERN void AddOriginBlock (
958
NLM_EXTERN void AddSequenceBlock (
961
NLM_EXTERN void AddSlashBlock (
965
NLM_EXTERN CharPtr DefaultFormatBlock (
969
NLM_EXTERN CharPtr FormatSourceBlock (
973
NLM_EXTERN CharPtr FormatOrganismBlock (
977
NLM_EXTERN CharPtr FormatReferenceBlock (
981
NLM_EXTERN CharPtr FormatCommentBlock (
985
NLM_EXTERN CharPtr FormatFeatHeaderBlock (
989
NLM_EXTERN CharPtr FormatSourceFeatBlock (
993
NLM_EXTERN CharPtr FormatFeatureBlock (
997
NLM_EXTERN CharPtr FormatBasecountBlock (
1001
NLM_EXTERN CharPtr FormatSequenceBlock (
1002
Asn2gbFormatPtr afp,
1005
NLM_EXTERN CharPtr FormatContigBlock (
1006
Asn2gbFormatPtr afp,
1009
NLM_EXTERN CharPtr FormatSlashBlock (
1010
Asn2gbFormatPtr afp,
1014
NLM_EXTERN void PrintFtableIntervals (
1015
ValNodePtr PNTR head,
1020
NLM_EXTERN void PrintFtableLocAndQuals (
1021
IntAsn2gbJobPtr ajp,
1022
ValNodePtr PNTR head,
1025
SeqMgrFeatContextPtr context
1027
NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
1032
NLM_EXTERN void DoImmediateRemoteFeatureFormat (
1033
Asn2gbFormatPtr afp,
1038
NLM_EXTERN void DoImmediateFormat (
1039
Asn2gbFormatPtr afp,
1050
#define NLM_EXTERN NLM_EXPORT
1055
#endif /* ndef _ASN2NGNBI_ */