2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: asn2gnb1.c
28
* Author: Karl Sirotkin, Tom Madden, Tatiana Tatusov, Jonathan Kans,
31
* Version Creation Date: 10/21/98
35
* File Description: New GenBank flatfile generator - work in progress
38
* --------------------------------------------------------------------------
39
* ==========================================================================
57
#include <alignmgr2.h>
66
/* utility functions */
68
NLM_EXTERN ValNodePtr ValNodeCopyStrToHead (ValNodePtr PNTR head, Int2 choice, CharPtr str)
73
if (head == NULL || str == NULL) return NULL;
75
newnode = ValNodeNew (NULL);
76
if (newnode == NULL) return NULL;
78
newnode->choice = (Uint1) choice;
79
newnode->data.ptrvalue = StringSave (str);
81
newnode->next = *head;
87
/* the val node strings mechanism will be replaced by a more efficient method later */
89
NLM_EXTERN CharPtr MergeFFValNodeStrs (
101
if (list == NULL) return NULL;
103
for (vnp = list, len = 0; vnp != NULL; vnp = vnp->next) {
104
str = (CharPtr) vnp->data.ptrvalue;
105
len += StringLen (str);
107
if (len == 0) return NULL;
109
ptr = MemNew (sizeof (Char) * (len + 2));
110
if (ptr == NULL) return NULL;
112
for (vnp = list, tmp = ptr; vnp != NULL; vnp = vnp->next) {
113
str = (CharPtr) vnp->data.ptrvalue;
114
tmp = StringMove (tmp, str);
121
NLM_EXTERN void AddValNodeString (
122
ValNodePtr PNTR head,
130
CharPtr freeme = NULL;
135
len = StringLen (prefix) + StringLen (string) + StringLen (suffix);
136
if (len == 0) return;
138
if (len < sizeof (buf)) {
140
/* if new string fits in stack buffer, no need to allocate */
142
MemSet ((Pointer) buf, 0, sizeof (buf));
147
/* new string bigger than stack buffer, so allocate sufficient string */
149
newstr = (CharPtr) MemNew (sizeof (Char) * (len + 2));
150
if (newstr == NULL) return;
152
/* allocated string will be freed at end of function */
159
if (prefix != NULL) {
160
strptr = StringMove (strptr, prefix);
163
if (string != NULL) {
164
strptr = StringMove (strptr, string);
167
if (suffix != NULL) {
168
strptr = StringMove (strptr, suffix);
171
/* currently just makes a valnode list, to be enhanced later */
173
ValNodeCopyStr (head, 0, newstr);
175
/* if large string was allocated, free it now */
177
if (freeme != NULL) {
183
NLM_EXTERN void FFAddString_NoRedund (
184
StringItemPtr unique,
190
CharPtr str = string;
192
Boolean wholeWord = FALSE;
194
if ( StringHasNoText(prefix) &&
195
StringHasNoText(string) &&
196
StringHasNoText(suffix) ) return;
198
if (StringNICmp (string, "tRNA-", 5) == 0) {
202
while ( foundPos >= 0 && !wholeWord ) {
203
foundPos = FFStringSearch(unique, str, foundPos);
204
if ( foundPos >= 0 ) {
205
wholeWord = IsWholeWordSubstr(unique, foundPos, str);
206
foundPos += StringLen(str);
210
if ( foundPos < 0 || !wholeWord ) {
211
FFAddTextToString(unique, prefix, string, suffix, FALSE, FALSE, TILDE_IGNORE);
217
/* s_AddPeriodToEnd () -- Adds a '.' to the end of a given string if */
218
/* there is not already one there. */
220
/* Note that this adds one character to the */
221
/* length of the string, leading to a */
222
/* memory overrun if space was not previously */
223
/* allocated for this. */
225
NLM_EXTERN void s_AddPeriodToEnd (CharPtr someString)
229
if (StringHasNoText (someString)) return;
230
len = StringLen (someString);
232
if (someString[len-1] != '.')
234
someString[len] = '.';
235
someString[len+1] = '\0';
239
/* s_RemovePeriodFromEnd () -- If the last character in a given */
240
/* string is a '.', removes it. */
242
NLM_EXTERN Boolean s_RemovePeriodFromEnd (CharPtr someString)
246
if (StringHasNoText (someString)) return FALSE;
247
len = StringLen (someString);
248
if (len < 1) return FALSE;
249
if (someString[len-1] == '.') {
250
someString[len-1] = '\0';
257
/* isEllipsis () - Determines if a string ends in an ellipses */
260
NLM_EXTERN Boolean IsEllipsis (
268
if (StringHasNoText (str)) return FALSE;
269
len = StringLen (str);
270
if (len < 3) return FALSE;
272
return (Boolean) (ptr [0] == '.' && ptr [1] == '.' && ptr [2] == '.');
275
NLM_EXTERN void A2GBSeqLocReplaceID (
284
bsp = BioseqFindFromSeqLoc (ajpslp);
285
if (bsp == NULL) return;
286
sip = SeqIdFindBest (bsp->id, 0);
287
SeqLocReplaceID (newloc, sip);
290
NLM_EXTERN CharPtr asn2gb_PrintDate (
298
if (dp == NULL) return NULL;
300
if (DatePrint (dp, buf)) {
301
if (StringICmp (buf, "Not given") != 0) {
302
len = StringLen (buf);
304
if (buf [len - 1] == '\n') {
305
if (buf [len - 2] == '.') {
306
buf [len - 2] = '\0';
308
buf [len - 1] = '\0';
312
return StringSave (buf);
319
static CharPtr month_names [] = {
320
"JAN", "FEB", "MAR", "APR", "MAY", "JUN",
321
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
325
NLM_EXTERN CharPtr DateToFF (
339
if (dp == NULL) return NULL;
341
if (dp->data [0] == 0) {
343
StringCpy (buf, dp->str);
345
} else if (dp->data [0] == 1) {
347
year = 1900 + (Int2) dp->data [1];
348
month = (Int2) dp->data [2];
349
day = (Int2) dp->data [3];
352
if (month < 1 || month > 12) {
355
if (day < 1 || day > 31) {
359
if (month < 1 || month > 12) {
362
if (day < 1 || day > 31) {
368
sprintf (buf, "??-%s-%ld",
369
month_names [month-1], (long) year);
370
} else if (day < 10) {
371
sprintf (buf, "0%ld-%s-%ld",
372
(long) day, month_names [month-1], (long) year);
374
sprintf(buf, "%ld-%s-%ld",
375
(long) day, month_names [month-1], (long) year);
383
NLM_EXTERN StringItemPtr FFGetString (IntAsn2gbJobPtr ajp)
388
if (ajp == NULL) return NULL;
389
if (ajp->pool != NULL) {
391
ajp->pool = sip->next;
393
MemSet ((Pointer) sip, 0, sizeof (StringItem));
395
sip = (StringItemPtr) MemNew (sizeof (StringItem));
396
if (sip == NULL) return NULL;
404
NLM_EXTERN void FFRecycleString (IntAsn2gbJobPtr ajp, StringItemPtr ffstring)
409
if (ajp == NULL || ffstring == NULL) return;
410
if ( ffstring->pos == -1 ) return;
414
while (nxt->next != NULL) {
418
nxt->next = ajp->pool;
419
ajp->pool = ffstring;
421
ffstring->curr = NULL;
424
NLM_EXTERN void FFAddOneChar (
427
Boolean convertQuotes
430
StringItemPtr current = sip->curr;
432
if ( current->pos == STRING_BUF_LEN ) {
433
current->next = FFGetString(sip->iajp);
434
current = current->next;
439
if ( convertQuotes && ch == '\"' ) {
442
current->buf[current->pos] = ch;
446
NLM_EXTERN void FFAddNewLine(StringItemPtr ffstring) {
447
FFAddOneChar(ffstring, '\n', FALSE);
450
NLM_EXTERN void FFAddNChar (
454
Boolean convertQuotes
459
for ( i = 0; i < n; ++i ) {
460
FFAddOneChar(sip, ch, convertQuotes);
465
NLM_EXTERN void FFExpandTildes (StringItemPtr sip, CharPtr PNTR cpp) {
466
Char replace = **cpp;
468
if ( **cpp == '~' ) {
469
if ( *((*cpp) + 1) == '~' ) { /* "~~" -> '~' */
477
FFAddOneChar(sip, replace, FALSE);
481
NLM_EXTERN void FFReplaceTildesWithSpaces (StringItemPtr ffstring, CharPtr PNTR cpp) {
482
Char replace = **cpp, lookahead;
485
if ( *cptr == '`' ) {
486
FFAddOneChar(ffstring, replace, FALSE);
491
lookahead = *(cptr + 1);
493
if ( IS_DIGIT(lookahead) ) {
497
if ( (lookahead == ' ') || (lookahead == '(') ) {
498
if ( IS_DIGIT(*(cptr + 2)) ) {
504
FFAddOneChar(ffstring, replace, FALSE);
507
NLM_EXTERN void FFOldExpand (StringItemPtr sip, CharPtr PNTR cpp) {
508
/* "~" -> "\n", "~~" or "~~ ~~" -> "\n\n" */
511
Char next = *(cp + 1);
514
if ( current == '`' ) {
516
FFAddOneChar(sip, current, FALSE);
518
FFAddOneChar(sip, '~', FALSE);
524
/* handle "~", "~~" or "~~ ~~" */
525
FFAddOneChar(sip, '\n', FALSE);
527
FFAddOneChar(sip, '\n', FALSE);
535
if ( *cp == '~' ) { /* saw "~~ ~~" */
543
NLM_EXTERN void AddCommentStringWithTildes (StringItemPtr ffstring, CharPtr string)
545
/* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
549
while (*string != '\0') {
550
if (*string == '`' && *(string+1) == '~') {
551
FFAddOneChar(ffstring, '~', FALSE);
553
} else if (*string == '~') {
554
FFAddOneChar(ffstring, '\n', FALSE);
556
if (*string == '~') {
558
for (i = 0; i < 12; i++) {
559
FFAddOneChar(ffstring, ' ', FALSE);
562
FFAddOneChar(ffstring, '\n', FALSE);
564
if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
568
} else if (*string == '\"') {
570
FFAddOneChar(ffstring, *string, FALSE);
573
FFAddOneChar(ffstring, *string, FALSE);
577
} /* AddCommentStringWithTildes */
580
NLM_EXTERN void AddStringWithTildes (StringItemPtr ffstring, CharPtr string)
582
/* One "~" is a new line, "~~" or "~~ ~~" means 2 returns */
584
while (*string != '\0') {
585
if (*string == '`' && *(string+1) == '~') {
586
FFAddOneChar(ffstring, '~', FALSE);
588
} else if (*string == '~') {
589
FFAddOneChar(ffstring, '\n', FALSE);
591
if (*string == '~') {
592
FFAddOneChar(ffstring, '\n', FALSE);
594
if (*string == ' ' && *(string+1) == '~' && *(string+2) == '~') {
598
} else if (*string == '\"') {
600
FFAddOneChar(ffstring, *string, FALSE);
603
FFAddOneChar(ffstring, *string, FALSE);
607
} /* AddStringWithTildes */
610
NLM_EXTERN void FFProcessTildes (StringItemPtr sip, CharPtr PNTR cpp, Int2 tildeAction) {
612
switch (tildeAction) {
615
FFExpandTildes(sip, cpp);
618
case TILDE_OLD_EXPAND :
619
FFOldExpand(sip, cpp);
622
case TILDE_TO_SPACES :
623
FFReplaceTildesWithSpaces (sip, cpp);
628
FFAddOneChar(sip, **cpp, FALSE);
633
NLM_EXTERN void FFAddPeriod (StringItemPtr sip) {
636
StringItemPtr riter = sip->curr, prev;
639
if ( sip == NULL ) return;
640
ajp = (IntAsn2gbJobPtr)sip->iajp;
641
if ( ajp == NULL ) return;
643
for ( i = riter->pos - 1; i >= 0; --i ) {
646
if ( (ch == ' ') || (ch == '\t') || (ch == '~') || (ch == '.') || (ch == '\n')) {
649
if ( i < 0 && riter != sip ) {
650
for ( prev = sip; prev->next != NULL; prev = prev->next ) {
651
if ( prev->next == riter ) {
653
FFRecycleString(ajp, riter);
668
FFAddOneChar(sip, '.', FALSE);
672
NLM_EXTERN void FFAddOneString (
676
Boolean convertQuotes,
680
CharPtr strp = string;
682
if ( string == NULL ) return;
684
while ( *strp != '\0' ) {
685
if ( (*strp == '`') || (*strp == '~') ) {
686
FFProcessTildes(sip, &strp, tildeAction);
688
FFAddOneChar(sip, *strp, convertQuotes);
698
NLM_EXTERN void FFCatenateSubString (
700
StringItemPtr start_sip, Int4 start_pos,
701
StringItemPtr end_sip, Int4 end_pos
704
Int4 max_i, min_i, i;
705
StringItemPtr current;
706
Boolean in_url = FALSE;
707
IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)dest->iajp;
710
for ( current = start_sip, i = start_pos;
712
current = current->next ) {
713
if ( current == start_sip ) {
719
if ( current == end_sip ) {
722
max_i = current->pos;
725
for ( i = min_i; i < max_i; ++i ) {
726
if ( current->buf[i] == '<' ) {
727
if ( !FFIsStartOfLink(current, i) ) {
728
FFAddOneString(dest, "<", FALSE, FALSE, TILDE_IGNORE);
734
if ( current->buf[i] == '>' ) {
736
FFAddOneString(dest, ">", FALSE, FALSE, TILDE_IGNORE);
743
FFAddOneChar(dest, current->buf[i], FALSE);
746
if ( current == end_sip ) break;
749
for ( current = start_sip, i = start_pos;
751
current = current->next ) {
752
if ( current == start_sip ) {
758
if ( current == end_sip ) {
761
max_i = current->pos;
764
for ( i = min_i; i < max_i; ++i ) {
765
FFAddOneChar(dest, current->buf[i], FALSE);
768
if ( current == end_sip ) break;
774
NLM_EXTERN CharPtr FFToCharPtr (StringItemPtr sip) {
777
CharPtr result, temp;
779
for ( iter = sip; iter != NULL; iter = iter->next ) {
783
result = (CharPtr)MemNew(size + 2);
786
for ( iter = sip; iter != NULL; iter = iter->next ) {
787
for ( i = 0; i < iter->pos; ++i ) {
788
*temp = iter->buf[i];
799
/* word wrap functions */
801
NLM_EXTERN void FFSkipLink (StringItemPtr PNTR iterp, Int4Ptr ip) {
802
StringItemPtr iter = *iterp;
805
while ( (iter != NULL) && (iter->buf[i] != '>') ) {
808
if ( i == iter->pos ) {
814
if ( i == iter->pos && iter->next != NULL ) {
823
NLM_EXTERN Boolean FFIsStartOfLink (StringItemPtr iter, Int4 pos) {
824
static CharPtr start_link = "<A HREF";
825
static CharPtr end_link = "</A>";
826
Int4 start_len = StringLen(start_link);
827
Int4 end_len = StringLen(end_link);
831
if ( iter == NULL || pos >= iter->pos ) return FALSE;
832
if ( iter->buf[pos] != '<' ) return FALSE;
834
MemSet(temp, 0, sizeof(temp));
835
for ( i = 0; i < start_len && iter != NULL; ++i ) {
836
if ( pos + i < iter->pos ) {
837
temp[i] = iter->buf[pos+i];
838
if ( i == end_len - 1 ) {
839
if ( StringNICmp(temp, end_link, end_len) == 0 ) {
850
if ( i == start_len ) {
851
if ( StringNICmp(temp, start_link, start_len) == 0 ) {
860
NLM_EXTERN void FFSavePosition(StringItemPtr ffstring, StringItemPtr PNTR bufptr, Int4 PNTR posptr) {
861
*bufptr = ffstring->curr;
862
*posptr = ffstring->curr->pos;
866
NLM_EXTERN void FFTrim (
867
StringItemPtr ffstring,
868
StringItemPtr line_start,
873
StringItemPtr riter, iter;
875
IntAsn2gbJobPtr ajp = (IntAsn2gbJobPtr)ffstring->iajp;
877
for ( i = 0; i < line_prefix_len; ++i ) {
879
if ( line_pos == STRING_BUF_LEN ) {
881
line_start= line_start->next;
885
riter = ffstring->curr;
886
while ( riter != NULL ) {
887
for ( i = riter->pos - 1;
888
(i >= 0) && !(riter == line_start && i <= line_pos);
890
if ( !IS_WHITESP(riter->buf[i]) || (riter->buf[i] == '\n') ) {
895
i = STRING_BUF_LEN - 1;
896
for ( iter = ffstring; iter != NULL; iter = iter->next ) {
897
if ( iter->next == riter ) {
907
ffstring->curr = riter;
911
FFRecycleString(ajp, riter->next);
920
/* A line is wrapped when the visble text in th eline exceeds the line size. */
921
/* Visible text is text that is not an HTML hyper-link. */
922
/* A line may be broken in one of the following characters: */
923
/* space, comma and dash */
924
/* the oredr of search is first spaces, then commas and then dashes. */
925
/* We nee to take into account the possiblity that a 'new-line' character */
926
/* already exists in the line, in such case we break at the 'new-line' */
927
/* spaces, dashes and new-lines will be broken at that character wheras for */
928
/* commas we break at the character following the comma. */
930
NLM_EXTERN void FFCalculateLineBreak (
931
StringItemPtr PNTR break_sip, Int4 PNTR break_pos,
932
Int4 init_indent, Int4 visible
935
StringItemPtr iter, prev;
941
Boolean found_comma = FALSE, found_dash = FALSE;
942
/* each candidate is a pair of buffer and position withingh this buffer */
943
StringItemPtr candidate_sip_space = NULL,
944
candidate_sip_comma = NULL,
945
candidate_sip_dash = NULL;
946
Int4 candidate_int_space = -1,
947
candidate_int_comma = -1,
948
candidate_int_dash = -1;
954
/* skip the first 'init_indent' characters of the line */
955
while ( iter != NULL && !done ) {
956
for ( i = start; i < iter->pos && init_indent > 0; ++i ) {
957
if ( iter->buf[i] == '\n' ) {
958
candidate_sip_space = iter;
959
candidate_int_space = i;
963
if ( FFIsStartOfLink(iter, i) ) {
964
FFSkipLink(&iter, &i);
972
if ( init_indent > 0 ) {
981
while ( iter != NULL && !done ) {
982
for ( i = start; i < iter->pos; ++i ) {
984
candidate_sip_comma = iter;
985
candidate_int_comma = i;
989
candidate_sip_dash = iter;
990
candidate_int_dash = i;
996
candidate_sip_space = iter;
997
candidate_int_space = i;
1000
} else if ( ch == ' ' ) {
1001
candidate_sip_space = iter;
1002
candidate_int_space = i;
1003
} else if ( ch == ',' ) {
1005
} else if ( ch == '-' ) {
1007
/*candidate_sip_dash = iter;
1008
candidate_int_dash = i;*/
1011
if ( FFIsStartOfLink(iter, i) ) {
1012
FFSkipLink(&iter, &i);
1018
if ( copied >= visible ) {
1019
if ( (candidate_sip_space == NULL) && (candidate_int_space == -1) &&
1020
(candidate_sip_comma == NULL) && (candidate_int_comma == -1) &&
1021
(candidate_sip_dash == NULL) && (candidate_int_dash == -1) ) {
1022
candidate_sip_space = iter;
1023
candidate_int_space = i;
1036
/* the order in which we examine the various candidate breaks is important */
1037
if ( iter == NULL && !done) { /* reached the end */
1039
*break_pos = prev->pos;
1041
if( candidate_sip_space != NULL ) {
1042
*break_sip = candidate_sip_space;
1043
*break_pos = candidate_int_space;
1044
} else if( candidate_sip_comma != NULL ) {
1045
*break_sip = candidate_sip_comma;
1046
*break_pos = candidate_int_comma;
1047
} else if( candidate_sip_dash != NULL ) {
1048
*break_sip = candidate_sip_dash;
1049
*break_pos = candidate_int_dash;
1054
NLM_EXTERN void FFLineWrap (
1060
CharPtr eb_line_prefix
1063
/* line break candidate is a pair <StringItepPtr, position> */
1064
StringItemPtr break_sip = src;
1066
StringItemPtr line_start = NULL;
1068
Int4 i, line_prefix_len = 0;
1071
FFSavePosition(dest, &line_start, &line_pos);
1073
for ( iter = src; iter != NULL; iter = iter->next ) {
1074
for ( i = 0; i < iter->pos; ) {
1078
FFCalculateLineBreak(&break_sip, &break_pos, init_indent, line_max - line_prefix_len + 1);
1079
FFCatenateSubString(dest, iter, i, break_sip, break_pos);
1080
FFTrim(dest, line_start, line_pos, cont_indent);
1081
FFAddOneChar(dest, '\n', FALSE);
1083
FFSavePosition(dest, &line_start, &line_pos);
1088
if ( iter->buf[i-1] == 'X' && iter->buf[i-2] == 'X') {
1089
if ( (i == 2) || ((i > 2) && (iter->buf[i-3] == '\n')) ) {
1095
if ( IS_WHITESP(iter->buf[i]) ) {
1098
if ( iter != src->curr || i < iter->pos ) {
1099
if ( eb_line_prefix != NULL ) {
1100
FFAddOneString(dest, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1102
FFAddNChar(dest, ' ', cont_indent - StringLen(eb_line_prefix), FALSE);
1104
line_prefix_len = cont_indent;
1105
/*FFSkipGarbage(&iter, &i);*/
1113
NLM_EXTERN void FFStartPrint (
1116
Int4 gb_init_indent,
1117
Int4 gb_cont_indent,
1120
Int4 eb_init_indent,
1121
Int4 eb_cont_indent,
1122
CharPtr eb_line_prefix,
1127
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1128
FFAddNChar(sip, ' ', gb_init_indent, FALSE);
1129
FFAddOneString(sip, gb_label, FALSE, FALSE, TILDE_IGNORE);
1130
FFAddNChar(sip, ' ', gb_tab_to - gb_init_indent - StringLen(gb_label), FALSE);
1131
} else if (format == EMBL_FMT || format == EMBLPEPT_FMT) {
1132
if ( eb_print_xx ) {
1133
FFAddOneString(sip, "XX\n", FALSE, FALSE, TILDE_IGNORE);
1135
FFAddOneString(sip, eb_line_prefix, FALSE, FALSE, TILDE_IGNORE);
1136
FFAddNChar(sip, ' ', eb_init_indent - StringLen(eb_line_prefix), FALSE);
1140
NLM_EXTERN void FFAddTextToString (
1141
StringItemPtr ffstring,
1146
Boolean convertQuotes,
1151
FFAddOneString (ffstring, prefix, FALSE, FALSE, TILDE_IGNORE);
1152
FFAddOneString (ffstring, string, FALSE, convertQuotes, tildeAction);
1153
FFAddOneString (ffstring, suffix, FALSE, FALSE, TILDE_IGNORE);
1156
FFAddPeriod(ffstring);
1161
NLM_EXTERN CharPtr FFEndPrint (
1162
IntAsn2gbJobPtr ajp,
1163
StringItemPtr ffstring,
1165
Int2 gb_init_indent,
1166
Int2 gb_cont_indent,
1167
Int2 eb_init_indent,
1168
Int2 eb_cont_indent,
1169
CharPtr eb_line_prefix
1172
StringItemPtr temp = FFGetString(ajp);
1175
if ( (ffstring == NULL) || (ajp == NULL) ) return NULL;
1177
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
1178
FFLineWrap(temp, ffstring, gb_init_indent, gb_cont_indent, ASN2FF_GB_MAX, NULL);
1180
FFLineWrap(temp, ffstring, eb_init_indent, eb_cont_indent, ASN2FF_EMBL_MAX, eb_line_prefix);
1183
result = FFToCharPtr(temp);
1184
FFRecycleString(ajp, temp);
1188
NLM_EXTERN Uint4 FFLength(StringItemPtr ffstring) {
1190
StringItemPtr current;
1192
for ( current = ffstring; current != NULL; current = current->next ) {
1193
len += current->pos;
1200
NLM_EXTERN Char FFCharAt(StringItemPtr ffstring, Uint4 pos) {
1201
Uint4 count = 0, inbufpos;
1202
StringItemPtr current = NULL;
1204
inbufpos = pos % STRING_BUF_LEN;
1206
for ( current = ffstring; current != NULL; current = current->next ) {
1207
count += current->pos;
1208
if ( count > pos ) break;
1211
if ( current != NULL && inbufpos <= pos ) {
1212
return current->buf[inbufpos];
1219
NLM_EXTERN Char FFFindChar (
1220
StringItemPtr ffstring, /* StringItem to search in */
1221
StringItemPtr start_buf, /* the position of the last char searched for (buffer) */
1222
Uint4 start_pos, /* the position of the last char searched for (pos) */
1223
Uint4 old_pos, /* the global position searched for */
1224
Uint4 new_pos /* new search position */
1229
StringItemPtr current = NULL;
1233
if ( new_pos == old_pos ) {
1234
result = start_buf->buf[start_pos];
1237
if ( new_pos > old_pos ) {
1238
delta = new_pos - old_pos;
1239
current = start_buf;
1240
count = current->pos - start_pos - 1;
1241
current = current->next;
1243
while ( delta > count && current != NULL ) {
1244
current = current->next;
1245
count += current->pos;
1248
if ( current != NULL ) {
1249
result = current->buf[new_pos % STRING_BUF_LEN];
1252
} else /* new_pos < old_pos */ {
1253
delta = old_pos - new_pos;
1254
if ( old_pos % STRING_BUF_LEN >= delta ) {
1255
result = start_buf->buf[new_pos % STRING_BUF_LEN];
1257
result = FFCharAt(ffstring, new_pos);
1264
NLM_EXTERN Boolean FFEmpty(StringItemPtr ffstring) {
1265
if ( ffstring != NULL && ffstring->pos != 0 ) {
1272
* Compute the right-most position in the pattern at which character a occurs,
1273
* for each character a in the alphabet (assumed ASCII-ISO 8859-1)
1275
* The result is returned in the supplied vector.
1277
static void ComputeLastOccurrence(const CharPtr pattern, Uint4 last_occurrence[])
1282
/* Initilalize vector */
1283
for ( i = 0; i < 256; ++i ) {
1284
last_occurrence[i] = 0;
1287
/* compute right-most occurrence */
1288
pat_len = StringLen(pattern);
1289
for ( i = 0; i < pat_len; ++i ) {
1290
last_occurrence[(Uint1)pattern[i]] = i;
1294
static void ComputePrefix(const CharPtr pattern, Uint4 longest_prefix[])
1296
Uint4 pat_len = StringLen(pattern);
1299
longest_prefix[0] = 0;
1302
for ( q = 1; q < pat_len; ++q ) {
1303
while ( k > 0 && pattern[k] != pattern[q] ) {
1304
k = longest_prefix[k - 1];
1306
if ( pattern[k] == pattern[q] ) {
1309
longest_prefix[q] = k;
1314
static void ComputeGoodSuffix(const CharPtr pattern, Uint4 good_suffix[])
1316
Uint4 pat_len = StringLen(pattern);
1317
Uint4Ptr longest_prefix, reverse_longest_prefix;
1318
CharPtr reverse_pattern;
1321
/* allocate memory */
1322
longest_prefix = MemNew(pat_len * sizeof(Uint4));
1323
reverse_longest_prefix = MemNew(pat_len * sizeof(Uint4));
1324
reverse_pattern = MemNew((pat_len + 1) * sizeof(Char));
1326
if ( longest_prefix == NULL ||
1327
reverse_longest_prefix == NULL ||
1328
reverse_pattern == NULL ) {
1329
MemFree(longest_prefix);
1330
MemFree(reverse_longest_prefix);
1331
MemFree(reverse_pattern);
1335
/* compute reverse pattern */
1336
for ( i = 0; i < pat_len; ++i ) {
1337
reverse_pattern[pat_len - i] = pattern[i];
1340
ComputePrefix(pattern, longest_prefix);
1341
ComputePrefix(reverse_pattern, reverse_longest_prefix);
1343
for ( j = 0; j < pat_len; ++j) {
1344
good_suffix[j] = pat_len - longest_prefix[pat_len-1];
1347
for ( i = 0; i < pat_len; ++i ) {
1348
j = pat_len - reverse_longest_prefix[i] - 1;
1349
if ( good_suffix[j] > i - reverse_longest_prefix[i] + 1) {
1350
good_suffix[j] = i - reverse_longest_prefix[i] + 1;
1354
MemFree(longest_prefix);
1355
MemFree(reverse_longest_prefix);
1356
MemFree(reverse_pattern);
1361
* searches for a pattern in a StringItem.
1362
* Using the Boyer-Moore algorithm for the search.
1364
NLM_EXTERN Int4 FFStringSearch (
1366
const CharPtr pattern,
1369
Uint4 text_len = FFLength(text);
1370
Uint4 pat_len = StringLen(pattern);
1371
Uint4 last_occurrence[256];
1372
Uint4Ptr good_suffix;
1376
if ( pat_len == 0 ) return 0;
1377
if ( text_len == 0 || pat_len > text_len - position ) return -1;
1379
good_suffix = (Uint4Ptr)MemNew(pat_len * sizeof(Int4));
1380
if ( good_suffix == NULL ) return -1;
1382
ComputeLastOccurrence(pattern, last_occurrence);
1383
ComputeGoodSuffix(pattern, good_suffix);
1386
while ( shift <= text_len - pat_len ) {
1388
while( j >= 0 && pattern[j] == FFCharAt(text,shift + j) ) {
1392
MemFree (good_suffix);
1395
shift += MAX( (Int4)good_suffix[j],
1396
(Int4)(j - last_occurrence[FFCharAt(text,shift + j)]));
1399
MemFree (good_suffix);
1406
/* IsWholeWordSubstr () -- Determines if a substring that is */
1407
/* contained in another string is a whole */
1408
/* word or phrase -- i.e. is it both */
1409
/* preceded and followed by white space. */
1412
NLM_EXTERN Boolean IsWholeWordSubstr (
1413
StringItemPtr searchStr,
1418
Boolean left, right;
1422
/* check on the left only if there is a character there */
1424
ch = FFCharAt(searchStr, foundPos - 1);
1425
left = IS_WHITESP(ch) || ispunct(ch);
1430
foundPos += StringLen(subStr);
1431
if ( foundPos == FFLength(searchStr) ) {
1434
ch = FFCharAt(searchStr, foundPos);
1435
right = IS_WHITESP(ch) || ispunct(ch);
1438
return left; /* see comment above */
1439
/* return left && right; this is how it should be!*/
1443
/* functions to record sections or blocks in linked lists */
1445
NLM_EXTERN BaseBlockPtr Asn2gbAddBlock (
1447
BlockType blocktype,
1455
if (awp == NULL || size < 1) return NULL;
1457
bbp = (BaseBlockPtr) MemNew (size);
1458
if (bbp == NULL) return NULL;
1459
bbp->blocktype = blocktype;
1460
bbp->section = awp->currsection;
1462
vnp = ValNodeAddPointer (&(awp->lastblock), 0, bbp);
1463
if (vnp == NULL) return bbp;
1465
awp->lastblock = vnp;
1466
if (awp->blockList == NULL) {
1467
awp->blockList = vnp;
1474
/*--------------------------------------------------------*/
1476
/* s_LocusGetBaseName() - */
1478
/*--------------------------------------------------------*/
1480
static Boolean s_LocusGetBaseName (BioseqPtr parent, BioseqPtr segment, CharPtr baseName)
1482
Char parentName[SEQID_MAX_LEN];
1483
Char segName[SEQID_MAX_LEN];
1487
Char bufTmp[SEQID_MAX_LEN];
1493
/* Get the parent Sequence ID */
1495
parentName [0] = '\0';
1497
for (sip = parent->id; sip != NULL; sip = sip->next) {
1498
if (sip->choice == SEQID_GENBANK ||
1499
sip->choice == SEQID_EMBL ||
1500
sip->choice == SEQID_DDBJ) break;
1501
if (sip->choice == SEQID_TPG ||
1502
sip->choice == SEQID_TPE ||
1503
sip->choice == SEQID_TPD) break;
1507
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1508
if (tsip != NULL && (! StringHasNoText (tsip->name))) {
1509
StringNCpy_0 (parentName, tsip->name, sizeof (parentName));
1513
if (StringHasNoText (parentName)) {
1514
StringNCpy_0 (parentName, baseName, sizeof (parentName));
1517
/* Get segment id */
1522
for (sip = segment->id; sip != NULL; sip = sip->next) {
1523
if (sip->choice == SEQID_GENBANK ||
1524
sip->choice == SEQID_EMBL ||
1525
sip->choice == SEQID_DDBJ) break;
1526
if (sip->choice == SEQID_TPG ||
1527
sip->choice == SEQID_TPE ||
1528
sip->choice == SEQID_TPD) break;
1532
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1533
if (tsip != NULL && (! StringHasNoText (tsip->name))) {
1534
StringNCpy_0 (segName, tsip->name, sizeof (segName));
1535
segNameLen = StringLen(segName);
1539
/* If there's no "SEG_" prefix, then */
1540
/* just use the parent ID. */
1542
StringNCpy_0 (prefix,parentName,sizeof (prefix));
1544
if (StringCmp(prefix,"SEG_") != 0)
1546
StringCpy(baseName,parentName);
1550
/* Otherwise, eliminate the "SEG_" ... */
1552
StringCpy(bufTmp, &parentName[4]);
1553
StringCpy(parentName,bufTmp);
1555
/* ... And calculate a base name */
1557
if (segNameLen > 0 &&
1558
(segName[segNameLen-1] == '1') &&
1559
(StringLen(parentName) == segNameLen) &&
1560
(parentName[segNameLen-1] == segName[segNameLen-1]))
1563
for (i = segNameLen-2; i >= 0; i--)
1564
if (parentName[i] == '0')
1568
newLength = segNameLen - deleteChars;
1569
StringNCpy (parentName,segName,newLength); /* not StringNCpy_0 */
1570
parentName[newLength] = '\0';
1573
/* Return the base name in the basename parameter */
1575
StringCpy(baseName,parentName);
1579
/* ********************************************************************** */
1581
static Uint1 fasta_order [NUM_SEQID] = {
1582
33, /* 0 = not set */
1583
20, /* 1 = local Object-id */
1584
15, /* 2 = gibbsq */
1585
16, /* 3 = gibbmt */
1586
30, /* 4 = giim Giimport-id */
1587
10, /* 5 = genbank */
1590
10, /* 8 = swissprot */
1591
15, /* 9 = patent */
1592
20, /* 10 = other TextSeqId */
1593
20, /* 11 = general Dbtag */
1603
/* DoOneSection builds a single report for one bioseq or segment */
1605
static Asn2gbSectPtr Asn2gbAddSection (
1613
if (awp == NULL) return NULL;
1615
asp = (Asn2gbSectPtr) MemNew (sizeof (IntAsn2gbSect));
1616
if (asp == NULL) return NULL;
1618
vnp = ValNodeAddPointer (&(awp->lastsection), 0, asp);
1619
if (vnp == NULL) return asp;
1621
awp->lastsection = vnp;
1622
if (awp->sectionList == NULL) {
1623
awp->sectionList = vnp;
1629
static void LIBCALLBACK SaveGBSeqSequence (
1638
tmpp = (CharPtr PNTR) userdata;
1641
tmp = StringMove (tmp, sequence);
1646
static CharPtr CompressNonBases (CharPtr str)
1653
if (str == NULL || str [0] == '\0') return NULL;
1658
while (ch != '\0') {
1659
if (IS_ALPHA (ch)) {
1671
static CharPtr DoSeqPortStream (
1680
if (bsp == NULL) return NULL;
1682
str = MemNew (sizeof (Char) * (bsp->length + 10));
1683
if (str == NULL) return NULL;
1686
SeqPortStream (bsp, STREAM_EXPAND_GAPS, (Pointer) &tmp, SaveGBSeqSequence);
1689
if (tmp == NULL) return NULL;
1691
while (ch != '\0') {
1692
if (ch == '\n' || ch == '\r' || ch == '\t') {
1698
TrimSpacesAroundString (str);
1699
CompressNonBases (str);
1704
static Boolean DeltaLitOnly (
1711
if (bsp == NULL || bsp->repr != Seq_repr_delta) return FALSE;
1712
for (vnp = (ValNodePtr)(bsp->seq_ext); vnp != NULL; vnp = vnp->next) {
1713
if (vnp->choice == 1) return FALSE;
1718
static Boolean SegHasParts (
1726
if (bsp == NULL || bsp->repr != Seq_repr_seg) return FALSE;
1727
sep = bsp->seqentry;
1728
if (sep == NULL) return FALSE;
1730
if (sep == NULL || (! IS_Bioseq_set (sep))) return FALSE;
1731
bssp = (BioseqSetPtr) sep->data.ptrvalue;
1732
if (bssp != NULL && bssp->_class == BioseqseqSet_class_parts) return TRUE;
1736
NLM_EXTERN void DoOneSection (
1746
Boolean onePartOfSeg,
1752
Asn2gbFormatPtr afp;
1753
IntAsn2gbJobPtr ajp;
1755
SeqMgrBioseqContext bcontext;
1756
BaseBlockPtr PNTR blockArray;
1757
SeqMgrDescContext dcontext;
1760
IntAsn2gbSectPtr iasp;
1761
Boolean isRefSeq = FALSE;
1763
Boolean nsgenome = FALSE;
1770
Boolean wgsmaster = FALSE;
1771
Boolean wgstech = FALSE;
1773
if (target == NULL || parent == NULL || bsp == NULL || awp == NULL) return;
1775
if (ajp == NULL) return;
1777
if (awp->mode == RELEASE_MODE && awp->style == CONTIG_STYLE) {
1778
if (bsp->repr == Seq_repr_seg) {
1779
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
1783
if (ajp->flags.suppressLocalID) {
1784
sip = SeqIdSelect (bsp->id, fasta_order, NUM_SEQID);
1785
if (sip == NULL || sip->choice == SEQID_LOCAL) return;
1789
awp->basename[0] = '\0';
1790
} else if (seg == 1) {
1791
s_LocusGetBaseName (parent, bsp, awp->basename);
1794
asp = Asn2gbAddSection (awp);
1795
if (asp == NULL) return;
1802
numsegs = awp->partcount;
1803
if (numsegs == 0 && SeqMgrGetBioseqContext (parent, &bcontext)) {
1804
numsegs = bcontext.numsegs;
1807
/* set working data fields */
1811
awp->target = target;
1812
awp->parent = parent;
1817
awp->numsegs = numsegs;
1820
awp->contig = contig;
1822
awp->firstfeat = TRUE;
1823
awp->featseen = FALSE;
1825
/* initialize empty blockList for this section */
1827
awp->blockList = NULL;
1828
awp->lastblock = NULL;
1830
/* and store section data into section fields */
1832
asp->target = target;
1836
asp->numsegs = numsegs;
1840
iasp = (IntAsn2gbSectPtr) asp;
1842
asp->blockArray = NULL;
1845
/* WGS master and NS_ virtual records treated differently */
1847
if (bsp->repr == Seq_repr_virtual) {
1849
/* check for certain ID types */
1851
for (sip = bsp->id; sip != NULL; sip = sip->next) {
1852
if (sip->choice == SEQID_GENBANK ||
1853
sip->choice == SEQID_EMBL ||
1854
sip->choice == SEQID_DDBJ) {
1855
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1856
if (tsip != NULL && tsip->accession != NULL) {
1857
acclen = StringLen (tsip->accession);
1859
if (StringCmp (tsip->accession + 6, "000000") == 0) {
1862
} else if (acclen == 13) {
1863
if (StringCmp (tsip->accession + 6, "0000000") == 0) {
1868
} else if (sip->choice == SEQID_OTHER) {
1869
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
1870
if (tsip != NULL && tsip->accession != NULL) {
1871
if (StringNICmp (tsip->accession, "NC_", 3) == 0) {
1873
} else if (StringNICmp (tsip->accession, "NS_", 3) == 0) {
1875
} else if (StringNICmp (tsip->accession, "NZ_", 3) == 0) {
1876
if (StringLen (tsip->accession) == 15) {
1877
if (StringCmp (tsip->accession + 9, "000000") == 0) {
1886
sdp = SeqMgrGetNextDescriptor (bsp, NULL, Seq_descr_molinfo, &dcontext);
1888
mip = (MolInfoPtr) sdp->data.ptrvalue;
1889
if (mip != NULL && mip->tech == MI_TECH_wgs) {
1895
for (sip = bsp->id; sip != NULL; sip = sip->next) {
1896
if (sip->choice == SEQID_OTHER) {
1901
/* start exploring and populating paragraphs */
1903
if (awp->format == FTABLE_FMT) {
1904
AddFeatHeaderBlock (awp);
1905
if (awp->showRefs) {
1906
AddReferenceBlock (awp, isRefSeq);
1908
AddSourceFeatBlock (awp);
1909
AddFeatureBlock (awp);
1913
AddLocusBlock (awp);
1915
if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1917
AddDeflineBlock (awp);
1918
AddAccessionBlock (awp);
1920
if (ISA_aa (bsp->mol)) {
1926
AddVersionBlock (awp);
1928
if (ISA_aa (bsp->mol)) {
1929
AddDbsourceBlock (awp);
1932
} else if (awp->format == EMBL_FMT || awp->format == EMBLPEPT_FMT) {
1934
AddAccessionBlock (awp);
1936
if (ISA_na (bsp->mol)) {
1937
AddVersionBlock (awp);
1940
if (ISA_aa (bsp->mol)) {
1941
/* AddPidBlock (awp); */
1942
/* AddDbsourceBlock (awp); */
1947
AddDeflineBlock (awp);
1950
AddKeywordsBlock (awp);
1952
if (awp->format == GENBANK_FMT || awp->format == GENPEPT_FMT) {
1953
AddSegmentBlock (awp, onePartOfSeg);
1956
AddSourceBlock (awp);
1957
AddOrganismBlock (awp);
1959
/* !!! RELEASE_MODE should check return value of AddReferenceBlock !!! */
1961
hasRefs = AddReferenceBlock (awp, isRefSeq);
1963
if (ajp->flags.needAtLeastOneRef) {
1964
/* RefSeq does not require a publication */
1970
AddCommentBlock (awp);
1971
AddPrimaryBlock (awp);
1973
AddFeatHeaderBlock (awp);
1974
AddSourceFeatBlock (awp);
1976
if (wgsmaster && wgstech) {
1980
} else if (nsgenome) {
1982
AddGenomeBlock (awp);
1984
} else if (contig) {
1986
if (awp->showconfeats) {
1987
AddFeatureBlock (awp);
1988
} else if (awp->smartconfeats && bsp->length <= 1000000) {
1989
AddFeatureBlock (awp);
1991
AddContigBlock (awp);
1993
if (awp->showContigAndSeq) {
1994
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
1995
if (awp->showBaseCount) {
1996
AddBasecountBlock (awp);
1999
AddOriginBlock (awp);
2001
AddSequenceBlock (awp);
2006
AddFeatureBlock (awp);
2008
if (awp->showContigAndSeq) {
2009
if (bsp->repr == Seq_repr_seg && (! SegHasParts (bsp))) {
2010
AddContigBlock (awp);
2011
} else if (bsp->repr == Seq_repr_delta && (! DeltaLitOnly (bsp))) {
2012
AddContigBlock (awp);
2016
if (ISA_na (bsp->mol) && ajp->gbseq == NULL) {
2017
if (awp->showBaseCount) {
2018
AddBasecountBlock (awp );
2021
AddOriginBlock (awp);
2023
AddSequenceBlock (awp);
2026
AddSlashBlock (awp);
2029
/* allocate block array for this section */
2031
numBlocks = ValNodeLen (awp->blockList);
2032
asp->numBlocks = numBlocks;
2034
if (numBlocks > 0) {
2035
blockArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numBlocks + 1));
2036
asp->blockArray = blockArray;
2038
if (blockArray != NULL) {
2039
for (vnp = awp->blockList, i = 0; vnp != NULL; vnp = vnp->next, i++) {
2040
blockArray [i] = (BaseBlockPtr) vnp->data.ptrvalue;
2045
/* free blockList, but leave data, now pointed to by blockArray elements */
2047
awp->blockList = ValNodeFree (awp->blockList);
2048
awp->lastblock = NULL;
2050
(awp->currsection)++;
2053
/* ********************************************************************** */
2056
the following functions handle various kinds of input, all calling
2057
DoOneSection once for each component that gets its own report
2060
static Boolean LIBCALLBACK Asn2Seg (
2062
SeqMgrSegmentContextPtr context
2067
BioseqPtr bsp = NULL;
2075
if (slp == NULL || context == NULL) return FALSE;
2076
awp = (Asn2gbWorkPtr) context->userdata;
2078
parent = context->parent;
2080
from = context->cumOffset;
2081
to = from + context->to - context->from;
2083
sip = SeqLocId (slp);
2085
loc = SeqLocFindNext (slp, NULL);
2087
sip = SeqLocId (loc);
2090
if (sip == NULL) return TRUE;
2092
/* may remote fetch genome component if not already in memory */
2094
bsp = BioseqLockById (sip);
2096
if (bsp == NULL) return TRUE;
2098
entityID = ObjMgrGetEntityIDForPointer (bsp);
2100
if (entityID != awp->entityID) {
2102
/* if segment not packaged in record, may need to feature index it */
2104
if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
2105
SeqMgrIndexFeatures (entityID, NULL);
2108
/* collect features indexed on the remote bioseq */
2112
to = bsp->length - 1;
2115
if (bsp->repr != Seq_repr_virtual) {
2117
DoOneSection (bsp, parent, bsp, bsp, /* slp */ NULL, awp->seg, from, to, FALSE, FALSE, awp);
2125
static Int4 CountRealParts (
2137
for (slp = (SeqLocPtr) slp_head; slp != NULL; slp = slp->next) {
2138
sip = SeqLocId (slp);
2139
if (sip == NULL) continue;
2140
if (sip->choice == SEQID_GI) {
2141
part = BioseqFind (sip);
2142
if (part == NULL) continue;
2143
for (id = part->id; id != NULL; id = id->next) {
2144
if (id->choice == SEQID_GIBBSQ ||
2145
id->choice == SEQID_GIBBMT ||
2146
id->choice == SEQID_GIIM) break;
2148
if (id != NULL && part->repr == Seq_repr_virtual) continue;
2155
typedef struct findseg {
2158
} FindSeg, PNTR FindSegPtr;
2160
static Boolean LIBCALLBACK FindSegForPart (
2162
SeqMgrSegmentContextPtr context
2167
BioseqPtr bsp = NULL;
2171
if (slp == NULL || context == NULL) return TRUE;
2172
fsp = (FindSegPtr) context->userdata;
2174
sip = SeqLocId (slp);
2176
loc = SeqLocFindNext (slp, NULL);
2178
sip = SeqLocId (loc);
2181
if (sip == NULL) return TRUE;
2183
bsp = BioseqFind (sip);
2184
if (bsp == NULL) return TRUE;
2186
if (bsp->repr != Seq_repr_virtual) {
2190
if (bsp != fsp->bsp) return TRUE;
2195
NLM_EXTERN void DoOneBioseq (
2201
IntAsn2gbJobPtr ajp;
2204
SeqMgrSegmentContext context;
2205
Boolean contig = FALSE;
2208
SeqEntryPtr oldscope;
2210
Boolean segmented = FALSE;
2214
if (bsp == NULL) return;
2215
awp = (Asn2gbWorkPtr) userdata;
2216
if (awp == NULL) return;
2218
if (ajp == NULL) return;
2220
/* return if molecule not right for format */
2222
if (ISA_na (bsp->mol)) {
2223
if (ajp->format == GENPEPT_FMT || ajp->format == EMBLPEPT_FMT) return;
2225
/* only do mRNA feature tables in GPS if targeted to a specific mRNA */
2227
if (ajp->format == FTABLE_FMT && ajp->skipMrnas) {
2228
if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
2229
bssp = (BioseqSetPtr) bsp->idx.parentptr;
2230
if (bssp != NULL && bssp->_class == BioseqseqSet_class_nuc_prot) {
2231
if (bsp->idx.parenttype == OBJ_BIOSEQSET) {
2232
bssp = (BioseqSetPtr) bsp->idx.parentptr;
2233
if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
2241
} else if (ISA_aa (bsp->mol)) {
2242
if (ajp->format == GENBANK_FMT || ajp->format == EMBL_FMT) return;
2244
/* only do protein feature tables if targeted to a specific protein */
2246
if (ajp->format == FTABLE_FMT && ajp->skipProts) return;
2249
if (awp->style == SEGMENT_STYLE) {
2252
if (awp->style == CONTIG_STYLE) {
2255
// Never do segmented style in FTABLE format
2256
if (awp->format == FTABLE_FMT) {
2263
if (bsp->repr == Seq_repr_seg && awp->style == NORMAL_STYLE) {
2265
/* if bsp followed by parts set, then do not default to contig style */
2267
if (SegHasParts (bsp)) {
2271
if (bsp->seq_ext_type == 1) {
2273
/* count only non-virtual parts */
2275
sep = GetTopSeqEntryForEntityID (awp->entityID);
2276
oldscope = SeqEntrySetScope (sep);
2277
awp->partcount = CountRealParts ((SeqLocPtr) bsp->seq_ext);
2278
SeqEntrySetScope (oldscope);
2285
if (bsp->repr == Seq_repr_delta && awp->style == NORMAL_STYLE) {
2286
if (! DeltaLitOnly (bsp)) {
2291
if (bsp->repr == Seq_repr_seg) {
2293
/* this is a segmented bioseq */
2297
/* show all segments individually */
2300
SeqMgrExploreSegments (bsp, (Pointer) awp, Asn2Seg);
2304
/* show as single bioseq */
2308
to = bsp->length - 1;
2310
DoOneSection (parent, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
2313
} else if (bsp->repr == Seq_repr_raw ||
2314
bsp->repr == Seq_repr_const ||
2315
bsp->repr == Seq_repr_delta ||
2316
bsp->repr == Seq_repr_virtual) {
2318
parent = SeqMgrGetParentOfPart (bsp, &context);
2319
if (parent != NULL) {
2321
/* this is a part of an indexed segmented bioseq */
2323
from = context.cumOffset;
2324
to = from + context.to - context.from;
2326
s_LocusGetBaseName (parent, bsp, awp->basename);
2330
SeqMgrExploreSegments (parent, (Pointer) &fs, FindSegForPart);
2331
awp->showAllFeats = TRUE;
2333
DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, fs.seg, from, to, contig, TRUE, awp);
2337
/* this is a regular non-segmented bioseq */
2341
to = bsp->length - 1;
2343
DoOneSection (bsp, parent, bsp, parent, ajp->ajp.slp, 0, from, to, contig, FALSE, awp);
2348
static void DoBioseqSetList (
2349
SeqEntryPtr seq_set,
2357
if (seq_set == NULL || awp == NULL) return;
2359
/* iterate rather than recurse unless multiple nested sets > nuc-prot */
2361
for (sep = seq_set; sep != NULL; sep = sep->next) {
2363
if (IS_Bioseq_set (sep)) {
2364
bssp = (BioseqSetPtr) sep->data.ptrvalue;
2365
if (bssp == NULL) continue;
2367
if (bssp->_class == BioseqseqSet_class_genbank ||
2368
bssp->_class == BioseqseqSet_class_mut_set ||
2369
bssp->_class == BioseqseqSet_class_pop_set ||
2370
bssp->_class == BioseqseqSet_class_phy_set ||
2371
bssp->_class == BioseqseqSet_class_eco_set ||
2372
bssp->_class == BioseqseqSet_class_wgs_set ||
2373
bssp->_class == BioseqseqSet_class_gen_prod_set) {
2375
/* if popset within genbank set, for example, recurse */
2377
DoBioseqSetList (bssp->seq_set, awp);
2383
/* at most nuc-prot set, so do main bioseqs that fit the format */
2385
VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
2389
static void DoOneBioseqSet (
2397
if (sep == NULL || awp == NULL) return;
2399
if (IS_Bioseq_set (sep)) {
2400
bssp = (BioseqSetPtr) sep->data.ptrvalue;
2401
if (bssp == NULL) return;
2403
if (bssp->_class == BioseqseqSet_class_genbank ||
2404
bssp->_class == BioseqseqSet_class_mut_set ||
2405
bssp->_class == BioseqseqSet_class_pop_set ||
2406
bssp->_class == BioseqseqSet_class_phy_set ||
2407
bssp->_class == BioseqseqSet_class_eco_set ||
2408
bssp->_class == BioseqseqSet_class_wgs_set ||
2409
bssp->_class == BioseqseqSet_class_gen_prod_set) {
2411
/* this is a pop/phy/mut/eco set, catenate separate reports */
2413
DoBioseqSetList (bssp->seq_set, awp);
2419
/* at most nuc-prot set, so do main bioseqs that fit the format */
2421
VisitSequencesInSep (sep, (Pointer) awp, VISIT_MAINS, DoOneBioseq);
2427
/* ********************************************************************** */
2429
/* public functions */
2431
static int LIBCALLBACK SortParagraphByIDProc (
2437
BaseBlockPtr bbp1, bbp2;
2439
if (vp1 == NULL || vp2 == NULL) return 0;
2440
bbp1 = *((BaseBlockPtr PNTR) vp1);
2441
bbp2 = *((BaseBlockPtr PNTR) vp2);
2442
if (bbp1 == NULL || bbp2 == NULL) return 0;
2444
if (bbp1->entityID > bbp2->entityID) return 1;
2445
if (bbp1->entityID < bbp2->entityID) return -1;
2447
if (bbp1->itemtype > bbp2->itemtype) return 1;
2448
if (bbp1->itemtype < bbp2->itemtype) return -1;
2450
if (bbp1->itemID > bbp2->itemID) return 1;
2451
if (bbp1->itemID < bbp2->itemID) return -1;
2453
if (bbp1->paragraph > bbp2->paragraph) return 1;
2454
if (bbp1->paragraph < bbp2->paragraph) return -1;
2459
static void IsBspRefseq (
2465
BoolPtr has_refseqP;
2468
if (bsp == NULL || userdata == NULL) return;
2469
has_refseqP = (BoolPtr) userdata;
2470
for (sip = bsp->id; sip != NULL; sip = sip->next) {
2471
if (sip->choice == SEQID_OTHER) {
2472
*has_refseqP = TRUE;
2477
static Boolean IsSepRefseq (
2482
Boolean is_refseq = FALSE;
2484
if (sep == NULL) return FALSE;
2485
VisitBioseqsInSep (sep, (Pointer) &is_refseq, IsBspRefseq);
2489
typedef struct modeflags {
2491
} ModeFlags, PNTR ModeFlagsPtr;
2493
static ModeFlags flagTable [] = {
2496
{TRUE, TRUE, TRUE, TRUE, TRUE,
2497
TRUE, TRUE, TRUE, TRUE, TRUE,
2498
TRUE, TRUE, TRUE, TRUE, TRUE,
2499
TRUE, TRUE, TRUE, TRUE, TRUE,
2500
TRUE, TRUE, TRUE, TRUE, TRUE,
2504
{FALSE, TRUE, TRUE, TRUE, TRUE,
2505
FALSE, TRUE, TRUE, TRUE, TRUE,
2506
TRUE, TRUE, FALSE, TRUE, TRUE,
2507
TRUE, TRUE, FALSE, FALSE, TRUE,
2508
TRUE, TRUE, TRUE, TRUE, TRUE,
2512
{FALSE, FALSE, FALSE, FALSE, FALSE,
2513
FALSE, FALSE, TRUE, FALSE, FALSE,
2514
FALSE, FALSE, FALSE, FALSE, FALSE,
2515
FALSE, FALSE, FALSE, FALSE, FALSE,
2516
FALSE, FALSE, FALSE, FALSE, TRUE,
2520
{FALSE, FALSE, FALSE, FALSE, FALSE,
2521
FALSE, FALSE, FALSE, FALSE, FALSE,
2522
FALSE, FALSE, FALSE, FALSE, FALSE,
2523
FALSE, FALSE, FALSE, FALSE, FALSE,
2524
FALSE, FALSE, FALSE, FALSE, FALSE,
2528
static void SetFlagsFromMode (
2529
IntAsn2gbJobPtr ajp,
2538
if (ajp == NULL) return;
2539
if (! (mode >= RELEASE_MODE && mode <= DUMP_MODE)) {
2542
mfp = &(flagTable [(int) (mode - 1)]);
2543
bp = &(mfp->flags [0]);
2545
ajp->flags.suppressLocalID = *(bp++);
2546
ajp->flags.validateFeats = *(bp++);
2547
ajp->flags.ignorePatPubs = *(bp++);
2548
ajp->flags.dropShortAA = *(bp++);
2549
ajp->flags.avoidLocusColl = *(bp++);
2551
ajp->flags.iupacaaOnly = *(bp++);
2552
ajp->flags.dropBadCitGens = *(bp++);
2553
ajp->flags.noAffilOnUnpub = *(bp++);
2554
ajp->flags.dropIllegalQuals = *(bp++);
2555
ajp->flags.checkQualSyntax = *(bp++);
2557
ajp->flags.needRequiredQuals = *(bp++);
2558
ajp->flags.needOrganismQual = *(bp++);
2559
ajp->flags.needAtLeastOneRef = *(bp++);
2560
ajp->flags.citArtIsoJta = *(bp++);
2561
ajp->flags.dropBadDbxref = *(bp++);
2563
ajp->flags.useEmblMolType = *(bp++);
2564
ajp->flags.hideBankItComment = *(bp++);
2565
ajp->flags.checkCDSproductID = *(bp++);
2566
ajp->flags.suppressSegLoc = *(bp++);
2567
ajp->flags.srcQualsToNote = *(bp)++;
2569
ajp->flags.hideEmptySource = *(bp++);
2570
ajp->flags.goQualsToNote = *(bp++);
2571
ajp->flags.geneSynsToNote = *(bp++);
2572
ajp->flags.selenocysteineToNote = *(bp++);
2573
ajp->flags.extraProductsToNote = *(bp++);
2575
ajp->flags.forGbRelease = *(bp++);
2577
/* unapproved qualifiers suppressed for flatfile, okay for GBSeq XML */
2579
if (ajp->gbseq == NULL) {
2581
/* collaboration unapproved source quals on their own line only in indexer Sequin - relaxed */
2584
if (GetAppProperty ("InternalNcbiSequin") == NULL) {
2586
ajp->flags.srcQualsToNote = TRUE;
2590
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
2591
if (IsSepRefseq (sep)) {
2593
ajp->flags.srcQualsToNote = FALSE;
2595
/* selenocysteine always a separate qualifier for RefSeq */
2597
ajp->flags.selenocysteineToNote = FALSE;
2601
/* collaboration unapproved Gene Ontology quals on their own line only for RefSeq */
2603
/* ajp->flags.goQualsToNote = TRUE; */
2604
ajp->flags.geneSynsToNote = TRUE;
2609
sep = GetTopSeqEntryForEntityID (ajp->ajp.entityID);
2610
if (IsSepRefseq (sep)) {
2612
ajp->flags.srcQualsToNote = FALSE;
2614
/* selenocysteine always a separate qualifier for RefSeq */
2616
ajp->flags.selenocysteineToNote = FALSE;
2622
static void CheckVersionWithGi (BioseqPtr bsp, Pointer userdata)
2625
Boolean hasGi = FALSE;
2626
BoolPtr missingVersion;
2629
Boolean zeroVersion = FALSE;
2631
for (sip = bsp->id; sip != NULL; sip = sip->next) {
2632
switch (sip->choice) {
2639
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2640
if (tsip != NULL && tsip->version == 0) {
2651
if (hasGi && zeroVersion) {
2652
missingVersion = (BoolPtr) userdata;
2653
*missingVersion = TRUE;
2658
typedef struct lookforids {
2665
} LookForIDs, PNTR LookForIDsPtr;
2667
static void LookForSeqIDs (BioseqPtr bsp, Pointer userdata)
2674
lfip = (LookForIDsPtr) userdata;
2675
if (ISA_na (bsp->mol)) {
2678
if (ISA_aa (bsp->mol)) {
2679
lfip->isProt = TRUE;
2681
for (sip = bsp->id; sip != NULL; sip = sip->next) {
2682
switch (sip->choice) {
2683
case SEQID_GENBANK :
2694
tsip = (TextSeqIdPtr) sip->data.ptrvalue;
2696
if (StringNCmp (tsip->accession, "NC_", 3) == 0) {
2698
} else if (StringNCmp (tsip->accession, "NT_", 3) == 0) {
2699
lfip->isNTorNW = TRUE;
2700
} else if (StringNCmp (tsip->accession, "NW_", 3) == 0) {
2701
lfip->isNTorNW = TRUE;
2711
static void LookForGEDetc (
2724
MemSet ((Pointer) &lfi, 0, sizeof (LookForIDs));
2725
VisitBioseqsInSep (topsep, (Pointer) &lfi, LookForSeqIDs);
2727
*isNTorNW = lfi.isNTorNW;
2731
*isProt = lfi.isProt;
2734
static CharPtr defHead = "\
2735
Content-type: text/html\n\n\
2737
<HEAD><TITLE>GenBank entry</TITLE></HEAD>\n\
2742
static CharPtr defTail = "\
2748
#define FEAT_FETCH_MASK (ONLY_NEAR_FEATURES | FAR_FEATURES_SUPPRESS | NEAR_FEATURES_SUPPRESS)
2749
#define HTML_XML_ASN_MASK (CREATE_HTML_FLATFILE | CREATE_XML_GBSEQ_FILE | CREATE_ASN_GBSEQ_FILE)
2750
#define GENE_RIF_MASK (HIDE_GENE_RIFS | ONLY_GENE_RIFS | LATEST_GENE_RIFS)
2752
static Asn2gbJobPtr asn2gnbk_setup_ex (
2771
IntAsn2gbJobPtr ajp = NULL;
2775
BaseBlockPtr PNTR blockArray;
2777
CharPtr ffhead = NULL;
2778
CharPtr fftail = NULL;
2779
Asn2gbWriteFunc ffwrite = NULL;
2780
GBSeqPtr gbseq = NULL;
2782
IndxPtr index = NULL;
2791
Boolean lockFarComp;
2792
Boolean lockFarLocs;
2793
Boolean lockFarProd;
2794
Boolean lookupFarComp;
2795
Boolean lookupFarHist;
2796
Boolean lookupFarLocs;
2797
Boolean lookupFarProd;
2798
Boolean missingVersion;
2801
SeqEntryPtr oldscope;
2803
BaseBlockPtr PNTR paragraphArray;
2804
BaseBlockPtr PNTR paragraphByIDs;
2806
Pointer remotedata = NULL;
2807
Asn2gbFreeFunc remotefree = NULL;
2808
Asn2gbLockFunc remotelock = NULL;
2809
Asn2gbSectPtr PNTR sectionArray;
2813
Boolean skipMrnas = FALSE;
2814
Boolean skipProts = FALSE;
2816
BioseqSetPtr topbssp;
2817
Pointer userdata = NULL;
2819
Boolean is_html = FALSE;
2822
format = GENBANK_FMT;
2828
style = NORMAL_STYLE;
2831
if (extra != NULL) {
2832
ffwrite = extra->ffwrite;
2833
ffhead = extra->ffhead;
2834
fftail = extra->fftail;
2835
index = extra->index;
2836
gbseq = extra->gbseq;
2837
userdata = extra->userdata;
2838
remotelock = extra->remotelock;
2839
remotefree = extra->remotefree;
2840
remotedata = extra->remotedata;
2844
bsp = BioseqFind (SeqLocId (slp));
2846
bsp = BioseqFindFromSeqLoc (slp);
2848
if (bsp == NULL) return NULL;
2850
/* if location is whole, generate normal bioseq report */
2852
if (slp->choice == SEQLOC_WHOLE) {
2854
} else if (slp->choice == SEQLOC_INT) {
2855
sintp = (SeqIntPtr) slp->data.ptrvalue;
2856
if (sintp != NULL &&
2858
sintp->to == bsp->length - 1 &&
2859
sintp->strand == Seq_strand_plus) {
2867
entityID = ObjMgrGetEntityIDForPointer (bsp);
2868
} else if (bssp != NULL) {
2869
entityID = ObjMgrGetEntityIDForPointer (bssp);
2870
if (format == FTABLE_FMT) {
2875
if ((Boolean) ((custom & SHOW_PROT_FTABLE) != 0)) {
2880
if (entityID == 0) return NULL;
2882
if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
2883
SeqMgrIndexFeatures (entityID, NULL);
2886
if (mode == RELEASE_MODE) {
2887
sep = GetTopSeqEntryForEntityID (entityID);
2888
missingVersion = FALSE;
2889
VisitBioseqsInSep (sep, (Pointer) &missingVersion, CheckVersionWithGi);
2890
if (missingVersion) return NULL;
2893
ajp = (IntAsn2gbJobPtr) MemNew (sizeof (IntAsn2gbJob));
2894
if (ajp == NULL) return NULL;
2896
is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
2901
ajp->ajp.entityID = entityID;
2903
ajp->ajp.bssp = bssp;
2905
ajp->ajp.slp = AsnIoMemCopy ((Pointer) slp,
2906
(AsnReadFunc) SeqLocAsnRead,
2907
(AsnWriteFunc) SeqLocAsnWrite);
2909
ajp->ajp.slp = NULL;
2912
/* if location specified, normal defaults to master style */
2914
if (ajp->ajp.slp != NULL && style == NORMAL_STYLE) {
2915
style = MASTER_STYLE;
2918
ajp->format = format;
2919
ajp->mode = mode; /* for showing new qualifiers before quarantine ends */
2922
ajp->gbseq = gbseq; /* gbseq output can relax srcQualsToNote or goQualsToNote strictness */
2926
SetFlagsFromMode (ajp, mode);
2928
lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
2929
lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
2930
lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
2932
if (lockFarComp || lockFarLocs || lockFarProd) {
2934
/* lock all bioseqs in advance, including remote genome components */
2936
sep = GetTopSeqEntryForEntityID (entityID);
2937
if (ajp->ajp.slp != NULL && lockFarComp) {
2938
ajp->lockedBspList = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, ajp->ajp.slp);
2940
ajp->lockedBspList = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
2944
lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
2945
lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
2946
lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
2947
lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
2949
if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist) {
2951
/* lookukp all far SeqIDs in advance */
2953
sep = GetTopSeqEntryForEntityID (entityID);
2954
LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist);
2957
ajp->showFarTransl = (Boolean) ((flags & SHOW_FAR_TRANSLATION) != 0);
2958
ajp->transIfNoProd = (Boolean) ((flags & TRANSLATE_IF_NO_PRODUCT) != 0);
2959
ajp->alwaysTranslCds = (Boolean) ((flags & ALWAYS_TRANSLATE_CDS) != 0);
2961
ajp->masterStyle = (Boolean) (style == MASTER_STYLE);
2962
if (format == GENBANK_FMT || format == GENPEPT_FMT) {
2963
ajp->newSourceOrg = (Boolean) ((flags & USE_OLD_SOURCE_ORG) == 0);
2965
ajp->produceInsdSeq = (Boolean) ((flags & PRODUCE_OLD_GBSEQ) == 0);
2967
ajp->relModeError = FALSE;
2968
ajp->skipProts = skipProts;
2969
ajp->skipMrnas = skipMrnas;
2971
MemSet ((Pointer) (&aw), 0, sizeof (Asn2gbWork));
2973
aw.entityID = entityID;
2975
aw.sectionList = NULL;
2976
aw.lastsection = NULL;
2979
aw.showAllFeats = FALSE;
2981
aw.showconfeats = (Boolean) ((flags & SHOW_CONTIG_FEATURES) != 0);
2982
aw.showconsource = (Boolean) ((flags & SHOW_CONTIG_SOURCES) != 0);
2988
/* internal format pointer if writing at time of creation */
2991
MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
2996
af.ffwrite = ffwrite;
2997
af.userdata = userdata;
2998
af.remotelock = remotelock;
2999
af.remotefree = remotefree;
3000
af.remotedata = remotedata;
3008
sep = GetTopSeqEntryForEntityID (entityID);
3010
/* special types of records override feature fetching and contig display parameters */
3012
if (mode == ENTREZ_MODE) {
3013
if (! aw.showconfeats) {
3014
aw.smartconfeats = TRUE; /* features suppressed if CONTIG style and length > 1 MB */
3015
aw.showconfeats = FALSE;
3016
aw.showconsource = FALSE;
3020
aw.onlyNearFeats = FALSE;
3021
aw.farFeatsSuppress = FALSE;
3022
aw.nearFeatsSuppress = FALSE;
3024
LookForGEDetc (sep, &isGED, &isNTorNW, &isNC, &isTPA, &isNuc, &isProt);
3028
if ((Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES)) {
3029
aw.onlyNearFeats = TRUE;
3031
aw.nearFeatsSuppress = TRUE;
3034
} else if (isNTorNW || isTPA) {
3036
aw.onlyNearFeats = TRUE;
3040
aw.nearFeatsSuppress = TRUE;
3041
ajp->showFarTransl = TRUE;
3045
aw.onlyNearFeats = (Boolean) ((flags & FEAT_FETCH_MASK) == ONLY_NEAR_FEATURES);
3046
aw.farFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == FAR_FEATURES_SUPPRESS);
3047
aw.nearFeatsSuppress = (Boolean) ((flags & FEAT_FETCH_MASK) == NEAR_FEATURES_SUPPRESS);
3050
/* continue setting flags */
3052
aw.hideImpFeats = (Boolean) ((custom & HIDE_IMP_FEATS) != 0);
3053
aw.hideRemImpFeats = (Boolean) ((custom & HIDE_REM_IMP_FEATS) != 0);
3055
aw.hideSnpFeats = (Boolean) ((custom & HIDE_SNP_FEATS) != 0);
3056
aw.hideExonFeats = (Boolean) ((custom & HIDE_EXON_FEATS) != 0);
3057
aw.hideIntronFeats = (Boolean) ((custom & HIDE_INTRON_FEATS) != 0);
3058
aw.hideMiscFeats = (Boolean) ((custom & HIDE_MISC_FEATS) != 0);
3060
aw.hideCddFeats = (Boolean) ((custom & HIDE_CDD_FEATS) != 0);
3061
aw.hideCdsProdFeats = (Boolean) ((custom & HIDE_CDS_PROD_FEATS) != 0);
3063
ajp->showTranscript = (Boolean) ((custom & SHOW_TRANCRIPTION) != 0);
3064
ajp->showPeptide = (Boolean) ((custom & SHOW_PEPTIDE) != 0);
3066
aw.hideGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == HIDE_GENE_RIFS);
3067
aw.onlyGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == ONLY_GENE_RIFS);
3068
aw.latestGeneRIFs = (Boolean) ((custom & GENE_RIF_MASK) == LATEST_GENE_RIFS);
3070
aw.showRefs = (Boolean) ((custom & SHOW_FTABLE_REFS) != 0);
3073
if (sep != NULL && IS_Bioseq_set (sep)) {
3074
topbssp = (BioseqSetPtr) sep->data.ptrvalue;
3075
if (topbssp != NULL && topbssp->_class == BioseqseqSet_class_gen_prod_set) {
3077
aw.copyGpsCdsUp = (Boolean) ((flags & COPY_GPS_CDS_UP) != 0);
3078
aw.copyGpsGeneDown = (Boolean) ((flags & COPY_GPS_GENE_DOWN) != 0);
3082
aw.showContigAndSeq = (Boolean) ((flags & SHOW_CONTIG_AND_SEQ) != 0);
3084
if (style != MASTER_STYLE && style != SEGMENT_STYLE) {
3085
aw.showContigAndSeq = FALSE;
3089
aw.newLocusLine = TRUE;
3090
aw.showBaseCount = FALSE;
3092
if ((Boolean) ((flags & DDBJ_VARIANT_FORMAT) != 0)) {
3093
aw.citSubsFirst = TRUE;
3094
aw.hideGeneFeats = TRUE;
3095
aw.newLocusLine = FALSE;
3096
aw.showBaseCount = TRUE;
3097
ajp->newSourceOrg = FALSE;
3099
if (mode == SEQUIN_MODE || mode == DUMP_MODE) {
3100
aw.showBaseCount = TRUE;
3108
omdp = ObjMgrGetData (entityID);
3109
if (omdp != NULL && omdp->datatype == OBJ_SEQSUB) {
3110
ssp = (SeqSubmitPtr) omdp->dataptr;
3111
if (ssp != NULL && ssp->datatype == 1) {
3120
oldscope = SeqEntrySetScope (sep);
3123
/* send optional head string */
3125
is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
3126
if (ffhead == NULL && is_html) {
3129
if (ffhead != NULL) {
3131
fprintf (fp, ffhead);
3134
if (ffwrite != NULL) {
3135
ffwrite (ffhead, userdata, HEAD_BLOCK);
3141
/* handle all components of a pop/phy/mut/eco set */
3143
sep = SeqMgrGetSeqEntryForData (bssp);
3144
DoOneBioseqSet (sep, &aw);
3148
/* handle single bioseq, which may be segmented or a local part */
3150
DoOneBioseq (bsp, &aw);
3154
/* send optional tail string */
3156
if (fftail == NULL && is_html) {
3159
if (fftail != NULL) {
3161
fprintf (fp, fftail);
3164
if (ffwrite != NULL) {
3165
ffwrite (fftail, userdata, TAIL_BLOCK);
3169
SeqEntrySetScope (oldscope);
3171
/* check for failure to populate anything */
3173
if (ajp->flags.needAtLeastOneRef && aw.failed) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
3175
numSections = ValNodeLen (aw.sectionList);
3176
ajp->ajp.numSections = numSections;
3178
if (numSections == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
3180
/* allocate section array for this job */
3182
sectionArray = (Asn2gbSectPtr PNTR) MemNew (sizeof (Asn2gbSectPtr) * (numSections + 1));
3183
ajp->ajp.sectionArray = sectionArray;
3185
if (sectionArray == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
3187
/* fill in section and paragraph arrays */
3190
for (vnp = aw.sectionList, i = 0; vnp != NULL && i < numSections; vnp = vnp->next, i++) {
3191
asp = (Asn2gbSectPtr) vnp->data.ptrvalue;
3192
sectionArray [i] = asp;
3194
numParagraphs += asp->numBlocks;
3198
/* allocate paragraph array pointing to all blocks in all sections */
3200
ajp->ajp.numParagraphs = numParagraphs;
3201
if (numParagraphs == 0) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
3203
paragraphArray = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
3204
ajp->ajp.paragraphArray = paragraphArray;
3206
paragraphByIDs = (BaseBlockPtr PNTR) MemNew (sizeof (BaseBlockPtr) * (numParagraphs + 1));
3207
ajp->ajp.paragraphByIDs = paragraphByIDs;
3209
if (paragraphArray == NULL || paragraphByIDs == NULL) return asn2gnbk_cleanup ((Asn2gbJobPtr) ajp);
3212
for (i = 0; i < numSections; i++) {
3213
asp = sectionArray [i];
3216
numBlocks = asp->numBlocks;
3217
blockArray = asp->blockArray;
3218
if (blockArray != NULL) {
3220
for (j = 0; j < numBlocks; j++) {
3221
bbp = blockArray [j];
3223
paragraphArray [k] = bbp;
3224
paragraphByIDs [k] = bbp;
3232
/* sort paragraphByIDs array by entityID/itemtype/itemID/paragraph */
3234
HeapSort (paragraphByIDs, (size_t) numParagraphs, sizeof (BaseBlockPtr), SortParagraphByIDProc);
3236
/* free sectionList, but leave data, now pointed to by sectionArray elements */
3238
ValNodeFree (aw.sectionList);
3240
return (Asn2gbJobPtr) ajp;
3243
NLM_EXTERN Asn2gbJobPtr asn2gnbk_setup (
3257
return asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
3258
flags, locks, custom, extra,
3259
FALSE, NULL, NULL, NULL);
3262
/* ********************************************************************** */
3264
/* format functions allocate printable string for given paragraph */
3266
NLM_EXTERN CharPtr DefaultFormatBlock (
3267
Asn2gbFormatPtr afp,
3272
if (afp == NULL || bbp == NULL) return NULL;
3274
/* default format function assumes string pre-allocated by add block function */
3276
return StringSaveNoNull (bbp->string);
3279
typedef CharPtr (*FormatProc) (Asn2gbFormatPtr afp, BaseBlockPtr bbp);
3281
static FormatProc asn2gnbk_fmt_functions [27] = {
3283
DefaultFormatBlock, DefaultFormatBlock, DefaultFormatBlock,
3284
DefaultFormatBlock, DefaultFormatBlock, DefaultFormatBlock,
3285
DefaultFormatBlock, DefaultFormatBlock, DefaultFormatBlock,
3286
FormatSourceBlock, FormatOrganismBlock, FormatReferenceBlock,
3287
DefaultFormatBlock, FormatCommentBlock, FormatFeatHeaderBlock,
3288
FormatSourceFeatBlock, FormatFeatureBlock, FormatBasecountBlock,
3289
DefaultFormatBlock, FormatSequenceBlock, FormatContigBlock,
3290
DefaultFormatBlock, DefaultFormatBlock, FormatSlashBlock,
3294
NLM_EXTERN void PrintFtableIntervals (
3295
ValNodePtr PNTR head,
3313
if (head == NULL || target == NULL || location == NULL || label == NULL) return;
3315
if (location->choice == SEQLOC_PNT) {
3316
spp = (SeqPntPtr) location->data.ptrvalue;
3319
if (ifp != NULL && ifp->choice == 4 && ifp->a == 3) {
3320
sprintf (str, "%ld^\t%ld\t%s\n", (long) (spp->point + 1),
3321
(long) (spp->point + 2), label);
3322
ValNodeCopyStr (head, 0, str);
3328
slp = SeqLocFindNext (location, NULL);
3329
if (slp == NULL) return;
3331
start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
3332
stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
3333
CheckSeqLocForPartial (slp, &partial5, &partial3);
3335
sprintf (str1, "<%ld", (long) start);
3337
sprintf (str1, "%ld", (long) start);
3340
sprintf (str2, ">%ld", (long) stop);
3342
sprintf (str2, "%ld", (long) stop);
3344
sprintf (str, "%s\t%s\t%s\n", str1, str2, label);
3345
ValNodeCopyStr (head, 0, str);
3347
while ((slp = SeqLocFindNext (location, slp)) != NULL) {
3348
start = GetOffsetInBioseq (slp, target, SEQLOC_START) + 1;
3349
stop = GetOffsetInBioseq (slp, target, SEQLOC_STOP) + 1;
3350
CheckSeqLocForPartial (slp, &partial5, &partial3);
3352
sprintf (str1, "<%ld", (long) start);
3354
sprintf (str1, "%ld", (long) start);
3357
sprintf (str2, ">%ld", (long) stop);
3359
sprintf (str2, "%ld", (long) stop);
3361
if (start != 0 && stop != 0) {
3362
sprintf (str, "%s\t%s\n", str1, str2);
3363
ValNodeCopyStr (head, 0, str);
3368
static CharPtr goQualList [] = {
3369
"", "go_process", "go_component", "go_function", NULL
3372
static void PrintFTUserFld (
3379
CharPtr evidence = NULL;
3381
CharPtr goid = NULL;
3382
ValNodePtr PNTR head;
3389
CharPtr textstr = NULL;
3392
if (ufp == NULL || ufp->choice != 11) return;
3394
if (oip == NULL) return;
3395
for (i = 0; goQualType [i] != NULL; i++) {
3396
if (StringICmp (oip->str, goQualType [i]) == 0) break;
3398
if (goQualType [i] == NULL) return;
3400
/* loop to allow multiple entries for each type of GO term */
3401
for (entry = ufp->data.ptrvalue; entry != NULL; entry = entry->next) {
3402
if (entry == NULL || entry->choice != 11) break;
3409
for (ufp = (UserFieldPtr) entry->data.ptrvalue; ufp != NULL; ufp = ufp->next) {
3411
if (oip == NULL) continue;
3412
for (j = 0; goFieldType [j] != NULL; j++) {
3413
if (StringICmp (oip->str, goFieldType [j]) == 0) break;
3415
if (goFieldType [j] == NULL) continue;
3418
if (ufp->choice == 1) {
3419
textstr = (CharPtr) ufp->data.ptrvalue;
3423
if (ufp->choice == 1) {
3424
goid = (CharPtr) ufp->data.ptrvalue;
3425
} else if (ufp->choice == 2) {
3426
sprintf (gid, "%ld", (long) (Int4) ufp->data.intvalue);
3427
goid = (CharPtr) gid;
3431
if (ufp->choice == 2) {
3432
pmid = (Int4) ufp->data.intvalue;
3436
if (ufp->choice == 1) {
3437
evidence = (CharPtr) ufp->data.ptrvalue;
3444
/* if (StringHasNoText (textstr)) break; */
3446
str = (CharPtr) MemNew (StringLen (textstr) + StringLen (goid) + StringLen (evidence) + 40);
3447
if (str == NULL) return;
3448
StringCpy (str, "\t\t\t");
3449
StringCat (str, goQualList [i]);
3450
StringCat (str, "\t");
3451
StringCat (str, textstr);
3452
if (! StringHasNoText (goid)) {
3453
StringCat (str, "|");
3454
StringCat (str, goid);
3456
StringCat (str, "|");
3459
sprintf (tmp, "|%ld", (long) pmid);
3460
StringCat (str, tmp);
3462
StringCat (str, "|");
3464
if (! StringHasNoText (evidence)) {
3465
StringCat (str, "|");
3466
StringCat (str, evidence);
3468
len = StringLen (str);
3469
while (len > 0 && str [len - 1] == '|') {
3470
str [len - 1] = '\0';
3474
head = (ValNodePtr PNTR) userdata;
3475
StringCat (str, "\n");
3476
ValNodeCopyStr (head, 0, str);
3480
static void PrintFTUserObj (
3488
if (uop == NULL) return;
3490
if (oip == NULL || StringICmp (oip->str, "GeneOntology") != 0) return;
3491
VisitUserFieldsInUop (uop, userdata, PrintFTUserFld);
3494
static void PrintFTCodeBreak (
3495
ValNodePtr PNTR head,
3506
SeqCodeTablePtr sctp;
3514
switch (cbaa.choice) {
3516
seqcode = Seq_code_ncbieaa;
3519
seqcode = Seq_code_ncbi8aa;
3522
seqcode = Seq_code_ncbistdaa;
3527
if (seqcode == 0) return;
3528
sctp = SeqCodeTableFind (seqcode);
3529
if (sctp == NULL) return;
3531
MemSet ((Pointer) &iaj, 0, sizeof (IntAsn2gbJob));
3532
iaj.flags.iupacaaOnly = FALSE;
3533
iaj.relModeError = FALSE;
3535
slp = SeqLocFindNext (cbp->loc, NULL);
3536
while (slp != NULL) {
3537
str = FFFlatLoc (&iaj, target, slp, FALSE);
3539
residue = cbaa.value.intvalue;
3540
ptr = Get3LetterSymbol (&iaj, seqcode, sctp, residue);
3544
sprintf (buf, "\t\t\ttransl_except\t(pos:%s,aa:%s)\n", str, ptr);
3545
ValNodeCopyStr (head, 0, buf);
3548
slp = SeqLocFindNext (cbp->loc, slp);
3552
static Boolean SeqIdWriteForTable (SeqIdPtr sip, CharPtr buf, size_t buflen, IntAsn2gbJobPtr ajp, Boolean giOK)
3555
SeqIdPtr accn = NULL, local = NULL, patent = NULL,
3556
pdb = NULL, general = NULL, gi = NULL;
3558
Char id [41], str [64];
3560
CharPtr prefix = NULL;
3562
if (sip == NULL || buf == NULL || ajp == NULL) return FALSE;
3564
while (sip != NULL) {
3565
switch (sip->choice) {
3569
case SEQID_GENBANK :
3572
case SEQID_SWISSPROT :
3584
case SEQID_GENERAL :
3585
dbt = (DbtagPtr) sip->data.ptrvalue;
3587
if (StringICmp (dbt->db, "TMSMART") != 0 && StringICmp (dbt->db, "BankIt") != 0) {
3608
if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
3609
StringCat (str, prefix);
3610
StringCat (str, id);
3616
if (general != NULL) {
3617
if (SeqIdWrite (general, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
3618
StringCat (str, prefix);
3619
StringCat (str, id);
3625
if (local != NULL && (! ajp->flags.suppressLocalID) && numids == 0) {
3626
if (SeqIdWrite (local, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
3627
StringCat (str, prefix);
3628
StringCat (str, id);
3634
if (gi != NULL && giOK && numids == 0) {
3635
if (SeqIdWrite (accn, id, PRINTID_FASTA_SHORT, sizeof (id) - 1) != NULL) {
3636
StringCat (str, prefix);
3637
StringCat (str, id);
3643
StringNCpy_0 (buf, str, buflen);
3644
if (StringHasNoText (buf)) return FALSE;
3649
static void PrintBioSourceFtableEntry (
3650
ValNodePtr PNTR head,
3658
if (head == NULL || biop == NULL) return;
3660
if (biop->org != NULL && ! StringHasNoText (biop->org->taxname))
3662
sprintf (str, "\t\t\torganism\t%s\n", biop->org->taxname);
3663
ValNodeCopyStr (head, 0, str);
3667
if (biop->org != NULL && biop->org->orgname != NULL)
3669
for (mod = biop->org->orgname->mod;
3673
switch (mod->subtype)
3675
case ORGMOD_strain :
3676
sprintf (str, "\t\t\tstrain\t");
3678
case ORGMOD_substrain :
3679
sprintf (str, "\t\t\tsubstrain\t");
3682
sprintf (str, "\t\t\ttype\t");
3684
case ORGMOD_subtype :
3685
sprintf (str, "\t\t\tsubtype\t");
3687
case ORGMOD_variety :
3688
sprintf (str, "\t\t\tvariety\t");
3690
case ORGMOD_serotype :
3691
sprintf (str, "\t\t\tserotype\t");
3693
case ORGMOD_serogroup :
3694
sprintf (str, "\t\t\tserogroup\t");
3696
case ORGMOD_serovar :
3697
sprintf (str, "\t\t\tserovar\t");
3699
case ORGMOD_cultivar :
3700
sprintf (str, "\t\t\tcultivar\t");
3702
case ORGMOD_pathovar :
3703
sprintf (str, "\t\t\tpathovar\t");
3705
case ORGMOD_chemovar :
3706
sprintf (str, "\t\t\tchemovar\t");
3708
case ORGMOD_biovar :
3709
sprintf (str, "\t\t\tbiovar\t");
3711
case ORGMOD_biotype :
3712
sprintf (str, "\t\t\tbiotype\t");
3715
sprintf (str, "\t\t\tgroup\t");
3717
case ORGMOD_subgroup :
3718
sprintf (str, "\t\t\tsubgroup\t");
3720
case ORGMOD_isolate :
3721
sprintf (str, "\t\t\tisolate\t");
3723
case ORGMOD_common :
3724
sprintf (str, "\t\t\tcommon\t");
3726
case ORGMOD_acronym :
3727
sprintf (str, "\t\t\tacronym\t");
3729
case ORGMOD_dosage :
3730
sprintf (str, "\t\t\tdosage\t");
3732
case ORGMOD_nat_host :
3733
sprintf (str, "\t\t\tnat_host\t");
3735
case ORGMOD_sub_species :
3736
sprintf (str, "\t\t\tsub_species\t");
3738
case ORGMOD_specimen_voucher :
3739
sprintf (str, "\t\t\tspecimen_voucher\t");
3741
case ORGMOD_authority :
3742
sprintf (str, "\t\t\tauthority\t");
3745
sprintf (str, "\t\t\tforma\t");
3747
case ORGMOD_forma_specialis :
3748
sprintf (str, "\t\t\tforma_specialis\t");
3750
case ORGMOD_ecotype :
3751
sprintf (str, "\t\t\tecotype\t");
3753
case ORGMOD_synonym :
3754
sprintf (str, "\t\t\tsynonym\t");
3756
case ORGMOD_anamorph :
3757
sprintf (str, "\t\t\tanamorph\t");
3759
case ORGMOD_teleomorph :
3760
sprintf (str, "\t\t\tteleomorph\t");
3763
sprintf (str, "\t\t\tbreed\t");
3765
case ORGMOD_gb_acronym :
3766
sprintf (str, "\t\t\tgb_acronym\t");
3768
case ORGMOD_gb_anamorph :
3769
sprintf (str, "\t\t\tgb_anamorph\t");
3771
case ORGMOD_old_lineage :
3772
sprintf (str, "\t\t\told_lineage\t");
3774
case ORGMOD_old_name :
3775
sprintf (str, "\t\t\told_name\t");
3778
sprintf (str, "\t\t\tnote\t");
3783
if ( str [0] == 0) continue;
3784
if (! StringHasNoText (mod->subname))
3786
StringNCat (str, mod->subname, sizeof (str) - StringLen (str) - 2);
3787
str [sizeof (str) - 2] = 0;
3789
StringCat (str, "\n");
3790
ValNodeCopyStr (head, 0, str);
3794
for (ssp = biop->subtype; ssp != NULL; ssp = ssp->next)
3796
switch (ssp->subtype)
3798
case SUBSRC_chromosome :
3799
sprintf (str, "\t\t\tchromosome\t");
3802
sprintf (str, "\t\t\tmap\t");
3805
sprintf (str, "\t\t\tclone\t");
3807
case SUBSRC_haplotype :
3808
sprintf (str, "\t\t\thaplotype\t");
3810
case SUBSRC_genotype :
3811
sprintf (str, "\t\t\tgenotype\t");
3814
sprintf (str, "\t\t\tsex\t");
3816
case SUBSRC_cell_line :
3817
sprintf (str, "\t\t\tcell_line\t");
3819
case SUBSRC_cell_type :
3820
sprintf (str, "\t\t\tcell_type\t");
3822
case SUBSRC_tissue_type :
3823
sprintf (str, "\t\t\ttissue_type\t");
3825
case SUBSRC_clone_lib :
3826
sprintf (str, "\t\t\tclone_lib\t");
3828
case SUBSRC_dev_stage :
3829
sprintf (str, "\t\t\tdev_stage\t");
3831
case SUBSRC_frequency :
3832
sprintf (str, "\t\t\tfrequency\t");
3834
case SUBSRC_germline :
3835
sprintf (str, "\t\t\tgermline\t");
3837
case SUBSRC_rearranged :
3838
sprintf (str, "\t\t\trearranged\t");
3840
case SUBSRC_lab_host :
3841
sprintf (str, "\t\t\tlab_host\t");
3843
case SUBSRC_pop_variant :
3844
sprintf (str, "\t\t\tpop_variant\t");
3846
case SUBSRC_tissue_lib :
3847
sprintf (str, "\t\t\ttissue_lib\t");
3849
case SUBSRC_plasmid_name :
3850
sprintf (str, "\t\t\tplasmid_name\t");
3852
case SUBSRC_transposon_name :
3853
sprintf (str, "\t\t\ttransposon_name\t");
3855
case SUBSRC_insertion_seq_name :
3856
sprintf (str, "\t\t\tinsertion_seq_name\t");
3858
case SUBSRC_plastid_name :
3859
sprintf (str, "\t\t\tplastid_name\t");
3861
case SUBSRC_country :
3862
sprintf (str, "\t\t\tcountry\t");
3864
case SUBSRC_segment :
3865
sprintf (str, "\t\t\tsegment\t");
3867
case SUBSRC_endogenous_virus_name :
3868
sprintf (str, "\t\t\tendogenous_virus_name\t");
3870
case SUBSRC_transgenic :
3871
sprintf (str, "\t\t\ttransgenic\t");
3873
case SUBSRC_environmental_sample :
3874
sprintf (str, "\t\t\tenvironmental_sample\t");
3876
case SUBSRC_isolation_source :
3877
sprintf (str, "\t\t\tisolation_source\t");
3880
sprintf (str, "\t\t\tnote\t");
3885
if ( str [0] == 0) continue;
3886
if (! StringHasNoText (ssp->name))
3888
StringNCat (str, ssp->name, sizeof (str) - StringLen (str) - 2);
3889
str [sizeof (str) - 2] = 0;
3891
StringCat (str, "\n");
3892
ValNodeCopyStr (head, 0, str);
3896
NLM_EXTERN void PrintFtableLocAndQuals (
3897
IntAsn2gbJobPtr ajp,
3898
ValNodePtr PNTR head,
3901
SeqMgrFeatContextPtr context
3912
SeqMgrDescContext dcontext;
3914
SeqMgrFeatContext fcontext;
3916
ValNodePtr geneorprotdb;
3918
Boolean is_gps_genomic = FALSE;
3942
if (head == NULL || target == NULL || sfp == NULL || context == NULL) return;
3943
/* label = (CharPtr) FeatDefTypeLabel (sfp); */
3944
label = FindKeyFromFeatDefType (sfp->idx.subtype, FALSE);
3945
if (StringCmp (label, "Gene") == 0) {
3948
else if (StringCmp (label, "Src") == 0) {
3951
if (StringHasNoText (label)) {
3955
/* check if genomic sequence in genomic product set */
3957
if (target->idx.parenttype == OBJ_BIOSEQSET) {
3958
bssp = (BioseqSetPtr) target->idx.parentptr;
3959
if (bssp != NULL && bssp->_class == BioseqseqSet_class_gen_prod_set) {
3960
sdp = SeqMgrGetNextDescriptor (target, NULL, Seq_descr_molinfo, &dcontext);
3962
mip = (MolInfoPtr) sdp->data.ptrvalue;
3963
if (mip != NULL && mip->biomol == MOLECULE_TYPE_GENOMIC) {
3964
is_gps_genomic = TRUE;
3970
PrintFtableIntervals (head, target, sfp->location, label);
3972
geneorprotdb = NULL;
3973
pseudo = sfp->pseudo;
3975
switch (context->seqfeattype) {
3977
grp = (GeneRefPtr) sfp->data.value.ptrvalue;
3979
geneorprotdb = grp->db;
3980
pseudo |= grp->pseudo;
3982
StringNCpy_0 (str, (CharPtr) grp->locus, sizeof (str));
3983
if (! StringHasNoText (str)) {
3984
sprintf (tmp, "\t\t\tgene\t%s\n", str);
3985
ValNodeCopyStr (head, 0, tmp);
3987
for (vnp = grp->syn; vnp != NULL; vnp = vnp->next) {
3988
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
3989
if (! StringHasNoText (str)) {
3990
sprintf (tmp, "\t\t\tgene_syn\t%s\n", str);
3991
ValNodeCopyStr (head, 0, tmp);
3994
if (! StringHasNoText (grp->desc)) {
3995
sprintf (tmp, "\t\t\tgene_desc\t%s\n", grp->desc);
3996
ValNodeCopyStr (head, 0, tmp);
3998
if (! StringHasNoText (grp->maploc)) {
3999
sprintf (tmp, "\t\t\tmap\t%s\n", grp->maploc);
4000
ValNodeCopyStr (head, 0, tmp);
4002
if (! StringHasNoText (grp->locus_tag)) {
4003
sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
4004
ValNodeCopyStr (head, 0, tmp);
4008
case SEQFEAT_CDREGION :
4009
prod = BioseqFind (SeqLocId (sfp->product));
4010
prot = SeqMgrGetBestProteinFeature (prod, NULL);
4012
prp = (ProtRefPtr) prot->data.value.ptrvalue;
4014
geneorprotdb = prp->db;
4015
if (prp->name != NULL) {
4016
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
4017
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4018
if (! StringHasNoText (str)) {
4019
sprintf (tmp, "\t\t\tproduct\t%s\n", str);
4020
ValNodeCopyStr (head, 0, tmp);
4024
if (prp->desc != NULL) {
4025
StringNCpy_0 (str, prp->desc, sizeof (str));
4026
if (! StringHasNoText (str)) {
4027
sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
4028
ValNodeCopyStr (head, 0, tmp);
4031
for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
4032
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4033
if (! StringHasNoText (str)) {
4034
sprintf (tmp, "\t\t\tfunction\t%s\n", str);
4035
ValNodeCopyStr (head, 0, tmp);
4038
for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
4039
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4040
if (! StringHasNoText (str)) {
4041
sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
4042
ValNodeCopyStr (head, 0, tmp);
4046
StringNCpy_0 (str, prot->comment, sizeof (str));
4047
if (! StringHasNoText (str)) {
4048
sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
4049
ValNodeCopyStr (head, 0, tmp);
4052
crp = (CdRegionPtr) sfp->data.value.ptrvalue;
4054
if (crp->frame > 1 && crp->frame <= 3) {
4055
sprintf (tmp, "\t\t\tcodon_start\t%d\n", (int) crp->frame);
4056
ValNodeCopyStr (head, 0, tmp);
4058
for (cbp = crp->code_break; cbp != NULL; cbp = cbp->next) {
4059
PrintFTCodeBreak (head, cbp, target);
4063
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
4064
sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
4065
ValNodeCopyStr (head, 0, tmp);
4067
if (is_gps_genomic) {
4068
cds = SeqMgrGetCDSgivenProduct (prod, NULL);
4070
cdna = BioseqFindFromSeqLoc (cds->location);
4072
if (SeqIdWriteForTable (cdna->id, str, sizeof (str), ajp, FALSE)) {
4073
sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
4074
ValNodeCopyStr (head, 0, tmp);
4079
} else if (sfp->product != NULL) {
4080
sip = SeqLocId (sfp->product);
4082
if (sip->choice == SEQID_GI) {
4083
sip2 = GetSeqIdForGI (sip->data.intvalue);
4088
if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
4089
sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
4090
ValNodeCopyStr (head, 0, tmp);
4096
prod = BioseqFind (SeqLocId (sfp->product));
4097
rrp = (RnaRefPtr) sfp->data.value.ptrvalue;
4099
switch (rrp->ext.choice) {
4101
StringNCpy_0 (str, (CharPtr) rrp->ext.value.ptrvalue, sizeof (str));
4102
if (! StringHasNoText (str)) {
4103
sprintf (tmp, "\t\t\tproduct\t%s\n", str);
4104
ValNodeCopyStr (head, 0, tmp);
4108
trp = rrp->ext.value.ptrvalue;
4110
FeatDefLabel (sfp, str, sizeof (str) - 1, OM_LABEL_CONTENT);
4111
if (! StringHasNoText (str)) {
4112
sprintf (tmp, "\t\t\tproduct\t%s\n", str);
4113
ValNodeCopyStr (head, 0, tmp);
4115
numcodons = ComposeCodonsRecognizedString (trp, numbuf, sizeof (numbuf));
4116
if (numcodons > 0 && StringDoesHaveText (numbuf)) {
4117
sprintf (tmp, "\t\t\tcodon_recognized\t%s\n", numbuf);
4118
ValNodeCopyStr (head, 0, tmp);
4120
slp = trp->anticodon;
4122
if (slp != NULL && ajp->ajp.slp != NULL) {
4123
sip = SeqIdParse ("lcl|dummy");
4124
newloc = SeqLocReMapEx (sip, ajp->ajp.slp, slp, 0, FALSE, ajp->masterStyle);
4127
if (newloc != NULL) {
4128
A2GBSeqLocReplaceID (newloc, ajp->ajp.slp);
4132
if (StringNICmp (aa, "tRNA-", 5) == 0) {
4135
if (slp != NULL && StringDoesHaveText (aa)) {
4136
tmpx = FFFlatLoc (ajp, target, slp, ajp->masterStyle);
4138
sprintf (tmp, "\t\t\tanticodon\t(pos:%s,aa:%s)\n", tmpx, aa);
4139
ValNodeCopyStr (head, 0, tmp);
4143
if (newloc != NULL) {
4144
SeqLocFree (newloc);
4153
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
4154
sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
4155
ValNodeCopyStr (head, 0, tmp);
4157
if (is_gps_genomic) {
4158
cds = SeqMgrGetNextFeature (prod, NULL, SEQFEAT_CDREGION, 0, &fcontext);
4159
if (cds != NULL && SeqMgrGetNextFeature (prod, cds, SEQFEAT_CDREGION, 0, &fcontext) == NULL) {
4160
prod = BioseqFindFromSeqLoc (cds->product);
4162
if (SeqIdWriteForTable (prod->id, str, sizeof (str), ajp, FALSE)) {
4163
sprintf (tmp, "\t\t\tprotein_id\t%s\n", str);
4164
ValNodeCopyStr (head, 0, tmp);
4169
} else if (sfp->product != NULL) {
4170
sip = SeqLocId (sfp->product);
4172
if (sip->choice == SEQID_GI) {
4173
sip2 = GetSeqIdForGI (sip->data.intvalue);
4178
if (SeqIdWriteForTable (sip, str, sizeof (str), ajp, TRUE)) {
4179
sprintf (tmp, "\t\t\ttranscript_id\t%s\n", str);
4180
ValNodeCopyStr (head, 0, tmp);
4186
prp = (ProtRefPtr) sfp->data.value.ptrvalue;
4188
if (prp->name != NULL) {
4189
for (vnp = prp->name; vnp != NULL; vnp = vnp->next) {
4190
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4191
if (! StringHasNoText (str)) {
4192
sprintf (tmp, "\t\t\tproduct\t%s\n", str);
4193
ValNodeCopyStr (head, 0, tmp);
4197
if (prp->desc != NULL) {
4198
StringNCpy_0 (str, prp->desc, sizeof (str));
4199
if (! StringHasNoText (str)) {
4200
sprintf (tmp, "\t\t\tprot_desc\t%s\n", str);
4201
ValNodeCopyStr (head, 0, tmp);
4204
for (vnp = prp->activity; vnp != NULL; vnp = vnp->next) {
4205
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4206
if (! StringHasNoText (str)) {
4207
sprintf (tmp, "\t\t\tfunction\t%s\n", str);
4208
ValNodeCopyStr (head, 0, tmp);
4211
for (vnp = prp->ec; vnp != NULL; vnp = vnp->next) {
4212
StringNCpy_0 (str, (CharPtr) vnp->data.ptrvalue, sizeof (str));
4213
if (! StringHasNoText (str)) {
4214
sprintf (tmp, "\t\t\tEC_number\t%s\n", str);
4215
ValNodeCopyStr (head, 0, tmp);
4219
StringNCpy_0 (str, sfp->comment, sizeof (str));
4220
if (! StringHasNoText (str)) {
4221
sprintf (tmp, "\t\t\tprot_note\t%s\n", str);
4222
ValNodeCopyStr (head, 0, tmp);
4225
case SEQFEAT_REGION :
4226
StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
4227
if (! StringHasNoText (str)) {
4228
sprintf (tmp, "\t\t\tregion\t%s\n", str);
4229
ValNodeCopyStr (head, 0, tmp);
4233
bondidx = (Int2) sfp->data.value.intvalue;
4234
if (bondidx == 255) {
4237
if (bondidx > 0 && bondidx < 6) {
4238
sprintf (tmp, "\t\t\tbond_type\t%s\n", bondList [bondidx]);
4239
ValNodeCopyStr (head, 0, tmp);
4243
siteidx = (Int2) sfp->data.value.intvalue;
4244
if (siteidx == 255) {
4247
if (siteidx > 0 && siteidx < 27) {
4248
sprintf (tmp, "\t\t\tsite_type\t%s\n", siteList [siteidx]);
4249
ValNodeCopyStr (head, 0, tmp);
4252
case SEQFEAT_PSEC_STR :
4253
sec_str = (Int2) sfp->data.value.intvalue;
4254
if (sec_str > 0 && sec_str <= 3) {
4255
sprintf (tmp, "\t\t\tsec_str_type\t%s\n", secStrText [sec_str]);
4256
ValNodeCopyStr (head, 0, tmp);
4260
StringNCpy_0 (str, (CharPtr) sfp->data.value.ptrvalue, sizeof (str));
4261
if (! StringHasNoText (str)) {
4262
sprintf (tmp, "\t\t\theterogen\t%s\n", str);
4263
ValNodeCopyStr (head, 0, tmp);
4266
case SEQFEAT_BIOSRC :
4267
PrintBioSourceFtableEntry (head, sfp->data.value.ptrvalue);
4273
ValNodeCopyStr (head, 0, "\t\t\tpseudo\n");
4275
grp = SeqMgrGetGeneXref (sfp);
4277
if (SeqMgrGeneIsSuppressed (grp)) {
4278
ValNodeCopyStr (head, 0, "\t\t\tgene\t-\n");
4280
if (StringDoesHaveText (grp->locus)) {
4281
sprintf (tmp, "\t\t\tgene\t%s\n", grp->locus);
4282
ValNodeCopyStr (head, 0, tmp);
4284
if (StringDoesHaveText (grp->locus_tag)) {
4285
sprintf (tmp, "\t\t\tlocus_tag\t%s\n", grp->locus_tag);
4286
ValNodeCopyStr (head, 0, tmp);
4290
if (! StringHasNoText (sfp->comment)) {
4291
ValNodeCopyStr (head, 0, "\t\t\tnote\t");
4292
ValNodeCopyStr (head, 0, sfp->comment);
4293
ValNodeCopyStr (head, 0, "\n");
4295
switch (sfp->exp_ev) {
4297
ValNodeCopyStr (head, 0, "\t\t\tevidence\texperimental\n");
4300
ValNodeCopyStr (head, 0, "\t\t\tevidence\tnot_experimental\n");
4305
if (! StringHasNoText (sfp->except_text)) {
4306
ValNodeCopyStr (head, 0, "\t\t\texception\t");
4307
ValNodeCopyStr (head, 0, sfp->except_text);
4308
ValNodeCopyStr (head, 0, "\n");
4309
} else if (sfp->excpt) {
4310
ValNodeCopyStr (head, 0, "\t\t\texception\n");
4312
for (gbq = sfp->qual; gbq != NULL; gbq = gbq->next) {
4313
if (! StringHasNoText (gbq->qual)) {
4314
if (! StringHasNoText (gbq->val)) {
4315
sprintf (tmp, "\t\t\t%s\t%s\n", gbq->qual, gbq->val);
4316
ValNodeCopyStr (head, 0, tmp);
4320
VisitUserObjectsInUop (sfp->ext, (Pointer) head, PrintFTUserObj);
4321
for (vnp = geneorprotdb; vnp != NULL; vnp = vnp->next) {
4322
dbt = (DbtagPtr) vnp->data.ptrvalue;
4324
if (! StringHasNoText (dbt->db)) {
4326
if (oip->str != NULL && (! StringHasNoText (oip->str))) {
4327
sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
4328
ValNodeCopyStr (head, 0, tmp);
4330
sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
4331
ValNodeCopyStr (head, 0, tmp);
4336
for (vnp = sfp->dbxref; vnp != NULL; vnp = vnp->next) {
4337
dbt = (DbtagPtr) vnp->data.ptrvalue;
4339
if (! StringHasNoText (dbt->db)) {
4341
if (oip->str != NULL && (! StringHasNoText (oip->str))) {
4342
sprintf (tmp, "\t\t\tdb_xref\t%s:%s\n", dbt->db, oip->str);
4343
ValNodeCopyStr (head, 0, tmp);
4345
sprintf (tmp, "\t\t\tdb_xref\t%s:%ld\n", dbt->db, (long) oip->id);
4346
ValNodeCopyStr (head, 0, tmp);
4353
static BioseqPtr FindFirstBioseq (SeqEntryPtr sep)
4359
if (sep == NULL || sep->data.ptrvalue == NULL ||
4360
/* sep->choice < 0 || */ sep->choice > 2) return NULL;
4361
if (IS_Bioseq (sep)) {
4362
bsp = (BioseqPtr) sep->data.ptrvalue;
4365
bssp = (BioseqSetPtr) sep->data.ptrvalue;
4366
for (sep = bssp->seq_set; sep != NULL; sep = sep->next) {
4367
bsp = FindFirstBioseq (sep);
4368
if (bsp != NULL) return bsp;
4373
static BioseqPtr BioseqLockAndIndexByEntity (Uint2 entityID)
4380
if (entityID < 1) return NULL;
4382
sep = SeqMgrGetSeqEntryForEntityID (entityID);
4383
if (sep == NULL) return NULL;
4385
bsp = FindFirstBioseq (sep);
4386
if (bsp == NULL) return NULL;
4388
sip = SeqIdFindBest (bsp->id, 0);
4389
if (sip == NULL) return NULL;
4391
bsp = BioseqLockById (sip);
4392
if (bsp == NULL) return NULL;
4394
if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
4395
SeqMgrIndexFeatures (entityID, NULL);
4401
NLM_EXTERN CharPtr FormatFtableSourceFeatBlock (
4409
SeqMgrDescContext dcontext;
4410
SeqMgrFeatContext fcontext;
4416
if (bbp == NULL) return NULL;
4418
isp = (IntSrcBlockPtr) bbp;
4422
if (bbp->itemtype == OBJ_SEQDESC) {
4423
sdp = SeqMgrGetDesiredDescriptor (bbp->entityID, NULL, bbp->itemID,
4424
0, NULL, &dcontext);
4425
if (sdp == NULL) return NULL;
4426
biop = sdp->data.ptrvalue;
4427
} else if (bbp->itemtype == OBJ_SEQFEAT) {
4428
sfp = SeqMgrGetDesiredFeature (bbp->entityID, NULL, bbp->itemID, 0, NULL, &fcontext);
4429
if (sfp == NULL) return NULL;
4430
biop = sfp->data.value.ptrvalue;
4432
if (biop == NULL) return NULL;
4433
PrintFtableIntervals (&head, target, isp->loc, "source");
4434
PrintBioSourceFtableEntry (&head, biop);
4436
str = MergeFFValNodeStrs (head);
4437
ValNodeFreeData (head);
4442
NLM_EXTERN void DoImmediateFormat (
4443
Asn2gbFormatPtr afp,
4448
BlockType blocktype;
4452
SeqEntryPtr oldscope;
4453
QualValPtr qv = NULL;
4457
if (afp == NULL || bbp == NULL) return;
4459
blocktype = bbp->blocktype;
4460
if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return;
4461
fmt = asn2gnbk_fmt_functions [(int) blocktype];
4462
if (fmt == NULL) return;
4464
max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
4465
qv = MemNew (sizeof (QualVal) * (max + 5));
4466
if (qv == NULL) return;
4468
sep = GetTopSeqEntryForEntityID (bbp->entityID);
4470
bsp = BioseqLockAndIndexByEntity (bbp->entityID);
4471
oldscope = SeqEntrySetScope (sep);
4474
str = fmt (afp, bbp);
4477
SeqEntrySetScope (oldscope);
4481
if (afp->fp != NULL) {
4482
fprintf (afp->fp, "%s", str);
4484
if (afp->ffwrite != NULL) {
4485
afp->ffwrite (str, afp->userdata, blocktype);
4488
if (afp->fp != NULL) {
4489
fprintf (afp->fp, "?\n");
4491
if (afp->ffwrite != NULL) {
4492
afp->ffwrite ("?\n", afp->userdata, blocktype);
4500
NLM_EXTERN CharPtr asn2gnbk_format (
4509
BlockType blocktype;
4512
IntAsn2gbJobPtr iajp;
4514
SeqEntryPtr oldscope;
4520
/* qv must hold MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR) */
4522
iajp = (IntAsn2gbJobPtr) ajp;
4523
if (iajp == NULL || ajp->sectionArray == NULL || ajp->paragraphArray == NULL) return NULL;
4524
if (paragraph < 0 || paragraph >= ajp->numParagraphs) return NULL;
4526
bbp = ajp->paragraphArray [paragraph];
4527
if (bbp == NULL) return NULL;
4529
section = bbp->section;
4530
if (section < 0 || section >= ajp->numSections) return NULL;
4532
asp = ajp->sectionArray [section];
4533
if (asp == NULL) return NULL;
4535
blocktype = bbp->blocktype;
4536
if (blocktype < LOCUS_BLOCK || blocktype > SLASH_BLOCK) return NULL;
4538
max = (size_t) (MAX (ASN2GNBK_TOTAL_SOURCE, ASN2GNBK_TOTAL_FEATUR));
4539
qv = MemNew (sizeof (QualVal) * (max + 5));
4540
if (qv == NULL) return NULL;
4542
MemSet ((Pointer) &af, 0, sizeof (Asn2gbFormat));
4543
af.ajp = (IntAsn2gbJobPtr) ajp;
4546
af.format = iajp->format;
4550
sep = GetTopSeqEntryForEntityID (bbp->entityID);
4552
fmt = asn2gnbk_fmt_functions [(int) blocktype];
4553
if (fmt == NULL) return NULL;
4555
bsp = BioseqLockAndIndexByEntity (bbp->entityID);
4556
oldscope = SeqEntrySetScope (sep);
4558
str = fmt (&af, bbp);
4560
SeqEntrySetScope (oldscope);
4564
str = StringSave ("???\n");
4572
NLM_EXTERN Asn2gbJobPtr asn2gnbk_cleanup (
4579
BaseBlockPtr PNTR blockArray;
4581
IntAsn2gbJobPtr iajp;
4582
IntAsn2gbSectPtr iasp;
4584
IntFeatBlockPtr ifp;
4591
Asn2gbSectPtr PNTR sectionArray;
4592
StringItemPtr sip, nxt;
4595
iajp = (IntAsn2gbJobPtr) ajp;
4596
if (iajp == NULL) return NULL;
4598
SeqLocFree (iajp->ajp.slp);
4600
numSections = ajp->numSections;
4601
sectionArray = ajp->sectionArray;
4603
if (sectionArray != NULL) {
4605
for (i = 0; i < numSections; i++) {
4606
asp = sectionArray [i];
4608
iasp = (IntAsn2gbSectPtr) asp;
4610
numBlocks = asp->numBlocks;
4611
blockArray = asp->blockArray;
4612
if (blockArray != NULL) {
4614
for (j = 0; j < numBlocks; j++) {
4615
bbp = blockArray [j];
4618
MemFree (bbp->string);
4620
if (bbp->blocktype == REFERENCE_BLOCK) {
4621
rbp = (RefBlockPtr) bbp;
4622
MemFree (rbp->uniquestr);
4623
irp = (IntRefBlockPtr) rbp;
4624
DateFree (irp->date);
4625
SeqLocFree (irp->loc);
4626
MemFree (irp->authstr);
4628
MemFree (irp->maploc);
4630
} else if (bbp->blocktype == SOURCEFEAT_BLOCK) {
4632
isp = (IntSrcBlockPtr) bbp;
4633
SeqLocFree (isp->loc);
4635
} else if (bbp->blocktype == FEATURE_BLOCK) {
4637
ifp = (IntFeatBlockPtr) bbp;
4639
icp = (IntCdsBlockPtr) ifp;
4641
MemFree (icp->maploc);
4644
} else if (bbp->blocktype == SEQUENCE_BLOCK) {
4646
sbp = (SeqBlockPtr) bbp;
4647
MemFree (sbp->bases);
4654
MemFree (asp->blockArray);
4655
MemFree (asp->referenceArray);
4661
MemFree (ajp->sectionArray);
4662
MemFree (ajp->paragraphArray);
4663
MemFree (ajp->paragraphByIDs);
4666
while (sip != NULL) {
4672
if (iajp->lockedBspList != NULL) {
4673
UnlockFarComponents (iajp->lockedBspList);
4684
NLM_EXTERN Boolean SeqEntryToGnbk (
4698
AsnIoPtr aip = NULL;
4699
AsnIoPtr aipfree = NULL;
4701
AsnTypePtr atp = NULL;
4702
BioseqPtr bsp = NULL;
4703
BioseqSetPtr bssp = NULL;
4704
Boolean do_gbseq_asn = FALSE;
4705
Boolean do_gbseq_xml = FALSE;
4706
Asn2gbWriteFunc ffwrite = NULL;
4707
GBSeqPtr gbseq = NULL;
4709
IntAsn2gbJobPtr iajp;
4710
Boolean rsult = FALSE;
4711
Int1 type = ASNIO_TEXT_OUT;
4712
Pointer userdata = NULL;
4717
CharPtr ffhead = NULL;
4718
CharPtr fftail = NULL;
4722
BaseBlockPtr PNTR paragraphArray;
4727
ValNodePtr bsplist = NULL;
4729
Boolean lockFarComp;
4730
Boolean lockFarLocs;
4731
Boolean lockFarProd;
4732
Boolean lookupFarComp;
4733
Boolean lookupFarHist;
4734
Boolean lookupFarLocs;
4735
Boolean lookupFarProd;
4739
if (extra != NULL) {
4740
ffwrite = extra->ffwrite;
4742
ffhead = extra->ffhead;
4743
fftail = extra->fftail;
4745
gbseq = extra->gbseq;
4748
userdata = extra->userdata;
4750
if (fp == NULL && ffwrite == NULL && aip == NULL) return FALSE;
4751
if (sep == NULL && slp == NULL) return FALSE;
4753
if (IS_Bioseq (sep)) {
4754
bsp = (BioseqPtr) sep->data.ptrvalue;
4755
} else if (IS_Bioseq_set (sep)) {
4756
bssp = (BioseqSetPtr) sep->data.ptrvalue;
4762
/* this allows profiling of just the formatter, without feature indexing, on the Mac */
4765
entityID = ObjMgrGetEntityIDForPointer (sep->data.ptrvalue);
4766
if (SeqMgrFeaturesAreIndexed (entityID) == 0) {
4767
SeqMgrIndexFeatures (entityID, NULL);
4771
lockFarComp = (Boolean) ((locks & LOCK_FAR_COMPONENTS) != 0);
4772
lockFarLocs = (Boolean) ((locks & LOCK_FAR_LOCATIONS) != 0);
4773
lockFarProd = (Boolean) ((locks & LOCK_FAR_PRODUCTS) != 0);
4775
if (lockFarComp || lockFarLocs || lockFarProd) {
4776
locks = locks ^ (LOCK_FAR_COMPONENTS | LOCK_FAR_LOCATIONS | LOCK_FAR_PRODUCTS);
4777
if (slp != NULL && lockFarComp) {
4778
bsplist = LockFarComponentsEx (sep, FALSE, lockFarLocs, lockFarProd, slp);
4780
bsplist = LockFarComponentsEx (sep, lockFarComp, lockFarLocs, lockFarProd, NULL);
4784
lookupFarComp = (Boolean) ((locks & LOOKUP_FAR_COMPONENTS) != 0);
4785
lookupFarLocs = (Boolean) ((locks & LOOKUP_FAR_LOCATIONS) != 0);
4786
lookupFarProd = (Boolean) ((locks & LOOKUP_FAR_PRODUCTS) != 0);
4787
lookupFarHist = (Boolean) ((locks & LOOKUP_FAR_HISTORY) != 0);
4789
if (lookupFarComp || lookupFarLocs || lookupFarProd || lookupFarHist) {
4790
locks = locks ^ (LOOKUP_FAR_COMPONENTS | LOOKUP_FAR_LOCATIONS | LOOKUP_FAR_PRODUCTS | LOOKUP_FAR_HISTORY);
4791
LookupFarSeqIDs (sep, lookupFarComp, lookupFarLocs, lookupFarProd, FALSE, lookupFarHist);
4794
ProfilerSetStatus (TRUE);
4798
do_gbseq_xml = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_XML_GBSEQ_FILE);
4799
do_gbseq_asn = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_ASN_GBSEQ_FILE);
4801
if (do_gbseq_xml || do_gbseq_asn) {
4802
if (fp != NULL && aip == NULL) {
4806
aip = AsnIoNew (type, fp, NULL, NULL, NULL);
4810
if (extra == NULL) {
4811
MemSet ((Pointer) &xtra, 0, sizeof (XtraBlock));
4814
if (extra->gbseq == NULL) {
4815
MemSet ((Pointer) &gbsq, 0, sizeof (GBSeq));
4816
extra->gbseq = &gbsq;
4817
gbseq = extra->gbseq;
4821
/* pass TRUE for stream to do immediate write at time of creation for speed */
4823
ajp = asn2gnbk_setup_ex (bsp, bssp, slp, format, mode, style,
4824
flags, locks, custom, extra,
4825
TRUE, fp, aip, atp);
4829
iajp = (IntAsn2gbJobPtr) ajp;
4832
/* if streaming, all output was written in setup function, otherwise output here */
4836
/* send optional head string */
4838
is_html = (Boolean) ((flags & HTML_XML_ASN_MASK) == CREATE_HTML_FLATFILE);
4839
if (ffhead == NULL && is_html) {
4842
if (ffhead != NULL) {
4844
fprintf (fp, ffhead);
4847
if (ffwrite != NULL) {
4848
ffwrite (ffhead, userdata, HEAD_BLOCK);
4851
/* send each paragraph */
4853
numParagraphs = ajp->numParagraphs;
4854
paragraphArray = ajp->paragraphArray;
4856
for (i = 0; i < numParagraphs; i++) {
4857
str = asn2gnbk_format (ajp, i);
4858
block = (BlockType) 0;
4859
if (paragraphArray != NULL) {
4860
bbp = paragraphArray [i];
4862
block = bbp->blocktype;
4867
fprintf (fp, "%s", str);
4869
if (ffwrite != NULL) {
4870
ffwrite (str, userdata, block);
4874
fprintf (fp, "?\n");
4876
if (ffwrite != NULL) {
4877
ffwrite ("?\n", userdata, block);
4884
/* send optional tail string */
4886
if (fftail == NULL && is_html) {
4889
if (fftail != NULL) {
4891
fprintf (fp, fftail);
4894
if (ffwrite != NULL) {
4895
ffwrite (fftail, userdata, TAIL_BLOCK);
4900
/* if RELEASE_MODE, warn if unresolved gi numbers, missing translation, etc. */
4902
if (iajp->relModeError && mode == RELEASE_MODE) {
4906
asn2gnbk_cleanup (ajp);
4909
if (aipfree != NULL) {
4910
AsnIoFree (aipfree, FALSE);
4915
ProfilerSetStatus (FALSE);
4917
UnlockFarComponents (bsplist);
4924
NLM_EXTERN Boolean BioseqToGnbk (
4938
SeqEntryPtr sep = NULL;
4940
if (bsp == NULL && slp == NULL) return FALSE;
4942
sep = SeqMgrGetSeqEntryForData (bsp);
4944
return SeqEntryToGnbk (sep, slp, format, mode, style, flags, locks, custom, extra, fp);