2
* ===========================================================================
5
* National Center for Biotechnology Information (NCBI)
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government do not place any restriction on its use or reproduction.
13
* We would, however, appreciate having the NCBI and the author cited in
14
* any work or product based on this material
16
* Although all reasonable efforts have been taken to ensure the accuracy
17
* and reliability of the software and data, the NLM and the U.S.
18
* Government do not and cannot warrant the performance or results that
19
* may be obtained by using this software or data. The NLM and the U.S.
20
* Government disclaim all warranties, express or implied, including
21
* warranties of performance, merchantability or fitness for any particular
24
* ===========================================================================
26
* File Name: salfiles.c
28
* Author: Colombe Chappey
30
* Version Creation Date: 1/27/96
37
* --------------------------------------------------------------------------
38
* Date Name Description of modification
39
* ------- ---------- -----------------------------------------------------
42
* ==========================================================================
62
#define SALSA_PHYLIP_MARGIN 11
63
#define SALSA_CLUSTALV_MARGIN 17
65
extern SeqEntryPtr ReadLocalAlignment (Uint1 format, CharPtr path);
67
/**********************************************************/
68
static CharPtr FGetLine (FILE *fp)
70
ValNodePtr charp=NULL,
72
CharPtr buffer = NULL;
77
while (c!=255 && c!=NULLB && c!=EOF && c!='\0' && c!='\n')
79
if (c!='\t' && c!='\015') {
80
ValNodeAddInt (&charp, 1, (Int4)c);
86
buffer = (CharPtr)MemNew((size_t)((len+5)*sizeof(Char)));
87
for (j=0, vnp=charp; vnp!=NULL; vnp=vnp->next, j++)
88
buffer[j] = (Char) vnp->data.intvalue;
92
else if (c=='\0' || c=='\n') {
93
buffer = (CharPtr)MemNew((size_t)(sizeof(Char)));
99
/**********************************************************/
100
static ValNodePtr new_sequence_vectors (Int2 n_seq, Int4 lens)
102
ValNodePtr seqvnp = NULL;
107
for (j = 0; j < n_seq; j++) {
108
tmp = (CharPtr) MemNew((size_t) ((lens + 1) * sizeof(Char)));
109
for (strlens = 0; strlens < lens; strlens++)
112
ValNodeAddPointer (&seqvnp, 0, (Pointer)tmp);
117
/**********************************************************/
118
static Boolean stringhasnotext (CharPtr str)
126
if (ch > ' ' && ch <= '~') {
136
/**********************************************************/
137
static CharPtr get_first_notemptyline (FILE *fp)
143
if (! stringhasnotext (str)) {
144
if (StringLen (str) > 0)
153
/**********************************************************/
154
static SeqEntryPtr make_seqentry_for_seqentry (SeqEntryPtr sep)
156
SeqEntryPtr sep1 = NULL,
162
if (IS_Bioseq(sep) || IS_Bioseq_set(sep))
166
bssp = BioseqSetNew ();
171
sep1 = SeqEntryNew ();
173
sep1->data.ptrvalue = bssp;
174
SeqMgrLinkSeqEntry (sep1, 0, NULL);
176
for (tmp = bssp->seq_set; tmp!=NULL; tmp=tmp->next) {
179
bsp = (BioseqPtr) tmp->data.ptrvalue;
180
ObjMgrConnect (OBJ_BIOSEQ, (Pointer) bsp, OBJ_BIOSEQSET, (Pointer) bssp);
195
/**********************************************************/
196
static SeqEntryPtr strings_to_seqentry (ValNodePtr seqvnp, Uint1 mol_type, SeqIdPtr seqsip, SeqAnnotPtr sap)
206
for (vnp=seqvnp, sip=seqsip; vnp!=NULL && sip!=NULL; vnp=vnp->next, sip=sip->next)
208
str = (CharPtr) vnp->data.ptrvalue;
211
lens = (Int4) StringLen (str);
212
sep = StringToSeqEntry (str, sip, lens, mol_type);
214
if (sep_list == NULL)
222
sep_list = make_seqentry_for_seqentry (sep_list);
223
SeqAlignAddInSeqEntry (sep_list, sap);
227
/*******************************************************************
230
*** returns a SeqEntryPtr given a path name, and the mol_type
232
*** calls FastaToSeqEntryInternal
233
*** FastaReadAdvanced
234
*** calls NewFastaRead
235
*** makes a SeqEntryPtr-BioseqSet if 2 sequences or more
237
********************************************************************/
238
extern SeqEntryPtr FastaRead (CharPtr path, Uint2 mol_type)
241
SeqEntryPtr sep_list = NULL, sep = NULL, pre_sep = NULL;
245
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
250
if ( (fpin = FileOpen (path, "r")) != NULL) {
251
while ((sep = FastaToSeqEntry (fpin, (Boolean)ISA_na (mol_type) ) ) != NULL)
260
sep_list = make_seqentry_for_seqentry (sep_list);
265
NLM_EXTERN SeqEntryPtr FastaToSeqEntryInternal
267
VoidPtr input, /* input pointer (file or memory) */
268
Int4 type, /* type of inquiry FASTA_MEM_IO or FASTA_FILE_IO */
269
CharPtr PNTR last_char, /* returned pointer to next FASTA sequence */
270
Boolean is_na, /* type of sequence */
271
CharPtr PNTR errormsg, /* error messge for debugging */
272
Boolean parseSeqId, /* Parse SeqID from def line */
273
CharPtr special_symbol /* Returns special symbol if no SeqEntry */
276
static SeqIdPtr new_local_sip (Int2 count, Boolean is_na)
279
ObjectIdPtr oid = NULL;
282
oid = ObjectIdNew ();
285
sprintf (str, "nuc %ld", (long) count);
287
sprintf (str, "prot %ld", (long) count);
289
oid->str = StringSave (str);
290
sip = ValNodeNew (NULL);
292
sip->choice = SEQID_LOCAL;
293
sip->data.ptrvalue = (Pointer) oid;
301
static SeqEntryPtr NewFastaRead (FILE *fp, Boolean is_na, Boolean parseSeqId, Int2 *seqnumber, Int2 *segnumber, SeqIdPtr PNTR siplst, Int4 *lengthmax)
304
SeqEntryPtr sep = NULL;
305
SeqEntryPtr lastsep = NULL;
306
SeqEntryPtr nextsep = NULL;
307
SeqEntryPtr last = NULL;
308
CharPtr errormsg = NULL;
309
ValNodePtr head = NULL;
311
ObjectIdPtr oid = NULL;
313
SeqIdPtr siphead = NULL,
315
SeqIdPtr segsip=NULL, lastsegsip=NULL;
325
Boolean isLocalUnknownID;
334
nextsep = FastaToSeqEntryInternal ((void *)fp, 2, NULL, is_na, &errormsg, parseSeqId, &lastchar);
335
while (nextsep != NULL || (lastchar != EOF && lastchar != NULLB && lastchar != 255)) {
336
if (nextsep != NULL) {
338
if (IS_Bioseq (nextsep) && nextsep->data.ptrvalue != NULL) {
339
bsp = (BioseqPtr) nextsep->data.ptrvalue;
340
if (bsp->length > lensmax)
341
lensmax = bsp->length;
342
isLocalUnknownID = FALSE;
344
if (sip != NULL && sip->choice == SEQID_LOCAL) {
345
oid = (ObjectIdPtr) sip->data.ptrvalue;
346
if (oid != NULL && oid->str != NULL) {
347
isLocalUnknownID = (Boolean) (StringICmp (oid->str, "unknown") == 0);
350
if ((! parseSeqId) || isLocalUnknownID) {
351
sip = new_local_sip (count, is_na);
353
bsp->id = SeqIdFree (bsp->id);
355
SeqMgrReplaceInBioseqIndex (bsp);
358
j=SeqIdOrderInBioseqIdList(sip,siphead);
361
if (sip != NULL && sip->choice == SEQID_LOCAL) {
362
oid = (ObjectIdPtr) sip->data.ptrvalue;
363
if (oid != NULL && oid->str != NULL) {
364
lens = MIN(StringLen(oid->str), (Int4)7);
365
oid->str[lens] = '\0';
368
siptmp = MakeNewProteinSeqId (NULL, sip);
370
siptmp = new_local_sip (count, is_na);
373
bsp->id = SeqIdFree (bsp->id);
375
SeqMgrReplaceInBioseqIndex (bsp);
379
siptmp = SeqIdDup (sip);
380
siphead = AddSeqId (&siphead, siptmp);
382
SeqEntryPack (nextsep);
385
if (lastsep != NULL) {
386
AddSeqEntryToSeqEntry (lastsep, nextsep, TRUE);
388
if (segcount > segtotal)
390
sepnuc = FindNucSeqEntry (lastsep);
391
if (IS_Bioseq(sepnuc)) {
392
segbsp=(BioseqPtr)sepnuc->data.ptrvalue;
394
if (segsip != NULL) {
401
last->next = nextsep;
404
if (segcount > segtotal)
410
last->next = nextsep;
413
if (segcount > segtotal)
421
if (insegset && lastsep == NULL) {
426
if (segcount > segtotal)
435
if (segcount > segtotal)
442
vnp = ValNodeNew (head);
447
vnp->data.ptrvalue = errormsg;
450
} else if (lastchar == '[') {
453
} else if (lastchar == ']') {
456
nextsep = FastaToSeqEntryInternal ((void *)fp, 2, NULL, is_na, &errormsg, parseSeqId, &lastchar);
458
if (segnumber !=NULL)
459
*segnumber = segtotal;
464
if (lengthmax != NULL)
465
*lengthmax = lensmax;
466
if(seqnumber != NULL)
471
static SeqEntryPtr FastaReadAdvanced (CharPtr path, Uint2 mol_type, Int2 *seqnumber, Int2 *segnumber, SeqIdPtr PNTR sip, Int4 *lengthmax)
474
SeqEntryPtr sep = NULL;
478
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
483
if ( (fpin = FileOpen (path, "r")) != NULL) {
484
if (segnumber != NULL)
485
sep = NewFastaRead (fpin, (Boolean)ISA_na (mol_type), TRUE, seqnumber, segnumber, sip, lengthmax);
487
sep = NewFastaRead (fpin, (Boolean)ISA_na (mol_type), TRUE, NULL, NULL, NULL, NULL);
489
sep = make_seqentry_for_seqentry (sep);
494
/*******************************************************************
496
*** LocalAlignsToSeqAnnotDimn
497
*** LocalAlign1ToSeqAnnotDimn
499
*************************************************************************/
501
static ValNodePtr get_lens_fromseqalign (SeqAlignPtr salp)
505
ValNodePtr fromp = NULL;
514
if (salp->segtype == 1)
516
ddp = (DenseDiagPtr) salp->segs;
518
for (index=0; index<ddp->dim; index++) {
519
startp = ddp->starts;
522
val = *startp + ddp->len;
523
ValNodeAddInt (&fromp, 1, (Int4)(val+1));
527
else if (salp->segtype == 2)
529
dsp = (DenseSegPtr) salp->segs;
532
for (index=0; index<dsp->dim; index++)
534
if ((Boolean)(dsp->strands != NULL))
535
strand = dsp->strands[index];
537
strand = Seq_strand_plus;
538
startp = dsp->starts + ((dsp->dim * dsp->numseg) - dsp->dim);
541
for (j = dsp->numseg-1; j >= 0; j--, startp-=dsp->dim)
545
if (strand == Seq_strand_minus)
548
val = *startp + dsp->lens[j] - 1;
549
ValNodeAddInt (&fromp, 1, (Int4)(val+1));
552
ValNodeAddInt (&fromp, 1, (Int4)(-1));
559
static SeqAnnotPtr LocalAlign1ToSeqAnnotDimn (ValNodePtr vnpal, SeqIdPtr seqsip, ValNodePtr fromp, Int2 nbseq, Int4 lens, ValNodePtr strands, Boolean trunc_emptyends)
561
SeqAnnotPtr sap1=NULL;
564
if (vnpal!=NULL && vnpal->data.ptrvalue != NULL) {
565
tmp = (ValNodePtr) vnpal->data.ptrvalue;
566
sap1 = LocalAlignToSeqAnnotDimn (tmp, seqsip, fromp, nbseq, lens, NULL, FALSE);
571
static SeqAnnotPtr LocalAlignsToSeqAnnotDimn (ValNodePtr vnpal, SeqIdPtr seqsip, ValNodePtr fromp, Int2 nbseq, Int2 nbseg, Int4 lens, ValNodePtr strands, Boolean trunc_emptyends)
573
SeqAnnotPtr sap1 = NULL,
575
SeqAlignPtr salphead = NULL,
586
while (salphead == NULL && vnp != NULL)
592
for (k=0; k<nbseq; k++) {
593
siptmp2=SeqIdDup(siptmp);
594
siplst = AddSeqId (&siplst, siptmp2);
595
for (k1=0; k1<nbseg; k1++)
598
tmp = (ValNodePtr) vnp->data.ptrvalue;
599
sap1 = LocalAlignToSeqAnnotDimn (tmp, siplst, fromp, nbseq, lens, NULL, FALSE);
600
if (sap1!=NULL && sap1->data!=NULL)
601
salphead = (SeqAlignPtr) sap1->data;
606
fromp = ValNodeFree (fromp);
611
fromp = get_lens_fromseqalign (salptmp);
617
for (k=0; k<nbseq; k++) {
618
siptmp2=SeqIdDup(siptmp);
619
siplst = AddSeqId (&siplst, siptmp2);
620
for (k1=0; k1<nbseg && siptmp!=NULL; k1++)
623
tmp = (ValNodePtr) vnp->data.ptrvalue;
624
sap = LocalAlignToSeqAnnotDimn (tmp, siplst, fromp, nbseq, lens, NULL, FALSE);
625
if (sap!=NULL && sap->data!=NULL) {
626
salptmp->next = (SeqAlignPtr)sap->data;
627
salptmp = salptmp->next;
631
fromp = ValNodeFree (fromp);
637
/*****************************************************
639
*** 1) reads the sequences as FASTA: FastaReadAdvanced
640
*** 2) reads the sequence text with the gaps (-): ReadAlignmentToStrings
641
*** the max length allocated for the char array
642
*** that is the max length of the sequences plus a 1/2 of gaps.
644
*** ConvertPaupToFastaGap
646
******************************************************/
647
static ValNodePtr ReadAlignmentToStrings (CharPtr path, Int4 length, Int2 segnumber)
651
ValNodePtr vnpal, tmp, vnp;
659
Boolean insegb = FALSE;
667
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
672
vnpal = ValNodeNew (NULL);
674
for (j=1; j<segnumber; j++) {
675
vnp = ValNodeNew (NULL);
679
if ( (fp = FileOpen (path, "r")) == NULL) {
684
lmax = length + length/2;
688
while (*strp == ' ' && *strp!='\0' && *strp!='\n')
690
if (*strp!='\0' && *strp!='\n')
691
strlens = StringLen (strp);
693
str=NULL; /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
711
else if (StringStr(strp, "[")!= NULL) {
718
else if (StringStr(strp, "]")!= NULL) {
725
seqstr=(CharPtr)MemNew((size_t)((lmax + 1) * sizeof(Char)));
726
for (strlens=0; strlens<lmax; strlens++)
727
seqstr[strlens] = ' ';
729
if (vnp->data.ptrvalue==NULL) {
731
ValNodeAddPointer (&tmp, 0, (Pointer)seqstr);
732
vnp->data.ptrvalue = (Pointer) tmp;
734
tmp = (ValNodePtr)vnp->data.ptrvalue;
735
ValNodeAddPointer (&tmp, 0, (Pointer)seqstr);
741
for (j=0; j<strlens; j++)
743
if (strp[j]=='\n' || strp[j]=='\0' || strp[j]=='\r' )
745
strp[j] = TO_UPPER(strp[j]);
746
if (StringChr("ABCDEFGHIKLMNPQRSTUVWXYZ-*", strp[j]) != NULL) {
747
seqstr [lgseq] = strp[j];
751
seqstr [lgseq] = '\0';
758
while (*strp == ' ' && *strp!='\0' && *strp!='\n')
760
if (*strp!='\0' && *strp!='\n')
761
strlens = StringLen (strp);
763
str=NULL; /****!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
772
static SeqEntryPtr GapFastaRead (CharPtr path, Uint2 mol_type)
775
SeqAnnotPtr sap = NULL;
776
SeqEntryPtr sep = NULL;
777
ValNodePtr vnp = NULL;
787
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
792
sep = FastaReadAdvanced (path, mol_type, &seqnumber, &segnumber, &sip, &lmax);
797
for (siptmp=sip; siptmp!=NULL; siptmp=siptmp->next) {
800
if (nseq != seqnumber*segnumber) {
801
ErrPostEx (SEV_ERROR, 0, 0, "Every sequences should have a sequence ID");
802
sep = SeqEntryFree (sep);
804
str=matching_seqid (sip);
806
ErrPostEx (SEV_ERROR, 0, 0, "The submission contains several sequences with the same name \"%s\"", str);
808
sep = SeqEntryFree (sep);
811
vnp = ReadAlignmentToStrings (path, lmax, segnumber);
813
sap=LocalAlignsToSeqAnnotDimn(vnp,sip,NULL,seqnumber,segnumber, 0, NULL, FALSE);
815
sap=LocalAlign1ToSeqAnnotDimn (vnp, sip, NULL, seqnumber, 0, NULL, FALSE);
817
SeqAlignAddInSeqEntry (sep, sap);
825
/********************************************************************/
826
static Boolean has_extrachar (CharPtr str, Char missingchar, Char gapchar)
833
if (*str=='\0' || *str=='\n')
835
for (j=0; j<StrLen(str); j++) {
836
if (str[j]!='\n' && str[j]!='\0' && str[j]!='\r'
840
if (!isdigit(str[j]))
842
if ((StringChr ("ABCDGHKMNRSTUVWY", str[j])) == NULL &&
843
(StringChr ("abcdghkmnrstuvwy", str[j])) == NULL &&
844
str[j]!=gapchar && str[j] != missingchar &&
845
str[j]!='[' && str[j]!=']') {
855
/********************************************************************/
856
static Char nexustoseq (Char car, Char missingchar, Char gapchar)
862
if (car == missingchar)
871
/********************************************************************/
872
static Boolean ConvertPaupToFastaGap (CharPtr path, CharPtr tmpfile)
879
Char missingchar = '?';
890
if ( (fp = FileOpen (path, "r")) == NULL) {
895
if (! stringhasnotext (str)) {
896
if (StringLen (str) > 0 && str [0] != '>')
907
tmp = StringStr(str, "INTERLEAVE");
909
tmp = StringStr(str, "interleave");
913
ErrPostEx (SEV_ERROR, 0, 0, "This is a NEXUS interleave format");
916
tmp = StringStr(str, "GAP=");
918
tmp = StringStr(str, "gap=");
920
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
922
if (*tmp!='\0' && *tmp!='\n')
924
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
926
if (*tmp!='\0' && *tmp!='\n')
929
tmp = StringStr(str, "MISSING=");
931
tmp = StringStr(str, "missing=");
933
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
935
if (*tmp!='\0' && *tmp!='\n')
937
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
939
if (*tmp!='\0' && *tmp!='\n')
943
tmp = StringStr(str, "NTAX");
945
tmp = StringStr(str, "ntax");
947
while (*tmp!='\0' && *tmp!='\n' && !isdigit (*tmp))
949
if (*tmp!='\0' && *tmp!='\n')
950
n_seq = (Int2) atoi(tmp);
954
tmp = StringStr(str, "NCHAR");
956
tmp = StringStr(str, "nchar");
958
while (*tmp!='\0' && !isdigit (*tmp))
961
lg_seq = (Int4) atol(tmp);
964
tmp = StringStr(str, "MATRIX");
966
tmp = StringStr(str, "matrix");
973
if (n_seq == 0 || lg_seq == -1) {
978
tmp = StringStr(str, "MATRIX");
980
tmp = StringStr(str, "matrix");
991
if ( (fpout = FileOpen (tmpfile, "w")) == NULL) {
1000
strlens = StringLen (str);
1002
str2 = (CharPtr)MemNew((size_t)((strlens+4) * sizeof(Char)));
1003
if (str[0] == ';' || (tmp = StringStr(str, "end;"))!=NULL || (tmp = StringStr(str, "END;"))!=NULL || (tmp = StringStr(str, "gap data"))!=NULL)
1005
if (has_extrachar (str, missingchar, gapchar)) {
1017
if (str[j]=='\0' || str[j] == '\n' || str[j] == '\r' ) {
1022
while (str[j]!='\0' && str[j]!='\n' && j < strlens && str[j]!=']')
1028
car = nexustoseq (str[j], missingchar, gapchar);
1029
if (car != '\0' && car != '!') {
1035
else if (first_line) {
1042
while (str[j] == ' ')
1045
while (str[j]!='\0' && str[j]!='\n' && j < strlens) {
1047
while (str[j]!='\0' && str[j]!='\n' && j < strlens && str[j]!=']')
1051
car = nexustoseq (str[j], missingchar, gapchar);
1052
if (car != '\0' && car != '!') {
1067
strlens = StringLen (str);
1068
if (strlens > 0 && !stringhasnocharplus (str)) {
1069
if (!first_line && has_extrachar (str, missingchar, gapchar)) {
1074
fprintf(fpout, ">%s\n", str);
1079
fprintf(fpout, "%s\n", str2);
1081
k += StringLen (str2);
1086
fprintf(fpout, "%s\n", str);
1100
str = FGetLine (fp);
1105
fprintf(fpout, "\n");
1110
static ValNodePtr SequenceMacawRead (CharPtr path, Int2 n_seq)
1113
ValNodePtr vnp, seqvnp;
1120
Boolean worksheet=FALSE;
1122
if ( (fp = FileOpen (path, "r")) == NULL) {
1125
str = FGetLine (fp);
1128
tmp = StringStr(str, "worksheet");
1134
str = FGetLine (fp);
1139
tmp = StringStr(str, "num-cols");
1141
while (*tmp!=' ' && *tmp!='\0' && *tmp!='\n')
1143
while (!isdigit (*tmp) && *tmp!='\0' && *tmp!='\n')
1147
while (isdigit (*tmp) && *tmp!='\0' && *tmp!='\n') {
1152
lens = (Int4) atol (lenstr);
1156
str = FGetLine (fp);
1160
seqvnp = new_sequence_vectors (n_seq, lens);
1163
tmp = StringStr(str, "gap-seq");
1165
while (*tmp!='"' && *tmp!='\0' && *tmp!='\n')
1170
seq = (CharPtr)vnp->data.ptrvalue;
1175
if (*tmp=='\0' || *tmp=='\n') {
1177
str = FGetLine (fp);
1192
str = FGetLine (fp);
1202
static SeqIdPtr NameMacawRead (CharPtr path)
1208
SeqIdPtr siphead=NULL,
1212
if ( (fp = FileOpen (path, "r")) == NULL) {
1215
str = FGetLine (fp);
1217
tmp = StringStr(str, "name");
1219
while (*tmp!='"' && *tmp!='\0' && *tmp!='\n')
1226
while (*tmp!='"' && *tmp!='\0' && *tmp!='\n' && j<254) {
1232
if (StringLen(name) > 0) {
1233
sip = MakeSeqID (name);
1235
siphead = AddSeqId (&siphead, sip);
1242
str = FGetLine (fp);
1243
tmp = StringStr(str, "worksheet");
1253
static SeqEntryPtr MacawRead (CharPtr path, Uint1 mol_type, SeqIdPtr seqid, Boolean save_seqentry, Boolean save_sap)
1255
SeqEntryPtr sep = NULL;
1256
ValNodePtr seqvnp=NULL,
1264
seqid = NameMacawRead (path);
1267
str=matching_seqid (seqid);
1270
ErrPostEx (SEV_ERROR, 0, 0, "The submission contains several sequences with the same name \"%s\"", str);
1275
for (sip=seqid; sip!=NULL; sip=sip->next)
1277
seqvnp = SequenceMacawRead (path, n_seq);
1284
sap = LocalAlignToSeqAnnotDimn (seqvnp, seqid, NULL, n_seq, 0, NULL, FALSE);
1285
if ( save_seqentry )
1286
sep = strings_to_seqentry (seqvnp, mol_type, seqid, sap);
1287
ValNodeFree (seqvnp);
1292
/*******************************************************
1293
*** ReadAlignmentFunc
1294
*** called for PHYLIP, NEXUS interleave formats
1295
*** calls ReadLocalAlign
1297
*** LocalAlignToSeqAnnotDimn
1298
*** returns a SeqEntryPtr sep_list including a SeqAnnotPtr
1300
**********************************************************/
1301
static Boolean seq_line (CharPtr str)
1311
str2 = StringSave (str);
1312
lens = StringLen(str2);
1315
for (j = lens; j > 0; j--)
1317
str2[j] = TO_UPPER(str2[j]);
1318
if (str2[j] >= 'A' && str2[j] <= 'Z')
1321
if (str2[j]=='A' || str2[j]=='C' || str2[j]=='T' ||
1322
str2[j]=='G' || str2[j]=='N' || str2[j]=='U')
1327
if (val2 > (2*val1/3))
1333
static Boolean seq_char (Char car, Char missingchar, Char gapchar)
1335
if (car == 'A') return TRUE;
1336
if (car == 'T') return TRUE;
1337
if (car == 'G') return TRUE;
1338
if (car == 'C') return TRUE;
1339
if (car == 'U') return TRUE;
1340
if (car == 'N') return TRUE;
1341
if (car == 'a') return TRUE;
1342
if (car == 't') return TRUE;
1343
if (car == 'g') return TRUE;
1344
if (car == 'c') return TRUE;
1345
if (car == 'u') return TRUE;
1346
if (car == 'n') return TRUE;
1347
if (car == missingchar) return TRUE;
1348
if (car == gapchar) return TRUE;
1349
if (car == '*') return TRUE;
1353
static Boolean seqa_char (Char car, Char missingchar, Char gapchar)
1355
if (car >= 'A' && car <= 'Z') return TRUE;
1356
if (car >= 'a' && car <= 'z') return TRUE;
1357
if (car == missingchar) return TRUE;
1358
if (car == gapchar) return TRUE;
1359
if (car == '*') return TRUE;
1364
static ValNodePtr ReadLocalAlign (CharPtr path, Int2 align_format, Int2 n_seq, Int2 *offset, Int2 *offset_line)
1368
ValNodePtr seqvnp = NULL, vnp;
1384
Char missingchar = '?';
1387
if ( (fp = FileOpen (path, "r")) == NULL) {
1390
str = FGetLine (fp);
1392
if (! stringhasnotext (str)) {
1393
if (StringLen (str) > 0 && str [0] != '>')
1399
str = FGetLine (fp);
1401
if (align_format == SALSAA_GCG){
1405
str = FGetLine (fp);
1409
str = FGetLine (fp);
1412
fp = FileOpen (path, "r");
1413
str = FGetLine (fp);
1415
if (! stringhasnotext (str)) {
1416
if (StringLen (str) > 0 && str [0] != '>')
1421
str = FGetLine (fp);
1423
leftmargin = SALSAA_GCG;
1425
else if (align_format == SALSA_NEXUS) {
1430
/* the following break statement bypassed sequence reading */
1432
tmp = StringStr(str, "MATRIX");
1434
tmp = StringStr(str, "matrix");
1440
tmp = StringStr(str, "NTAX");
1442
tmp = StringStr(str, "ntax");
1444
while (tmp!='\0' && !isdigit (*tmp))
1447
n_seq = (Int2) atoi(tmp);
1451
tmp = StringStr(str, "NCHAR");
1453
tmp = StringStr(str, "nchar");
1455
while (tmp!='\0' && !isdigit (*tmp))
1458
lg_seq = (Int4) atol(tmp);
1461
tmp = StringStr(str, "GAP=");
1463
tmp = StringStr(str, "gap=");
1465
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
1467
if (*tmp!='\0' && *tmp!='\n')
1469
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
1471
if (*tmp!='\0' && *tmp!='\n')
1474
tmp = StringStr(str, "MISSING=");
1476
tmp = StringStr(str, "missing=");
1478
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
1480
if (*tmp!='\0' && *tmp!='\n')
1482
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
1484
if (*tmp!='\0' && *tmp!='\n')
1487
if (n_seq>0 && lg_seq>-1 && seq_line (str)) {
1488
if (seq_char(str[0], missingchar, gapchar)
1489
&& seq_char(str[1], missingchar, gapchar)
1490
&& seq_char(str[2], missingchar, gapchar)
1491
&& seq_char(str[3], missingchar, gapchar)
1492
&& seq_char(str[4], missingchar, gapchar)
1493
&& seq_char(str[5], missingchar, gapchar)) {
1498
for (leftmargin = 0; leftmargin<MAXSTR-1; leftmargin++) {
1499
if (str[leftmargin] == ' '
1500
&& seq_char(str[leftmargin+1], missingchar, gapchar)) {
1510
str = FGetLine (fp);
1516
} else if (align_format == SALSAA_NEXUS)
1522
/* the following break statement bypassed sequence reading */
1524
tmp = StringStr(str, "MATRIX");
1526
tmp = StringStr(str, "matrix");
1532
tmp = StringStr(str, "NTAX");
1534
tmp = StringStr(str, "ntax");
1536
while (tmp!='\0' && !isdigit (*tmp))
1539
n_seq = (Int2) atoi(tmp);
1543
tmp = StringStr(str, "NCHAR");
1545
tmp = StringStr(str, "nchar");
1547
while (tmp!='\0' && !isdigit (*tmp))
1550
lg_seq = (Int4) atol(tmp);
1553
tmp = StringStr(str, "GAP=");
1555
tmp = StringStr(str, "gap=");
1557
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
1559
if (*tmp!='\0' && *tmp!='\n')
1561
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
1563
if (*tmp!='\0' && *tmp!='\n')
1566
tmp = StringStr(str, "MISSING=");
1568
tmp = StringStr(str, "missing=");
1570
while (*tmp!='\0' && *tmp!='\n' && *tmp!='=')
1572
if (*tmp!='\0' && *tmp!='\n')
1574
while (*tmp!='\0' && *tmp!='\n' && *tmp==' ')
1576
if (*tmp!='\0' && *tmp!='\n')
1579
if (n_seq>0 && lg_seq>-1 && seq_line (str)) {
1580
if (seqa_char(str[0], missingchar, gapchar)
1581
&& seqa_char(str[1], missingchar, gapchar)
1582
&& seqa_char(str[2], missingchar, gapchar)
1583
&& seqa_char(str[3], missingchar, gapchar)
1584
&& seqa_char(str[4], missingchar, gapchar)
1585
&& seqa_char(str[5], missingchar, gapchar)) {
1590
for (leftmargin = 0; leftmargin<MAXSTR-1; leftmargin++) {
1591
if (str[leftmargin] == ' '
1592
&& seqa_char(str[leftmargin+1], missingchar, gapchar)) {
1602
str = FGetLine (fp);
1609
else if (align_format == SALSA_PHYLIP || align_format == SALSAA_PHYLIP) {
1610
if (sscanf (str, "%d %ld", &val1, &val2) == 2) {
1611
n_seq = (Int2) val1;
1612
lg_seq = (Int4) val2;
1616
str = FGetLine (fp);
1617
leftmargin = SALSA_PHYLIP_MARGIN;
1620
else if (align_format == SALSA_CLUSTALV) {
1625
for ( j =0; j < 4; j++) {
1628
str = FGetLine (fp);
1631
leftmargin = SALSA_CLUSTALV_MARGIN;
1634
ErrPostEx (SEV_ERROR, 0, 0, "We do not support this format yet");
1646
*offset = leftmargin;
1647
seqvnp = new_sequence_vectors (n_seq, lmax);
1649
lgseq = (Int4Ptr) MemNew((size_t) ((n_seq + 1) * sizeof(Int4)));
1650
for (j = 0; j < n_seq; j++) lgseq [j] = 0;
1652
tmp1 = (CharPtr) seqvnp->data.ptrvalue;
1658
ptr = StringChr (str, '[');
1662
strlens = StringLen (str);
1664
if (str[0] == ';' || (tmp = StringStr(str, "end;"))!=NULL || (tmp = StringStr(str, "END;"))!=NULL || (tmp = StringStr(str, "gap data"))!=NULL)
1666
if (! stringhasnocharplus (str) && str[0]!='>')
1668
tmp = (CharPtr) vnp->data.ptrvalue;
1669
for (j = leftmargin; j < strlens && lgseq [i_seq] <= lmax; j++)
1671
if (str[j] == '\n' || str[j] == '\r' ) break;
1672
str[j] = TO_UPPER (str[j]);
1673
if (str[j] == gapchar)
1675
else if (str[j] == ':')
1677
else if (str[j] == '.')
1679
if (align_format == SALSA_PHYLIP && i_seq != 0)
1683
str [j]= tmp1[lgseq[i_seq]];
1691
else if (str[j] == missingchar)
1693
if ((str[j] >= 'A' && str[j] <= 'Z') || str[j]=='*' || str[j] == '-') {
1694
tmp [lgseq[i_seq]] = str[j];
1699
if (i_seq == n_seq) {
1702
if (align_format == SALSA_PHYLIP && first) {
1707
else vnp = vnp->next;
1712
str = FGetLine (fp);
1717
for (lmax = 0, j = 0; j < n_seq; j++)
1718
if (lgseq[j] > lmax)
1720
for (vnp = seqvnp, j = 0 && vnp != NULL; j < n_seq; j++, vnp = vnp->next)
1722
tmp = (CharPtr) vnp->data.ptrvalue;
1727
else if (lmax < lg_seq)
1729
if (lg_seq < LENGTHMAX ) {
1730
Message(MSG_OK, "Length in file %d != alignment length %d", (int) lg_seq, (int) lmax);
1731
/**** FREE DATA STRUCT ***********/
1736
*offset_line = top_lines;
1740
static SeqIdPtr ReadLocalName (CharPtr path, Int2 nbseq, Int2 leftmargin, Int2 offset_lines)
1743
SeqIdPtr sip1 = NULL,
1744
sipnew = NULL, siptmp;
1749
if ( (fp = FileOpen (path, "r")) != NULL) {
1751
str = FGetLine (fp);
1752
while (str && j<offset_lines) {
1754
str = FGetLine (fp);
1757
while (str && i_seq < nbseq )
1759
if ( StringLen (str) > 0 )
1761
str [leftmargin] = '\0';
1762
for (j=leftmargin-1; j>0 && str[j] == ' '; j--)
1764
sipnew = MakeSeqID (str);
1768
siptmp->next = sipnew;
1772
str = FGetLine (fp);
1782
static SeqEntryPtr ReadAlignmentFunc (CharPtr path, Uint1 mol_type, Uint1 format, Int2 n_seq, Boolean save_seqentry, Boolean save_sap, SeqIdPtr seqsip)
1784
SeqEntryPtr sep = NULL;
1785
ValNodePtr seqvnp , vnp;
1792
seqvnp = ReadLocalAlign (path, format, n_seq, &leftmargin, &offset_lines);
1795
for (k=0, vnp=seqvnp; vnp!=NULL; vnp=vnp->next) k++;
1800
ValNodeFree (seqvnp);
1804
seqsip = ReadLocalName (path, n_seq, leftmargin, offset_lines);
1807
ValNodeFree (seqvnp);
1810
str=matching_seqid (seqsip);
1813
ErrPostEx (SEV_ERROR, 0, 0, "The submission contains several sequences with the same name \"%s\"", str);
1818
sap = LocalAlignToSeqAnnotDimn (seqvnp, seqsip, NULL, n_seq, 0, NULL, FALSE);
1819
if ( save_seqentry )
1820
sep = strings_to_seqentry (seqvnp, mol_type, seqsip, sap);
1822
ValNodeFree (seqvnp);
1828
/************************************************************
1829
*** ReadLocalAlignment
1830
*** called by sequin2.c
1831
*** calls ReadInterleaveAlign, ReadContiguouseAlign
1832
*** ReadInterleaveAlign
1833
*** reads formats: Phylip, NEXUS Interleave
1834
*** ReadContiguouseAlign
1835
*** reads formats: Fasta+gaps, NEXUS Contiguous, Macaw
1836
*** ReadAnyAlignment
1837
*** calls first ReadInterleaveAlign
1838
*** if NULL is returned, calls ReadContiguouseAlign
1840
************************************************************/
1841
extern SeqEntryPtr ReadInterleaveAlign (CharPtr path, Uint1 mol_type)
1844
SeqEntryPtr sep=NULL;
1845
Char name [PATH_MAX];
1853
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
1858
if ( (fp = FileOpen (path, "r")) != NULL) {
1859
str = get_first_notemptyline (fp);
1864
tmp = StringStr(str, "NEXUS");
1866
tmp = StringStr(str, "nexus");
1869
if (ISA_aa(mol_type))
1870
sep = ReadLocalAlignment (SALSAA_NEXUS, path);
1872
sep = ReadLocalAlignment (SALSA_NEXUS, path);
1877
if (sscanf (str, "%d %ld", &val1, &val2) == 2) {
1878
if (val1 > 0 && val2 > -1)
1880
if (ISA_aa(mol_type))
1881
sep = ReadLocalAlignment (SALSAA_PHYLIP, path);
1883
sep = ReadLocalAlignment (SALSA_PHYLIP, path);
1889
ErrPostEx (SEV_ERROR, 0, 0, "We do not support this format yet");
1893
extern SeqEntryPtr ReadContiguouseAlign (CharPtr path, Uint1 mol_type)
1896
SeqEntryPtr sep=NULL;
1897
Char name [PATH_MAX];
1903
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
1908
if ( (fp = FileOpen (path, "r")) != NULL) {
1909
str = get_first_notemptyline (fp);
1915
if (str[0] == '>' || str[0] == '[')
1917
if (ISA_aa(mol_type))
1918
sep = ReadLocalAlignment (SALSAA_FASTGAP, path);
1920
sep = ReadLocalAlignment (SALSA_FASTGAP, path);
1925
tmp = StringStr(str, "NEXUS");
1927
tmp = StringStr(str, "nexus");
1930
if (!ISA_aa(mol_type))
1931
sep = ReadLocalAlignment (SALSA_PAUP, path);
1936
tmp = StringStr(str, "MACAWDATAFILE");
1938
tmp = StringStr(str, "MacawDataFile");
1941
if (!ISA_aa(mol_type))
1942
sep = ReadLocalAlignment (SALSA_MACAW, path);
1947
ErrPostEx (SEV_ERROR, 0, 0, "We do not support this format yet");
1951
extern SeqEntryPtr ReadAnyAlignment (Boolean is_prot, CharPtr path)
1953
SeqEntryPtr sep = NULL;
1954
Uint1 mol_type=Seq_mol_na;
1955
Char name [PATH_MAX];
1960
mol_type = Seq_mol_aa;
1963
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
1968
errlev = ErrSetMessageLevel (SEV_FATAL);
1969
sep = AsnReadForSalsa (path);
1971
sep = ReadInterleaveAlign (path, mol_type);
1974
sep = ReadContiguouseAlign (path, mol_type);
1976
ErrSetMessageLevel (errlev);
1981
extern SeqEntryPtr ReadLocalAlignment (Uint1 format, CharPtr path)
1983
SeqEntryPtr sep = NULL;
1984
Char name [PATH_MAX];
1985
Char tmpfile [PATH_MAX];
1989
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
1997
sep = ReadAnyAlignment (FALSE, path);
2000
case SALSA_INTERLEAVE:
2001
sep = ReadInterleaveAlign (path, Seq_mol_na);
2003
case SALSAA_INTERLEAVE :
2004
sep = ReadInterleaveAlign (path, Seq_mol_aa);
2006
case SALSA_CONTIGUOUS:
2007
sep = ReadContiguouseAlign (path, Seq_mol_na);
2009
case SALSAA_CONTIGUOUS:
2010
sep = ReadContiguouseAlign (path, Seq_mol_aa);
2014
sep = FastaReadAdvanced (path, Seq_mol_na, NULL, NULL, NULL, NULL);
2017
sep = FastaReadAdvanced (path, Seq_mol_aa, NULL, NULL, NULL, NULL);
2021
sep = GapFastaRead (path, Seq_mol_na);
2023
case SALSAA_FASTGAP:
2024
sep = GapFastaRead (path, Seq_mol_aa);
2028
sep = ReadAlignmentFunc (path, Seq_mol_na, format, 0, TRUE, TRUE, NULL);
2034
sep = ReadAlignmentFunc (path, Seq_mol_aa, format, 0, TRUE, TRUE, NULL);
2038
if (ConvertPaupToFastaGap (path, tmpfile) )
2040
sep = GapFastaRead (tmpfile, Seq_mol_na);
2041
FileRemove (tmpfile);
2045
sep = MacawRead (path, Seq_mol_na, NULL, TRUE, TRUE);
2049
ErrPostEx (SEV_ERROR, 0, 0, "We do not support this format yet");
2056
/***********************************************************
2058
*** Import functions:
2059
*** from any file (ASN.1, FASTA, gi/acc#),
2060
*** calls ReadAsnFastaOrFlatFile
2061
*** Download from Entrez
2062
*** copy of FetchFromNet from sequin2.c (JK)
2064
************************************************************/
2066
extern SeqAlignPtr ImportFromFile (EditAlignDataPtr adp)
2068
SeqAlignPtr salp = NULL,
2071
SeqAnnotPtr sap = NULL;
2072
SeqEntryPtr sep = NULL;
2073
ValNodePtr importslp = NULL,
2076
Boolean new_seqalign=FALSE,
2084
importslp = CCReadAnythingLoop (NULL, adp->seq_info);
2085
if (importslp != NULL)
2087
if (adp->sap_original != NULL)
2089
salp_original = (SeqAlignPtr)(adp->sap_original->data);
2091
ok=SeqAlignSeqLocComp (salp_original, importslp);
2094
if (salp_original->dim==2 || is_dim2seqalign (salp_original))
2095
salp_original=salp_original;
2096
else if (salp_original->dim == 1)
2098
replace_salp = TRUE;
2102
ans = Message (MSG_OKC, "You have a multiple alignment.\n Importing a sequence will convert it into a multiple pairwise alignment.\n Do you want to continue ?");
2103
if (ans != ANS_CANCEL) {
2104
salptmp = multseqalign_to_pairseqalign (salp_original);
2106
SeqAlignListFree (salp_original);
2107
adp->sap_original->data = (Pointer) salptmp;
2108
salp_original = salptmp;
2119
slp = (SeqLocPtr) adp->master.region;
2122
ValNodeAddPointer (&sqloc, 0, (Pointer)slp);
2123
sqloc->next = importslp;
2124
salp = SeqLocListToSeqAlign (sqloc, (Int2)adp->align_format, NULL);
2127
if (salp_original != NULL) {
2128
salp = SeqAlignLink (salp_original, salp);
2129
new_seqalign = TRUE;
2133
if (adp->align_format==PRG_BLAST || adp->align_format==PRGALIGNDEFAULT)
2134
Message (MSG_OK, "Blast detected no sequence similarity and could not construct an alignment");
2136
Message (MSG_OK, "No significant similarity detected. No alignment produced");
2139
if (!new_seqalign && !replace_salp)
2140
salp = SeqAlignSetFree (salp);
2144
Message(MSG_OK, "Can not import a sequence already in the editor");
2149
/*------------------------------------------------------------*/
2150
typedef struct salfilesfetchform {
2155
EditAlignDataPtr adp;
2156
WindoW editor_window;
2157
} FetchForm, PNTR FetchFormPtr;
2159
static void FetchFormMessage (ForM f, Int2 mssg)
2163
ffp = (FetchFormPtr) GetObjectExtra (f);
2167
StdCutTextProc (NULL);
2170
StdCopyTextProc (NULL);
2172
case VIB_MSG_PASTE :
2173
StdPasteTextProc (NULL);
2175
case VIB_MSG_DELETE :
2176
StdDeleteTextProc (NULL);
2179
if (ffp->appmessage != NULL) {
2180
ffp->appmessage (f, mssg);
2187
static void FetchTextProc (TexT t)
2191
ffp = (FetchFormPtr) GetObjectExtra (t);
2192
if (ffp == NULL) return;
2193
if (TextHasNoText (t)) {
2194
SafeDisable (ffp->accept);
2196
SafeEnable (ffp->accept);
2200
static SeqAlignPtr align_this (SeqEntryPtr sep, SeqLocPtr master, SeqAnnotPtr sap, WindoW editor_window, EditAlignDataPtr adp)
2202
SeqAlignPtr salp = NULL,
2204
salp_original = NULL;
2205
ValNodePtr vnp=NULL,
2209
new_seqalign = FALSE,
2210
replace_salp = FALSE;
2215
if (!IS_Bioseq(sep))
2218
vnp = SeqEntryToSeqLoc (sep, &n, adp->mol_type);
2223
salp_original = (SeqAlignPtr)(sap->data);
2224
if (salp_original->dim==2 || is_dim2seqalign (salp_original))
2225
salp_original=salp_original;
2226
else if (salp_original->dim == 1)
2228
replace_salp = TRUE;
2232
ans = Message (MSG_OKC, "You have a multiple alignment.\n Importing a sequence will convert it into a multiple pairwise alignment.\n Do you want to continue ?");
2233
if (ans != ANS_CANCEL) {
2234
salptmp = multseqalign_to_pairseqalign (salp_original);
2237
adp->sap_original->data = (Pointer) salptmp;
2238
salp_original = salptmp;
2248
ok=SeqAlignSeqLocComp (salp_original, vnp);
2253
ValNodeAddPointer(&vnp2, 0, (Pointer)master);
2257
salp = SeqLocListToSeqAlign (vnp, adp->align_format, NULL);
2262
salp = SeqAlignLink (salp_original, salp);
2263
new_seqalign = TRUE;
2267
if (adp->align_format==PRG_BLAST || adp->align_format==PRGALIGNDEFAULT)
2268
Message (MSG_OK, "Blast detected no sequence similarity and could not construct an alignment");
2270
Message (MSG_OK, "No significant similarity detected. No alignment produced");
2272
if (new_seqalign || replace_salp) {
2273
repopulate_panel (editor_window, adp, salp);
2276
salp = SeqAlignSetFree (salp);
2279
Message(MSG_OK, "Can not import a sequence already in the editor");
2284
static SeqEntryPtr SeqEntryNewForBioseq (BioseqPtr bsp)
2286
SeqEntryPtr new_sep;
2291
slp=SeqLocIntNew(0, bsp->length-1, Seq_strand_plus, bsp->id);
2292
new_sip = MakeNewProteinSeqId (slp, NULL);
2293
new_bsp=BioseqCopy(new_sip, bsp->id, 0, bsp->length-1, Seq_strand_plus, TRUE);
2295
new_sep=SeqEntryNew();
2296
new_sep->choice = 1;
2297
new_sep->data.ptrvalue=(Pointer)new_bsp;
2298
SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) new_bsp, new_sep);
2304
static void CCDownloadProc (ButtoN b)
2317
SeqEditViewProcsPtr svpp;
2319
ffp = (FetchFormPtr) GetObjectExtra (b);
2320
if (ffp == NULL) return;
2321
svpp = (SeqEditViewProcsPtr) GetAppProperty ("SeqEditDisplayForm");
2324
if (svpp->download == NULL)
2329
GetTitle (ffp->accession, str, sizeof (str));
2330
if (StringHasNoText (str)) {
2331
Message (MSG_OK, "Please enter an accession number or gi");
2334
Select (ffp->accession);
2337
is_na = (Boolean) (ISA_na(ffp->adp->mol_type));
2342
if (GetValue (ffp->accntype) == 1) {
2346
if (! StrToLong (str, &uid)) {
2350
sep = svpp->download ("Salsa", accn, uid, is_na, &is_newbsp);
2352
Message (MSG_OK, "Unable to find this record in the database.");
2358
if (IS_Bioseq(sep)) {
2359
sep = SeqEntryNewForBioseq ((BioseqPtr)sep->data.ptrvalue);
2366
align_this(sep, (SeqLocPtr)ffp->adp->master.region,
2367
ffp->adp->sap_original, ffp->editor_window, ffp->adp);
2374
static void CCCommonFetchFromNet (BtnActnProc actn, BtnActnProc cancel, EditAlignDataPtr adp, WindoW editor_window)
2384
ffp = MemNew (sizeof (FetchForm));
2386
w = FixedWindow (-50, -33, -10, -10, "Download From Entrez", NULL);
2387
SetObjectExtra (w, ffp, StdCleanupFormProc);
2388
ffp->form = (ForM) w;
2389
ffp->formmessage = FetchFormMessage;
2391
ffp->editor_window = editor_window;
2392
SetGroupSpacing (w, 10, 10);
2394
g = HiddenGroup (w, -3, 0, NULL);
2395
StaticPrompt (g, "Type", 0, stdLineHeight, programFont, 'l');
2396
ffp->accntype = HiddenGroup (g, 4, 0, NULL);
2397
RadioButton (ffp->accntype, "Accession");
2398
RadioButton (ffp->accntype, "GI");
2399
SetValue (ffp->accntype, 1);
2400
ffp->accession = DialogText (g, "", 6, FetchTextProc);
2401
SetObjectExtra (ffp->accession, ffp, NULL);
2403
c = HiddenGroup (w, 4, 0, NULL);
2404
SetGroupSpacing (c, 10, 2);
2405
ffp->accept = DefaultButton (c, "Retrieve", actn);
2406
SetObjectExtra (ffp->accept, ffp, NULL);
2407
Disable (ffp->accept);
2408
PushButton (c, "Cancel", cancel);
2410
AlignObjects (ALIGN_CENTER, (HANDLE) g, (HANDLE) c, NULL);
2418
extern void CCFetchFromNet (EditAlignDataPtr adp, WindoW editor_window)
2420
CCCommonFetchFromNet (CCDownloadProc, StdCancelButtonProc, adp, editor_window);
2424
/************************************************************
2425
*** EditBioseqToFasta called by salparam.c
2426
SHOULD BE SEQ-ENTRY /Bioseq, Bioseqet
2428
if ( (fout = FileOpen ("ffile", "w")) != NULL) {
2429
sep = bssp->seq_set;
2433
if (count == 1 && !firstout) {}
2435
bsp = (BioseqPtr) sep->data.ptrvalue;
2436
EditBioseqToFasta (bsp, fout, -1, -1);
2443
************************************************************/
2444
extern void EditBioseqToFasta (BioseqPtr bsp, FILE *fout, Int4 from, Int4 to)
2451
Int4 Width_Page = 60;
2458
SeqIdWrite (SeqIdFindBest(bsp->id, 0), str, PRINTID_FASTA_LONG, 120);
2463
fprintf (fout, ">%s (%ld - %ld)\n", str, (long)(from+1), (long)(to+1));
2464
slp = SeqLocIntNew (from, to, Seq_strand_plus, SeqIdFindBest(bsp->id, 0));
2465
if ( bsp->mol == Seq_mol_aa )
2466
spp = SeqPortNewByLoc (slp, Seq_code_ncbieaa);
2468
spp = SeqPortNewByLoc (slp, Seq_code_iupacna);
2470
while ( j < SeqLocStop (slp) - SeqLocStart (slp) +1)
2472
txt_out = ReadBufferFromSep (spp, buffer, j, j +Width_Page, 0);
2473
if (txt_out == 0) break;
2475
fprintf(fout, "%s\n", buffer);
2481
/************************************************************/
2482
extern Int2 seqannot_write (SeqAnnotPtr sap, CharPtr path)
2484
Char name[PATH_MAX];
2491
if ( sap == NULL ) {
2494
if ( ( salp = (SeqAlignPtr) sap->data ) == NULL ) {
2497
if ( salp->segtype == COMPSEG ) {
2498
saptmp = SeqAnnotBoolSegToDenseSeg (sap);
2502
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
2507
amp = AsnAllModPtr ();
2508
atp = AsnTypeFind (amp,"Seq-annot");
2509
if ((aip = AsnIoOpen (path, "w")) == NULL) {
2512
while ( sap != NULL ) {
2513
if ( ! SeqAnnotAsnWrite ( sap, aip, atp ) ) {
2519
aip = AsnIoClose (aip);
2520
if ( salp->segtype == COMPSEG ) CompSeqAnnotFree (saptmp);
2524
/************************************************************/
2525
extern void seqalign_write (SeqAlignPtr salp, CharPtr path)
2530
sap = SeqAnnotNew ();
2533
sap->data = (Pointer) salp;
2534
seqannot_write (sap, path);
2536
sap = SeqAnnotFree (sap);
2541
/************************************************************
2542
*************************************************************
2543
*************************************************************
2544
*************************************************************
2546
*********************************************************
2547
*************************************************************
2548
*************************************************************
2549
*************************************************************
2550
*************************************************************
2551
**********************************************************/
2552
static Boolean seqentry_write (SeqEntryPtr sep, CharPtr path)
2554
Char name[PATH_MAX];
2559
if ( sep == NULL ) {
2564
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
2569
amp = AsnAllModPtr ();
2570
atp = AsnTypeFind (amp,"SeqEntry");
2571
if ((aip = AsnIoOpen (path,"w")) == NULL) {
2574
if ( ! SeqEntryAsnWrite ( sep, aip, atp ) ) {
2576
aip = AsnIoClose (aip);
2580
static void get_client_rect (PaneL p, RectPtr prc)
2582
ObjectRect (p, prc);
2583
InsetRect (prc, HRZ_BORDER_WIDTH, VER_BORDER_WIDTH);
2586
/***************************************************************
2587
*** switch_featOrder
2589
*****************************************************************/
2590
static void switch_featOrder (EditAlignDataPtr adp, Uint1 choice)
2597
oldstyle = GetMuskCurrentSt ();
2598
SetMuskCurrentSt (GetMuskStyleName (adp->styleNum));
2599
for(j =0; j<FEATDEF_ANY; j++)
2601
adp->featOrder[j] = (Uint1)GetMuskCParam(j, MSM_FORDER, MSM_NUM);
2602
groupNum = (Uint1)GetMuskCParam(j, MSM_FGROUP, MSM_NUM);
2603
adp->groupOrder[j] = (Uint1)GetMuskCParam(MSM_GROUPS, (Int2)groupNum, MSM_NUM);
2605
SetMuskCurrentSt (GetMuskStyleName (oldstyle));
2608
for(j=0; j<FEATDEF_ANY; ++j) adp->featOrder[j] = choice;
2612
/*********************************************
2616
*********************************************/
2617
typedef struct ccid {
2621
} CcId, PNTR CcIdPtr;
2623
static void FindSeqEntryForSeqIdCallback (SeqEntryPtr sep, Pointer mydata,
2624
Int4 index, Int2 indent)
2630
if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
2631
cip = (CcIdPtr)mydata;
2632
if (IS_Bioseq(sep)) {
2633
bsp = (BioseqPtr) sep->data.ptrvalue;
2634
if (bsp!=NULL && ISA_na (bsp->mol)) {
2635
sip = SeqIdFindBest(bsp->id, 0);
2636
if (SeqIdForSameBioseq(cip->sip, sip))
2644
static Int2 CC_SeqEntryToGeneticCode (Uint2 entityID, SeqIdPtr sip)
2646
SeqEntryPtr sep_head,
2651
sep_head = GetTopSeqEntryForEntityID (entityID);
2652
ci.sip = SeqIdDup (sip);
2655
SeqEntryExplore(sep_head,(Pointer)&ci, FindSeqEntryForSeqIdCallback);
2659
genCode = SeqEntryToGeneticCode (sep, NULL, NULL, 0);
2664
extern Boolean sesp_to_pept (SelEdStructPtr cds, SeqAlignPtr salp, ValNodePtr sqlocs, Boolean partial)
2666
SelEdStructPtr cdsp;
2667
SelEdStructPtr cds1;
2673
CharPtr pepPtr = NULL;
2676
CharPtr buffer = NULL,
2687
if ( cds == NULL || salp == NULL )
2689
if (cds->regiontype == 0 || cds->region == NULL)
2692
while (cds1->prev != NULL) {
2695
slp = sesp_to_slp (cds1, salp, sqlocs, partial);
2696
slplens = SeqLocLen (slp);
2697
if ( slplens < 3 ) {
2701
if (SeqLocStart(slp) > 0)
2704
codonstart = cds1->codonstart;
2705
strand = SeqLocStrand (slp);
2707
genCode = CC_SeqEntryToGeneticCode (cds1->entityID, SeqLocId(slp));
2709
genCode = Seq_code_ncbieaa;
2712
sit = (SeqIntPtr) slp->data.ptrvalue;
2713
if (strand == Seq_strand_minus && codonstart > 1) {
2714
cb = (Int2)(slplens % (Int4) 3);
2719
else if (strand == Seq_strand_minus) {
2720
cb = (Int2)(slplens % (Int4) 3);
2721
if (cb == 1 && sit->from >0) {
2723
} else if (cb == 2) {
2726
if (cb == 0) codonbase = 0;
2727
else if (cb == 1) codonbase = 1;
2728
else if (cb == 2) codonbase = 2;
2730
slplens = SeqLocLen (slp);
2731
bsp = cds_to_pept (slp, codonstart, genCode, TRUE);
2732
str = (CharPtr) BSMerge (bsp, NULL);
2734
pep = MemNew ((size_t) ((slplens + 5) *sizeof(Char)));
2735
pep = emptystring (pep, (Int4)(slplens + 5));
2736
pep [slplens + 3] = '\0';
2739
pepPtr += codonbase +1;
2740
strlens = 3*StringLen(str);
2741
if (slplens < strlens) {
2742
strlens=(Int4)(slplens/(Int4)3);
2743
str [strlens] ='\0';
2745
if (strand == Seq_strand_minus)
2746
reverse_string (str);
2747
strlens = StringLen(str);
2749
for (k = 0; k < strlens; k++, pepPtr += 3, strPtr++) {
2754
strlens = SeqLocLen (slp) + 5;
2755
buffer = MemNew ((size_t) (strlens *sizeof(Char)));
2756
buffer = emptystring (buffer, strlens);
2757
buffer [strlens -1] = '\0';
2760
sip = SeqLocId (slp);
2761
for (cdsp= cds1; cdsp != NULL; cdsp = cdsp->next)
2763
slp = (SeqLocPtr) cdsp->region;
2764
buffer = ReadBufferFromSap (pep, buffer, salp, sip, SeqLocStart(slp), SeqLocStop(slp));
2770
if (cds1->data != NULL) {
2773
pept->data.ptrvalue = MemFree (pept->data.ptrvalue);
2774
pept = ValNodeFree (pept);
2777
for (cdsp= cds1; cdsp != NULL; cdsp = cdsp->next)
2779
pept = ValNodeNew (NULL);
2781
pept->data.ptrvalue = (Pointer) buffer;
2783
cdsp->offset = sumlens;
2784
sumlens += SeqLocLen ((SeqLocPtr) cdsp->region);
2791
/*******************************************************************
2792
*** TranslateProc, TranslateButton
2796
********************************************************************/
2797
extern void CdRgnToProtProc (PaneL pnl, EditAlignDataPtr adp)
2802
ValNodePtr feathead = NULL,
2805
Boolean seq_select = FALSE;
2808
hratio = (float)adp->hoffset / (float)adp->length;
2809
ssp = ObjMgrGetSelected();
2810
for (; ssp != NULL; ssp = ssp->next)
2812
if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_VIRT) {
2813
feathead = adp->feat;
2814
itemsubtype = FEATDEF_CDS;
2816
else if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_SEQFEAT) {
2817
feathead = adp->seqfeat;
2818
itemsubtype = SEQFEAT_CDREGION;
2820
else feathead = NULL;
2821
if (feathead != NULL)
2823
for (vnp = feathead; vnp != NULL; vnp = vnp->next)
2825
if (vnp->choice == itemsubtype) {
2826
cds = (SelEdStructPtr) vnp->data.ptrvalue;
2827
if (cds->entityID == ssp->entityID && cds->itemID == ssp->itemID)
2829
if (sesp_to_pept(cds, (SeqAlignPtr) adp->sap_align->data, adp->sqloc_list, TRUE))
2837
if (!seq_select) return;
2838
data_collect_arrange (adp, TRUE);
2839
SeqEdSetCorrectBarMax (pnl, adp, hratio);
2840
temport = SavePort(ParentWindow(pnl));
2842
inval_panel (pnl, -1, -1);
2843
RestorePort (temport);
2847
/*******************************************************************
2851
********************************************************************/
2852
extern void UntranslateFunc (PaneL pnl, EditAlignDataPtr adp)
2858
ValNodePtr feathead = NULL, vnp = NULL;
2860
Boolean seq_select = FALSE;
2863
hratio = (float)adp->hoffset / (float)adp->length;
2864
ssp = ObjMgrGetSelected();
2865
for (; ssp != NULL; ssp = ssp->next)
2867
if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_VIRT) {
2868
feathead = adp->feat;
2869
itemsubtype = FEATDEF_CDS;
2871
else if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_SEQFEAT) {
2872
feathead = adp->seqfeat;
2873
itemsubtype = SEQFEAT_CDREGION;
2875
else feathead = NULL;
2876
if (feathead != NULL) {
2877
for (vnp = feathead; vnp != NULL; vnp = vnp->next) {
2878
if (vnp->choice == itemsubtype)
2880
cds = (SelEdStructPtr) vnp->data.ptrvalue;
2881
if (cds->entityID == ssp->entityID && cds->itemID == ssp->itemID)
2883
if (cds->data != NULL) {
2886
pept->data.ptrvalue = MemFree (pept->data.ptrvalue);
2888
for (; cds != NULL; cds = cds->next) {
2899
if (!seq_select) return;
2900
data_collect_arrange (adp, TRUE);
2901
SeqEdSetCorrectBarMax (pnl, adp, hratio);
2902
temport = SavePort(ParentWindow(pnl));
2904
inval_panel (pnl, -1, -1);
2905
RestorePort (temport);
2911
extern Boolean ShowFeatureFunc (EditAlignDataPtr adp)
2916
Boolean seq_select = FALSE;
2918
switch_featOrder (adp, 1);
2919
adp->seqfeat = SeqfeatlistFree (adp->seqfeat);
2920
for (vnp = adp->anp_list; vnp != NULL; vnp = vnp->next) {
2921
anp = (AlignNodePtr) vnp->data.ptrvalue;
2922
if ( anp != NULL ) {
2923
if ( anp->segs->cnp == NULL ) {
2924
slp = CollectSeqLocFromAlignNode (anp);
2925
CollectFeatureForAlign (slp, anp, adp->featOrder, adp->groupOrder);
2926
adp->seqfeat=CollectFeatureForEditor (slp, adp->seqfeat, anp->seq_entityID, anp->bsp_itemID, adp->featOrder, FALSE);
2932
switch_featOrder (adp, 0);
2935
OrderFeatProc (adp->anp_list);
2936
if (adp->seqfeat != NULL)
2937
checkselectsequinfeature_for_editor (adp->seqfeat);
2941
/***********************************************************
2945
*** loop on Bioseq to delete the features in those selected only.
2947
***********************************************************/
2948
extern Boolean HideFeatureFunc (EditAlignDataPtr adp)
2953
AlignSegPtr asp, aspnext;
2954
Boolean seq_select = FALSE;
2956
switch_featOrder (adp, 0);
2957
if (adp->input_format == OBJ_BIOSEQ)
2959
if ( checkOMss_for_itemtype (OBJ_BIOSEQ) == 0 )
2960
ssp = &(adp->master);
2961
else ssp = ObjMgrGetSelected();
2962
for (; ssp != NULL; ssp = ssp->next) {
2963
if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_BIOSEQ ) {
2964
adp->seqfeat =SeqfeatlistFree_fromID (adp->seqfeat, ssp->entityID);
2965
anp = (AlignNodePtr) AlignNodeFind (adp->anp_list, ssp->entityID, ssp->itemID, ssp->itemtype);
2966
if ( anp != NULL ) {
2970
aspnext = asp->next;
2971
if(asp->cnp != NULL)
2972
FreeFeatureList(asp->cnp);
2975
ValNodeFree(asp->mismatch);
2976
asp->mismatch = NULL;
2984
else if (adp->input_format == OBJ_SEQALIGN)
2986
for (vnp = adp->anp_list; vnp != NULL; vnp = vnp->next) {
2987
anp = (AlignNodePtr) vnp->data.ptrvalue;
2988
if ( anp != NULL ) {
2992
aspnext = asp->next;
2993
if(asp->cnp != NULL)
2994
FreeFeatureList(asp->cnp);
2997
ValNodeFree(asp->mismatch);
2998
asp->mismatch = NULL;
3004
if (seq_select) adp->seqfeat =SeqfeatlistFree (adp->seqfeat);
3007
switch_featOrder (adp, 1);
3013
/***********************************************************
3015
*** ResetFeatureProc
3017
***********************************************************/
3018
extern Boolean ResetFeatureFunc (EditAlignDataPtr adp)
3024
AlignSegPtr asp, aspnext;
3025
Boolean seq_select = FALSE;
3027
switch_featOrder (adp, 1);
3028
ssp = Gettranslation (adp->seqfeat);
3029
adp->seqfeat = SeqfeatlistFree (adp->seqfeat);
3030
for (vnp = adp->anp_list; vnp != NULL; vnp = vnp->next) {
3031
anp = (AlignNodePtr) vnp->data.ptrvalue;
3032
if ( anp != NULL ) {
3036
aspnext = asp->next;
3037
if(asp->cnp != NULL)
3038
FreeFeatureList(asp->cnp);
3041
ValNodeFree(asp->mismatch);
3042
asp->mismatch = NULL;
3044
slp = CollectSeqLocFromAlignNode (anp);
3045
CollectFeatureForAlign (slp, anp, adp->featOrder, adp->groupOrder);
3046
adp->seqfeat=CollectFeatureForEditor (slp, adp->seqfeat, anp->seq_entityID, anp->bsp_itemID, adp->featOrder, FALSE);
3050
if (adp->seqfeat !=NULL) {
3051
for (vnp = adp->seqfeat; vnp != NULL; vnp = vnp->next)
3053
if (vnp->choice == SEQFEAT_CDREGION) {
3054
cds = (SelEdStructPtr) vnp->data.ptrvalue;
3055
if (cds->entityID == ssp->entityID && cds->itemID == ssp->itemID)
3057
if (sesp_to_pept(cds, (SeqAlignPtr) adp->sap_align->data, adp->
3067
/******************************************************************/
3068
static SelEdStructPtr split_feat (SelEdStructPtr feat, Int4 pos, Int4 changevalue)
3072
SelEdStructPtr new, next;
3075
slpfeat = (SeqLocPtr) feat->region;
3077
if (changevalue >= 0)
3078
from = (Int4)(pos + changevalue);
3082
to = (Int4)(SeqLocStop(slpfeat) + changevalue);
3083
sitfeat = (SeqIntPtr) slpfeat->data.ptrvalue;
3084
sitfeat->to = pos -1;
3085
new = new_seledstruct (feat->entityID, feat->itemID, feat->itemtype, 0, feat->bsp_itemID, from, to, SeqLocId (slpfeat), SeqLocStrand (slpfeat), FALSE, feat->label, feat->data, feat->offset + SeqLocLen(slpfeat), 1);
3092
extern ValNodePtr update_featpept (EditAlignDataPtr adp, ValNodePtr feathead, RecT *rp, SelStructPtr ssp, Int4 changevalue, Uint2 itemsubtype)
3096
SelEdStructPtr feat,
3103
Boolean overlap, precede, succeed, deletefeat;
3105
if (ssp == NULL) return feathead;
3106
if (ssp->regiontype == 0 || ssp->region == NULL) return feathead;
3107
slpssp = (SeqLocPtr) ssp->region;
3108
if (SeqLocStart(slpssp) == SeqLocStop(slpssp)) return feathead;
3109
width = adp->visibleWidth;
3110
if (adp->columnpcell > 0)
3111
width += (Int2) adp->visibleWidth / (Int2) adp->columnpcell;
3113
while (vnpfeat != NULL)
3115
vnpfeatnext = vnpfeat->next;
3116
feat1 = (SelEdStructPtr) vnpfeat->data.ptrvalue;
3117
if( feat1 != NULL && (vnpfeat->choice ==itemsubtype || itemsubtype == 255))
3120
while (feat != NULL)
3123
slpfeat = (SeqLocPtr) feat->region;
3124
overlap = overlapp_ssp (slpssp, slpfeat);
3125
precede = precede_ssp (slpssp, slpfeat);
3126
succeed = succeed_ssp (slpssp, slpfeat);
3128
if (overlap || precede || succeed )
3130
sitfeat = (SeqIntPtr) slpfeat->data.ptrvalue;
3133
sitfeat->from = sitfeat->from + changevalue ;
3134
sitfeat->to = sitfeat->to + changevalue ;
3138
if (changevalue < 0 ) sitfeat->to =sitfeat->to +changevalue ;
3142
if (changevalue < 0)
3144
if ( include_ssp (slpssp, slpfeat) ) {
3146
feathead = del_ssp_fromid (feathead, itemsubtype, feat);
3148
inval_rect (rp->left, rp->top, rp->right, rp->bottom);
3150
else if ( include_ssp (slpfeat, slpssp) ) {
3151
if (!adp->spliteditmode)
3152
sitfeat->to = sitfeat->to + changevalue;
3154
feat=split_feat(feat,SeqLocStart(slpssp), changevalue);
3156
else if ((lg = overlapp_startssp (slpssp, slpfeat)) > 0) {
3157
if (changevalue < 0) {
3158
sitfeat->from = sitfeat->from - (abs(changevalue)-lg);
3159
sitfeat->to = sitfeat->to + changevalue;
3162
ErrPostEx (SEV_ERROR, 0, 0, "Cut what ?");
3164
else if ((lg = overlapp_startssp (slpfeat, slpssp)) > 0) {
3165
if (changevalue < 0) {
3166
sitfeat->to = sitfeat->to - lg ;
3168
else sitfeat->to = sitfeat->to + lg ;
3172
if (!adp->spliteditmode)
3173
sitfeat->to = sitfeat->to + changevalue ;
3175
feat = split_feat (feat, SeqLocStart(slpssp), changevalue);
3179
if (!deletefeat && rp != NULL)
3181
inval_selstruct(adp, feat->entityID, feat->itemID, feat->itemtype, itemsubtype, rp, adp->margin.left,(Int2)(width *adp->charw));
3182
inval_selstruct(adp, feat->entityID, feat->itemID, feat->itemtype, itemsubtype, rp, adp->margin.left, (Int2)(width *adp->charw));
3188
if (feat1->data != NULL)
3189
sesp_to_pept (feat1, (SeqAlignPtr) adp->sap_align->data, adp->sqloc_list, TRUE);
3191
vnpfeat = vnpfeatnext;
3196
static Boolean anpp_has_feature (ValNodePtr anp_list)
3203
for (vnp = anp_list; vnp != NULL; vnp = vnp->next) {
3204
anp = (AlignNodePtr)vnp->data.ptrvalue;
3208
for(asp = anp->segs; asp != NULL; asp = asp->next) {
3209
if(asp->cnp != NULL) {
3221
extern void ShowFeatureProc (PaneL pnl, Boolean invalidate)
3224
SeqEditViewFormPtr wdp;
3225
EditAlignDataPtr adp;
3229
wdp = (SeqEditViewFormPtr)GetObjectExtra (ParentWindow(pnl));
3230
if ( wdp == NULL ) return;
3231
if ( ( adp = GetAlignDataPanel (pnl) ) == NULL ) return;
3232
if ( adp->seqnumber == 0 ) return;
3233
hratio = (float)adp->hoffset / (float)adp->length;
3234
adp->showfeat = (!adp->showfeat);
3237
if ( adp->showfeat ) {
3239
if (!anpp_has_feature (adp->anp_list))
3240
ok = (Boolean) ShowFeatureFunc (adp);
3242
Enable (wdp->hidefeatitem);
3243
Disable(wdp->showfeatitem);
3244
SetTitle (wdp->showfeatbt, "Hide Feat.");
3249
if (anpp_has_feature (adp->anp_list))
3250
ok = (Boolean) HideFeatureFunc (adp);
3252
Disable (wdp->hidefeatitem);
3253
Enable(wdp->showfeatitem);
3254
SetTitle (wdp->showfeatbt, "Show Feat.");
3259
data_collect_arrange (adp, TRUE);
3260
SeqEdSetCorrectBarMax (pnl, adp, hratio);
3262
temport = SavePort(ParentWindow(pnl));
3264
inval_panel (pnl, -1, -1);
3265
RestorePort (temport);
3271
/******************************************************
3273
*** LaunchCDSEditor on a Bioseq (input_itemID)
3275
*******************************************************/
3276
static void LaunchCDSEditor (Uint2 input_entityID, Uint2 input_itemID, SeqLocPtr slp, Uint1 codonstart)
3279
SeqEntryPtr top_sep;
3282
if (slp != NULL && input_entityID != 0)
3284
top_sep = GetTopSeqEntryForEntityID (input_entityID);
3285
input_entityID = SeqMgrGetEntityIDForSeqEntry (top_sep);
3287
w = (WindoW) CreateCdRgnForm (-50, -33, "Coding Region", NULL, top_sep, CdRgnFeatFormActnProc);
3288
cfp = (FeatureFormPtr) GetObjectExtra (w);
3290
cfp->input_entityID = input_entityID;
3291
cfp->input_itemID = input_itemID;
3292
cfp->input_itemtype = OBJ_BIOSEQ;
3293
cfp->this_itemtype = OBJ_SEQFEAT;
3294
cfp->this_subtype = FEATDEF_CDS;
3295
PointerToForm (cfp->form, NULL);
3296
SendMessageToForm (cfp->form, VIB_MSG_INIT);
3297
PointerToDialog (cfp->location, (Pointer) slp);
3298
CdRgnTranslateWithFrame (cfp->form, 1);
3305
/***************************************************************
3306
*** slpfeatreplacefunc
3308
*****************************************************************/
3309
static Boolean slpfeatreplacefunc(GatherContextPtr gcp)
3315
if(gcp->thistype != OBJ_SEQFEAT)
3317
sfp = (SeqFeatPtr)(gcp->thisitem);
3318
slp = (SeqLocPtr) gcp->userdata;
3319
CheckSeqLocForPartial (sfp->location, &p5, &p3);
3320
SetSeqLocPartial (slp, p5, p3);
3321
sfp->location = SeqLocFree (sfp->location);
3322
sfp->location = slp;
3326
/******************************************************************
3329
*** look at the selected items
3330
*** if new feature, attaches it (AttachDataForProc)
3331
*** other, replaces it (GatherItem)
3332
*** sends a message to ObjMgr (ObjMgrSendMsg (OM_MSG_UPDATE..))
3333
*** write the seqenrty in the temporary file
3335
*** SaveFeatureProc, SaveFeatureButton : call SaveFeatProc
3336
*** sends a message to ObjMgr (ObjMgrSendMsg (OM_MSG_UPDATE..))
3338
*******************************************************************/
3339
extern void SaveFeatProc (PaneL pnl)
3341
EditAlignDataPtr adp;
3343
SelStructPtr ssp = NULL;
3344
SelEdStructPtr feat;
3350
Uint2 bsp_eID, bsp_iID;
3353
if ( ( adp = GetAlignDataPanel (pnl) ) == NULL ) return;
3354
if ( adp->seqnumber == 0 ) return;
3355
if ( checkOMss_for_itemtype (OBJ_VIRT) == 0
3356
&& checkOMss_for_itemtype (OBJ_SEQFEAT) == 0 ) return;
3357
sep = GetBestTopParentForItemID (adp->master.entityID, adp->master.itemID, adp->master.itemtype);
3360
ssp = ObjMgrGetSelected();
3361
for (; ssp != NULL; ssp = ssp->next)
3363
if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_VIRT)
3366
while (vnp != NULL) {
3368
if (vnp->choice == SEQFEAT_CDREGION) {
3369
feat = (SelEdStructPtr) vnp->data.ptrvalue;
3370
if ( is_samess_ses (ssp, feat) )
3372
if (SeqLocStart((SeqLocPtr)feat->region)==0 || feat->codonstart == 1)
3374
adp->curfeat = feat;
3375
slp = sesp_to_slp (feat, (SeqAlignPtr) adp->sap_align->data, adp->sqloc_list, FALSE);
3376
bsp_eID = SeqMgrGetEntityIDForSeqEntry (sep);
3377
bsp_iID = feat->bsp_itemID;
3378
LaunchCDSEditor (bsp_eID, bsp_iID, slp, feat->codonstart);
3381
ErrPostEx (SEV_ERROR, 0, 0, "Codon start must be 1");
3388
else if ( checkssp_for_editor (ssp) && ssp->itemtype == OBJ_SEQFEAT)
3394
if (vnp->choice == SEQFEAT_CDREGION
3395
|| vnp->choice == SEQFEAT_GENE || vnp->choice == SEQFEAT_RNA)
3397
feat = (SelEdStructPtr) vnp->data.ptrvalue;
3398
if ( is_samess_ses (ssp, feat) )
3400
slp = sesp_to_slp (feat,(SeqAlignPtr) adp->sap_align->data, adp->sqloc_list, FALSE);
3403
bsp_eID = feat->entityID;
3404
bsp_iID = feat->itemID;
3405
GatherItem (bsp_eID, bsp_iID, OBJ_SEQFEAT, (Pointer)(slp), slpfeatreplacefunc);
3406
ObjMgrSendMsg (OM_MSG_UPDATE, adp->master.entityID, adp->master.itemID, adp->master.itemtype);
3407
HideFeatureFunc (adp);
3408
adp->showfeat = FALSE;
3409
ShowFeatureProc(pnl, FALSE);
3410
get_client_rect (pnl, &rp);
3411
width = adp->visibleWidth;
3412
if (adp->columnpcell > 0)
3413
width +=(Int2)adp->visibleWidth/(Int2) adp->columnpcell;
3414
inval_all (adp, &rp, (Uint2)255, OBJ_VIRT, OBJ_SEQFEAT, width);
3418
handled = GatherProcLaunch (OMPROC_EDIT, FALSE, bsp_eID, bsp_iID, OBJ_SEQFEAT, 0, 0, OBJ_SEQFEAT, 0);
3427
seqentry_write (sep, adp->tmpfile);
3431
if (!adp->showfeat) {
3432
ShowFeatureProc(pnl, TRUE);
3437
/******************************************************************
3440
*** looks at the features items
3441
*** attaches the new features (AttachDataForProc)
3442
*** replaces the old features (GatherItem)
3447
EditAlignDataPtr adp;
3448
SelEdStructPtr feat;
3452
if ( ( adp = GetAlignDataPanel (pnl) ) == NULL ) return;
3453
if ( adp->seqnumber == 0 ) return;
3455
MemSet ((Pointer) &ompc, 0, sizeof (OMProcControl));
3456
ompc.input_entityID = adp->master.entityID;
3457
ompc.input_itemID = adp->master.itemID;
3458
ompc.input_itemtype = adp->master.itemtype;
3459
ompc.output_itemtype = OBJ_SEQFEAT;
3461
for (vnp=adp->seqfeat; vnp != NULL; vnp = vnp->next)
3463
if (vnp->choice == SEQFEAT_CDREGION
3464
|| vnp->choice == SEQFEAT_GENE || vnp->choice == SEQFEAT_RNA) {
3465
feat = (SelEdStructPtr) vnp->data.ptrvalue;
3466
slp = sesp_to_slp (feat, (SeqAlignPtr) adp->sap_align->data, FALSE);
3468
GatherItem (feat->entityID, feat->itemID, OBJ_SEQFEAT,
3469
(Pointer)(slp), slpfeatreplacefunc);
3473
*******************************************************************/
3474
extern void SaveAllFeatProc (PaneL pnl)
3480
static void MakeFeatFunc (EditAlignDataPtr adp, SelStructPtr ssp, Uint2 itemsubtype, Uint1 strand)
3482
SelEdStructPtr feat = NULL;
3483
SelStructPtr ssptmp;
3490
slp = (SeqLocPtr) ssp->region;
3491
ssptmp = is_selectedbyID (ssp->entityID, 255, OBJ_VIRT);
3492
if (ssptmp == NULL) {
3494
itemID = adp->nfeat;
3496
itemID = ssptmp->itemID;
3498
from = SeqLocStart (slp);
3499
to = SeqLocStop (slp);
3500
if (to == APPEND_RESIDUE) {
3501
slpfeat = ValNodeNew (NULL);
3502
slpfeat->choice = SEQLOC_WHOLE;
3503
slpfeat->data.ptrvalue = (Pointer) SeqIdDup (SeqLocId(slp));
3504
to = SeqLocLen(slpfeat) -1;
3505
SeqLocFree (slpfeat);
3507
if (from >= 0 && to > 0) {
3508
sit = (SeqIntPtr) slp->data.ptrvalue;
3509
sit->strand = strand;
3510
feat = ss_to_ses (ssp);
3511
slpfeat = (SeqLocPtr)feat->region;
3512
setposition_toses (feat, from, to);
3514
/* feat->entityID !!!!!!!!!!!!!!!!!!!!!*/
3515
feat->bsp_itemID = feat->itemID;
3516
feat->itemID = itemID;
3517
feat->itemtype = OBJ_VIRT;
3518
feat->codonstart = 1;
3523
adp->feat = AddFeatFunc (feat, &(adp->feat), itemsubtype);
3527
extern void MakeFeatProc (PaneL pnl, Uint2 itemsubtype, Uint1 strand)
3530
EditAlignDataPtr adp;
3534
if ( ( adp = GetAlignDataPanel (pnl) ) != NULL ) {
3535
if (adp->seqnumber > 0 || ISA_na(adp->mol_type)) {
3536
hratio = (float)adp->hoffset / (float)adp->length;
3537
ssp = ObjMgrGetSelected();
3538
for (; ssp != NULL; ssp = ssp->next)
3540
if (checkssp_for_editor (ssp) && ssp->itemtype == OBJ_BIOSEQ ) {
3541
MakeFeatFunc (adp, ssp, itemsubtype, strand);
3544
data_collect_arrange (adp, TRUE);
3545
SeqEdSetCorrectBarMax (pnl, adp, hratio);
3547
temport = SavePort(ParentWindow(pnl));
3549
inval_panel (pnl, -1, -1);
3550
RestorePort (temport);
3556
extern void TranslateAllBioseq (PaneL pnl, EditAlignDataPtr adp)
3563
ValNodePtr vnp = NULL;
3566
hratio = (float)adp->hoffset / (float)adp->length;
3567
if (adp->seqnumber > 0 || ISA_na(adp->mol_type)) {
3568
for (vnp = adp->anp_list; vnp != NULL; vnp = vnp->next) {
3569
if ( (anp = (AlignNodePtr) vnp->data.ptrvalue) != NULL)
3571
slp = CollectSeqLocFromAlignNode(anp);
3573
ssp = SelStructNew (anp->seq_entityID, anp->bsp_itemID, OBJ_BIOSEQ, SeqLocStart(slp), SeqLocStop(slp), SeqLocId(slp), SeqLocStrand(slp), FALSE);
3575
MakeFeatFunc (adp, ssp, SEQFEAT_CDREGION, Seq_strand_plus);
3582
for (; vnp != NULL; vnp = vnp->next)
3584
if (vnp->choice == FEATDEF_CDS) {
3585
cds = (SelEdStructPtr) vnp->data.ptrvalue;
3586
sesp_to_pept(cds, (SeqAlignPtr) adp->sap_align->data, adp->sqloc_list, TRUE);
3589
data_collect_arrange (adp, TRUE);
3590
SeqEdSetCorrectBarMax (pnl, adp, hratio);
3591
temport = SavePort(ParentWindow(pnl));
3593
inval_panel (pnl, -1, -1);
3594
RestorePort (temport);
3599
/**************************************/
3600
/**************************************/
3601
/**************************************/
3602
/**************************************/
3603
/**************************************/
3605
static void FindBioseqCB3 (SeqEntryPtr sep, Pointer mydata,
3606
Int4 index, Int2 indent)
3610
if (sep != NULL && sep->data.ptrvalue && mydata != NULL) {
3611
if (IS_Bioseq(sep)) {
3612
vnp = (ValNodePtr)mydata;
3613
if (vnp->data.ptrvalue==NULL)
3615
vnp->data.ptrvalue=(BioseqPtr) sep->data.ptrvalue;
3621
static SelEdStructPtr is_sip_inseqinfo (SeqIdPtr sip, SelEdStructPtr seq_info)
3626
for (tmp=seq_info; tmp!=NULL; tmp=tmp->next)
3628
slp=(SeqLocPtr)tmp->region;
3629
if (SeqIdForSameBioseq (sip, SeqLocId(slp)))
3637
extern ValNodePtr CCReadAnythingLoop (CharPtr filename, SelEdStructPtr seq_info)
3639
Char name [PATH_MAX];
3645
ValNodePtr head = NULL,
3655
if (filename == NULL)
3657
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
3662
fp = FileOpen (filename, "r");
3664
while ((dataptr = ReadAsnFastaOrFlatFile (fp, &datatype, NULL, FALSE, FALSE, TRUE, FALSE)) != NULL) {
3665
ValNodeAddPointer (&head, datatype, dataptr);
3668
for (vnp = head; vnp != NULL; vnp = vnp->next) {
3669
datatype = vnp->choice;
3670
dataptr = vnp->data.ptrvalue;
3671
entityID = ObjMgrRegister (datatype, dataptr);
3672
if (datatype == OBJ_BIOSEQ)
3674
bsp=(BioseqPtr)vnp->data.ptrvalue;
3675
slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, SeqIdFindBest (bsp->id, 0));
3678
if ((tmp=is_sip_inseqinfo(bsp->id, seq_info)) != NULL) {
3681
sep->data.ptrvalue = (Pointer)bsp;
3682
SeqEntryReplaceSeqID (sep, SeqLocId(slp));
3684
slp = SeqLocIntNew(0, bsp->length-1, Seq_strand_plus, bsp->id);
3685
sep->data.ptrvalue = NULL;
3689
ValNodeAddPointer (&slphead, 0, (Pointer) slp);
3691
else if (datatype == OBJ_SEQENTRY)
3693
sep=(SeqEntryPtr)vnp->data.ptrvalue;
3694
vn.data.ptrvalue=NULL;
3695
SeqEntryExplore (sep, &vn, FindBioseqCB3);
3696
if (vn.data.ptrvalue!=NULL) {
3697
bsp=(BioseqPtr)vn.data.ptrvalue;
3698
slp = SeqLocIntNew (0, bsp->length-1, Seq_strand_plus, SeqIdFindBest (bsp->id, 0));
3701
if ((tmp=is_sip_inseqinfo(bsp->id, seq_info)) != NULL) {
3702
SeqEntryReplaceSeqID (sep, SeqLocId(slp));
3704
slp = SeqLocIntNew(0, bsp->length-1, Seq_strand_plus, bsp->id);
3707
ValNodeAddPointer (&slphead, 0, (Pointer) slp);
3721
/*******************************************************
3723
*** copied from Jonathan's code
3724
*** without the following lines:
3726
rsult = SeqEntryNew ();
3727
if (rsult != NULL) {
3728
rsult->choice = sep->choice;
3729
rsult->data.ptrvalue = sep->data.ptrvalue;
3730
sep->data.ptrvalue = NULL;
3731
if (datatype == OBJ_SEQSUB) {
3732
SeqSubmitFree ((SeqSubmitPtr) dataptr);
3736
if (!ObjMgrRegister (OBJ_SEQENTRY, (Pointer) rsult))
3737
rsult = SeqEntryFree (rsult);
3739
***********************************************************/
3740
extern SeqEntryPtr AsnReadForSalsa (CharPtr path)
3742
Char name[PATH_MAX];
3750
Boolean delete_sep=FALSE;
3754
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
3759
if (path != NULL && path [0] != '\0') {
3760
dataptr = ObjMgrGenericAsnTextFileRead (path, &datatype, &entityID);
3761
if (dataptr != NULL && entityID > 0) {
3762
if (datatype == OBJ_SEQSUB || datatype == OBJ_SEQENTRY ||
3763
datatype == OBJ_BIOSEQ || datatype == OBJ_BIOSEQSET) {
3764
sep = GetTopSeqEntryForEntityID (entityID);
3766
sep = SeqEntryNew ();
3768
if (datatype == OBJ_BIOSEQ) {
3769
bsp = (BioseqPtr) dataptr;
3771
sep->data.ptrvalue = bsp;
3772
SeqMgrSeqEntry (SM_BIOSEQ, (Pointer) bsp, sep);
3773
} else if (datatype == OBJ_BIOSEQSET) {
3774
bssp = (BioseqSetPtr) dataptr;
3776
sep->data.ptrvalue = bssp;
3777
SeqMgrSeqEntry (SM_BIOSEQSET, (Pointer) bssp, sep);
3780
sep = GetTopSeqEntryForEntityID (entityID);
3784
rsult = (SeqEntryPtr) AsnIoMemCopy((Pointer) sep, (AsnReadFunc)SeqEntryAsnRead, (AsnWriteFunc)SeqEntryAsnWrite);
3786
sep->data.ptrvalue = NULL;
3787
sep = SeqEntryFree (sep);
3791
ObjMgrFree (datatype, (Pointer) dataptr);
3797
extern SeqEntryPtr seqentry_read (CharPtr path)
3799
Char name[PATH_MAX];
3807
if (! GetInputFileName (name, PATH_MAX,"","TEXT")) {
3812
amp = AsnAllModPtr ();
3813
atp = AsnTypeFind (amp,"SeqEntry");
3814
if ((aip = AsnIoOpen (path,"r")) == NULL) {
3817
sep = SeqEntryAsnRead ( aip, atp );
3818
aip = AsnIoClose (aip);