2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
28
* Author(s): John Kuzio
30
* Version Creation Date: 98-01-01
34
* File Description: coiled-coil prediction
37
* --------------------------------------------------------------------------
38
* Date Name Description of modification
39
* --------------------------------------------------------------------------
41
* Revision 6.14 1998/12/18 16:24:52 kuzio
44
* Revision 6.13 1998/11/16 14:34:09 kuzio
45
* flagBoundaryCondition
47
* Revision 6.12 1998/09/16 18:19:26 kuzio
50
* ==========================================================================
61
static char _this_module[] = "ccp";
63
#define THIS_MODULE _this_module
64
static char _this_file[] = __FILE__;
66
#define THIS_FILE _this_file
72
typedef struct gather_Prot_Bioseq
76
} Gather_PBS, PNTR Gather_PBSPtr;
80
{ "protein GI", "0", "0", "9000000", TRUE,
81
'g', ARG_INT, 0.0, 0, NULL},
82
{ "FastA file", NULL, NULL, NULL, TRUE,
83
'f', ARG_STRING, 0.0, 0, NULL},
84
{ "cc window 1", "22", "7", "42", TRUE,
85
'w', ARG_INT, 0.0, 0, NULL},
86
{ "cc window 2", "-1", "7", "42", TRUE,
87
'x', ARG_INT, 0.0, 0, NULL},
88
{ "cc window 3", "-1", "7", "42", TRUE,
89
'y', ARG_INT, 0.0, 0, NULL},
90
{ "cc window 4", "-1", "7", "42", TRUE,
91
'z', ARG_INT, 0.0, 0, NULL},
92
{ "sequence output", "FALSE", "FALSE", "TRUE", TRUE,
93
'S', ARG_BOOLEAN, 0.0, 0, NULL},
94
{ "X-out sequence output for blast", "FALSE", "FALSE", "TRUE", TRUE,
95
'X', ARG_BOOLEAN, 0.0, 0, NULL},
96
{ "stringent filter", "FALSE", "FALSE", "TRUE", TRUE,
97
's', ARG_BOOLEAN, 0.0, 0, NULL},
98
{ "very stringent filter", "FALSE", "FALSE", "TRUE", TRUE,
99
'v', ARG_BOOLEAN, 0.0, 0, NULL},
100
{ "output line length", "50", "40", "160", TRUE,
101
'l', ARG_INT, 0.0, 0, NULL},
102
{ "data file 0=KSpcc 1=KSmtk 2=KSmtidk", "0", "0", "2", TRUE,
103
'd', ARG_INT, 0.0, 0, NULL},
104
{ "Filter boundary condition hits only", "FALSE", "FALSE", "TRUE", TRUE,
105
'B', ARG_BOOLEAN, 0.0, 0, NULL}
108
static Boolean GetProteinBioseq (GatherContextPtr gcp)
116
if ((gpbsp = (Gather_PBSPtr) gcp->userdata) == NULL)
119
if (gpbsp->bsp != NULL)
121
if (gcp->thistype != OBJ_BIOSEQ)
123
if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
129
entrezgi = GetGIForSeqId (bsp->id);
153
CharPtr datafile[3] = {"KSpcc.mat", "KSmtk.mat", "KSmtidk.mat"};
155
Int4 i, iloop, iwindow, pccwindow, start, stop, linelen;
156
FloatHiPtr pccscore, pccscore1, pccscore2, pccscore3, pccscore4;
161
static GatherScope gs;
163
static Gather_PBS gpbs;
166
argcount = sizeof (myargs) / sizeof (Args);
167
if (!GetArgs ("CCP", argcount, myargs))
173
MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
174
MemSet ((Pointer) gsp->ignore, (int) (TRUE),
175
(size_t) (OBJ_MAX * sizeof (Boolean)));
176
gsp->ignore[OBJ_BIOSEQ] = FALSE;
180
if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
182
ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
183
"No gi or FastA file given :: for help : ccp -");
188
gi = myargs[0].intvalue;
189
if (myargs[1].strvalue != NULL)
190
StrCpy (fastafile, myargs[1].strvalue);
196
if (!EntrezInit ("CCP", FALSE, &flagHaveNet))
198
ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
199
"Entrez init failed");
207
sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
211
if ((fiop = FileOpen (fastafile, "r")) == NULL)
213
ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
214
"Failed to open FastA file");
218
sep = FastaToSeqEntry (fiop, FALSE);
223
ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
224
"No seqentry found");
229
linelen = myargs[10].intvalue;
233
GatherSeqEntry (sep, (Pointer) gpbsp, GetProteinBioseq,
236
if (gpbsp->bsp != NULL)
238
if (ISA_aa (gpbsp->bsp->mol))
240
title = FastaTitle (gpbsp->bsp, ">", NULL);
247
for (i = 0; i < 4; i++)
253
pccwindow = myargs[2].intvalue;
256
pccwindow = myargs[3].intvalue;
259
pccwindow = myargs[4].intvalue;
262
pccwindow = myargs[5].intvalue;
270
pccp->window = pccwindow;
271
MemFree (pccp->pccdatafile);
272
pccp->pccdatafile = StringSave (datafile[myargs[11].intvalue]);
273
if (ReadPccData (pccp) == 0)
275
ErrPostEx (SEV_ERROR, TOP_ERROR, 101,
276
"Could not open or read %s data file",
281
pccscore = PredictCCBioseq (gpbsp->bsp, 0, gpbsp->bsp->length-1,
284
if (pccscore != NULL)
290
pccscore1 = pccscore;
293
pccscore2 = pccscore;
296
pccscore3 = pccscore;
299
pccscore4 = pccscore;
308
if (myargs[6].intvalue == FALSE)
310
printf ("%s\n", title);
314
for (iloop = 0; iloop < gpbsp->bsp->length; iloop++)
315
printf ("%lf\n", (double) pccscore1[iloop]);
318
for (iloop = 0; iloop < gpbsp->bsp->length; iloop++)
319
printf ("%lf %lf\n", (double) pccscore1[iloop],
320
(double) pccscore2[iloop]);
323
for (iloop = 0; iloop < gpbsp->bsp->length; iloop++)
324
printf ("%lf %lf %lf\n", (double) pccscore1[iloop],
325
(double) pccscore2[iloop],
326
(double) pccscore3[iloop]);
329
for (iloop = 0; iloop < gpbsp->bsp->length; iloop++)
330
printf ("%lf %lf %lf %lf\n", (double) pccscore1[iloop],
331
(double) pccscore2[iloop],
332
(double) pccscore3[iloop],
333
(double) pccscore4[iloop]);
336
ErrPostEx (SEV_ERROR, TOP_ERROR, 107,
337
"No coiled-coil predictions made");
343
sequence = (Uint1Ptr) MemNew ((size_t) (sizeof (Uint1) *
344
gpbsp->bsp->length+1));
345
spp = SeqPortNew (gpbsp->bsp, 0, gpbsp->bsp->length-1, 0,
347
SeqPortSeek (spp, 0, SEEK_SET);
350
while ((sequence[i] = SeqPortGetResidue (spp)) != SEQPORT_EOF)
352
if (('a' <= (Char) sequence[i] && (Char) sequence[i] <= 'z') ||
353
('A' <= (Char) sequence[i] && (Char) sequence[i] <= 'Z'))
359
while (sequence[i] != 0)
361
sequence[i] = (Uint1) TO_UPPER ((Char) sequence[i]);
365
for (iloop = 0; iloop < iwindow; iloop++)
371
pccscore = pccscore1;
374
pccscore = pccscore2;
377
pccscore = pccscore3;
380
pccscore = pccscore4;
383
if (myargs[9].intvalue)
385
slpn = FilterCCVS (pccscore, 40, gpbsp->bsp->length, 32,
387
(Boolean) myargs[12].intvalue);
389
else if (myargs[8].intvalue)
391
slpn = FilterCCVS (pccscore, 50, gpbsp->bsp->length, 24,
393
(Boolean) myargs[12].intvalue);
397
slpn = FilterCC (pccscore, 50, gpbsp->bsp->length, 0,
399
(Boolean) myargs[12].intvalue);
404
start = SeqLocStart (slp);
405
stop = SeqLocStop (slp);
406
for (i = start; i <= stop; i++)
408
if (myargs[7].intvalue == TRUE)
409
sequence[i] = (Uint1) 'x';
411
sequence[i] = (Uint1) TO_LOWER ((Char) sequence[i]);
419
printf ("%s\n", title);
421
while (sequence[i] != 0)
423
printf ("%c", (Char) sequence[i]);
425
if (i % linelen == 0)
427
if (myargs[7].intvalue == TRUE)
430
printf (" %8ld\n", (long) i);
433
if (i % linelen != 0)
446
ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
447
"Not a protein bioseq");
454
ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
459
sep = SeqEntryFree (sep);
462
sep = FastaToSeqEntry (fiop, FALSE);