3
* Sequence handler library by Huzefa Rangwala
16
/*********************************************************/
17
/* ! \brief Initializes the <tt>gk_seq_t</tt> variable
22
\param A pointer to gk_seq_t itself
25
/***********************************************************************/
27
void gk_seq_init(gk_seq_t *seq)
40
/***********************************************************************/
41
/*! \brief This function creates the localizations for the various sequences
43
\param string i.e amino acids, nucleotides, sequences
44
\returns gk_i2cc2i_t variable
46
/*********************************************************************/
48
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
56
nsymbols = strlen(alphabet);
57
t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
59
t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
60
t->c2i = gk_imalloc(256, "gk_i2c_create_common");
63
gk_cset(256, -1, t->i2c);
64
gk_iset(256, -1, t->c2i);
66
for(i=0;i<nsymbols;i++){
67
t->i2c[i] = alphabet[i];
68
t->c2i[(int)alphabet[i]] = i;
76
/*********************************************************************/
77
/*! \brief This function reads a pssm in the format of gkmod pssm
79
\param file_name is the name of the pssm file
82
/********************************************************************/
83
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
87
size_t ntokens, nbytes, len;
92
static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
93
static int PSSMWIDTH = 20;
94
char *header, line[MAXLINELEN];
95
gk_i2cc2i_t *converter;
97
header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
99
converter = gk_i2cc2i_create_common(AAORDER);
101
gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
104
seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
108
seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
109
seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
110
seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
112
seq->nsymbols = PSSMWIDTH;
113
seq->name = gk_getbasename(filename);
115
fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
118
/* Read the header line */
119
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
120
errexit("Unexpected end of file: %s\n", filename);
122
gk_strtokenize(line, " \t\n", &tokens);
124
for (i=0; i<PSSMWIDTH; i++)
125
header[i] = tokens.list[i][0];
127
gk_freetokenslist(&tokens);
130
/* Read the rest of the lines */
131
for (i=0, ii=0; ii<len; ii++) {
132
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
133
errexit("Unexpected end of file: %s\n", filename);
135
gk_strtokenize(line, " \t\n", &tokens);
137
seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
139
for (j=0; j<PSSMWIDTH; j++) {
140
seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
141
seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
146
gk_freetokenslist(&tokens);
150
seq->len = i; /* Reset the length if certain characters were skipped */
152
gk_free((void **)&header, LTERM);
159
/**************************************************************************/
160
/*! \brief This function frees the memory allocated to the seq structure.
165
/**************************************************************************/
166
void gk_seq_free(gk_seq_t *seq)
168
gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
169
gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
170
gk_free((void **)&seq->name, &seq->sequence, LTERM);
171
//gk_free((void **)&seq, LTERM);
172
gk_free((void **) &seq, LTERM);