3
Copyright (c) 2008 Genome Research Ltd (GRL).
5
Permission is hereby granted, free of charge, to any person obtaining
6
a copy of this software and associated documentation files (the
7
"Software"), to deal in the Software without restriction, including
8
without limitation the rights to use, copy, modify, merge, publish,
9
distribute, sublicense, and/or sell copies of the Software, and to
10
permit persons to whom the Software is furnished to do so, subject to
11
the following conditions:
13
The above copyright notice and this permission notice shall be
14
included in all copies or substantial portions of the Software.
16
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
/* Contact: Heng Li <lh3@sanger.ac.uk> */
28
/* Last Modified: 12APR2009 */
37
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
38
#define KS_SEP_TAB 1 // isspace() && !' '
41
#define __KS_TYPE(type_t) \
42
typedef struct __kstream_t { \
44
int begin, end, is_eof; \
48
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
49
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
51
#define __KS_BASIC(type_t, __bufsize) \
52
static inline kstream_t *ks_init(type_t f) \
54
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
56
ks->buf = (char*)malloc(__bufsize); \
59
static inline void ks_destroy(kstream_t *ks) \
67
#define __KS_GETC(__read, __bufsize) \
68
static inline int ks_getc(kstream_t *ks) \
70
if (ks->is_eof && ks->begin >= ks->end) return -1; \
71
if (ks->begin >= ks->end) { \
73
ks->end = __read(ks->f, ks->buf, __bufsize); \
74
if (ks->end < __bufsize) ks->is_eof = 1; \
75
if (ks->end == 0) return -1; \
77
return (int)ks->buf[ks->begin++]; \
81
#define KSTRING_T kstring_t
82
typedef struct __kstring_t {
89
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
92
#define __KS_GETUNTIL(__read, __bufsize) \
93
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
95
if (dret) *dret = 0; \
97
if (ks->begin >= ks->end && ks->is_eof) return -1; \
100
if (ks->begin >= ks->end) { \
103
ks->end = __read(ks->f, ks->buf, __bufsize); \
104
if (ks->end < __bufsize) ks->is_eof = 1; \
105
if (ks->end == 0) break; \
108
if (delimiter > KS_SEP_MAX) { \
109
for (i = ks->begin; i < ks->end; ++i) \
110
if (ks->buf[i] == delimiter) break; \
111
} else if (delimiter == KS_SEP_SPACE) { \
112
for (i = ks->begin; i < ks->end; ++i) \
113
if (isspace(ks->buf[i])) break; \
114
} else if (delimiter == KS_SEP_TAB) { \
115
for (i = ks->begin; i < ks->end; ++i) \
116
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117
} else i = 0; /* never come to here! */ \
118
if (str->m - str->l < i - ks->begin + 1) { \
119
str->m = str->l + (i - ks->begin) + 1; \
120
kroundup32(str->m); \
121
str->s = (char*)realloc(str->s, str->m); \
123
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124
str->l = str->l + (i - ks->begin); \
127
if (dret) *dret = ks->buf[i]; \
133
str->s = (char*)calloc(1, 1); \
135
str->s[str->l] = '\0'; \
139
#define KSTREAM_INIT(type_t, __read, __bufsize) \
141
__KS_BASIC(type_t, __bufsize) \
142
__KS_GETC(__read, __bufsize) \
143
__KS_GETUNTIL(__read, __bufsize)
145
#define __KSEQ_BASIC(type_t) \
146
static inline kseq_t *kseq_init(type_t fd) \
148
kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
149
s->f = ks_init(fd); \
152
static inline void kseq_rewind(kseq_t *ks) \
155
ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
157
static inline void kseq_destroy(kseq_t *ks) \
160
free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
166
>=0 length of the sequence (normal)
168
-2 truncated quality string
170
#define __KSEQ_READ \
171
static int kseq_read(kseq_t *seq) \
174
kstream_t *ks = seq->f; \
175
if (seq->last_char == 0) { /* then jump to the next header line */ \
176
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
177
if (c == -1) return -1; /* end of file */ \
178
seq->last_char = c; \
179
} /* the first header char has been read */ \
180
seq->comment.l = seq->seq.l = seq->qual.l = 0; \
181
if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
182
if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
183
while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
184
if (isgraph(c)) { /* printable non-space character */ \
185
if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
186
seq->seq.m = seq->seq.l + 2; \
187
kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
188
seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
190
seq->seq.s[seq->seq.l++] = (char)c; \
193
if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
194
seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
195
if (c != '+') return seq->seq.l; /* FASTA */ \
196
if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
197
seq->qual.m = seq->seq.m; \
198
seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
200
while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
201
if (c == -1) return -2; /* we should not stop here */ \
202
while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
203
if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
204
seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
205
seq->last_char = 0; /* we have not come to the next header line */ \
206
if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
210
#define __KSEQ_TYPE(type_t) \
212
kstring_t name, comment, seq, qual; \
217
#define KSEQ_INIT(type_t, __read) \
218
KSTREAM_INIT(type_t, __read, 4096) \
219
__KSEQ_TYPE(type_t) \
220
__KSEQ_BASIC(type_t) \