~ubuntu-branches/ubuntu/quantal/samtools/quantal

« back to all changes in this revision

Viewing changes to kseq.h

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2009-09-03 19:29:40 UTC
  • Revision ID: james.westby@ubuntu.com-20090903192940-o9gv6ubu11aztg8b
Tags: upstream-0.1.5c
ImportĀ upstreamĀ versionĀ 0.1.5c

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* The MIT License
 
2
 
 
3
   Copyright (c) 2008 Genome Research Ltd (GRL).
 
4
 
 
5
   Permission is hereby granted, free of charge, to any person obtaining
 
6
   a copy of this software and associated documentation files (the
 
7
   "Software"), to deal in the Software without restriction, including
 
8
   without limitation the rights to use, copy, modify, merge, publish,
 
9
   distribute, sublicense, and/or sell copies of the Software, and to
 
10
   permit persons to whom the Software is furnished to do so, subject to
 
11
   the following conditions:
 
12
 
 
13
   The above copyright notice and this permission notice shall be
 
14
   included in all copies or substantial portions of the Software.
 
15
 
 
16
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
17
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
18
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
19
   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
20
   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
21
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
22
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
23
   SOFTWARE.
 
24
*/
 
25
 
 
26
/* Contact: Heng Li <lh3@sanger.ac.uk> */
 
27
 
 
28
/* Last Modified: 12APR2009 */
 
29
 
 
30
#ifndef AC_KSEQ_H
 
31
#define AC_KSEQ_H
 
32
 
 
33
#include <ctype.h>
 
34
#include <string.h>
 
35
#include <stdlib.h>
 
36
 
 
37
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
 
38
#define KS_SEP_TAB   1 // isspace() && !' '
 
39
#define KS_SEP_MAX   1
 
40
 
 
41
#define __KS_TYPE(type_t)                                               \
 
42
        typedef struct __kstream_t {                            \
 
43
                char *buf;                                                              \
 
44
                int begin, end, is_eof;                                 \
 
45
                type_t f;                                                               \
 
46
        } kstream_t;
 
47
 
 
48
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
 
49
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
 
50
 
 
51
#define __KS_BASIC(type_t, __bufsize)                                                           \
 
52
        static inline kstream_t *ks_init(type_t f)                                              \
 
53
        {                                                                                                                               \
 
54
                kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));       \
 
55
                ks->f = f;                                                                                                      \
 
56
                ks->buf = (char*)malloc(__bufsize);                                                     \
 
57
                return ks;                                                                                                      \
 
58
        }                                                                                                                               \
 
59
        static inline void ks_destroy(kstream_t *ks)                                    \
 
60
        {                                                                                                                               \
 
61
                if (ks) {                                                                                                       \
 
62
                        free(ks->buf);                                                                                  \
 
63
                        free(ks);                                                                                               \
 
64
                }                                                                                                                       \
 
65
        }
 
66
 
 
67
#define __KS_GETC(__read, __bufsize)                                            \
 
68
        static inline int ks_getc(kstream_t *ks)                                \
 
69
        {                                                                                                               \
 
70
                if (ks->is_eof && ks->begin >= ks->end) return -1;      \
 
71
                if (ks->begin >= ks->end) {                                                     \
 
72
                        ks->begin = 0;                                                                  \
 
73
                        ks->end = __read(ks->f, ks->buf, __bufsize);    \
 
74
                        if (ks->end < __bufsize) ks->is_eof = 1;                \
 
75
                        if (ks->end == 0) return -1;                                    \
 
76
                }                                                                                                       \
 
77
                return (int)ks->buf[ks->begin++];                                       \
 
78
        }
 
79
 
 
80
#ifndef KSTRING_T
 
81
#define KSTRING_T kstring_t
 
82
typedef struct __kstring_t {
 
83
        size_t l, m;
 
84
        char *s;
 
85
} kstring_t;
 
86
#endif
 
87
 
 
88
#ifndef kroundup32
 
89
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 
90
#endif
 
91
 
 
92
#define __KS_GETUNTIL(__read, __bufsize)                                                                \
 
93
        static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
 
94
        {                                                                                                                                       \
 
95
                if (dret) *dret = 0;                                                                                    \
 
96
                str->l = 0;                                                                                                             \
 
97
                if (ks->begin >= ks->end && ks->is_eof) return -1;                              \
 
98
                for (;;) {                                                                                                              \
 
99
                        int i;                                                                                                          \
 
100
                        if (ks->begin >= ks->end) {                                                                     \
 
101
                                if (!ks->is_eof) {                                                                              \
 
102
                                        ks->begin = 0;                                                                          \
 
103
                                        ks->end = __read(ks->f, ks->buf, __bufsize);            \
 
104
                                        if (ks->end < __bufsize) ks->is_eof = 1;                        \
 
105
                                        if (ks->end == 0) break;                                                        \
 
106
                                } else break;                                                                                   \
 
107
                        }                                                                                                                       \
 
108
                        if (delimiter > KS_SEP_MAX) {                                                           \
 
109
                                for (i = ks->begin; i < ks->end; ++i)                                   \
 
110
                                        if (ks->buf[i] == delimiter) break;                                     \
 
111
                        } else if (delimiter == KS_SEP_SPACE) {                                         \
 
112
                                for (i = ks->begin; i < ks->end; ++i)                                   \
 
113
                                        if (isspace(ks->buf[i])) break;                                         \
 
114
                        } else if (delimiter == KS_SEP_TAB) {                                           \
 
115
                                for (i = ks->begin; i < ks->end; ++i)                                   \
 
116
                                        if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
 
117
                        } else i = 0; /* never come to here! */                                         \
 
118
                        if (str->m - str->l < i - ks->begin + 1) {                                      \
 
119
                                str->m = str->l + (i - ks->begin) + 1;                                  \
 
120
                                kroundup32(str->m);                                                                             \
 
121
                                str->s = (char*)realloc(str->s, str->m);                                \
 
122
                        }                                                                                                                       \
 
123
                        memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
 
124
                        str->l = str->l + (i - ks->begin);                                                      \
 
125
                        ks->begin = i + 1;                                                                                      \
 
126
                        if (i < ks->end) {                                                                                      \
 
127
                                if (dret) *dret = ks->buf[i];                                                   \
 
128
                                break;                                                                                                  \
 
129
                        }                                                                                                                       \
 
130
                }                                                                                                                               \
 
131
                if (str->l == 0) {                                                                                              \
 
132
                        str->m = 1;                                                                                                     \
 
133
                        str->s = (char*)calloc(1, 1);                                                           \
 
134
                }                                                                                                                               \
 
135
                str->s[str->l] = '\0';                                                                                  \
 
136
                return str->l;                                                                                                  \
 
137
        }
 
138
 
 
139
#define KSTREAM_INIT(type_t, __read, __bufsize) \
 
140
        __KS_TYPE(type_t)                                                       \
 
141
        __KS_BASIC(type_t, __bufsize)                           \
 
142
        __KS_GETC(__read, __bufsize)                            \
 
143
        __KS_GETUNTIL(__read, __bufsize)
 
144
 
 
145
#define __KSEQ_BASIC(type_t)                                                                                    \
 
146
        static inline kseq_t *kseq_init(type_t fd)                                                      \
 
147
        {                                                                                                                                       \
 
148
                kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                                 \
 
149
                s->f = ks_init(fd);                                                                                             \
 
150
                return s;                                                                                                               \
 
151
        }                                                                                                                                       \
 
152
        static inline void kseq_rewind(kseq_t *ks)                                                      \
 
153
        {                                                                                                                                       \
 
154
                ks->last_char = 0;                                                                                              \
 
155
                ks->f->is_eof = ks->f->begin = ks->f->end = 0;                                  \
 
156
        }                                                                                                                                       \
 
157
        static inline void kseq_destroy(kseq_t *ks)                                                     \
 
158
        {                                                                                                                                       \
 
159
                if (!ks) return;                                                                                                \
 
160
                free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
 
161
                ks_destroy(ks->f);                                                                                              \
 
162
                free(ks);                                                                                                               \
 
163
        }
 
164
 
 
165
/* Return value:
 
166
   >=0  length of the sequence (normal)
 
167
   -1   end-of-file
 
168
   -2   truncated quality string
 
169
 */
 
170
#define __KSEQ_READ                                                                                                             \
 
171
        static int kseq_read(kseq_t *seq)                                                                       \
 
172
        {                                                                                                                                       \
 
173
                int c;                                                                                                                  \
 
174
                kstream_t *ks = seq->f;                                                                                 \
 
175
                if (seq->last_char == 0) { /* then jump to the next header line */ \
 
176
                        while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');        \
 
177
                        if (c == -1) return -1; /* end of file */                                       \
 
178
                        seq->last_char = c;                                                                                     \
 
179
                } /* the first header char has been read */                                             \
 
180
                seq->comment.l = seq->seq.l = seq->qual.l = 0;                                  \
 
181
                if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;                  \
 
182
                if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);                 \
 
183
                while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
 
184
                        if (isgraph(c)) { /* printable non-space character */           \
 
185
                                if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
 
186
                                        seq->seq.m = seq->seq.l + 2;                                            \
 
187
                                        kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
 
188
                                        seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
 
189
                                }                                                                                                               \
 
190
                                seq->seq.s[seq->seq.l++] = (char)c;                                             \
 
191
                        }                                                                                                                       \
 
192
                }                                                                                                                               \
 
193
                if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
 
194
                seq->seq.s[seq->seq.l] = 0;     /* null terminated string */            \
 
195
                if (c != '+') return seq->seq.l; /* FASTA */                                    \
 
196
                if (seq->qual.m < seq->seq.m) { /* allocate enough memory */    \
 
197
                        seq->qual.m = seq->seq.m;                                                                       \
 
198
                        seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);         \
 
199
                }                                                                                                                               \
 
200
                while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
 
201
                if (c == -1) return -2; /* we should not stop here */                   \
 
202
                while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)             \
 
203
                        if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
 
204
                seq->qual.s[seq->qual.l] = 0; /* null terminated string */              \
 
205
                seq->last_char = 0;     /* we have not come to the next header line */ \
 
206
                if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
 
207
                return seq->seq.l;                                                                                              \
 
208
        }
 
209
 
 
210
#define __KSEQ_TYPE(type_t)                                             \
 
211
        typedef struct {                                                        \
 
212
                kstring_t name, comment, seq, qual;             \
 
213
                int last_char;                                                  \
 
214
                kstream_t *f;                                                   \
 
215
        } kseq_t;
 
216
 
 
217
#define KSEQ_INIT(type_t, __read)                               \
 
218
        KSTREAM_INIT(type_t, __read, 4096)                      \
 
219
        __KSEQ_TYPE(type_t)                                                     \
 
220
        __KSEQ_BASIC(type_t)                                            \
 
221
        __KSEQ_READ
 
222
 
 
223
#endif