~ubuntu-branches/ubuntu/quantal/genometools/quantal-backports

« back to all changes in this revision

Viewing changes to src/external/samtools-0.1.18/razf.c

  • Committer: Package Import Robot
  • Author(s): Sascha Steinbiss
  • Date: 2012-07-09 14:10:23 UTC
  • Revision ID: package-import@ubuntu.com-20120709141023-juuu4spm6chqsf9o
Tags: upstream-1.4.1
ImportĀ upstreamĀ versionĀ 1.4.1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * RAZF : Random Access compressed(Z) File
 
3
 * Version: 1.0
 
4
 * Release Date: 2008-10-27
 
5
 *
 
6
 * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
 
7
 *
 
8
 * All rights reserved.
 
9
 *
 
10
 * Redistribution and use in source and binary forms, with or without
 
11
 * modification, are permitted provided that the following conditions
 
12
 * are met:
 
13
 * 1. Redistributions of source code must retain the above copyright
 
14
 *    notice, this list of conditions and the following disclaimer.
 
15
 * 2. Redistributions in binary form must reproduce the above copyright
 
16
 *    notice, this list of conditions and the following disclaimer in the
 
17
 *    documentation and/or other materials provided with the distribution.
 
18
 *
 
19
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 
20
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 
21
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 
22
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 
23
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 
24
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 
25
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 
26
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 
27
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 
28
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 
29
 * SUCH DAMAGE.
 
30
 */
 
31
 
 
32
#ifndef _NO_RAZF
 
33
 
 
34
#include <fcntl.h>
 
35
#include <stdio.h>
 
36
#include <stdlib.h>
 
37
#include <string.h>
 
38
#include <unistd.h>
 
39
#include "razf.h"
 
40
 
 
41
 
 
42
#if ZLIB_VERNUM < 0x1221
 
43
struct _gz_header_s {
 
44
    int     text;
 
45
    uLong   time;
 
46
    int     xflags;
 
47
    int     os;
 
48
    Bytef   *extra;
 
49
    uInt    extra_len;
 
50
    uInt    extra_max;
 
51
    Bytef   *name;
 
52
    uInt    name_max;
 
53
    Bytef   *comment;
 
54
    uInt    comm_max;
 
55
    int     hcrc;
 
56
    int     done;
 
57
};
 
58
#warning "zlib < 1.2.2.1; RAZF writing is disabled."
 
59
#endif
 
60
 
 
61
#define DEF_MEM_LEVEL 8
 
62
 
 
63
static inline uint32_t byte_swap_4(uint32_t v){
 
64
        v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
 
65
        return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
 
66
}
 
67
 
 
68
static inline uint64_t byte_swap_8(uint64_t v){
 
69
        v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
 
70
        v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
 
71
        return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
 
72
}
 
73
 
 
74
static inline int is_big_endian(){
 
75
        int x = 0x01;
 
76
        char *c = (char*)&x;
 
77
        return (c[0] != 0x01);
 
78
}
 
79
 
 
80
#ifndef _RZ_READONLY
 
81
static void add_zindex(RAZF *rz, int64_t in, int64_t out){
 
82
        if(rz->index->size == rz->index->cap){
 
83
                rz->index->cap = rz->index->cap * 1.5 + 2;
 
84
                rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
 
85
                rz->index->bin_offsets  = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
 
86
        }
 
87
        if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
 
88
        rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
 
89
        rz->index->size ++;
 
90
}
 
91
 
 
92
static void save_zindex(RAZF *rz, int fd){
 
93
        int32_t i, v32;
 
94
        int is_be;
 
95
        is_be = is_big_endian();
 
96
        if(is_be) write(fd, &rz->index->size, sizeof(int));
 
97
        else {
 
98
                v32 = byte_swap_4((uint32_t)rz->index->size);
 
99
                write(fd, &v32, sizeof(uint32_t));
 
100
        }
 
101
        v32 = rz->index->size / RZ_BIN_SIZE + 1;
 
102
        if(!is_be){
 
103
                for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
 
104
                for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
 
105
        }
 
106
        write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
 
107
        write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);
 
108
}
 
109
#endif
 
110
 
 
111
#ifdef _USE_KNETFILE
 
112
static void load_zindex(RAZF *rz, knetFile *fp){
 
113
#else
 
114
static void load_zindex(RAZF *rz, int fd){
 
115
#endif
 
116
        int32_t i, v32;
 
117
        int is_be;
 
118
        if(!rz->load_index) return;
 
119
        if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
 
120
        is_be = is_big_endian();
 
121
#ifdef _USE_KNETFILE
 
122
        knet_read(fp, &rz->index->size, sizeof(int));
 
123
#else
 
124
        read(fd, &rz->index->size, sizeof(int));
 
125
#endif
 
126
        if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
 
127
        rz->index->cap = rz->index->size;
 
128
        v32 = rz->index->size / RZ_BIN_SIZE + 1;
 
129
        rz->index->bin_offsets  = malloc(sizeof(int64_t) * v32);
 
130
#ifdef _USE_KNETFILE
 
131
        knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
 
132
#else
 
133
        read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
 
134
#endif
 
135
        rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
 
136
#ifdef _USE_KNETFILE
 
137
        knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
 
138
#else
 
139
        read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
 
140
#endif
 
141
        if(!is_be){
 
142
                for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
 
143
                for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
 
144
        }
 
145
}
 
146
 
 
147
#ifdef _RZ_READONLY
 
148
static RAZF* razf_open_w(int fd)
 
149
{
 
150
        fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
 
151
        return 0;
 
152
}
 
153
#else
 
154
static RAZF* razf_open_w(int fd){
 
155
        RAZF *rz;
 
156
#ifdef _WIN32
 
157
        setmode(fd, O_BINARY);
 
158
#endif
 
159
        rz = calloc(1, sizeof(RAZF));
 
160
        rz->mode = 'w';
 
161
#ifdef _USE_KNETFILE
 
162
    rz->x.fpw = fd;
 
163
#else
 
164
        rz->filedes = fd;
 
165
#endif
 
166
        rz->stream = calloc(sizeof(z_stream), 1);
 
167
        rz->inbuf  = malloc(RZ_BUFFER_SIZE);
 
168
        rz->outbuf = malloc(RZ_BUFFER_SIZE);
 
169
        rz->index = calloc(sizeof(ZBlockIndex), 1);
 
170
        deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 
171
        rz->stream->avail_out = RZ_BUFFER_SIZE;
 
172
        rz->stream->next_out  = rz->outbuf;
 
173
        rz->header = calloc(sizeof(gz_header), 1);
 
174
        rz->header->os    = 0x03; //Unix
 
175
        rz->header->text  = 0;
 
176
        rz->header->time  = 0;
 
177
        rz->header->extra = malloc(7);
 
178
        strncpy((char*)rz->header->extra, "RAZF", 4);
 
179
        rz->header->extra[4] = 1; // obsolete field
 
180
        // block size = RZ_BLOCK_SIZE, Big-Endian
 
181
        rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
 
182
        rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
 
183
        rz->header->extra_len = 7;
 
184
        rz->header->name = rz->header->comment  = 0;
 
185
        rz->header->hcrc = 0;
 
186
        deflateSetHeader(rz->stream, rz->header);
 
187
        rz->block_pos = rz->block_off = 0;
 
188
        return rz;
 
189
}
 
190
 
 
191
static void _razf_write(RAZF* rz, const void *data, int size){
 
192
        int tout;
 
193
        rz->stream->avail_in = size;
 
194
        rz->stream->next_in  = (void*)data;
 
195
        while(1){
 
196
                tout = rz->stream->avail_out;
 
197
                deflate(rz->stream, Z_NO_FLUSH);
 
198
                rz->out += tout - rz->stream->avail_out;
 
199
                if(rz->stream->avail_out) break;
 
200
#ifdef _USE_KNETFILE
 
201
                write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
202
#else
 
203
                write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
204
#endif
 
205
                rz->stream->avail_out = RZ_BUFFER_SIZE;
 
206
                rz->stream->next_out  = rz->outbuf;
 
207
                if(rz->stream->avail_in == 0) break;
 
208
        };
 
209
        rz->in += size - rz->stream->avail_in;
 
210
        rz->block_off += size - rz->stream->avail_in;
 
211
}
 
212
 
 
213
static void razf_flush(RAZF *rz){
 
214
        uint32_t tout;
 
215
        if(rz->buf_len){
 
216
                _razf_write(rz, rz->inbuf, rz->buf_len);
 
217
                rz->buf_off = rz->buf_len = 0;
 
218
        }
 
219
        if(rz->stream->avail_out){
 
220
#ifdef _USE_KNETFILE    
 
221
                write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
222
#else        
 
223
                write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
224
#endif
 
225
                rz->stream->avail_out = RZ_BUFFER_SIZE;
 
226
                rz->stream->next_out  = rz->outbuf;
 
227
        }
 
228
        while(1){
 
229
                tout = rz->stream->avail_out;
 
230
                deflate(rz->stream, Z_FULL_FLUSH);
 
231
                rz->out += tout - rz->stream->avail_out;
 
232
                if(rz->stream->avail_out == 0){
 
233
#ifdef _USE_KNETFILE    
 
234
                        write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
235
#else            
 
236
                        write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
237
#endif
 
238
                        rz->stream->avail_out = RZ_BUFFER_SIZE;
 
239
                        rz->stream->next_out  = rz->outbuf;
 
240
                } else break;
 
241
        }
 
242
        rz->block_pos = rz->out;
 
243
        rz->block_off = 0;
 
244
}
 
245
 
 
246
static void razf_end_flush(RAZF *rz){
 
247
        uint32_t tout;
 
248
        if(rz->buf_len){
 
249
                _razf_write(rz, rz->inbuf, rz->buf_len);
 
250
                rz->buf_off = rz->buf_len = 0;
 
251
        }
 
252
        while(1){
 
253
                tout = rz->stream->avail_out;
 
254
                deflate(rz->stream, Z_FINISH);
 
255
                rz->out += tout - rz->stream->avail_out;
 
256
                if(rz->stream->avail_out < RZ_BUFFER_SIZE){
 
257
#ifdef _USE_KNETFILE        
 
258
                        write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
259
#else            
 
260
                        write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
 
261
#endif
 
262
                        rz->stream->avail_out = RZ_BUFFER_SIZE;
 
263
                        rz->stream->next_out  = rz->outbuf;
 
264
                } else break;
 
265
        }
 
266
}
 
267
 
 
268
static void _razf_buffered_write(RAZF *rz, const void *data, int size){
 
269
        int i, n;
 
270
        while(1){
 
271
                if(rz->buf_len == RZ_BUFFER_SIZE){
 
272
                        _razf_write(rz, rz->inbuf, rz->buf_len);
 
273
                        rz->buf_len = 0;
 
274
                }
 
275
                if(size + rz->buf_len < RZ_BUFFER_SIZE){
 
276
                        for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
 
277
                        rz->buf_len += size;
 
278
                        return;
 
279
                } else {
 
280
                        n = RZ_BUFFER_SIZE - rz->buf_len;
 
281
                        for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
 
282
                        size -= n;
 
283
                        data += n;
 
284
                        rz->buf_len += n;
 
285
                }
 
286
        }
 
287
}
 
288
 
 
289
int razf_write(RAZF* rz, const void *data, int size){
 
290
        int ori_size, n;
 
291
        int64_t next_block;
 
292
        ori_size = size;
 
293
        next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
 
294
        while(rz->in + rz->buf_len + size >= next_block){
 
295
                n = next_block - rz->in - rz->buf_len;
 
296
                _razf_buffered_write(rz, data, n);
 
297
                data += n;
 
298
                size -= n;
 
299
                razf_flush(rz);
 
300
                add_zindex(rz, rz->in, rz->out);
 
301
                next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
 
302
        }
 
303
        _razf_buffered_write(rz, data, size);
 
304
        return ori_size;
 
305
}
 
306
#endif
 
307
 
 
308
/* gzip flag byte */
 
309
#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
 
310
#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
 
311
#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
 
312
#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
 
313
#define COMMENT      0x10 /* bit 4 set: file comment present */
 
314
#define RESERVED     0xE0 /* bits 5..7: reserved */
 
315
 
 
316
static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
 
317
        int method, flags, n, len;
 
318
        if(size < 2) return 0;
 
319
        if(data[0] != 0x1f || data[1] != 0x8b) return 0;
 
320
        if(size < 4) return 0;
 
321
        method = data[2];
 
322
        flags  = data[3];
 
323
        if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
 
324
        n = 4 + 6; // Skip 6 bytes
 
325
        *extra_off = n + 2;
 
326
        *extra_len = 0;
 
327
        if(flags & EXTRA_FIELD){
 
328
                if(size < n + 2) return 0;
 
329
                len = ((int)data[n + 1] << 8) | data[n];
 
330
                n += 2;
 
331
                *extra_off = n;
 
332
                while(len){
 
333
                        if(n >= size) return 0;
 
334
                        n ++;
 
335
                        len --;
 
336
                }
 
337
                *extra_len = n - (*extra_off);
 
338
        }
 
339
        if(flags & ORIG_NAME) while(n < size && data[n++]);
 
340
        if(flags & COMMENT) while(n < size && data[n++]);
 
341
        if(flags & HEAD_CRC){
 
342
                if(n + 2 > size) return 0;
 
343
                n += 2;
 
344
        }
 
345
        return n;
 
346
}
 
347
 
 
348
#ifdef _USE_KNETFILE
 
349
static RAZF* razf_open_r(knetFile *fp, int _load_index){
 
350
#else
 
351
static RAZF* razf_open_r(int fd, int _load_index){
 
352
#endif
 
353
        RAZF *rz;
 
354
        int ext_off, ext_len;
 
355
        int n, is_be, ret;
 
356
        int64_t end;
 
357
        unsigned char c[] = "RAZF";
 
358
        rz = calloc(1, sizeof(RAZF));
 
359
        rz->mode = 'r';
 
360
#ifdef _USE_KNETFILE
 
361
    rz->x.fpr = fp;
 
362
#else
 
363
#ifdef _WIN32
 
364
        setmode(fd, O_BINARY);
 
365
#endif
 
366
        rz->filedes = fd;
 
367
#endif
 
368
        rz->stream = calloc(sizeof(z_stream), 1);
 
369
        rz->inbuf  = malloc(RZ_BUFFER_SIZE);
 
370
        rz->outbuf = malloc(RZ_BUFFER_SIZE);
 
371
        rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
 
372
#ifdef _USE_KNETFILE
 
373
    n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
 
374
#else
 
375
        n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
 
376
#endif
 
377
        ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
 
378
        if(ret == 0){
 
379
                PLAIN_FILE:
 
380
                rz->in = n;
 
381
                rz->file_type = FILE_TYPE_PLAIN;
 
382
                memcpy(rz->outbuf, rz->inbuf, n);
 
383
                rz->buf_len = n;
 
384
                free(rz->stream);
 
385
                rz->stream = NULL;
 
386
                return rz;
 
387
        }
 
388
        rz->header_size = ret;
 
389
        ret = inflateInit2(rz->stream, -WINDOW_BITS);
 
390
        if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
 
391
        rz->stream->avail_in = n - rz->header_size;
 
392
        rz->stream->next_in  = rz->inbuf + rz->header_size;
 
393
        rz->stream->avail_out = RZ_BUFFER_SIZE;
 
394
        rz->stream->next_out  = rz->outbuf;
 
395
        rz->file_type = FILE_TYPE_GZ;
 
396
        rz->in = rz->header_size;
 
397
        rz->block_pos = rz->header_size;
 
398
        rz->next_block_pos = rz->header_size;
 
399
        rz->block_off = 0;
 
400
        if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
 
401
        if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
 
402
                fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
 
403
                return rz;
 
404
        }
 
405
        rz->load_index = _load_index;
 
406
        rz->file_type = FILE_TYPE_RZ;
 
407
#ifdef _USE_KNETFILE
 
408
        if(knet_seek(fp, -16, SEEK_END) == -1){
 
409
#else
 
410
        if(lseek(fd, -16, SEEK_END) == -1){
 
411
#endif
 
412
                UNSEEKABLE:
 
413
                rz->seekable = 0;
 
414
                rz->index = NULL;
 
415
                rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
 
416
        } else {
 
417
                is_be = is_big_endian();
 
418
                rz->seekable = 1;
 
419
#ifdef _USE_KNETFILE
 
420
        knet_read(fp, &end, sizeof(int64_t));
 
421
#else
 
422
                read(fd, &end, sizeof(int64_t));
 
423
#endif        
 
424
                if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
 
425
                else rz->src_end = end;
 
426
 
 
427
#ifdef _USE_KNETFILE
 
428
                knet_read(fp, &end, sizeof(int64_t));
 
429
#else
 
430
                read(fd, &end, sizeof(int64_t));
 
431
#endif        
 
432
                if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
 
433
                else rz->end = end;
 
434
                if(n > rz->end){
 
435
                        rz->stream->avail_in -= n - rz->end;
 
436
                        n = rz->end;
 
437
                }
 
438
                if(rz->end > rz->src_end){
 
439
#ifdef _USE_KNETFILE
 
440
            knet_seek(fp, rz->in, SEEK_SET);
 
441
#else
 
442
                        lseek(fd, rz->in, SEEK_SET);
 
443
#endif
 
444
                        goto UNSEEKABLE;
 
445
                }
 
446
#ifdef _USE_KNETFILE
 
447
        knet_seek(fp, rz->end, SEEK_SET);
 
448
                if(knet_tell(fp) != rz->end){
 
449
                        knet_seek(fp, rz->in, SEEK_SET);
 
450
#else
 
451
                if(lseek(fd, rz->end, SEEK_SET) != rz->end){
 
452
                        lseek(fd, rz->in, SEEK_SET);
 
453
#endif
 
454
                        goto UNSEEKABLE;
 
455
                }
 
456
#ifdef _USE_KNETFILE
 
457
                load_zindex(rz, fp);
 
458
                knet_seek(fp, n, SEEK_SET);
 
459
#else
 
460
                load_zindex(rz, fd);
 
461
                lseek(fd, n, SEEK_SET);
 
462
#endif
 
463
        }
 
464
        return rz;
 
465
}
 
466
 
 
467
#ifdef _USE_KNETFILE
 
468
RAZF* razf_dopen(int fd, const char *mode){
 
469
    if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
 
470
    else if(strstr(mode, "w")) return razf_open_w(fd);
 
471
        return NULL;
 
472
}
 
473
 
 
474
RAZF* razf_dopen2(int fd, const char *mode)
 
475
{
 
476
    fprintf(stderr,"[razf_dopen2] implement me\n");
 
477
    return NULL;
 
478
}
 
479
#else
 
480
RAZF* razf_dopen(int fd, const char *mode){
 
481
        if(strstr(mode, "r")) return razf_open_r(fd, 1);
 
482
        else if(strstr(mode, "w")) return razf_open_w(fd);
 
483
        else return NULL;
 
484
}
 
485
 
 
486
RAZF* razf_dopen2(int fd, const char *mode)
 
487
{
 
488
        if(strstr(mode, "r")) return razf_open_r(fd, 0);
 
489
        else if(strstr(mode, "w")) return razf_open_w(fd);
 
490
        else return NULL;
 
491
}
 
492
#endif
 
493
 
 
494
static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
 
495
        int fd;
 
496
        RAZF *rz;
 
497
        if(strstr(mode, "r")){
 
498
#ifdef _USE_KNETFILE
 
499
        knetFile *fd = knet_open(filename, "r");
 
500
        if (fd == 0) {
 
501
            fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
 
502
            return NULL;
 
503
        }
 
504
#else
 
505
#ifdef _WIN32
 
506
                fd = open(filename, O_RDONLY | O_BINARY);
 
507
#else
 
508
                fd = open(filename, O_RDONLY);
 
509
#endif
 
510
#endif
 
511
                if(fd < 0) return NULL;
 
512
                rz = razf_open_r(fd, _load_index);
 
513
        } else if(strstr(mode, "w")){
 
514
#ifdef _WIN32
 
515
                fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
 
516
#else
 
517
                fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
 
518
#endif
 
519
                if(fd < 0) return NULL;
 
520
                rz = razf_open_w(fd);
 
521
        } else return NULL;
 
522
        return rz;
 
523
}
 
524
 
 
525
RAZF* razf_open(const char *filename, const char *mode){
 
526
        return _razf_open(filename, mode, 1);
 
527
}
 
528
 
 
529
RAZF* razf_open2(const char *filename, const char *mode){
 
530
        return _razf_open(filename, mode, 0);
 
531
}
 
532
 
 
533
int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
 
534
        int64_t n;
 
535
        if(rz->mode != 'r' && rz->mode != 'R') return 0;
 
536
        switch(rz->file_type){
 
537
                case FILE_TYPE_PLAIN:
 
538
                        if(rz->end == 0x7fffffffffffffffLL){
 
539
#ifdef _USE_KNETFILE
 
540
                                if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
 
541
                n = knet_tell(rz->x.fpr);
 
542
                                knet_seek(rz->x.fpr, 0, SEEK_END);
 
543
                rz->end = knet_tell(rz->x.fpr);
 
544
                                knet_seek(rz->x.fpr, n, SEEK_SET);
 
545
#else
 
546
                                if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
 
547
                                rz->end = lseek(rz->filedes, 0, SEEK_END);
 
548
                                lseek(rz->filedes, n, SEEK_SET);
 
549
#endif                
 
550
                        }
 
551
                        *u_size = *c_size = rz->end;
 
552
                        return 1;
 
553
                case FILE_TYPE_GZ:
 
554
                        return 0;
 
555
                case FILE_TYPE_RZ:
 
556
                        if(rz->src_end == rz->end) return 0;
 
557
                        *u_size = rz->src_end;
 
558
                        *c_size = rz->end;
 
559
                        return 1;
 
560
                default:
 
561
                        return 0;
 
562
        }
 
563
}
 
564
 
 
565
static int _razf_read(RAZF* rz, void *data, int size){
 
566
        int ret, tin;
 
567
        if(rz->z_eof || rz->z_err) return 0;
 
568
        if (rz->file_type == FILE_TYPE_PLAIN) {
 
569
#ifdef _USE_KNETFILE
 
570
                ret = knet_read(rz->x.fpr, data, size);
 
571
#else
 
572
                ret = read(rz->filedes, data, size);
 
573
#endif        
 
574
                if (ret == 0) rz->z_eof = 1;
 
575
                return ret;
 
576
        }
 
577
        rz->stream->avail_out = size;
 
578
        rz->stream->next_out  = data;
 
579
        while(rz->stream->avail_out){
 
580
                if(rz->stream->avail_in == 0){
 
581
                        if(rz->in >= rz->end){ rz->z_eof = 1; break; }
 
582
                        if(rz->end - rz->in < RZ_BUFFER_SIZE){
 
583
#ifdef _USE_KNETFILE
 
584
                                rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
 
585
#else
 
586
                                rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
 
587
#endif        
 
588
                        } else {
 
589
#ifdef _USE_KNETFILE
 
590
                                rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
 
591
#else
 
592
                                rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
 
593
#endif        
 
594
                        }
 
595
                        if(rz->stream->avail_in == 0){
 
596
                                rz->z_eof = 1;
 
597
                                break;
 
598
                        }
 
599
                        rz->stream->next_in = rz->inbuf;
 
600
                }
 
601
                tin = rz->stream->avail_in;
 
602
                ret = inflate(rz->stream, Z_BLOCK);
 
603
                rz->in += tin - rz->stream->avail_in;
 
604
                if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
 
605
                        fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
 
606
                        rz->z_err = 1;
 
607
                        break;
 
608
                }
 
609
                if(ret == Z_STREAM_END){
 
610
                        rz->z_eof = 1;
 
611
                        break;
 
612
                }
 
613
                if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
 
614
                        rz->buf_flush = 1;
 
615
                        rz->next_block_pos = rz->in;
 
616
                        break;
 
617
                }
 
618
        }
 
619
        return size - rz->stream->avail_out;
 
620
}
 
621
 
 
622
int razf_read(RAZF *rz, void *data, int size){
 
623
        int ori_size, i;
 
624
        ori_size = size;
 
625
        while(size > 0){
 
626
                if(rz->buf_len){
 
627
                        if(size < rz->buf_len){
 
628
                                for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
 
629
                                rz->buf_off += size;
 
630
                                rz->buf_len -= size;
 
631
                                data += size;
 
632
                                rz->block_off += size;
 
633
                                size = 0;
 
634
                                break;
 
635
                        } else {
 
636
                                for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
 
637
                                data += rz->buf_len;
 
638
                                size -= rz->buf_len;
 
639
                                rz->block_off += rz->buf_len;
 
640
                                rz->buf_off = 0;
 
641
                                rz->buf_len = 0;
 
642
                                if(rz->buf_flush){
 
643
                                        rz->block_pos = rz->next_block_pos;
 
644
                                        rz->block_off = 0;
 
645
                                        rz->buf_flush = 0;
 
646
                                }
 
647
                        }
 
648
                } else if(rz->buf_flush){
 
649
                        rz->block_pos = rz->next_block_pos;
 
650
                        rz->block_off = 0;
 
651
                        rz->buf_flush = 0;
 
652
                }
 
653
                if(rz->buf_flush) continue;
 
654
                rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
 
655
                if(rz->z_eof && rz->buf_len == 0) break;
 
656
        }
 
657
        rz->out += ori_size - size;
 
658
        return ori_size - size;
 
659
}
 
660
 
 
661
int razf_skip(RAZF* rz, int size){
 
662
        int ori_size;
 
663
        ori_size = size;
 
664
        while(size > 0){
 
665
                if(rz->buf_len){
 
666
                        if(size < rz->buf_len){
 
667
                                rz->buf_off += size;
 
668
                                rz->buf_len -= size;
 
669
                                rz->block_off += size;
 
670
                                size = 0;
 
671
                                break;
 
672
                        } else {
 
673
                                size -= rz->buf_len;
 
674
                                rz->buf_off = 0;
 
675
                                rz->buf_len = 0;
 
676
                                rz->block_off += rz->buf_len;
 
677
                                if(rz->buf_flush){
 
678
                                        rz->block_pos = rz->next_block_pos;
 
679
                                        rz->block_off = 0;
 
680
                                        rz->buf_flush = 0;
 
681
                                }
 
682
                        }
 
683
                } else if(rz->buf_flush){
 
684
                        rz->block_pos = rz->next_block_pos;
 
685
                        rz->block_off = 0;
 
686
                        rz->buf_flush = 0;
 
687
                }
 
688
                if(rz->buf_flush) continue;
 
689
                rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
 
690
                if(rz->z_eof || rz->z_err) break;
 
691
        }
 
692
        rz->out += ori_size - size;
 
693
        return ori_size - size;
 
694
}
 
695
 
 
696
static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
 
697
#ifdef _USE_KNETFILE
 
698
        knet_seek(rz->x.fpr, in, SEEK_SET);
 
699
#else
 
700
        lseek(rz->filedes, in, SEEK_SET);
 
701
#endif
 
702
        rz->in  = in;
 
703
        rz->out = out;
 
704
        rz->block_pos = in;
 
705
        rz->next_block_pos = in;
 
706
        rz->block_off = 0;
 
707
        rz->buf_flush = 0;
 
708
        rz->z_eof = rz->z_err = 0;
 
709
        inflateReset(rz->stream);
 
710
        rz->stream->avail_in = 0;
 
711
        rz->buf_off = rz->buf_len = 0;
 
712
}
 
713
 
 
714
int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
 
715
        int64_t pos;
 
716
        rz->z_eof = 0;
 
717
        if(rz->file_type == FILE_TYPE_PLAIN){
 
718
                rz->buf_off = rz->buf_len = 0;
 
719
                pos = block_start + block_offset;
 
720
#ifdef _USE_KNETFILE
 
721
                knet_seek(rz->x.fpr, pos, SEEK_SET);
 
722
        pos = knet_tell(rz->x.fpr);
 
723
#else
 
724
                pos = lseek(rz->filedes, pos, SEEK_SET);
 
725
#endif
 
726
                rz->out = rz->in = pos;
 
727
                return pos;
 
728
        }
 
729
        if(block_start == rz->block_pos && block_offset >= rz->block_off) {
 
730
                block_offset -= rz->block_off;
 
731
                goto SKIP; // Needn't reset inflate
 
732
        }
 
733
        if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
 
734
        _razf_reset_read(rz, block_start, 0);
 
735
        SKIP:
 
736
        if(block_offset) razf_skip(rz, block_offset);
 
737
        return rz->block_off;
 
738
}
 
739
 
 
740
int64_t razf_seek(RAZF* rz, int64_t pos, int where){
 
741
        int64_t idx;
 
742
        int64_t seek_pos, new_out;
 
743
        rz->z_eof = 0;
 
744
        if (where == SEEK_CUR) pos += rz->out;
 
745
        else if (where == SEEK_END) pos += rz->src_end;
 
746
        if(rz->file_type == FILE_TYPE_PLAIN){
 
747
#ifdef _USE_KNETFILE
 
748
                knet_seek(rz->x.fpr, pos, SEEK_SET);
 
749
        seek_pos = knet_tell(rz->x.fpr);
 
750
#else
 
751
                seek_pos = lseek(rz->filedes, pos, SEEK_SET);
 
752
#endif
 
753
                rz->buf_off = rz->buf_len = 0;
 
754
                rz->out = rz->in = seek_pos;
 
755
                return seek_pos;
 
756
        } else if(rz->file_type == FILE_TYPE_GZ){
 
757
                if(pos >= rz->out) goto SKIP;
 
758
                return rz->out;
 
759
        }
 
760
        if(pos == rz->out) return pos;
 
761
        if(pos > rz->src_end) return rz->out;
 
762
        if(!rz->seekable || !rz->load_index){
 
763
                if(pos >= rz->out) goto SKIP;
 
764
        }
 
765
        idx = pos / RZ_BLOCK_SIZE - 1;
 
766
        seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
 
767
        new_out  = (idx + 1) * RZ_BLOCK_SIZE;
 
768
        if(pos > rz->out && new_out <= rz->out) goto SKIP;
 
769
        _razf_reset_read(rz, seek_pos, new_out);
 
770
        SKIP:
 
771
        razf_skip(rz, (int)(pos - rz->out));
 
772
        return rz->out;
 
773
}
 
774
 
 
775
uint64_t razf_tell2(RAZF *rz)
 
776
{
 
777
        /*
 
778
        if (rz->load_index) {
 
779
                int64_t idx, seek_pos;
 
780
                idx = rz->out / RZ_BLOCK_SIZE - 1;
 
781
                seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
 
782
                if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
 
783
                        fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
 
784
                                        (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
 
785
        }
 
786
        */
 
787
        return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
 
788
}
 
789
 
 
790
int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
 
791
{
 
792
        if (where != SEEK_SET) return -1;
 
793
        return razf_jump(rz, voffset>>16, voffset&0xffff);
 
794
}
 
795
 
 
796
void razf_close(RAZF *rz){
 
797
        if(rz->mode == 'w'){
 
798
#ifndef _RZ_READONLY
 
799
                razf_end_flush(rz);
 
800
                deflateEnd(rz->stream);
 
801
#ifdef _USE_KNETFILE
 
802
                save_zindex(rz, rz->x.fpw);
 
803
                if(is_big_endian()){
 
804
                        write(rz->x.fpw, &rz->in, sizeof(int64_t));
 
805
                        write(rz->x.fpw, &rz->out, sizeof(int64_t));
 
806
                } else {
 
807
                        uint64_t v64 = byte_swap_8((uint64_t)rz->in);
 
808
                        write(rz->x.fpw, &v64, sizeof(int64_t));
 
809
                        v64 = byte_swap_8((uint64_t)rz->out);
 
810
                        write(rz->x.fpw, &v64, sizeof(int64_t));
 
811
                }
 
812
#else
 
813
                save_zindex(rz, rz->filedes);
 
814
                if(is_big_endian()){
 
815
                        write(rz->filedes, &rz->in, sizeof(int64_t));
 
816
                        write(rz->filedes, &rz->out, sizeof(int64_t));
 
817
                } else {
 
818
                        uint64_t v64 = byte_swap_8((uint64_t)rz->in);
 
819
                        write(rz->filedes, &v64, sizeof(int64_t));
 
820
                        v64 = byte_swap_8((uint64_t)rz->out);
 
821
                        write(rz->filedes, &v64, sizeof(int64_t));
 
822
                }
 
823
#endif
 
824
#endif
 
825
        } else if(rz->mode == 'r'){
 
826
                if(rz->stream) inflateEnd(rz->stream);
 
827
        }
 
828
        if(rz->inbuf) free(rz->inbuf);
 
829
        if(rz->outbuf) free(rz->outbuf);
 
830
        if(rz->header){
 
831
                free(rz->header->extra);
 
832
                free(rz->header->name);
 
833
                free(rz->header->comment);
 
834
                free(rz->header);
 
835
        }
 
836
        if(rz->index){
 
837
                free(rz->index->bin_offsets);
 
838
                free(rz->index->cell_offsets);
 
839
                free(rz->index);
 
840
        }
 
841
        free(rz->stream);
 
842
#ifdef _USE_KNETFILE
 
843
    if (rz->mode == 'r')
 
844
        knet_close(rz->x.fpr);
 
845
    if (rz->mode == 'w')
 
846
        close(rz->x.fpw);
 
847
#else
 
848
        close(rz->filedes);
 
849
#endif
 
850
        free(rz);
 
851
}
 
852
 
 
853
#endif