1
#include "compressed_seq.h"
13
static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
15
register cmph_uint32 res = 0;
25
void compressed_seq_init(compressed_seq_t * cs)
27
select_init(&cs->sel);
35
void compressed_seq_destroy(compressed_seq_t * cs)
37
free(cs->store_table);
39
free(cs->length_rems);
41
select_destroy(&cs->sel);
45
void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
47
register cmph_uint32 i;
48
// lengths: represents lengths of encoded values
49
register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
50
register cmph_uint32 rems_mask;
51
register cmph_uint32 stored_value;
56
for(i = 0; i < cs->n; i++)
58
if(vals_table[i] == 0)
64
lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
65
cs->total_length += lengths[i];
71
free(cs->store_table);
73
cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
76
for(i = 0; i < cs->n; i++)
78
if(vals_table[i] == 0)
80
stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
81
set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
82
cs->total_length += lengths[i];
85
cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
94
free(cs->length_rems);
97
cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
99
rems_mask = (1U << cs->rem_r) - 1U;
100
cs->total_length = 0;
102
for(i = 0; i < cs->n; i++)
104
cs->total_length += lengths[i];
105
set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
106
lengths[i] = cs->total_length >> cs->rem_r;
109
select_init(&cs->sel);
111
// FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
112
// I changed the select structure to work up to m, instead of up to m - 1.
113
select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
118
cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
120
register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
121
space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
122
space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
123
return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
126
cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
128
register cmph_uint32 enc_idx, enc_length;
129
register cmph_uint32 rems_mask;
130
register cmph_uint32 stored_value;
131
register cmph_uint32 sel_res;
133
assert(idx < cs->n); // FABIANO ADDED
135
rems_mask = (1U << cs->rem_r) - 1U;
140
sel_res = select_query(&cs->sel, idx);
144
sel_res = select_query(&cs->sel, idx - 1);
146
enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
147
enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
149
sel_res = select_next_query(&cs->sel, sel_res);
152
enc_length = (sel_res - idx) << cs->rem_r;
153
enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
154
enc_length -= enc_idx;
158
stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
159
return stored_value + ((1U << enc_length) - 1U);
162
void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
164
register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
165
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
166
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
167
register cmph_uint32 pos = 0;
169
cmph_uint32 buflen_sel = 0;
171
*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
173
DEBUGP("sel_size = %u\n", sel_size);
174
DEBUGP("length_rems_size = %u\n", length_rems_size);
175
DEBUGP("store_table_size = %u\n", store_table_size);
176
*buf = (char *)calloc(*buflen, sizeof(char));
184
// dumping n, rem_r and total_length
185
memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
186
pos += (cmph_uint32)sizeof(cmph_uint32);
187
DEBUGP("n = %u\n", cs->n);
189
memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
190
pos += (cmph_uint32)sizeof(cmph_uint32);
191
DEBUGP("rem_r = %u\n", cs->rem_r);
193
memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
194
pos += (cmph_uint32)sizeof(cmph_uint32);
195
DEBUGP("total_length = %u\n", cs->total_length);
199
select_dump(&cs->sel, &buf_sel, &buflen_sel);
200
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
201
pos += (cmph_uint32)sizeof(cmph_uint32);
202
DEBUGP("buflen_sel = %u\n", buflen_sel);
204
memcpy(*buf + pos, buf_sel, buflen_sel);
207
for(i = 0; i < buflen_sel; i++)
209
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
216
// dumping length_rems
217
memcpy(*buf + pos, cs->length_rems, length_rems_size);
219
for(i = 0; i < length_rems_size; i++)
221
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
224
pos += length_rems_size;
226
// dumping store_table
227
memcpy(*buf + pos, cs->store_table, store_table_size);
230
for(i = 0; i < store_table_size; i++)
232
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
235
DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
238
void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
240
register cmph_uint32 pos = 0;
241
cmph_uint32 buflen_sel = 0;
242
register cmph_uint32 length_rems_size = 0;
243
register cmph_uint32 store_table_size = 0;
245
// loading n, rem_r and total_length
246
memcpy(&(cs->n), buf, sizeof(cmph_uint32));
247
pos += (cmph_uint32)sizeof(cmph_uint32);
248
DEBUGP("n = %u\n", cs->n);
250
memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
251
pos += (cmph_uint32)sizeof(cmph_uint32);
252
DEBUGP("rem_r = %u\n", cs->rem_r);
254
memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
255
pos += (cmph_uint32)sizeof(cmph_uint32);
256
DEBUGP("total_length = %u\n", cs->total_length);
259
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
260
pos += (cmph_uint32)sizeof(cmph_uint32);
261
DEBUGP("buflen_sel = %u\n", buflen_sel);
263
select_load(&cs->sel, buf + pos, buflen_sel);
266
for(i = 0; i < buflen_sel; i++)
268
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
273
// loading length_rems
276
free(cs->length_rems);
278
length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
279
cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
280
length_rems_size *= 4;
281
memcpy(cs->length_rems, buf + pos, length_rems_size);
284
for(i = 0; i < length_rems_size; i++)
286
DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
289
pos += length_rems_size;
291
// loading store_table
292
store_table_size = ((cs->total_length + 31) >> 5);
295
free(cs->store_table);
297
cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
298
store_table_size *= 4;
299
memcpy(cs->store_table, buf + pos, store_table_size);
302
for(i = 0; i < store_table_size; i++)
304
DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
308
DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
311
void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
316
cmph_uint32 buflen = 0;
317
compressed_seq_dump(cs, &buf, &buflen);
318
memcpy(cs_packed, buf, buflen);
324
cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
326
register cmph_uint32 sel_size = select_packed_size(&cs->sel);
327
register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
328
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
329
return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
333
cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
335
// unpacking cs_packed
336
register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
337
register cmph_uint32 n = *ptr++;
338
register cmph_uint32 rem_r = *ptr++;
339
ptr++; // skipping total_length
340
// register cmph_uint32 total_length = *ptr++;
341
register cmph_uint32 buflen_sel = *ptr++;
342
register cmph_uint32 * sel_packed = ptr;
343
register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
344
register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
345
register cmph_uint32 * store_table = (ptr += length_rems_size);
347
// compressed sequence query computation
348
register cmph_uint32 enc_idx, enc_length;
349
register cmph_uint32 rems_mask;
350
register cmph_uint32 stored_value;
351
register cmph_uint32 sel_res;
353
rems_mask = (1U << rem_r) - 1U;
358
sel_res = select_query_packed(sel_packed, idx);
362
sel_res = select_query_packed(sel_packed, idx - 1);
364
enc_idx = (sel_res - (idx - 1)) << rem_r;
365
enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
367
sel_res = select_next_query_packed(sel_packed, sel_res);
370
enc_length = (sel_res - idx) << rem_r;
371
enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
372
enc_length -= enc_idx;
376
stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
377
return stored_value + ((1U << enc_length) - 1U);