5
#include"compressed_rank.h"
9
static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
11
register cmph_uint32 res = 0;
21
void compressed_rank_init(compressed_rank_t * cr)
26
select_init(&cr->sel);
30
void compressed_rank_destroy(compressed_rank_t * cr)
34
select_destroy(&cr->sel);
37
void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
39
register cmph_uint32 i,j;
40
register cmph_uint32 rems_mask;
41
register cmph_uint32 * select_vec = 0;
43
cr->max_val = vals_table[cr->n - 1];
44
cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
49
select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
50
cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
51
rems_mask = (1U << cr->rem_r) - 1U;
53
for(i = 0; i < cr->n; i++)
55
set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
58
for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
60
while(i > (vals_table[j] >> cr->rem_r))
64
select_vec[i - 1] = j;
68
// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
69
// I changed the select structure to work up to m, instead of up to m - 1.
70
select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
75
cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
77
register cmph_uint32 rems_mask;
78
register cmph_uint32 val_quot, val_rem;
79
register cmph_uint32 sel_res, rank;
86
val_quot = idx >> cr->rem_r;
87
rems_mask = (1U << cr->rem_r) - 1U;
88
val_rem = idx & rems_mask;
95
sel_res = select_query(&cr->sel, val_quot - 1) + 1;
96
rank = sel_res - val_quot;
101
if(GETBIT32(cr->sel.bits_vec, sel_res))
105
if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
116
cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
118
register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
119
space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
120
space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
124
void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
126
register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
127
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
128
register cmph_uint32 pos = 0;
130
cmph_uint32 buflen_sel = 0;
132
*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
134
DEBUGP("sel_size = %u\n", sel_size);
135
DEBUGP("vals_rems_size = %u\n", vals_rems_size);
137
*buf = (char *)calloc(*buflen, sizeof(char));
145
// dumping max_val, n and rem_r
146
memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
147
pos += (cmph_uint32)sizeof(cmph_uint32);
148
DEBUGP("max_val = %u\n", cr->max_val);
150
memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
151
pos += (cmph_uint32)sizeof(cmph_uint32);
152
DEBUGP("n = %u\n", cr->n);
154
memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
155
pos += (cmph_uint32)sizeof(cmph_uint32);
156
DEBUGP("rem_r = %u\n", cr->rem_r);
159
select_dump(&cr->sel, &buf_sel, &buflen_sel);
160
memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
161
pos += (cmph_uint32)sizeof(cmph_uint32);
162
DEBUGP("buflen_sel = %u\n", buflen_sel);
164
memcpy(*buf + pos, buf_sel, buflen_sel);
168
for(i = 0; i < buflen_sel; i++)
170
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
178
memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
180
for(i = 0; i < vals_rems_size; i++)
182
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
185
pos += vals_rems_size;
187
DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
190
void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
192
register cmph_uint32 pos = 0;
193
cmph_uint32 buflen_sel = 0;
194
register cmph_uint32 vals_rems_size = 0;
196
// loading max_val, n, and rem_r
197
memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
198
pos += (cmph_uint32)sizeof(cmph_uint32);
199
DEBUGP("max_val = %u\n", cr->max_val);
201
memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
202
pos += (cmph_uint32)sizeof(cmph_uint32);
203
DEBUGP("n = %u\n", cr->n);
205
memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
206
pos += (cmph_uint32)sizeof(cmph_uint32);
207
DEBUGP("rem_r = %u\n", cr->rem_r);
210
memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
211
pos += (cmph_uint32)sizeof(cmph_uint32);
212
DEBUGP("buflen_sel = %u\n", buflen_sel);
214
select_load(&cr->sel, buf + pos, buflen_sel);
217
for(i = 0; i < buflen_sel; i++)
219
DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
229
vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
230
cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
232
memcpy(cr->vals_rems, buf + pos, vals_rems_size);
235
for(i = 0; i < vals_rems_size; i++)
237
DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
240
pos += vals_rems_size;
242
DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
247
void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
252
cmph_uint32 buflen = 0;
253
compressed_rank_dump(cr, &buf, &buflen);
254
memcpy(cr_packed, buf, buflen);
259
cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
261
register cmph_uint32 sel_size = select_packed_size(&cr->sel);
262
register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
263
return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
266
cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
268
// unpacking cr_packed
269
register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
270
register cmph_uint32 max_val = *ptr++;
271
register cmph_uint32 n = *ptr++;
272
register cmph_uint32 rem_r = *ptr++;
273
register cmph_uint32 buflen_sel = *ptr++;
274
register cmph_uint32 * sel_packed = ptr;
276
register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
278
register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
280
// compressed sequence query computation
281
register cmph_uint32 rems_mask;
282
register cmph_uint32 val_quot, val_rem;
283
register cmph_uint32 sel_res, rank;
290
val_quot = idx >> rem_r;
291
rems_mask = (1U << rem_r) - 1U;
292
val_rem = idx & rems_mask;
299
sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
300
rank = sel_res - val_quot;
305
if(GETBIT32(bits_vec, sel_res))
309
if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)