2
Copyright (c) by respective owners including Yahoo!, Microsoft, and
3
individual contributors. All rights reserved. Released under a BSD (revised)
4
license as described in the file LICENSE.
7
#include "unique_sort.h"
8
#include "global_data.h"
12
const size_t neg_1 = 1;
13
const size_t general = 2;
15
char* run_len_decode(char *p, uint32_t& i)
16
{// read an int 7 bits at a time.
19
i = i | ((*(p++) & 127) << 7*count++);
20
i = i | (*(p++) << 7*count);
24
inline int32_t ZigZagDecode(uint32_t n) { return (n >> 1) ^ -static_cast<int32_t>(n & 1); }
26
size_t read_cached_tag(io_buf& cache, example* ae)
30
if (buf_read(cache, c, sizeof(tag_size)) < sizeof(tag_size))
32
tag_size = *(size_t*)c;
33
c += sizeof(tag_size);
35
if (buf_read(cache, c, tag_size) < tag_size)
39
push_many(ae->tag, c, tag_size);
40
return tag_size+sizeof(tag_size);
47
__attribute__((packed))
51
int read_cached_features(void* in, example* ec)
54
example* ae = (example*)ec;
55
ae->sorted = all->p->sorted_cache;
56
io_buf* input = all->p->input;
58
size_t total = all->p->lp->read_cached_label(all->sd, ae->ld, *input);
61
if (read_cached_tag(*input,ae) == 0)
64
unsigned char num_indices = 0;
65
if (buf_read(*input, c, sizeof(num_indices)) < sizeof(num_indices))
67
num_indices = *(unsigned char*)c;
68
c += sizeof(num_indices);
70
all->p->input->set(c);
71
for (;num_indices > 0; num_indices--)
74
unsigned char index = 0;
75
if((temp = buf_read(*input,c,sizeof(index) + sizeof(size_t))) < sizeof(index) + sizeof(size_t)) {
76
cerr << "truncated example! " << temp << " " << char_size + sizeof(size_t) << endl;
80
index = *(unsigned char*)c;
82
ae->indices.push_back((size_t)index);
83
v_array<feature>* ours = ae->atomics+index;
84
float* our_sum_feat_sq = ae->sum_feat_sq+index;
85
size_t storage = *(size_t *)c;
87
all->p->input->set(c);
89
if (buf_read(*input,c,storage) < storage) {
90
cerr << "truncated example! wanted: " << storage << " bytes" << endl;
94
char *end = c+storage;
101
c = run_len_decode(c,f.weight_index);
102
if (f.weight_index & neg_1)
104
else if (f.weight_index & general) {
105
f.x = ((one_float *)c)->f;
108
*our_sum_feat_sq += f.x*f.x;
109
uint32_t diff = f.weight_index >> 2;
111
int32_t s_diff = ZigZagDecode(diff);
114
f.weight_index = last + s_diff;
115
last = f.weight_index;
118
all->p->input->set(c);
124
char* run_len_encode(char *p, size_t i)
125
{// store an int 7 bits at a time.
128
*(p++) = (i & 127) | 128;
135
inline uint32_t ZigZagEncode(int32_t n) {
136
uint32_t ret = (n << 1) ^ (n >> 31);
140
void output_byte(io_buf& cache, unsigned char s)
144
buf_write(cache, c, 1);
149
void output_features(io_buf& cache, unsigned char index, feature* begin, feature* end, uint32_t mask)
152
size_t storage = (end-begin) * int_size;
153
for (feature* i = begin; i != end; i++)
154
if (i->x != 1. && i->x != -1.)
155
storage+=sizeof(float);
156
buf_write(cache, c, sizeof(index) + storage + sizeof(size_t));
157
*(unsigned char*)c = index;
160
char *storage_size_loc = c;
165
for (feature* i = begin; i != end; i++)
167
uint32_t cache_index = (i->weight_index) & mask;
168
int32_t s_diff = (cache_index - last);
169
size_t diff = ZigZagEncode(s_diff) << 2;
172
c = run_len_encode(c, diff);
173
else if (i->x == -1.)
174
c = run_len_encode(c, diff | neg_1);
176
c = run_len_encode(c, diff | general);
182
*(size_t*)storage_size_loc = c - storage_size_loc - sizeof(size_t);
185
void cache_tag(io_buf& cache, v_array<char> tag)
188
buf_write(cache, c, sizeof(size_t)+tag.size());
189
*(size_t*)c = tag.size();
191
memcpy(c, tag.begin, tag.size());
196
void cache_features(io_buf& cache, example* ae, uint32_t mask)
198
cache_tag(cache,ae->tag);
199
output_byte(cache, (unsigned char) ae->indices.size());
200
for (unsigned char* b = ae->indices.begin; b != ae->indices.end; b++)
201
output_features(cache, *b, ae->atomics[*b].begin,ae->atomics[*b].end, mask);