3
* Author: Peter G. Jensen
5
* Created on 11 March 2016, 14:15
10
#include "AlignedEncoder.h"
13
#define DBOUND (SAMEBOUND*2)
15
AlignedEncoder::AlignedEncoder(uint32_t places, uint32_t k)
19
size_t bytes = 2*sizeof(uint32_t) + (places*sizeof(uint32_t));
20
_scratchpad = scratchpad_t(bytes*8);
21
assert(_scratchpad.size() == (2*sizeof(uint32_t) + (_places*sizeof(uint32_t))));
22
if(_places < 256) _psize = 1;
23
else if(_places < 65536) _psize = 2;
29
AlignedEncoder::~AlignedEncoder()
31
_scratchpad.release();
34
uint32_t AlignedEncoder::tokenBytes(uint32_t ntokens) const
37
if(ntokens < 256) size = 1;
38
else if(ntokens < 65536) size = 2;
43
uint32_t AlignedEncoder::writeBitVector(size_t offset, const uint32_t* data)
45
for(size_t i = 0; i < _places; ++i)
47
_scratchpad.set(i+(offset*8), data[i] > 0);
49
return offset + scratchpad_t::bytes(_places);
52
uint32_t AlignedEncoder::writeTwoBitVector(size_t offset, const uint32_t* data)
54
for(size_t i = 0; i < _places; ++i)
59
_scratchpad.set((i*2)+(offset*8), true);
62
_scratchpad.set((i*2)+(offset*8), true);
64
_scratchpad.set((i*2)+(offset*8)+1, true);
72
return offset + scratchpad_t::bytes(_places*2);
75
uint32_t AlignedEncoder::readTwoBitVector(uint32_t* destination, const unsigned char* source, uint32_t offset)
77
scratchpad_t b = scratchpad_t((unsigned char*)&source[offset], _places*2);
78
for(size_t i = 0; i < _places; ++i)
91
return offset + scratchpad_t::bytes(_places*2);
95
uint32_t AlignedEncoder::writeTokens(size_t offset, const uint32_t* data)
97
if(sizeof(T) == sizeof(uint32_t))
99
memcpy(&(_scratchpad.raw()[offset]), data, _places*sizeof(T));
103
for(size_t i = 0; i < _places; ++i)
105
T* dest = (T*)(&_scratchpad.raw()[offset + (i*sizeof(T))]);
109
return offset + _places*sizeof(T);
113
uint32_t AlignedEncoder::readTokens(uint32_t* destination, const unsigned char* source, uint32_t offset)
115
for(size_t i = 0; i < _places; ++i)
117
T* src = (T*)(&source[offset + (i*sizeof(T))]);
118
destination[i] = *src;
120
return offset + _places*sizeof(T);
124
uint32_t AlignedEncoder::writeTokenCounts(size_t offset, const uint32_t* data)
128
for(size_t i = 0; i < _places; ++i)
132
T* dest = (T*)(&_scratchpad.raw()[offset + (cnt*sizeof(T))]);
137
return offset + cnt*sizeof(T);
141
size_t AlignedEncoder::bitTokenCountsSize(const unsigned char* source, uint32_t offset) const
143
scratchpad_t b = scratchpad_t((unsigned char*)&source[offset], _places);
146
for(uint32_t i = 0; i < _places; ++i)
153
return offset + b.size() + cnt;
157
uint32_t AlignedEncoder::readBitTokenCounts(uint32_t* destination, const unsigned char* source, uint32_t offset) const
159
const unsigned char* ts = &source[offset + scratchpad_t::bytes(_places)];
160
scratchpad_t b = scratchpad_t((unsigned char*)&source[offset], _places);
163
for(uint32_t i = 0; i < _places; ++i)
167
destination[i] = *((T*)&ts[cnt]);
175
size_t AlignedEncoder::placeTokenCountsSize(const unsigned char* source, uint32_t offset) const
181
size = source[offset];
184
size = *(uint16_t*)(&source[offset]);
187
size = *(uint32_t*)(&source[offset]);
190
size = std::numeric_limits<size_t>::max(); // should provoke an error
194
return offset + (_psize * size) + (size*sizeof(T));
198
uint32_t AlignedEncoder::readPlaceTokenCounts(uint32_t* destination, const unsigned char* source, uint32_t offset) const
204
size = source[offset];
207
size = *(uint16_t*)(&source[offset]);
210
size = *(uint32_t*)(&source[offset]);
213
size = std::numeric_limits<size_t>::max(); // should provoke an error
218
const unsigned char* ts = &source[offset + (_psize*size)];
219
for(size_t i = 0; i < size; ++i)
225
pos = source[offset + i];
228
pos = *((uint16_t*)&source[offset + i*2]);
231
pos = *((uint32_t*)&source[offset + i*4]);
238
destination[pos] = *((T*)&ts[i*sizeof(T)]);
240
return offset + size;
243
uint32_t AlignedEncoder::writePlaces(size_t offset, const uint32_t* data)
246
uint16_t* dest16 = (uint16_t*)(&_scratchpad.raw()[offset]);
247
uint32_t* dest32 = (uint32_t*)(&_scratchpad.raw()[offset]);
248
for(size_t i = 0; i < _places; ++i)
255
_scratchpad.raw()[offset + cnt + 1] = (unsigned char)i;
258
dest16[cnt+1] = (uint16_t)i;
274
_scratchpad.raw()[offset] = (unsigned char)cnt;
285
return offset + _psize + cnt*_psize;
288
uint32_t AlignedEncoder::readPlaces(uint32_t* destination, const unsigned char* source, uint32_t offset, uint32_t value)
294
size = source[offset];
297
size = *(uint16_t*)(&source[offset]);
300
size = *(uint32_t*)(&source[offset]);
303
size = std::numeric_limits<size_t>::max(); // should provoke an error
309
uint16_t* raw16 = (uint16_t*) &source[offset];
310
uint32_t* raw32 = (uint32_t*) &source[offset];
311
for(size_t i = 0; i < size; ++i)
316
destination[source[i+offset]] = value;
319
destination[raw16[i]] = value;
322
destination[raw32[i]] = value;
328
return offset + _psize*size;
331
uint32_t AlignedEncoder::readBitVector(uint32_t* destination, const unsigned char* source, uint32_t offset, uint32_t value)
333
scratchpad_t b = scratchpad_t((unsigned char*)&source[offset], _places);
334
for(uint32_t i = 0; i < _places; ++i)
338
destination[i] = value;
345
return offset + b.size();
348
unsigned char AlignedEncoder::getType(uint32_t sum, uint32_t pwt, bool same, uint32_t val) const
350
if(pwt == 0) return 0;
351
if(same && val <= SAMEBOUND)
353
size_t bvsize = scratchpad_t::bytes(_places);
354
size_t indirect = _psize+pwt*_psize;
356
if(bvsize <= indirect)
362
return SAMEBOUND+val;
367
size_t tsize = tokenBytes(val);
368
size_t bvsize = scratchpad_t::bytes(_places*2);
369
size_t indirect = _psize+pwt*(_psize+tsize);
370
size_t bvindirect = scratchpad_t::bytes(_places)+pwt*tsize;
371
size_t direct = _places*tsize;
373
if(val < 4 && bvsize <= indirect && bvsize <= bvindirect)
377
else if(direct <= indirect && direct <= bvindirect)
391
else if(indirect <= bvindirect)
424
size_t AlignedEncoder::size(const uchar* s) const
426
unsigned char type = s[0];
427
if(type <= SAMEBOUND)
429
if((_places % 8) == 0) return 1 + (_places / 8);
430
else return 2 + (_places / 8);
442
size = 1 + *(uint16_t*)(&s[1]);
443
size *= sizeof(uint16_t);
446
size = 1 + *(uint32_t*)(&s[1]);
447
size *= sizeof(uint32_t);
450
size = std::numeric_limits<size_t>::max(); // should provoke an error
459
if((_places % 4) == 0) return 1 + (_places / 4);
460
else return 2 + (_places / 4);
462
return 1 + (sizeof(unsigned char)*_places);
464
return 1 + (sizeof(uint16_t)*_places);
466
return 1 + (sizeof(uint32_t)*_places);
468
return placeTokenCountsSize<unsigned char>((unsigned char*)s, 1);
470
return placeTokenCountsSize<uint16_t>((unsigned char*)s, 1);
472
return placeTokenCountsSize<uint32_t>((unsigned char*)s, 1);
474
return bitTokenCountsSize<unsigned char>((unsigned char*)s, 1);
476
return bitTokenCountsSize<uint16_t>((unsigned char*)s, 1);
478
return bitTokenCountsSize<uint32_t>((unsigned char*)s, 1);
481
return std::numeric_limits<size_t>::infinity();
485
size_t AlignedEncoder::encode(const uint32_t* d, unsigned char type)
488
_scratchpad.raw()[0] = type;
489
if(type <= SAMEBOUND)
491
return writeBitVector(1, d);
495
return writePlaces(1, d);
501
return writeTwoBitVector(1,d);
503
return writeTokens<unsigned char>(1, d);
505
return writeTokens<uint16_t>(1, d);
507
return writeTokens<uint32_t>(1, d);
510
size_t size = writePlaces(1, d);
511
return writeTokenCounts<unsigned char>(size, d);
515
size_t size = writePlaces(1, d);
516
return writeTokenCounts<uint16_t>(size, d);
520
size_t size = writePlaces(1, d);
521
return writeTokenCounts<uint32_t>(size, d);
525
size_t size = writeBitVector(1, d);
526
return writeTokenCounts<unsigned char>(size, d);
530
size_t size = writeBitVector(1, d);
531
return writeTokenCounts<uint16_t>(size, d);
535
size_t size = writeBitVector(1, d);
536
return writeTokenCounts<uint32_t>(size, d);
545
void AlignedEncoder::decode(uint32_t* d, const unsigned char* s)
547
memset(d, 0, sizeof(uint32_t)*_places);
548
unsigned char type = s[0];
549
if(type <= SAMEBOUND)
551
readBitVector(d, s, 1, type);
556
readPlaces(d, s, 1, type - SAMEBOUND);
563
readTwoBitVector(d,s,1);
566
readTokens<unsigned char>(d,s,1);
569
readTokens<uint16_t>(d,s,1);
572
readTokens<uint32_t>(d,s,1);
575
readPlaceTokenCounts<unsigned char>(d, s, 1);
578
readPlaceTokenCounts<uint16_t>(d, s, 1);
581
readPlaceTokenCounts<uint32_t>(d, s, 1);
584
readBitTokenCounts<unsigned char>(d, s, 1);
587
readBitTokenCounts<uint16_t>(d, s, 1);
590
readBitTokenCounts<uint32_t>(d, s, 1);