1
/* VerifyPN - TAPAAL Petri Net Engine
2
* Copyright (C) 2016 Peter Gjøl Jensen <root@petergjoel.dk>
4
* This program is free software: you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation, either version 3 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program. If not, see <http://www.gnu.org/licenses/>.
20
* Author: Peter G. Jensen
22
* Created on 10 June 2015, 18:44
35
#include "binarywrapper.h"
36
#include "linked_bucket.h"
40
// direct 2lvl indexing in chunks of ~ 2^32
41
// takes up ~ 512k (256k on 32bit) for the index.
42
// when to keep data in bucket or send to heap - just make sure bucket does not overflow
45
typedef uint32_t uint;
46
typedef unsigned char uchar;
48
template<typename D, typename N>
56
struct ptrie_el_t<void, N> {
59
} __attribute__((packed));
60
typedef std::pair<bool, size_t> returntype_t;
65
} __attribute__((packed));
68
#define PTRIETPL uint16_t HEAPBOUND, uint16_t SPLITBOUND, size_t ALLOCSIZE, typename T, typename I
71
uint16_t HEAPBOUND = 17,
72
uint16_t SPLITBOUND = 129,
73
size_t ALLOCSIZE = (1024 * 64),
83
static_assert(HEAPBOUND * (sizeof(fwdnode_t)/sizeof(size_t)) < std::numeric_limits<uint16_t>::max(),
84
"HEAPBOUND * (sizeof(fwdnode_t)/sizeof(fwdnode_t)) should be less than 2^16");
86
static_assert(SPLITBOUND < sizeof(fwdnode_t),
87
"SPLITBOUND should be less than sizeof(fwdnode_t)");
89
static_assert(SPLITBOUND > 3,
90
"SPLITBOUND MUST BE LARGER THAN 3");
92
static_assert(HEAPBOUND > sizeof(size_t),
93
"HEAPBOUND MUST BE LARGER THAN sizeof(size_t)");
96
typedef ptrie_el_t<T, I> entry_t;
104
static inline size_t overhead(size_t count, bool hasent) {
106
return count * (sizeof (uint16_t) + sizeof (I));
108
return count * (sizeof (uint16_t));
111
inline I* entries(uint16_t count, bool hasent) {
112
if (hasent) return (I*) (data(0, hasent) + (count * (sizeof (uint16_t))));
116
inline uchar* data(uint16_t count, bool hasent) {
117
return ((uchar*) (&_data)) + overhead(count, hasent);
120
inline uint16_t& first(uint16_t count = 0, uint16_t index = 0) {
121
return ((uint16_t*) &_data)[index];
124
} __attribute__((packed));
128
struct node_t : public base_t {
129
uint16_t _totsize = 0;
130
uint16_t _count = 0; // bucket-counts
131
bucket_t* _data = NULL; // back-pointers to data-array up to date
132
} __attribute__((packed));
134
struct fwdnode_t : public base_t {
135
base_t* _children[256];
138
base_t*& operator[](size_t i) {
141
} __attribute__((packed));
143
linked_bucket_t<entry_t, ALLOCSIZE>* _entries = NULL;
148
fwdnode_t* new_fwd();
150
base_t* fast_forward(const binarywrapper_t& encoding, fwdnode_t** tree_pos, uint& byte);
151
bool bucket_search(const binarywrapper_t& encoding, node_t* node, uint& b_index, uint byte);
153
bool best_match(const binarywrapper_t& encoding, fwdnode_t** tree_pos, base_t** node, uint& byte, uint& b_index);
155
void split_node(node_t* node, fwdnode_t* jumppar, node_t* locked, size_t bsize, size_t byte);
157
void split_fwd(node_t* node, fwdnode_t* jumppar, node_t* locked, size_t bsize, size_t byte);
159
static inline uint16_t bytes(const uint16_t d) {
160
if (d >= HEAPBOUND) return sizeof (uchar*);
166
void erase(fwdnode_t* parent, node_t* node, size_t bucketid, int byte);
167
bool merge_down(fwdnode_t* parent, node_t* node, int byte);
168
void inject_byte(node_t* node, uchar topush, size_t totsize, std::function<uint16_t(size_t)> sizes);
175
returntype_t insert(binarywrapper_t wrapper);
176
returntype_t insert(const uchar* data, size_t length);
177
returntype_t exists(binarywrapper_t wrapper);
178
returntype_t exists(const uchar* data, size_t length);
179
bool erase (binarywrapper_t wrapper);
180
bool erase (const uchar* data, size_t length);
186
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::~set() {
187
std::stack<fwdnode_t*> stack;
189
while(!stack.empty())
191
fwdnode_t* next = stack.top();
193
for(size_t i = 0; i < 256; ++i)
195
base_t* child = next->_children[i];
198
if(i > 0 && child == next->_children[i-1]) continue;
199
if(child->_type == 255)
201
stack.push((fwdnode_t*)child);
205
node_t* node = (node_t*) child;
206
// TODO: we should delete data here also!
218
void set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::init()
223
_entries = new linked_bucket_t<entry_t, ALLOCSIZE>(1);
227
_root->_parent = NULL;
232
for (; i < 256; ++i) (*_root)[i] = _root;
236
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::set()
242
typename set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::node_t*
243
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::new_node() {
248
typename set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::fwdnode_t*
249
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::new_fwd() {
250
return new fwdnode_t;
255
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::fast_forward(const binarywrapper_t& encoding, fwdnode_t** tree_pos, uint& byte) {
256
fwdnode_t* t_pos = *tree_pos;
258
uint16_t s = encoding.size(); // TODO remove minus to
259
uchar* sc = (uchar*) & s;
265
if (byte >= 2) next = (*t_pos)[encoding[byte - 2]];
266
else next = (*t_pos)[sc[1 - byte]];
268
assert(next != NULL);
273
if (next->_type != 255) {
274
return (node_t*) next;
276
t_pos = static_cast<fwdnode_t*> (next);
284
bool set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::bucket_search(const binarywrapper_t& encoding, node_t* node, uint& b_index, uint byte) {
285
// run through the stored data in the bucket, looking for matches
286
// start by creating an encoding that "points" to the "unmatched"
287
// part of the encoding. Notice, this is a shallow copy, no actual
288
// heap-allocation happens!
289
const bool hasent = _entries != NULL;
293
if (encoding.size() > byte) {
294
encsize = byte > 0 ? encoding.size() - byte : encoding.size();
298
const uchar* raw = encoding.const_raw() + byte;
300
uchar* tf = (uchar*) & first;
302
first = encoding.size();
308
tf[1] = encoding[-2 + byte];
309
tf[0] = encoding[-2 + byte + 1];
312
bucket_t* bucket = node->_data;
313
if (node->_count > 0) {
317
uchar* bsc = (uchar*) & bs;
319
uchar* fc = (uchar*) & f;
320
for (; b_index < node->_count; ++b_index) {
321
f = bucket->first(node->_count, b_index);
322
if (f >= first) break;
323
if (byte > 1) bs = encsize;
324
else if (byte == 1) {
325
bs = encoding.size();
329
// bs = bucket->bytes(b_index);
330
} else // if byte == 0
334
// bs = bucket->bytes(b_index);
337
// assert(bytes(bs) == bucket->bytes(b_index));
341
if (b_index >= node->_count ||
342
bucket->first(node->_count, b_index) > first) return false;
344
uchar* data = bucket->data(node->_count, hasent);
345
for (; b_index < node->_count; ++b_index) {
349
if (bucket->first(node->_count, b_index) > first) break;
350
// first is 2 bytes, which is size of counter, the first part of the tree
351
// if we reach here, things have same size
353
if (encsize < HEAPBOUND) {
354
for (; b < encsize; ++b) {
355
if (data[offset + b] != raw[b]) break;
361
assert(raw[b] != data[offset + b]);
362
if (raw[b] < data[offset + b]) {
366
// else continue search
369
uchar* ptr = *((uchar**) (&data[offset]));
370
int cmp = memcmp(ptr, raw, encsize);
381
offset += bytes(encsize);
390
bool set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::best_match(const binarywrapper_t& encoding, fwdnode_t** tree_pos, base_t** node,
391
uint& byte, uint& b_index) {
392
// run through tree as long as there are branches covering some of
394
*node = fast_forward(encoding, tree_pos, byte);
395
if((size_t)*node != (size_t)*tree_pos) {
396
return bucket_search(encoding, (node_t*)*node, b_index, byte);
405
void set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::split_fwd(node_t* node, fwdnode_t* jumppar, node_t* locked, size_t bsize, size_t byte) {
407
const uint16_t bucketsize = node->_count;
408
if(bucketsize < (sizeof(fwdnode_t) / sizeof(size_t))) return;
409
const bool hasent = _entries != NULL;
411
fwdnode_t* fwd_n = new_fwd();
413
// std::cerr << "SplitFWD " << (locked ? locked : node) << " OP : " << jumppar << " NP : " << fwd_n << " LOW : " << low_n << std::endl;
415
fwd_n->_parent = jumppar;
417
fwd_n->_path = node->_path;
420
node->_path = binarywrapper_t::_masks[0];
425
for (size_t i = 0; i < 256; ++i) (*fwd_n)[i] = (locked == NULL ? node : locked);
427
(*jumppar)[fwd_n->_path] = fwd_n;
436
bucket_t* bucket = node->_data;
438
uint16_t lengths[sizeof(fwdnode_t)/sizeof(size_t)];
439
for (int i = 0; i < bucketsize; ++i) {
443
uchar* f = (uchar*)&(bucket->first(bucketsize, i));
444
uchar* d = (uchar*)&(lengths[i]);
454
// assert(lengths[i] == bucket->length(i));
456
if ((bucket->first(bucketsize, i) & binarywrapper_t::_masks[0]) == 0) {
458
if (lengths[i] < (HEAPBOUND + 1)) {
459
if (lengths[i] > 1) {
460
lsize += lengths[i] - 1;
462
// otherwise new size = 0
464
lsize += bytes(lengths[i]);
468
if (lengths[i] < (HEAPBOUND + 1)) {
469
hsize += lengths[i] - 1;
471
hsize += bytes(lengths[i]);
474
bcnt += bytes(lengths[i]);
477
//if(bucketsize > 0)free(node->_data);
479
// allocate new buckets
480
node->_totsize = hsize > 0 ? hsize : 0;
482
if (hcnt == 0) node->_data = NULL;
483
else node->_data = (bucket_t*) malloc(node->_totsize +
484
bucket_t::overhead(node->_count, hasent));
486
lown._totsize = lsize > 0 ? lsize : 0;
488
if (lcnt == 0) lown._data = NULL;
489
else lown._data = (bucket_t*) malloc(lown._totsize +
490
bucket_t::overhead(lown._count, hasent));
496
#define LLENGTH(x) lengths[x] - 1
497
for (int i = 0; i < bucketsize; ++i) {
498
if (i < lown._count) {
499
lown._data->first(lown._count, i) = (bucket->first(bucketsize, i) << 8);
500
if (lengths[i] > 0) {
501
// lown._data->length(i) = LLENGTH(i);
502
uchar* dest = &(lown._data->data(lown._count, hasent)[lbcnt]);
503
if (LLENGTH(i) >= HEAPBOUND) {
504
uchar* data = (uchar*) malloc(LLENGTH(i));
505
memcpy(dest, &data, sizeof (uchar*));
507
// std::cout << "DATA FOR " << i << " in " << low_n << " IN " << (void*)dest << std::endl;
511
if (lengths[i] >= HEAPBOUND) {
512
src = *((uchar**)&(bucket->data(bucketsize, hasent)[bcnt]));
514
src = &(bucket->data(bucketsize, hasent)[bcnt]);
517
uchar* f = (uchar*)&(lown._data->first(lown._count, i));
524
if (lengths[i] >= HEAPBOUND) {
526
if (LLENGTH(i) >= HEAPBOUND) {
527
uchar* tmp = *((uchar**)&(lown._data->data(lown._count, hasent)[lbcnt]));
529
assert(memcmp(tmp, &(src[1]), LLENGTH(i)) == 0);
535
lbcnt += bytes(LLENGTH(i));
538
// std::cout << bucket->first(bucketsize, i) << std::endl;
539
// std::cout << i << " NFIRST " << lown._data->first(lown._count, i) << std::endl;
541
// assert(lown._data->length(i) == 0 && lengths[i] <= 1 ||
542
// lengths[i] - 1 == lown._data->length(i));
544
int j = i - lown._count;
545
node->_data->first(node->_count, j) = (bucket->first(bucketsize, i) << 8);
546
// node->_data->length(j) = lengths[i] - 1;
547
if (lengths[i] > 0) {
548
uchar* dest = &(node->_data->data(node->_count, hasent)[hbcnt]);
549
if (LLENGTH(i) >= HEAPBOUND) {
550
uchar* data = (uchar*) malloc(LLENGTH(i));
551
memcpy(dest, &data, sizeof (uchar*));
553
// std::cout << "DATA FOR " << j << " in " << node << " IN " << (void*)dest << std::endl;
557
if (lengths[i] < HEAPBOUND) {
558
src = &(bucket->data(bucketsize, hasent)[bcnt]);
560
src = *((uchar**)&(bucket->data(bucketsize, hasent)[bcnt]));
563
uchar* f = (uchar*) & node->_data->first(node->_count, j);
571
if (lengths[i] >= HEAPBOUND) {
573
if (LLENGTH(i) >= HEAPBOUND) {
574
uchar* tmp = *((uchar**)&(node->_data->data(node->_count, hasent)[hbcnt]));
576
assert(memcmp(tmp, &(src[1]), LLENGTH(i)) == 0);
583
hbcnt += bytes(LLENGTH(i));
584
assert(LLENGTH(i) == lengths[i] - 1);
585
// std::cout << bucket->first(bucketsize, i) << std::endl;
586
// std::cout << i << " NFIRST " << node->_data->first(node->_count, j) << std::endl;
590
bcnt += bytes(lengths[i]);
595
I* ents = bucket->entries(bucketsize, true);
596
for (size_t i = 0; i < bucketsize; ++i) {
597
entry_t& ent = _entries->operator[](ents[i]);
598
ent.node = (size_t)fwd_n;
599
ent.path = (bucket->first(bucketsize, i));
605
memcpy(node->_data->entries(bucketsize, true), bucket->entries(bucketsize, true), bucketsize * sizeof (I));
607
// std::cout << "SPLIT ALL HIGH" << std::endl;
609
for (size_t i = 0; i < 128; ++i) (*fwd_n)[i] = fwd_n;
610
split_node(node, fwd_n, locked, bsize > 0 ? bsize - 1 : 0, byte + 1);
612
else if (hcnt == 0) {
614
memcpy(lown._data->entries(bucketsize, true), bucket->entries(bucketsize, true), bucketsize * sizeof (I));
615
for (size_t i = 128; i < 256; ++i) (*fwd_n)[i] = fwd_n;
617
// std::cout << "SPLIT ALL LOW" << std::endl;
618
node->_data = lown._data;
619
node->_path = lown._path;
620
node->_count = lown._count;
621
node->_totsize = lown._totsize;
622
node->_type = lown._type;
623
split_node(node, fwd_n, locked, bsize > 0 ? bsize - 1 : 0, byte + 1);
625
node_t* low_n = new_node();
626
low_n->_data = lown._data;
627
low_n->_totsize = lown._totsize;
628
low_n->_count = lown._count;
629
low_n->_path = lown._path;
630
low_n->_type = lown._type;
631
for (size_t i = 0; i < 128; ++i) (*fwd_n)[i] = low_n;
633
// We are stopping splitting here, so correct entries if needed
634
I* ents = bucket->entries(bucketsize, true);
636
for (size_t i = 0; i < bucketsize; ++i) {
637
if (i < lown._count) lown._data->entries(lown._count, true)[i] = ents[i];
638
else node->_data->entries(node->_count, true)[i - lown._count] = ents[i];
642
if(lown._count >= SPLITBOUND && lown._count >= node->_count)
644
split_node(low_n, fwd_n, NULL, bsize > 0 ? bsize - 1 : 0, byte + 1);
646
if(node->_count >= SPLITBOUND)
648
split_node(node, fwd_n, locked, bsize > 0 ? bsize - 1 : 0, byte + 1);
650
// std::cout << "SPLIT Moving LOW " << lown._count << " TO " << low_n << std::endl;
651
// std::cout << "SPLIT Moving HIGH " << node->_count << " TO " << node << std::endl;
657
void set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::split_node(node_t* node, fwdnode_t* jumppar, node_t* locked, size_t bsize, size_t byte) {
659
assert(bsize != std::numeric_limits<size_t>::max());
660
if (node->_type == 8) // new fwd node!
663
split_fwd(node, jumppar, locked, bsize, byte);
667
const uint16_t bucketsize = node->_count;
670
// std::cerr << "Split " << (locked ? locked : node) << " high : " << h_node << std::endl;
672
hnode._type = node->_type + 1;
673
assert(hnode._type <= 8);
674
assert(node->_type <= 8);
676
//assert(n_node->_lck == 0);
677
assert((node->_path & binarywrapper_t::_masks[node->_type]) == 0);
678
hnode._path = node->_path | binarywrapper_t::_masks[node->_type];
680
// because we are only really shifting around bits when enc_pos % 8 = 0
681
// then we need to find out which bit of the first 8 we are
682
// splitting on in the "remainder"
683
const uint r_pos = node->_type;
686
// Copy over the data to the new buckets
689
int hcnt = bucketsize;
696
for (size_t i = 0; i < bucketsize; i++) {
698
uchar* f = (uchar*) & node->_data->first(bucketsize, i);
699
if ((f[1] & binarywrapper_t::_masks[r_pos]) > 0) {
711
uchar* fcc = (uchar*) & fc;
723
// assert(bytes(fc) == node->_data->bytes(i));
728
bucket_t* old = node->_data;
731
hnode._totsize = node->_totsize - lsize;
733
node->_totsize = lsize;
737
assert(hnode._totsize == bytes(bsize) * hnode._count);
738
assert(node->_totsize == bytes(bsize) * node->_count);
741
size_t dist = (hnode._path - node->_path);
746
const bool hasent = _entries != NULL;
748
if (node->_count == 0) // only high node has data
750
// std::cout << "ALL HIGH" << std::endl;
751
for(size_t i = node->_path; i < hnode._path; ++i)
753
assert(jumppar->_children[i] == node);
754
jumppar->_children[i] = jumppar;
757
node->_path = hnode._path;
758
node->_count = hnode._count;
760
node->_totsize = hnode._totsize;
761
node->_type = hnode._type;
764
split_node(node, jumppar, locked, bsize, byte);
766
else if (hnode._count == 0) // only low node has data
768
// std::cout << "ALL LOW" << std::endl;
769
for(size_t i = hnode._path; i < hnode._path + dist; ++i)
771
assert(jumppar->_children[i] == node);
772
jumppar->_children[i] = jumppar;
776
split_node(node, jumppar, locked, bsize, byte);
778
node_t* h_node = new_node();
779
h_node->_count = hnode._count;
780
h_node->_type = hnode._type;
781
h_node->_path = hnode._path;
782
h_node->_totsize = hnode._totsize;
783
h_node->_data = hnode._data;
785
for(size_t i = hnode._path; i < hnode._path + dist; ++i)
787
assert(jumppar->_children[i] == node);
788
jumppar->_children[i] = h_node;
791
// std::cout << "Moving LOW " << node->_count << " TO " << node << std::endl;
792
// std::cout << "Moving HIGH " << h_node->_count << " TO " << h_node << std::endl;
793
h_node->_data = (bucket_t*) malloc(h_node->_totsize +
794
bucket_t::overhead(h_node->_count, hasent));
795
node->_data = (bucket_t*) malloc(node->_totsize +
796
bucket_t::overhead(node->_count, hasent));
799
memcpy(&node->_data->first(node->_count), &(old->first(bucketsize)), sizeof (uint16_t) * node->_count);
800
memcpy(&h_node->_data->first(h_node->_count), &(old->first(bucketsize, node->_count)), sizeof (uint16_t) * h_node->_count);
803
memcpy(node->_data->data(node->_count, hasent), old->data(bucketsize, hasent), node->_totsize);
804
memcpy(h_node->_data->data(h_node->_count, hasent), &(old->data(bucketsize, hasent)[node->_totsize]), h_node->_totsize);
807
I* ents = old->entries(bucketsize, true);
809
for (size_t i = 0; i < bucketsize; ++i) {
810
if (i < node->_count) node->_data->entries(node->_count, true)[i] = ents[i];
811
else h_node->_data->entries(h_node->_count, true)[i - node->_count] = ents[i];
816
if(node->_count >= SPLITBOUND && node->_count >= h_node->_count)
818
split_node(node, jumppar, locked, bsize, byte);
820
if(h_node->_count >= SPLITBOUND)
822
split_node(h_node, jumppar, NULL, bsize, byte);
828
std::pair<bool, size_t>
829
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::exists(const uchar* data, size_t length) {
830
assert(length <= 65536);
831
binarywrapper_t encoding((uchar*) data, length * 8);
832
// memcpy(encoding.raw()+2, data, length);
834
// memcpy(encoding.raw(), &length, 2);
838
fwdnode_t* fwd = _root;
843
bool res = best_match(encoding, &fwd, &base, byte, b_index);
844
returntype_t ret = returntype_t(res, std::numeric_limits<size_t>::max());
845
if((size_t)fwd != (size_t)base) {
846
node_t* node = (node_t*)base;
847
if (_entries != NULL && res) {
848
ret.second = node->_data->entries(node->_count, true)[b_index];
856
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::insert(const uchar* data, size_t length) {
857
assert(length <= 65536);
858
binarywrapper_t e2((uchar*) data, length * 8);
859
const bool hasent = _entries != NULL;
860
// binarywrapper_t encoding(length*8+16);
861
// memcpy(encoding.raw()+2, data, length);
863
// memcpy(encoding.raw(), &length, 2);
867
fwdnode_t* fwd = _root;
872
bool res = best_match(e2, &fwd, &base, byte, b_index);
873
if (res) { // We are not inserting duplicates, semantics of PTrie is a set.
874
returntype_t ret(false, 0);
876
node = (node_t*)base;
877
ret = returntype_t(false, node->_data->entries(node->_count, true)[b_index]);
882
if((size_t)base == (size_t)fwd)
891
size_t s = e2.size();
892
uchar* sc = (uchar*) & s;
893
uchar b = (byte < 2 ? sc[1 - byte] : e2[byte-2]);
902
min = min & (~binarywrapper_t::_masks[bit]);
903
max |= binarywrapper_t::_masks[bit];
904
for (int i = min; i <= max ; ++i) {
905
if(fwd->_children[i] != fwd)
907
max = (max & ~binarywrapper_t::_masks[bit]) | (binarywrapper_t::_masks[bit] & b);
908
min = min | (binarywrapper_t::_masks[bit] & b);
914
} while(bit > 0 && !stop);
916
for (size_t i = min; i <= max; ++i) (*fwd)[i] = node;
921
node = (node_t*)base;
926
binarywrapper_t nenc(e2.const_raw(),
927
(e2.size() - byte)*8,
931
// std::cout << "COULD NOT FIND of size " << e2.size() << std::endl;
932
// e2.print(std::cout);
933
// std::cout << "Inserted into " << node << std::endl;
935
// make a new bucket, add new entry, copy over old data
937
uint nbucketcount = node->_count + 1;
938
uint nitemsize = nenc.size();
940
if (nitemsize >= HEAPBOUND) {
942
nitemsize = sizeof (uchar*);
945
uint nbucketsize = node->_totsize + nitemsize;
947
bucket_t* nbucket = (bucket_t*) malloc(nbucketsize +
948
bucket_t::overhead(nbucketcount, hasent));
950
// copy over old "firsts"
951
memcpy(&nbucket->first(nbucketcount), &(node->_data->first(node->_count)), b_index * sizeof (uint16_t));
952
memcpy(&(nbucket->first(nbucketcount, b_index + 1)), &(node->_data->first(node->_count, b_index)),
953
(node->_count - b_index) * sizeof (uint16_t));
955
uchar* f = (uchar*) & nbucket->first(nbucketcount, b_index);
957
f[1] = e2[-2 + byte];
958
f[0] = e2[-2 + byte + 1];
960
nbucket->first(nbucketcount, b_index) = e2.size();
962
nbucket->first(nbucketcount, b_index) <<= 8;
970
memcpy(nbucket->entries(nbucketcount, true), node->_data->entries(node->_count, true), b_index * sizeof (I));
971
memcpy(&(nbucket->entries(nbucketcount, true)[b_index + 1]), &(node->_data->entries(node->_count, true)[b_index]),
972
(node->_count - b_index) * sizeof (I));
974
entry = nbucket->entries(nbucketcount, true)[b_index] = _entries->next(0);
975
entry_t& ent = _entries->operator[](entry);
976
ent.node = (size_t)fwd;
977
ent.path = (nbucket->first(nbucketcount, b_index) >> 8);
983
if (byte >= 2) tmpsize = b_index * bytes(nenc.size());
985
uint16_t o = e2.size();
986
for (size_t i = 0; i < b_index; ++i) {
988
uint16_t f = node->_data->first(nbucketcount - 1, i);
989
uchar* fc = (uchar*) & f;
990
uchar* oc = (uchar*) & o;
997
// assert(bytes(f) == nbucket->bytes(i));
1000
// copy over old data
1001
memcpy(nbucket->data(nbucketcount, hasent),
1002
node->_data->data(node->_count, hasent), tmpsize);
1004
memcpy(&(nbucket->data(nbucketcount, hasent)[tmpsize + nitemsize]),
1005
&(node->_data->data(node->_count, hasent)[tmpsize]), (node->_totsize - tmpsize));
1007
// copy over new data
1009
memcpy(&(nbucket->data(nbucketcount, hasent)[tmpsize]),
1010
nenc.const_raw(), nenc.size());
1013
uchar* data = (uchar*) malloc(nenc.size());
1015
// copy data to heap
1016
memcpy(data, nenc.const_raw(), nenc.size());
1019
memcpy(&(nbucket->data(nbucketcount, hasent)[tmpsize]),
1020
&data, sizeof (uchar*));
1023
// if needed, split the node
1026
node->_data = nbucket;
1027
node->_count = nbucketcount;
1028
node->_totsize = nbucketsize;
1030
// std::cout << " FIRST " << tmp._data->first(nbucketcount, b_index) << std::endl;
1031
// std::cout << "NODE " << node << " SIZE : " << nbucketsize << std::endl;
1032
if (node->_count >= SPLITBOUND) {
1033
// copy over data to we can work while readers finish
1034
// we have to wait for readers to finish for
1036
split_node(node, fwd, node, nenc.size(), byte);
1040
for (int i = byte - 1; i >= 2; --i) {
1041
assert(fwd != NULL);
1042
assert(e2[-2 + i] == fwd->_path);
1043
assert(fwd->_parent == NULL || fwd->_parent->_children[fwd->_path] == fwd);
1047
auto r = exists(data, length);
1050
r = exists(data, length);
1054
return returntype_t(true, entry);
1059
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::insert(binarywrapper_t wrapper) {
1060
return insert(wrapper.raw(), wrapper.size());
1065
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::exists(binarywrapper_t wrapper) {
1066
return exists(wrapper.raw(), wrapper.size());
1072
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::inject_byte(node_t* node, uchar topush, size_t totsize, std::function<uint16_t(size_t)> _sizes)
1074
const bool hasent = _entries != NULL;
1075
bucket_t *nbucket = node->_data;
1077
nbucket = (bucket_t *) malloc(totsize +
1078
bucket_t::overhead(node->_count,
1084
for(size_t i = 0; i < node->_count; ++i)
1086
auto const size = _sizes(i);
1087
uchar* f = (uchar*)&nbucket->first(node->_count, i);
1088
nbucket->first(node->_count, i) = node->_data->first(node->_count, i);
1092
// in some cases we need to expand to heap here!
1095
if(size < HEAPBOUND)
1097
nbucket->data(node->_count, hasent)[dcnt] = push;
1100
if(size < HEAPBOUND && size > 1)
1102
memcpy(&(nbucket->data(node->_count, hasent)[dcnt]),
1103
&(node->_data->data(node->_count, hasent)[ocnt]),
1108
else if(size >= HEAPBOUND)
1111
uchar* dest = (uchar*)malloc(size);
1112
memcpy(&(nbucket->data(node->_count, hasent)[dcnt]), &dest, sizeof(size_t));
1114
dcnt += sizeof(size_t);
1115
if(size == HEAPBOUND)
1117
src = &(node->_data->data(node->_count, hasent)[ocnt]);
1118
memcpy(dest, src, size - 1);
1123
assert(size > HEAPBOUND);
1124
// allready on heap, but we need to expand it
1125
src = *(uchar**)&(node->_data->data(node->_count, hasent)[ocnt]);
1126
memcpy(dest, src, size - 1);
1127
ocnt += sizeof(size_t);
1133
// also, handle entries here!
1136
assert(ocnt == node->_totsize);
1137
assert(totsize == dcnt);
1139
if(nbucket != node->_data) free(node->_data);
1141
node->_data = nbucket;
1147
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::merge_down(fwdnode_t* parent, node_t* node, int on_heap)
1149
const bool hasent = _entries != NULL;
1150
if(node->_type == 0)
1152
if(node->_count < SPLITBOUND/3) return true;
1153
if(node->_count == 0)
1155
for(size_t i = 0; i < 256; ++i) parent->_children[i] = parent;
1158
if (parent != this->_root) {
1159
// we can remove fwd and go back one level
1160
parent->_parent->_children[parent->_path] = parent->_parent;
1162
fwdnode_t* next = parent->_parent;
1165
base_t* other = parent;
1166
for(size_t i = 0; i < 256; ++i)
1168
if(parent->_children[i] != parent && other != parent->_children[i])
1177
other = parent->_children[i];
1186
else if(other->_type != 255)
1188
node = (node_t*)other;
1189
return merge_down(parent, node, on_heap);
1191
else if(other != parent)
1193
assert(other->_type == 255);
1202
else if(parent != this->_root)
1204
// we need to re-add path to items here.
1205
if(parent->_parent == this->_root) {
1207
uint16_t sizes[256];
1209
for(size_t i = 0; i < node->_count; ++i)
1212
uchar* tc = (uchar*)&t;
1213
uchar* fc = (uchar*)&node->_data->first(node->_count, i);
1215
tc[1] = parent->_path;
1217
totsize += bytes(sizes[i]);
1220
inject_byte(node, parent->_path, totsize, [&sizes](size_t i )
1225
node->_path = parent->_path;
1226
parent->_parent->_children[node->_path] = node;
1228
node->_totsize = totsize;
1229
fwdnode_t* next = parent->_parent;
1231
return merge_down(next, node, on_heap + 1);
1235
assert(node->_count > 0);
1236
if(on_heap == std::numeric_limits<int>::min())
1239
uint16_t length = 0;
1240
uchar* l = (uchar*)&length;
1241
fwdnode_t* tmp = parent;
1243
while(tmp != this->_root)
1250
assert(length + 1 >= depth);
1256
// first copy in path to firsts.
1258
assert(on_heap >= 0);
1259
node->_path = parent->_path;
1260
parent->_parent->_children[node->_path] = node;
1261
fwdnode_t* next = parent->_parent;
1268
for(size_t i = 0; i < node->_count; ++i)
1270
uchar* f = (uchar*)&node->_data->first(node->_count, i);
1275
else if(on_heap > 0)
1277
size_t nbucketsize = 0;
1278
if(on_heap >= HEAPBOUND)
1280
nbucketsize = node->_count * sizeof(size_t);
1282
else//if(on_heap < HEAPBOUND)
1284
assert(on_heap >= 0);
1285
nbucketsize = on_heap * node->_count;
1288
inject_byte(node, node->_path, nbucketsize, [on_heap](size_t)
1293
node->_totsize = nbucketsize;
1295
return merge_down(next, node, on_heap + 1);
1298
if(parent != this->_root)
1300
assert(node->_count > 0);
1301
return merge_down(parent->_parent, node, on_heap);
1306
if(node->_count > SPLITBOUND / 3) return true;
1307
uchar path = node->_path;
1309
if(path & binarywrapper_t::_masks[node->_type - 1])
1311
child = parent->_children[
1312
path & ~binarywrapper_t::_masks[node->_type - 1]];
1316
child = parent->_children[
1317
path | binarywrapper_t::_masks[node->_type - 1]];
1320
assert(node != child);
1322
if(child->_type != node->_type && child->_type != 255)
1324
// The other node is not ready for merging yet.
1325
assert(child->_type > node->_type);
1331
if(child->_type != 255) {
1332
node_t *other = (node_t *) child;
1334
const uint nbucketcount = node->_count + other->_count;
1335
const uint nbucketsize = node->_totsize + other->_totsize;
1337
if (nbucketcount >= SPLITBOUND)
1340
bucket_t *nbucket = (bucket_t *) malloc(nbucketsize +
1341
bucket_t::overhead(nbucketcount, hasent));
1342
node_t *first = node;
1343
node_t *second = other;
1344
if (path & binarywrapper_t::_masks[node->_type - 1]) {
1345
std::swap(first, second);
1348
memcpy(&nbucket->first(nbucketcount),
1349
&(first->_data->first(first->_count)),
1350
first->_count * sizeof(uint16_t));
1352
memcpy(&(nbucket->first(nbucketcount, first->_count)),
1353
&(second->_data->first(second->_count)),
1354
second->_count * sizeof(uint16_t));
1357
// copy over entries
1358
memcpy(nbucket->entries(nbucketcount, true),
1359
first->_data->entries(first->_count, true),
1360
first->_count * sizeof(I));
1361
memcpy(&(nbucket->entries(nbucketcount, true)[first->_count]),
1362
second->_data->entries(second->_count, true),
1363
second->_count * sizeof(I));
1367
// copy over old data
1368
if (nbucketsize > 0) {
1369
memcpy(nbucket->data(nbucketcount, hasent),
1370
first->_data->data(first->_count, hasent), first->_totsize);
1372
memcpy(&(nbucket->data(nbucketcount, hasent)[first->_totsize]),
1373
second->_data->data(second->_count, hasent), second->_totsize);
1377
node->_data = nbucket;
1378
node->_totsize = nbucketsize;
1379
node->_count = nbucketcount;
1381
uchar from = node->_path & ~binarywrapper_t::_masks[node->_type - 1];
1383
for(size_t i = node->_type - 1; i < 8; ++i) {
1384
to = to | binarywrapper_t::_masks[i];
1387
if(child->_type == 255)
1389
if(child != parent) return true;
1390
for(size_t i = from; i <= to; ++i)
1392
if( parent->_children[i] != child &&
1393
parent->_children[i] != node)
1401
for(size_t i = from; i <= to; ++i)
1403
assert(parent->_children[i] == child ||
1404
parent->_children[i] == node);
1405
parent->_children[i] = node;
1407
return merge_down(parent, node, on_heap);
1415
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::erase(fwdnode_t* parent, node_t* node, size_t bindex, int on_heap)
1417
const bool hasent = _entries != NULL;
1419
// first find size and amount before
1421
uint16_t before = 0;
1422
if (parent == this->_root)
1424
for(size_t i = 0; i < bindex; ++i)
1426
before += bytes(node->_data->first(node->_count, i));
1428
size = node->_data->first(node->_count, bindex);
1430
else if(parent->_parent == this->_root) {
1431
for(size_t i = 0; i <= bindex; ++i)
1434
uchar* tc = (uchar*)&t;
1435
uchar* fc = (uchar*)&node->_data->first(node->_count, i);
1437
tc[1] = parent->_path;
1439
if(i == bindex) size = t;
1440
else before += bytes(t);
1443
assert(on_heap != std::numeric_limits<int>::min());
1444
size = on_heap > 0 ? on_heap : 0;
1445
before = size * bindex;
1448
// got sizes, now we can remove data if we point to anything
1449
if(size >= HEAPBOUND)
1451
before = sizeof(size_t)*bindex;
1452
uchar* src = *((uchar**)&(node->_data->data(node->_count, hasent)[before]));
1454
size = sizeof(size_t);
1457
uint nbucketcount = node->_count - 1;
1458
if(nbucketcount > 0) {
1459
uint nbucketsize = node->_totsize - size;
1461
bucket_t *nbucket = (bucket_t *) malloc(nbucketsize +
1462
bucket_t::overhead(nbucketcount, hasent));
1464
// copy over old "firsts", [0,bindex) to [0,bindex) then (bindex,node->_count) to [bindex, nbucketcount)
1465
memcpy(&nbucket->first(nbucketcount),
1466
&(node->_data->first(node->_count)),
1467
bindex * sizeof(uint16_t));
1469
memcpy(&(nbucket->first(nbucketcount, bindex)),
1470
&(node->_data->first(node->_count, bindex + 1)),
1471
(nbucketcount - bindex) * sizeof(uint16_t));
1474
// copy over entries
1475
memcpy(nbucket->entries(nbucketcount, true),
1476
node->_data->entries(node->_count, true),
1477
bindex * sizeof(I));
1478
memcpy(&(nbucket->entries(nbucketcount, true)[bindex]),
1479
&(node->_data->entries(node->_count, true)[bindex + 1]),
1480
(nbucketcount - bindex) * sizeof(I));
1482
// copy back entries here in _entries!
1486
// copy over old data
1487
if (nbucketsize > 0) {
1488
memcpy(nbucket->data(nbucketcount, hasent),
1489
node->_data->data(node->_count, hasent), before);
1490
assert(nbucketsize >= before);
1491
memcpy(&(nbucket->data(nbucketcount, hasent)[before]),
1492
&(node->_data->data(node->_count, hasent)[before + size]),
1493
(nbucketsize - before));
1497
node->_data = nbucket;
1498
node->_count = nbucketcount;
1499
node->_totsize -= size;
1510
merge_down(parent, node, on_heap);
1515
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::erase(binarywrapper_t encoding)
1517
assert(encoding.size() <= 65536);
1520
fwdnode_t* fwd = this->_root;
1521
base_t* base = NULL;
1525
bool res = this->best_match(encoding, &fwd, &base, byte, b_index);
1526
if(!res || (size_t)fwd == (size_t)base)
1528
assert(!this->exists(encoding).first);
1533
int onheap = encoding.size();
1535
erase(fwd, (node_t *) base, b_index, onheap);
1536
assert(!this->exists(encoding).first);
1543
set<HEAPBOUND, SPLITBOUND, ALLOCSIZE, T, I>::erase(const uchar *data, size_t length)
1545
binarywrapper_t b((uchar*)data, length*8);
1554
#endif /* PTRIE_H */