2
Copyright (c) 2002-2009 Anatoliy Kuznetsov.
4
Permission is hereby granted, free of charge, to any person
5
obtaining a copy of this software and associated documentation
6
files (the "Software"), to deal in the Software without restriction,
7
including without limitation the rights to use, copy, modify, merge,
8
publish, distribute, sublicense, and/or sell copies of the Software,
9
and to permit persons to whom the Software is furnished to do so,
10
subject to the following conditions:
12
The above copyright notice and this permission notice shall be included
13
in all copies or substantial portions of the Software.
15
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
OTHER DEALINGS IN THE SOFTWARE.
29
// No intermix FP with integer SSE in this program
30
//#define BM_SET_MMX_GUARD
36
#pragma warning( push )
37
#pragma warning( disable : 4996)
50
const unsigned BSIZE = 150000000;
51
const unsigned int REPEATS = 300;
53
typedef bitset<BSIZE> test_bitset;
55
unsigned platform_test = 1;
62
TimeTaker(const char* test_name, unsigned repeats)
63
: test_name_(test_name), repeats_(repeats)
71
clock_t elapsed_clocks = finish_ - start_;
72
double duration = (double)(finish_ - start_) / CLOCKS_PER_SEC;
74
cout << test_name_ << " ; ";
77
cout << duration << endl;
81
cout << elapsed_clocks << ";" << duration << ";";
84
double ops_per_sec = (double)repeats_ / duration;
91
const char* test_name_;
97
typedef bm::bvector<> bvect;
100
void SimpleFillSets(test_bitset& bset,
104
unsigned fill_factor,
107
for (unsigned i = min; i < max; i+=fill_factor)
117
// 111........111111........111111..........11111111.......1111111...
120
void FillSetsIntervals(test_bitset& bset,
124
unsigned fill_factor,
127
while(fill_factor==0)
129
fill_factor=rand()%10;
133
unsigned factor = 10 * fill_factor;
134
for (i = min; i < max; ++i)
140
len = rand() % factor;
143
} while (end >= max);
144
for (j = i; j < end; ++j)
167
for(unsigned k=0; k < 1000 && i < max; k+=3,i+=3)
190
unsigned* m1 = new unsigned[BSIZE/32];
191
unsigned* m2 = new unsigned[BSIZE/32];
197
TimeTaker tt("Memory ADD transfer test", REPEATS * 4);
198
for (i = 0; i < REPEATS*4; ++i)
200
for (j = 0; j < BSIZE/32; j+=4)
212
TimeTaker tt("memcpy transfer test", REPEATS * 4);
213
for (i = 0; i < REPEATS*4; ++i)
215
memcpy(m1, m2, BSIZE/32 * sizeof(unsigned));
227
bvect* bv = new bvect();
228
test_bitset* bset = new test_bitset();
231
FillSetsIntervals(*bset, *bv, 0, BSIZE, 10);
235
TimeTaker tt("BitCount. Random bitvector", REPEATS*2);
236
for (unsigned i = 0; i < REPEATS*2; ++i)
242
volatile unsigned* p = &value;
248
TimeTaker tt("BitCount. Random bitvector (STL)", REPEATS*2);
249
for (unsigned i = 0; i < REPEATS*2; ++i)
251
value += bset->count();
265
void BitForEachTest()
267
// setup the test data
269
unsigned* test_arr = new unsigned[65536];
270
for (unsigned j = 0; j < 65536; ++j)
277
unsigned bit_list[32];
278
TimeTaker tt("BitList algorithm. Conventional (AND based check)", REPEATS*10);
281
for (unsigned i = 0; i < REPEATS*10; ++i)
283
for (unsigned j = 0; j < 65536; ++j)
285
bm::bit_list(i*test_arr[j], bit_list);
291
unsigned bit_list[32];
292
TimeTaker tt("BitList4 algorithm(sub-octet+switch)", REPEATS*10);
294
for (unsigned i = 0; i < REPEATS*10; ++i)
296
for (unsigned j = 0; j < 65536; ++j)
298
bm::bit_list_4(i*test_arr[j], bit_list);
307
void BitCountSparseTest()
310
bvect* bv = new bvect();
311
test_bitset* bset = new test_bitset();
312
unsigned value = 0, c1;
313
volatile unsigned* p = &value;
315
SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);
318
TimeTaker tt("BitCount: Sparse bitset ", REPEATS*10);
319
for (unsigned i = 0; i < REPEATS*10; ++i)
321
value += bv->count();
327
TimeTaker tt("BitCount: Sparse bitset (STL)", REPEATS*10);
328
for (unsigned int i = 0; i < REPEATS*10; ++i)
330
value += bset->count();
341
TimeTaker tt("BitCount: GAP Sparse bitset", REPEATS*1000);
342
for (unsigned i = 0; i < REPEATS*1000; ++i)
344
value += bv->count();
358
void BitCompareTest()
361
bvect* bv1 = new bvect();
362
bvect* bv2 = new bvect();
363
test_bitset* bset = new test_bitset();
366
SimpleFillSets(*bset, *bv1, 0, BSIZE, 10);
367
SimpleFillSets(*bset, *bv2, 0, BSIZE, 10);
370
TimeTaker tt("BitCompare: Random bitvector", REPEATS*10);
371
for (unsigned int i = 0; i < REPEATS*10; ++i)
373
value+=bv1->compare(*bv2);
383
if (platform_test) return;
385
unsigned cnt = REPEATS * 100000;
386
unsigned* arr1 = new unsigned[cnt];
387
unsigned* arr2 = new unsigned[cnt];
390
for (i = 0; i < cnt; ++i)
392
if ((rand() % 10) == 0)
404
TimeTaker tt("wordcmp complex: Random words comparison", cnt);
406
for (i = 0; i < cnt; ++i)
408
int res2 = bm::wordcmp(arr1[i], arr2[i]);
409
int res = bm::wordcmp0(arr1[i], arr2[i]);
413
cerr << "Incorrect result ! " << arr1[i]
414
<< "<=>" << arr2[i] << " res=" << res <<
422
volatile void* p = &c;
425
TimeTaker tt("wordcmp0. Random words comparison", cnt);
426
for (i = 0; i < cnt; ++i)
428
c += bm::wordcmp0(arr1[i], arr2[i]);
434
TimeTaker tt("wordcmp. Random words comparison", cnt);
435
for (i = 0; i < cnt; ++i)
437
c += bm::wordcmp(arr1[i], arr2[i]);
447
sprintf(buf, "%p", p);
450
void EnumeratorTest()
453
test_bitset* bset = new test_bitset();
456
FillSetsIntervals(*bset, bv, 0, BSIZE, 10);
458
unsigned cnt1 = bv.count();
459
//unsigned cnt2 = bset->count();
465
TimeTaker tt("bvector<>::enumerator", REPEATS/10);
466
for (i = 0; i < REPEATS/10; ++i)
468
bvect::enumerator en = bv.first();
470
for (;en.valid();++en)
478
// -----------------------------------------------
482
TimeTaker tt("bvector<>::get_next()", REPEATS/10);
483
for (i = 0; i < REPEATS/10; ++i)
487
unsigned value = bv.get_first();
490
value = bv.get_next(value);
501
sprintf(buf, "%i %i ", cnt, cnt1);//, cnt2);
506
void EnumeratorTestGAP()
508
bvect* bv = new bvect();
509
test_bitset* bset = new test_bitset();
513
SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);
516
for (int k = 0; k < 2; ++k)
520
TimeTaker tt("Sparse bvector (enumerator)", REPEATS*10*(k+1));
521
for (i = 0; i < REPEATS*10*(k+1); ++i)
523
bvect::enumerator en = bv->first();
524
bvect::enumerator bend = bv->end();
535
// -----------------------------------------------
539
TimeTaker tt("Sparse bvector (get_next())", REPEATS*10*(k+1));
541
for (i = 0; i < REPEATS*10*(k+1); ++i)
545
unsigned value = bv->get_first();
548
value = bv->get_next(value);
555
sprintf(buf, "%i", cnt); // to fool some smart compilers like ICC
564
cout << "Testing optimized vectors." << endl;
570
// -----------------------------------------------
574
void SerializationTest()
578
// prepare a test bitset with a small number of bits set somewhere
579
// far from the beginning
580
for (unsigned i = 0; i < 5; ++i)
582
bv_sparse[BSIZE/2 + i * 3] = true;
584
bv_sparse[100] = true;
585
bv_sparse[70000] = true;
586
bv_sparse[200000] = true;
588
bv_sparse.optimize();
590
unsigned cnt = bv_sparse.count();
591
bvect::statistics st;
592
bv_sparse.calc_stat(&st);
593
unsigned char* buf = new unsigned char[st.max_serialize_mem];
595
unsigned len, id_size;
598
TimeTaker tt("Small bvector serialization", REPEATS*70000);
599
for (unsigned i = 0; i < REPEATS*70000; ++i)
601
len += bm::serialize(bv_sparse, buf, bm::BM_NO_BYTE_ORDER|bm::BM_NO_GAP_LENGTH);
602
id_size += cnt * sizeof(unsigned);
606
delete [] buf; buf = 0;
608
bvect* bv = new bvect();
609
test_bitset* bset = new test_bitset();
612
SimpleFillSets(*bset, *bv, 0, BSIZE, 4);
616
buf = new unsigned char[st.max_serialize_mem];
619
TimeTaker tt("Large bvector serialization", REPEATS*4);
620
for (unsigned i = 0; i < REPEATS*4; ++i)
622
len += bm::serialize(*bv, buf, bm::BM_NO_BYTE_ORDER|bm::BM_NO_GAP_LENGTH);
623
id_size += cnt * sizeof(unsigned);
628
sprintf(cbuf, "%i %i %i", id_size, len, value);
638
bvect* bv = new bvect();
639
test_bitset* bset = new test_bitset();
641
//unsigned value = 0;
643
SimpleFillSets(*bset, *bv, 0, BSIZE, 2500);
645
TimeTaker tt("Invert bvector", REPEATS*4);
646
for (i = 0; i < REPEATS*4; ++i)
654
TimeTaker tt("Invert bvector (STL)", REPEATS*4);
655
for (i = 0; i < REPEATS*4; ++i)
668
bvect* bv1 = new bvect();
669
test_bitset* bset1 = new test_bitset();
670
test_bitset* bset2 = new test_bitset();
671
bvect* bv2 = new bvect();
673
//unsigned value = 0;
675
SimpleFillSets(*bset1, *bv1, 0, BSIZE, 100);
676
SimpleFillSets(*bset1, *bv2, 0, BSIZE, 100);
678
TimeTaker tt("AND bvector test", REPEATS*4);
679
for (i = 0; i < REPEATS*4; ++i)
687
TimeTaker tt("AND bvector test(STL)", REPEATS*4);
688
for (i = 0; i < REPEATS*4; ++i)
704
bvect* bv1 = new bvect();
705
test_bitset* bset1 = new test_bitset();
706
bvect* bv2 = new bvect();
709
SimpleFillSets(*bset1, *bv1, 0, BSIZE, 100);
710
SimpleFillSets(*bset1, *bv2, 0, BSIZE, 100);
714
TimeTaker tt("SUB bvector test", REPEATS*4);
715
for (i = 0; i < REPEATS*4; ++i)
728
bvect* bv1 = new bvect();
729
bvect* bv2 = new bvect();
730
test_bitset* bset1 = new test_bitset();
731
test_bitset* bset2 = new test_bitset();
734
SimpleFillSets(*bset1, *bv1, 0, BSIZE, 400);
735
SimpleFillSets(*bset2, *bv2, 0, BSIZE, 500);
739
unsigned test_count = 0;
744
TimeTaker tt("XOR COUNT bvector test with TEMP vector", REPEATS*4);
745
for (i = 0; i < REPEATS*4; ++i)
750
count1 += bv_tmp.count();
756
test_bitset* bset_tmp = new test_bitset();
757
TimeTaker tt("XOR COUNT bvector test with TEMP vector (STL)", REPEATS*4);
758
for (i = 0; i < REPEATS*4; ++i)
763
test_count += bset_tmp->count();
769
TimeTaker tt("XOR COUNT bvector test", REPEATS*4);
770
for (i = 0; i < REPEATS*4; ++i)
772
count2 += bm::count_xor(*bv1, *bv2);
778
if (count1 != count2)
780
cout << "Check failed !" << endl;
781
cout << count1 << " " << count2 << " " << test_count << endl;
786
// -----------------------------------------
789
cout << "One optimized vector" << endl;
796
TimeTaker tt("XOR COUNT bvector test with TEMP vector", REPEATS*4);
797
for (i = 0; i < REPEATS*4; ++i)
802
count1 += bv_tmp.count();
807
TimeTaker tt("XOR COUNT bvector test", REPEATS*4);
808
for (i = 0; i < REPEATS*4; ++i)
810
count2 += bm::count_xor(*bv1, *bv2);
815
if (count1 != count2)
817
cout << "Check failed !" << endl;
822
// -----------------------------------------
825
cout << "Both vectors optimized" << endl;
832
TimeTaker tt("XOR COUNT bvector test with TEMP vector", REPEATS*4);
833
for (i = 0; i < REPEATS*4; ++i)
838
count1 += bv_tmp.count();
843
TimeTaker tt("XOR COUNT bvector test", REPEATS*4);
844
for (i = 0; i < REPEATS*4; ++i)
846
count2 += bm::count_xor(*bv1, *bv2);
850
if (count1 != count2)
852
cout << "Check failed !" << endl;
868
bvect* bv1 = new bvect();
869
bvect* bv2 = new bvect();
870
test_bitset* bset1 = new test_bitset();
871
test_bitset* bset2 = new test_bitset();
874
SimpleFillSets(*bset1, *bv1, 0, BSIZE, 500);
875
SimpleFillSets(*bset2, *bv2, 0, BSIZE, 250);
879
unsigned countA=0, countB=0, test_countA=0, test_countB=0;
880
unsigned test_count = 0;
884
TimeTaker tt("Tversky Index bvector test vector", REPEATS);
885
for (i = 0; i < REPEATS; ++i)
887
count1 = bm::count_and(*bv1, *bv2);
889
countA = bm::count_sub(*bv1, *bv2);
890
countB = bm::count_sub(*bv2, *bv1);
892
ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
899
test_bitset* bset_tmp = new test_bitset();
900
double test_dice = 0;
901
TimeTaker tt("Dice bvector test with TEMP vector(STL)", REPEATS);
902
for (i = 0; i < REPEATS; ++i)
907
test_count += bset_tmp->count();
909
test_countA += bset1->count();
910
test_countB += bset2->count();
912
test_countA += bset1->count();
913
test_countB += bset2->count();
915
test_dice += double(2*test_count) / double(test_countA + test_countB);
921
bm::distance_metric_descriptor dmd[3];
922
dmd[0].metric = bm::COUNT_AND;
923
dmd[1].metric = bm::COUNT_SUB_AB;
924
dmd[2].metric = bm::COUNT_SUB_BA;
926
TimeTaker tt("Tversky Index bvector test (pipeline)", REPEATS);
927
for (i = 0; i < REPEATS; ++i)
929
bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
931
ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
933
dmd[0].result = dmd[1].result = dmd[2].result = 0;
938
if (fabs(ti2 - ti1) > 0.1)
940
cout << "Check failed ! error=" << fabs(ti2 - ti1) << endl;
941
cout << ti1 << " " << ti2 << endl;
946
// -----------------------------------------
949
cout << "One optimized vector" << endl;
952
bv1->count(); // trying to fool the CPU cache
956
TimeTaker tt("Dice metric bvector test", REPEATS);
957
for (i = 0; i < REPEATS; ++i)
959
count1 = bm::count_and(*bv1, *bv2);
961
countA = bm::count_sub(*bv1, *bv2);
962
countB = bm::count_sub(*bv2, *bv1);
964
ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
971
bm::distance_metric_descriptor dmd[3];
972
dmd[0].metric = bm::COUNT_AND;
973
dmd[1].metric = bm::COUNT_SUB_AB;
974
dmd[2].metric = bm::COUNT_SUB_BA;
976
TimeTaker tt("Tversky Index bvector test(pipeline)", REPEATS);
977
for (i = 0; i < REPEATS; ++i)
979
bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
981
ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
983
dmd[0].result = dmd[1].result = dmd[2].result = 0;
988
if (fabs(ti2 - ti1) > 0.1)
990
cout << "Check failed !" << endl;
991
cout << ti1 << " " << ti2 << endl;
997
// -----------------------------------------
1000
cout << "Both vectors optimized" << endl;
1005
TimeTaker tt("Tversky index bvector test", REPEATS);
1006
for (i = 0; i < REPEATS; ++i)
1008
count1 = bm::count_and(*bv1, *bv2);
1010
countA = bm::count_sub(*bv1, *bv2);
1011
countB = bm::count_sub(*bv2, *bv1);
1013
ti1 = double(count1) / double(0.4*countA + 0.5*countB + count1);
1018
bm::distance_metric_descriptor dmd[3];
1019
dmd[0].metric = bm::COUNT_AND;
1020
dmd[1].metric = bm::COUNT_SUB_AB;
1021
dmd[2].metric = bm::COUNT_SUB_BA;
1023
TimeTaker tt("Tversky Index bvector test (pipeline)", REPEATS);
1024
for (i = 0; i < REPEATS; ++i)
1026
bm::distance_operation(*bv1, *bv2, &dmd[0], (&dmd[0])+3);
1028
ti2 = double(dmd[0].result) / double(0.4*dmd[1].result + 0.5*dmd[2].result + dmd[0].result);
1030
dmd[0].result = dmd[1].result = dmd[2].result = 0;
1034
if (fabs(ti2 - ti1) > 0.1)
1036
cout << "Check failed !" << endl;
1037
cout << ti1 << " " << ti2 << endl;
1051
# define BM_ALIGN16 __declspec(align(16))
1052
# define BM_ALIGN16ATTR
1057
# define BM_ALIGN16ATTR __attribute__((aligned(16)))
1063
# define BM_ALIGN16ATTR
1067
void BitBlockTransposeTest()
1069
bm::word_t BM_ALIGN16 block1[bm::set_block_size] BM_ALIGN16ATTR = {0,};
1070
//bm::word_t BM_ALIGN16 block2[bm::set_block_size] = {0xFF,};
1071
unsigned BM_ALIGN16 tmatrix1[32][bm::set_block_plain_size] BM_ALIGN16ATTR;
1073
for (unsigned i = 0; i < bm::set_block_size; ++i)
1075
block1[i] = 1 | (1 << 5) | (7 << 15) | (3 << 22);
1078
const unsigned blocks_count = 70000;
1079
bm::word_t* blocks[blocks_count];
1080
for (unsigned k = 0; k < blocks_count; ++k)
1082
blocks[k] = bm::block_allocator::allocate(bm::set_block_size, 0);
1083
for (unsigned i = 0; i < bm::set_block_size; ++i)
1085
blocks[k][i] = 1 | (1 << 5) | (7 << 15) | (3 << 22);
1092
TimeTaker tt("Bit-block transpose.", REPEATS*1000);
1093
for (unsigned i = 0; i < REPEATS*1000; ++i)
1095
bm::bit_block_transpose(block1, tmatrix1);
1100
TimeTaker tt("Bit-block trestore.", REPEATS*1000);
1101
for (unsigned i = 0; i < REPEATS*1000; ++i)
1103
bm::bit_block_trestore(tmatrix1, block2);
1110
TimeTaker tt("Bit-block transpose distance.", REPEATS*1000);
1111
unsigned distance[bm::set_block_plain_cnt][bm::set_block_plain_cnt];
1112
for (unsigned i = 0; i < REPEATS*1000; ++i)
1114
bm::bit_block_tmatrix_distance(tmatrix1, distance);
1115
cnt += distance[1][1];
1121
unsigned d2[bm::set_block_plain_cnt][bm::set_block_plain_cnt];
1123
TimeTaker tt("Bit-block transpose+distance", 100000);
1124
unsigned distance[bm::set_block_plain_cnt][bm::set_block_plain_cnt];
1126
for (unsigned i = 0; i < 100000; ++i)
1128
bm::vect_bit_transpose<unsigned,
1129
bm::set_block_plain_cnt,
1130
bm::set_block_plain_size>
1131
(blocks[idx], bm::set_block_size, tmatrix1);
1132
bm::tmatrix_distance<unsigned,
1133
bm::set_block_plain_cnt,
1134
bm::set_block_plain_size>
1135
(tmatrix1, distance);
1137
cnt += distance[1][1];
1139
if (idx >= blocks_count) idx = 0;
1140
memcpy(d2, distance, sizeof(distance));
1146
sprintf(cbuf, "%i %i", cnt, d2[10][10]);
1148
for (unsigned i = 0; i < blocks_count; ++i)
1150
bm::block_allocator::deallocate(blocks[i], 0);
1157
bvect* bv_small = new bvect(bm::BM_GAP);
1158
bvect* bv_large = new bvect(bm::BM_GAP);
1160
test_bitset* bset = new test_bitset();
1162
FillSetsIntervals(*bset, *bv_large, 0, 2000000000, 10);
1164
for (int i = 0; i < 2000; ++i)
1166
bv_small->set(i*10000);
1170
TimeTaker tt("Operation &= test", REPEATS * 10);
1172
for (unsigned i = 0; i < REPEATS*10; ++i)
1174
bvect t1(bm::BM_GAP);
1177
count += t1.count();
1184
TimeTaker tt("Operation &= with enumerator test", REPEATS * 10);
1186
for (unsigned i = 0; i < REPEATS*10; ++i)
1188
bvect t1(bm::BM_GAP);
1189
bvect t2(bm::BM_GAP);
1192
for (bvect::enumerator it = t1.first(); it != t1.end(); ++it) {
1193
if ((*bv_large)[*it]) {
1197
count += t2.count();
1209
TimeTaker tt("TOTAL", 1);
1217
BitCountSparseTest();
1221
BitBlockTransposeTest();
1224
EnumeratorTestGAP();
1235
SerializationTest();
1242
#pragma warning( pop )