~ubuntu-branches/ubuntu/trusty/bmagic/trusty-proposed

« back to all changes in this revision

Viewing changes to src/bmrandom.h

  • Committer: Bazaar Package Importer
  • Author(s): Roberto C. Sanchez
  • Date: 2010-01-24 14:45:39 UTC
  • mfrom: (4.1.6 sid)
  • Revision ID: james.westby@ubuntu.com-20100124144539-4ipk5rt64dpp38hl
Tags: 3.6.3-1
* New upstream release
* debian/patches/config.guess.patch: drop obsolete patch
* Add ${misc:Depends} as requested by lintian

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#ifndef BMRANDOM__H__INCLUDED__
 
2
#define BMRANDOM__H__INCLUDED__
 
3
/*
 
4
Copyright(c) 2009 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
 
5
 
 
6
Permission is hereby granted, free of charge, to any person 
 
7
obtaining a copy of this software and associated documentation 
 
8
files (the "Software"), to deal in the Software without restriction, 
 
9
including without limitation the rights to use, copy, modify, merge, 
 
10
publish, distribute, sublicense, and/or sell copies of the Software, 
 
11
and to permit persons to whom the Software is furnished to do so, 
 
12
subject to the following conditions:
 
13
 
 
14
The above copyright notice and this permission notice shall be included 
 
15
in all copies or substantial portions of the Software.
 
16
 
 
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 
18
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 
19
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
 
20
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
 
21
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
 
22
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
 
23
OTHER DEALINGS IN THE SOFTWARE.
 
24
 
 
25
For more information please visit:  http://bmagic.sourceforge.net
 
26
 
 
27
*/
 
28
 
 
29
#include "bm.h"
 
30
#include "bmfunc.h"
 
31
#include "bmdef.h"
 
32
 
 
33
#include <stdlib.h>
 
34
#include <algorithm>
 
35
 
 
36
 
 
37
namespace bm
 
38
{
 
39
 
 
40
/*!
 
41
    Class implements algorithm for random subset generation.
 
42
 
 
43
    Implemented method tries to be fair, but doesn't guarantee
 
44
    true randomeness.
 
45
 
 
46
    Performace note: 
 
47
    Class holds temporary buffers and variables, so it is recommended to
 
48
    re-use instances over multiple calls.
 
49
 
 
50
    \ingroup setalgo
 
51
*/
 
52
template<class BV>
 
53
class random_subset
 
54
{
 
55
public:
 
56
    random_subset();
 
57
    ~random_subset();
 
58
 
 
59
    /// Get random subset of input vector
 
60
    ///
 
61
    /// @param bv_out - destination vector
 
62
    /// @param bv_in  - input vector
 
63
    /// @param count  - number of bits to pick
 
64
    ///
 
65
    void sample(BV& bv_out, const BV& bv_in, unsigned count);
 
66
    
 
67
 
 
68
private:
 
69
    typedef 
 
70
        typename BV::blocks_manager_type  blocks_manager_type;
 
71
 
 
72
private:
 
73
    void get_subset(BV& bv_out, 
 
74
                    const BV& bv_in, 
 
75
                    unsigned  bv_in_count,
 
76
                    unsigned count);
 
77
 
 
78
    unsigned find_max_block();
 
79
    
 
80
    void get_random_subset(bm::word_t*       blk_out, 
 
81
                           const bm::word_t* blk_src,
 
82
                           unsigned          count);
 
83
    static 
 
84
    unsigned process_word(bm::word_t*       blk_out, 
 
85
                          const bm::word_t* blk_src,
 
86
                          unsigned          i,
 
87
                          unsigned          count);
 
88
 
 
89
    static
 
90
    void get_random_array(bm::word_t*       blk_out, 
 
91
                          bm::gap_word_t*   bit_list,
 
92
                          unsigned          bit_list_size,
 
93
                          unsigned          count);
 
94
 
 
95
 
 
96
private:
 
97
    random_subset(const random_subset&);
 
98
    random_subset& operator=(const random_subset&);
 
99
private:
 
100
    unsigned*         block_counts_; 
 
101
    unsigned short*   block_bits_take_;
 
102
    unsigned          blocks_;
 
103
    bm::gap_word_t    bit_list_[bm::gap_max_bits];
 
104
    unsigned*         block_candidates_;
 
105
    unsigned          candidates_count_;
 
106
    bm::word_t*       sub_block_;
 
107
};
 
108
 
 
109
 
 
110
///////////////////////////////////////////////////////////////////
 
111
 
 
112
 
 
113
 
 
114
template<class BV>
 
115
random_subset<BV>::random_subset()
 
116
: block_counts_(new unsigned[bm::set_total_blocks]),
 
117
  block_bits_take_(new bm::gap_word_t[bm::set_total_blocks]),
 
118
  block_candidates_(new unsigned[bm::set_total_blocks]),
 
119
  candidates_count_(0),
 
120
  sub_block_(new bm::word_t[bm::set_block_size])
 
121
{
 
122
}
 
123
 
 
124
template<class BV>
 
125
random_subset<BV>::~random_subset()
 
126
{
 
127
    delete [] block_counts_;
 
128
    delete [] block_bits_take_;
 
129
    delete [] block_candidates_;
 
130
    delete [] sub_block_;
 
131
}
 
132
 
 
133
template<class BV>
 
134
void random_subset<BV>::sample(BV&       bv_out, 
 
135
                               const BV& bv_in, 
 
136
                               unsigned  count)
 
137
{
 
138
    if (count == 0)
 
139
    {
 
140
        bv_out.clear(true);
 
141
        return;
 
142
    }
 
143
 
 
144
    unsigned bcnt = bv_in.count();
 
145
    if (count >= bcnt)
 
146
    {
 
147
        bv_out = bv_in;
 
148
        return;
 
149
    }
 
150
    if (count > bcnt/2) 
 
151
    {
 
152
        // build the complement vector and subtract it
 
153
        BV tmp_bv;
 
154
        unsigned delta_count = bcnt - count;
 
155
 
 
156
        get_subset(tmp_bv, bv_in, bcnt, delta_count);
 
157
        bv_out = bv_in;
 
158
        bv_out -= tmp_bv;
 
159
        return;
 
160
    }
 
161
 
 
162
    get_subset(bv_out, bv_in, bcnt, count);
 
163
}
 
164
 
 
165
template<class BV>
 
166
void random_subset<BV>::get_subset(BV&       bv_out, 
 
167
                                   const BV& bv_in, 
 
168
                                   unsigned  bcnt,
 
169
                                   unsigned  count)
 
170
{
 
171
    bv_out.clear(true);
 
172
    bv_out.resize(bv_in.size());
 
173
 
 
174
    const blocks_manager_type& bman_in = bv_in.get_blocks_manager();
 
175
    blocks_manager_type& bman_out = bv_out.get_blocks_manager();
 
176
 
 
177
    bm::word_t* tmp_block = bman_out.check_allocate_tempblock();
 
178
    candidates_count_ = 0;
 
179
 
 
180
 
 
181
    // compute bit-counts in all blocks
 
182
    //
 
183
    unsigned block_count = blocks_ = bv_in.count_blocks(block_counts_);
 
184
    for (unsigned i = 0; i <= block_count; ++i)
 
185
    {
 
186
        if (block_counts_[i])
 
187
        {
 
188
            float block_percent = 
 
189
                ((float)(block_counts_[i]+1)) / (float)bcnt;
 
190
            float bits_to_take = ((float)count) * block_percent; 
 
191
            bits_to_take += 0.99f;
 
192
 
 
193
            unsigned t = (unsigned)bits_to_take;
 
194
            block_bits_take_[i] = (bm::gap_word_t)t;
 
195
            
 
196
            if (block_bits_take_[i] == 0)
 
197
            {
 
198
                block_bits_take_[i] = 1;
 
199
            }
 
200
            else
 
201
            if (block_bits_take_[i] > block_counts_[i])
 
202
                block_bits_take_[i] = block_counts_[i];
 
203
 
 
204
            BM_ASSERT(bman_in.get_block(i));
 
205
        }
 
206
        else
 
207
        {
 
208
            block_bits_take_[i] = 0;
 
209
        }
 
210
    } // for i
 
211
 
 
212
    for (unsigned take_count = 0; count; count -= take_count) 
 
213
    {
 
214
        unsigned i = find_max_block();
 
215
        take_count = block_bits_take_[i];
 
216
        if (take_count > count)
 
217
            take_count = count;
 
218
        if (take_count == 0)
 
219
            continue;
 
220
        const bm::word_t* blk_src = bman_in.get_block(i);
 
221
        BM_ASSERT(blk_src);
 
222
 
 
223
        // allocate target block
 
224
        bm::word_t* blk_out = bman_out.get_block(i);
 
225
        if (blk_out != 0)
 
226
        {
 
227
            blk_out = bman_out.deoptimize_block(i);
 
228
        } 
 
229
        else
 
230
        {            
 
231
            blk_out = bman_out.get_allocator().alloc_bit_block();
 
232
            bman_out.set_block(i, blk_out);
 
233
        }
 
234
        if (take_count == block_counts_[i])
 
235
        {
 
236
            // copy the whole src block
 
237
            if (BM_IS_GAP(bman_in, blk_src, i))
 
238
            {
 
239
                gap_convert_to_bitset(blk_out, BMGAP_PTR(blk_src));
 
240
            }
 
241
            else
 
242
            {
 
243
                bm::bit_block_copy(blk_out, blk_src);                
 
244
            }
 
245
            block_bits_take_[i] = 0; // exclude block from any farther picking
 
246
            continue;
 
247
        }
 
248
        bit_block_set(blk_out, 0);
 
249
 
 
250
        if (block_counts_[i] < 4096) // use array shuffle
 
251
        {
 
252
            unsigned arr_len;
 
253
            // convert source block to bit-block
 
254
            if (BM_IS_GAP(bman_in, blk_src, i))
 
255
            {
 
256
                arr_len = gap_convert_to_arr(bit_list_,
 
257
                                             BMGAP_PTR(blk_src),
 
258
                                             bm::gap_equiv_len);
 
259
            }
 
260
            else // bit-block
 
261
            {
 
262
                arr_len = bit_convert_to_arr(bit_list_, 
 
263
                                             blk_src, 
 
264
                                             bm::gap_max_bits, 
 
265
                                             bm::gap_equiv_len,
 
266
                                             0);
 
267
            }
 
268
            BM_ASSERT(arr_len);
 
269
            get_random_array(blk_out, bit_list_, arr_len, take_count);
 
270
        }
 
271
        else // dense block
 
272
        {
 
273
            // convert source block to bit-block
 
274
            if (BM_IS_GAP(bman_in, blk_src, i))
 
275
            {
 
276
                gap_convert_to_bitset(tmp_block, BMGAP_PTR(blk_src));
 
277
                blk_src = tmp_block;
 
278
            }
 
279
 
 
280
            // pick random bits source block to target
 
281
            get_random_subset(blk_out, blk_src, take_count);
 
282
        }
 
283
        
 
284
        block_bits_take_[i] = 0; // exclude block from any farther picking
 
285
    }
 
286
}
 
287
 
 
288
template<class BV>
 
289
void random_subset<BV>::get_random_subset(bm::word_t*       blk_out, 
 
290
                                          const bm::word_t* blk_src,
 
291
                                          unsigned          take_count)
 
292
{
 
293
    for (unsigned rounds = 0; take_count && rounds < 10; ++rounds)
 
294
    {
 
295
        // pick random scan start and scan direction
 
296
        //
 
297
        unsigned i = rand() % bm::set_block_size;
 
298
        unsigned new_count;
 
299
 
 
300
        for (; i < bm::set_block_size && take_count; ++i)
 
301
        {
 
302
            if (blk_src[i] && (blk_out[i] != blk_src[i]))
 
303
            {
 
304
                take_count -= new_count = 
 
305
                    process_word(blk_out, blk_src, i, take_count);
 
306
            }
 
307
        } // for i
 
308
 
 
309
    } // for
 
310
    // if masked scan did not produce enough results..
 
311
    //
 
312
    if (take_count)
 
313
    {
 
314
        // Find all vacant bits: do logical (src SUB out)
 
315
        for (unsigned i = 0; i < bm::set_block_size; ++i)
 
316
        {
 
317
            sub_block_[i] = blk_src[i] & ~blk_out[i];
 
318
        }
 
319
        // now transform vacant bits to array, then pick random elements
 
320
        //
 
321
        unsigned arr_len = bit_convert_to_arr(bit_list_, 
 
322
                                              sub_block_, 
 
323
                                              bm::gap_max_bits, 
 
324
                                              bm::gap_max_bits,
 
325
                                              0);
 
326
        BM_ASSERT(arr_len);
 
327
        get_random_array(blk_out, bit_list_, arr_len, take_count);        
 
328
    }
 
329
}
 
330
 
 
331
template<class BV>
 
332
unsigned random_subset<BV>::process_word(bm::word_t*       blk_out, 
 
333
                                         const bm::word_t* blk_src,
 
334
                                         unsigned          i,
 
335
                                         unsigned          count)
 
336
{
 
337
    unsigned new_bits, mask;
 
338
 
 
339
    do 
 
340
    {    
 
341
        mask = rand();
 
342
        mask ^= mask << 16;
 
343
    } while (mask == 0);
 
344
 
 
345
    unsigned src_rand = blk_src[i] & mask;    
 
346
    new_bits = src_rand & ~blk_out[i];
 
347
 
 
348
    if (new_bits)
 
349
    {
 
350
        unsigned new_count = bm::word_bitcount(new_bits);
 
351
 
 
352
        // check if we accidentally picked more bits than needed
 
353
        if (new_count > count)
 
354
        {
 
355
            BM_ASSERT(count);
 
356
 
 
357
            unsigned char blist[64];
 
358
            unsigned arr_size = bm::bit_list_4(new_bits, blist);
 
359
            BM_ASSERT(arr_size == new_count);
 
360
            std::random_shuffle(blist, blist + arr_size);
 
361
            unsigned value = 0;
 
362
            for (unsigned j = 0; j < count; ++j)
 
363
            {
 
364
                value |= (1 << blist[j]);
 
365
            }
 
366
            new_bits = value;
 
367
            new_count = count;
 
368
 
 
369
            BM_ASSERT(bm::word_bitcount(new_bits) == count);
 
370
            BM_ASSERT((new_bits & ~blk_src[i]) == 0);
 
371
        }
 
372
 
 
373
        blk_out[i] |= new_bits;
 
374
        return new_count;
 
375
    }
 
376
 
 
377
    return 0;    
 
378
}
 
379
 
 
380
 
 
381
template<class BV>
 
382
void random_subset<BV>::get_random_array(bm::word_t*       blk_out, 
 
383
                                         bm::gap_word_t*   bit_list,
 
384
                                         unsigned          bit_list_size,
 
385
                                         unsigned          count)
 
386
{
 
387
    std::random_shuffle(bit_list, bit_list + bit_list_size);
 
388
    for (unsigned i = 0; i < count; ++i)
 
389
    {
 
390
        bm::set_bit(blk_out, bit_list[i]);
 
391
    }
 
392
}
 
393
 
 
394
template<class BV>
 
395
unsigned random_subset<BV>::find_max_block() 
 
396
{
 
397
    if (candidates_count_)
 
398
    {
 
399
        return block_candidates_[--candidates_count_];
 
400
    }
 
401
 
 
402
    unsigned candidate = 0;
 
403
    unsigned max_i = 0;
 
404
    for (unsigned i = 0; i <= blocks_; ++i)
 
405
    {
 
406
        if (block_bits_take_[i] == 0) continue;
 
407
        if (block_bits_take_[i] == candidate)
 
408
        {
 
409
            block_candidates_[candidates_count_++] = i;
 
410
        }
 
411
        else
 
412
        {
 
413
            unsigned diff = abs((int)block_bits_take_[i] - (int)candidate);
 
414
            double d = (double)diff / (double)candidate;
 
415
 
 
416
            if (d < 0.20f) // delta is statistically insignificant
 
417
            {
 
418
                block_candidates_[candidates_count_++] = i;
 
419
            }
 
420
            else
 
421
            if (block_bits_take_[i] > candidate)
 
422
            {
 
423
                 candidate = block_bits_take_[i];
 
424
                 max_i = i;
 
425
                 candidates_count_ = 0;
 
426
                 block_candidates_[candidates_count_++] = i;
 
427
            }
 
428
        }
 
429
    }
 
430
 
 
431
    if (candidates_count_ > 1)
 
432
    {
 
433
        std::random_shuffle(block_candidates_, block_candidates_ + candidates_count_);
 
434
        return find_max_block();
 
435
    }
 
436
    return max_i;
 
437
}
 
438
 
 
439
 
 
440
} // namespace
 
441
 
 
442
#include "bmundef.h"
 
443
 
 
444
#endif