2
Copyright (c) 2006-2010 Gordon Gremme <gremme@zbh.uni-hamburg.de>
3
Copyright (c) 2007 Stefan Kurtz <kurtz@zbh.uni-hamburg.de>
4
Copyright (c) 2008 Thomas Jahns <Thomas.Jahns@gmx.net>
5
Copyright (c) 2006-2008 Center for Bioinformatics, University of Hamburg
7
Permission to use, copy, modify, and distribute this software for any
8
purpose with or without fee is hereby granted, provided that the above
9
copyright notice and this permission notice appear in all copies.
11
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21
#include "core/assert_api.h"
22
#include "core/disc_distri.h"
23
#include "core/ensure.h"
24
#include "core/hashmap-generic.h"
26
#include "core/unused_api.h"
29
GtHashtable *hashdist;
30
unsigned long long num_of_occurrences;
33
GtDiscDistri* gt_disc_distri_new(void)
35
return gt_calloc((size_t) 1, sizeof (GtDiscDistri));
38
void gt_disc_distri_add(GtDiscDistri *d, unsigned long key)
40
gt_disc_distri_add_multi(d, key, (unsigned long long) 1);
43
DECLARE_HASHMAP(unsigned long, ul, unsigned long long, ull, static, inline)
44
DEFINE_HASHMAP(unsigned long, ul, unsigned long long, ull, gt_ht_ul_elem_hash,
45
gt_ht_ul_elem_cmp, NULL_DESTRUCTOR, NULL_DESTRUCTOR, static,
48
void gt_disc_distri_add_multi(GtDiscDistri *d, unsigned long key,
49
unsigned long long occurrences)
51
unsigned long long *valueptr;
55
d->hashdist = ul_ull_gt_hashmap_new();
57
valueptr = ul_ull_gt_hashmap_get(d->hashdist, key);
59
ul_ull_gt_hashmap_add(d->hashdist, key, occurrences);
62
(*valueptr) += occurrences;
64
d->num_of_occurrences += occurrences;
67
unsigned long long gt_disc_distri_get(const GtDiscDistri *d, unsigned long key)
69
unsigned long long *valueptr;
71
if (!d->hashdist || !(valueptr = ul_ull_gt_hashmap_get(d->hashdist, key)))
77
double cumulative_probability;
78
unsigned long long num_of_occurrences;
82
static enum iterator_op
83
showvalue(unsigned long key, unsigned long long occurrences,
84
void *data, GT_UNUSED GtError *err)
87
GtShowValueInfo *info;
90
gt_assert(data && occurrences);
91
info = (GtShowValueInfo*) data;
93
probability = (double) ((double) occurrences / info->num_of_occurrences);
94
info->cumulative_probability += probability;
95
gt_file_xprintf(info->outfp, "%lu: %llu (prob=%.4f,cumulative=%.4f)\n",
96
key, occurrences, probability, info->cumulative_probability);
97
return CONTINUE_ITERATION;
100
void gt_disc_distri_show(const GtDiscDistri *d, GtFile *outfp)
102
GtShowValueInfo showvalueinfo;
107
if (d->hashdist != NULL) {
108
showvalueinfo.cumulative_probability = 0.0;
109
showvalueinfo.num_of_occurrences = d->num_of_occurrences;
110
showvalueinfo.outfp = outfp;
111
rval = ul_ull_gt_hashmap_foreach_in_default_order(d->hashdist, showvalue,
112
&showvalueinfo, NULL);
113
gt_assert(!rval); /* showvalue() is sane */
118
GtDiscDistriIterFunc func;
120
} DiscDistriForeachInfo;
122
static enum iterator_op
123
disc_distri_foreach_iterfunc(unsigned long key, unsigned long long occurrences,
124
void *data, GT_UNUSED GtError *err)
126
DiscDistriForeachInfo *info;
129
info = (DiscDistriForeachInfo*) data;
130
info->func(key, occurrences, info->data);
131
return CONTINUE_ITERATION;
135
void gt_disc_distri_foreach_generic(const GtDiscDistri *d,
136
GtDiscDistriIterFunc func,
138
ul_ull_gt_hashmap_KeyCmp cmp)
140
DiscDistriForeachInfo info;
143
if (d->hashdist != NULL) {
147
rval = ul_ull_gt_hashmap_foreach_ordered(d->hashdist,
148
disc_distri_foreach_iterfunc,
151
rval = ul_ull_gt_hashmap_foreach_in_default_order(d->hashdist,
152
disc_distri_foreach_iterfunc,
154
gt_assert(!rval); /* disc_distri_foreach_iterfunc() is sane */
158
void gt_disc_distri_foreach(const GtDiscDistri *d, GtDiscDistriIterFunc func,
161
gt_disc_distri_foreach_generic(d,func,data,NULL);
165
rev_key_cmp(const unsigned long a, const unsigned long b)
167
return -gt_ht_ul_elem_cmp(&a,&b);
170
void gt_disc_distri_foreach_in_reverse_order(const GtDiscDistri *d,
171
GtDiscDistriIterFunc func,
174
gt_disc_distri_foreach_generic(d,func,data, rev_key_cmp);
177
#define DISC_DISTRI_FOREACHTESTSIZE 3
179
/* data for foreach unit test */
180
struct ForeachTesterData
183
int expkeys[DISC_DISTRI_FOREACHTESTSIZE];
184
int expvalues[DISC_DISTRI_FOREACHTESTSIZE];
189
/* helper function for unit test of foreach */
190
static void foreachtester(unsigned long key,
191
unsigned long long value, void *data)
193
struct ForeachTesterData *tdata = data;
194
GtError *err = tdata->err;
196
gt_ensure(*(tdata->had_err), tdata->counter < DISC_DISTRI_FOREACHTESTSIZE);
197
gt_ensure(*(tdata->had_err),
198
(unsigned long) tdata->expkeys[tdata->counter] == key);
199
gt_ensure(*(tdata->had_err),
200
(unsigned long long) tdata->expvalues[tdata->counter] == value);
203
int gt_disc_distri_unit_test(GtError *err)
207
struct ForeachTesterData tdata;
211
d = gt_disc_distri_new();
213
gt_ensure(had_err, gt_disc_distri_get(d, 0UL) == 0);
214
gt_ensure(had_err, gt_disc_distri_get(d, 100UL) == 0);
216
gt_disc_distri_add(d, 0);
217
gt_disc_distri_add_multi(d, 100UL, 256ULL);
219
gt_ensure(had_err, gt_disc_distri_get(d, 0UL) == 1ULL);
220
gt_ensure(had_err, gt_disc_distri_get(d, 100UL) == 256ULL);
222
/* test foreach and foreach_in_reverse_order: */
223
gt_disc_distri_add(d, 2UL);
226
tdata.expkeys[0] = 0;
227
tdata.expvalues[0] = 1;
228
tdata.expkeys[1] = 2;
229
tdata.expvalues[1] = 1;
230
tdata.expkeys[2] = 100;
231
tdata.expvalues[2] = 256;
232
tdata.had_err = &had_err;
234
gt_disc_distri_foreach(d, foreachtester, &tdata);
238
tdata.expkeys[0] = 100;
239
tdata.expvalues[0] = 256;
240
tdata.expkeys[1] = 2;
241
tdata.expvalues[1] = 1;
242
tdata.expkeys[2] = 0;
243
tdata.expvalues[2] = 1;
244
tdata.had_err = &had_err;
246
gt_disc_distri_foreach_in_reverse_order(d, foreachtester, &tdata);
249
gt_disc_distri_delete(d);
254
void gt_disc_distri_delete(GtDiscDistri *d)
257
gt_hashtable_delete(d->hashdist);