1
/* $Id: datastore_db.c,v 1.107 2004/06/19 19:17:56 relson Exp $ */
3
/*****************************************************************************
6
datastore_db.c -- implements the datastore, using Berkeley DB
9
Gyepi Sam <gyepi@praxis-sw.com> 2002 - 2003
10
Matthias Andree <matthias.andree@gmx.de> 2003
12
******************************************************************************/
14
/* To avoid header file conflicts the order is:
15
** 1. System header files
16
** 2. Header files for external packages
17
** 3. Bogofilter's header files
20
#define DONT_TYPEDEF_SSIZE_T 1
26
#include <unistd.h> /* for SEEK_SET for SunOS 4.1.x */
27
#include <sys/resource.h>
35
#include "datastore.h"
36
#include "datastore_db.h"
40
#include "paths.h" /* for build_path */
41
#include "rand_sleep.h"
47
static DB_ENV *dbe; /* libdb environment, if in use, NULL otherwise */
49
static const DBTYPE dbtype = DB_BTREE;
51
static bool init = false;
52
bool create_flag = false; /* For datastore.c (to add .WORDLIST_VERSION) */
57
int fd; /* file descriptor of data base file */
58
dbmode_t open_mode; /* datastore open mode, DS_READ/DS_WRITE */
59
DB *dbp; /* data base handle */
61
bool is_swapped; /* set if CPU and data base endianness differ */
62
bool created; /* if newly created; for datastore.c (to add .WORDLIST_VERSION) */
65
#define DBT_init(dbt) (memset(&dbt, 0, sizeof(DBT)))
67
#define DB_AT_LEAST(maj, min) ((DB_VERSION_MAJOR > (maj)) || ((DB_VERSION_MAJOR == (maj)) && (DB_VERSION_MINOR >= (min))))
68
#define DB_AT_MOST(maj, min) ((DB_VERSION_MAJOR < (maj)) || ((DB_VERSION_MAJOR == (maj)) && (DB_VERSION_MINOR <= (min))))
70
/* dummy infrastructure, to be expanded by environment
71
* or transactional initialization/shutdown */
73
static int db_init(void);
74
static void db_cleanup(void);
76
/* Function definitions */
78
/** translate BerkeleyDB \a flags bitfield back to symbols */
79
static const char *resolveflags(u_int32_t flags) {
82
strlcpy(buf, "", sizeof(buf));
83
if (flags & DB_CREATE) flags &= ~DB_CREATE, strlcat(buf, "DB_CREATE ", sizeof(buf));
84
if (flags & DB_EXCL) flags &= ~DB_EXCL, strlcat(buf, "DB_EXCL ", sizeof(buf));
85
if (flags & DB_NOMMAP) flags &= ~DB_NOMMAP, strlcat(buf, "DB_NOMMAP ", sizeof(buf));
86
if (flags & DB_RDONLY) flags &= ~DB_RDONLY, strlcat(buf, "DB_RDONLY ", sizeof(buf));
87
snprintf(b2, sizeof(b2), "%#lx", (unsigned long)flags);
88
if (flags) strlcat(buf, b2, sizeof(buf));
92
/** wrapper for Berkeley DB's DB->open() method which has changed API and
93
* semantics -- this should deal with 3.2, 3.3, 4.0, 4.1 and 4.2. */
94
static int DB_OPEN(DB *db, const char *file,
95
const char *database, DBTYPE type, u_int32_t flags, int mode)
101
0, /* TXN handle - we use autocommit instead */
103
file, database, type, flags, mode);
105
if (DEBUG_DATABASE(1) || getenv("BF_DEBUG_DB_OPEN"))
106
fprintf(dbgout, "[pid %lu] DB->open(db=%p, file=%s, database=%s, "
107
"type=%x, flags=%#lx=%s, mode=%#o) -> %d %s\n",
108
(unsigned long)getpid(), (void *)db, file,
109
database ? database : "NIL", type, (unsigned long)flags,
110
resolveflags(flags), mode, ret, db_strerror(ret));
115
/* implements locking. */
116
static int db_lock(int fd, int cmd, short int type)
122
lock.l_whence = (short int)SEEK_SET;
124
return (fcntl(fd, cmd, &lock));
128
/** "constructor" - allocate our handle and initialize its contents */
129
static dbh_t *dbh_init(const char *path, const char *name)
132
size_t len = strlen(path) + strlen(name) + 2;
134
handle = xmalloc(sizeof(dbh_t));
135
memset(handle, 0, sizeof(dbh_t)); /* valgrind */
137
handle->fd = -1; /* for lock */
139
handle->path = xstrdup(path);
140
handle->name = xmalloc(len);
141
build_path(handle->name, len, path, name);
143
handle->locked = false;
144
handle->is_swapped = false;
145
handle->created = false;
150
/** free \a handle and associated data.
151
* NB: does not close transactions, data bases or the environment! */
152
static void dbh_free(/*@only@*/ dbh_t *handle)
154
if (handle != NULL) {
163
/* Returns is_swapped flag */
164
bool db_is_swapped(void *vhandle)
166
dbh_t *handle = vhandle;
167
return handle->is_swapped;
171
/* Returns created flag */
172
bool db_created(void *vhandle)
174
dbh_t *handle = vhandle;
175
return handle->created;
179
/* If header and library version do not match,
180
* print an error message on stderr and exit with EX_ERROR. */
181
static void check_db_version(void)
184
static int version_ok;
188
(void)db_version(&maj, &min, NULL);
189
if (!(maj == DB_VERSION_MAJOR && min == DB_VERSION_MINOR)) {
190
fprintf(stderr, "The DB versions do not match.\n"
191
"This program was compiled for DB version %d.%d,\n"
192
"but it is linked against DB version %d.%d.\nAborting.\n",
193
DB_VERSION_MAJOR, DB_VERSION_MINOR, maj, min);
199
/** check limit of open file (given through descriptor \a fd) against
200
* current resource limit and warn if file size is "close" (2 MB) to the
201
* limit. errors from the system are ignored, no warning then.
203
static void check_fsize_limit(int fd, uint32_t pagesize) {
207
if (fstat(fd, &st)) return; /* ignore error */
208
if (getrlimit(RLIMIT_FSIZE, &rl)) return; /* ignore error */
209
if (rl.rlim_cur != (rlim_t)RLIM_INFINITY) {
210
/* WARNING: Be extremely careful that in these comparisons there
211
* is no unsigned term, it will spoil everything as C will
212
* coerce into unsigned types, which would then make "file size
213
* larger than resource limit" undetectable. BUG: this doesn't
214
* work when pagesize doesn't fit into signed long. ("requires"
215
* 2**31 for file size and 32-bit integers to fail) */
216
if ((off_t)(rl.rlim_cur/pagesize) - st.st_size/(long)pagesize < 16) {
217
print_error(__FILE__, __LINE__, "error: the data base file size is only 16 pages");
218
print_error(__FILE__, __LINE__, " below the resource limit. Cowardly refusing");
219
print_error(__FILE__, __LINE__, " to continue to avoid data base corruption.");
222
if ((off_t)(rl.rlim_cur >> 20) - (st.st_size >> 20) < 2) {
223
print_error(__FILE__, __LINE__, "warning: data base file size approaches resource limit.");
224
print_error(__FILE__, __LINE__, " write errors (bumping into the limit) can cause");
225
print_error(__FILE__, __LINE__, " data base corruption.");
230
/* The old, pre-3.3 API will not fill in the page size with
231
* DB_CACHED_COUNTS, and without DB_CACHED_COUNTS, BerlekeyDB will read
232
* the whole data base, incurring a severe performance penalty. We'll
233
* guess a page size. As this is a safety margin for the file size,
234
* we'll return 0 and let the caller guess some size instead. */
236
/* return page size, of 0xffffffff for trouble */
237
static uint32_t get_psize(DB *dbp)
239
uint32_t ret, pagesize;
240
DB_BTREE_STAT *dbstat = NULL;
242
ret = dbp->stat(dbp, &dbstat, DB_FAST_STAT);
244
dbp->err (dbp, ret, "%s (db) DB->stat", progname);
247
pagesize = dbstat->bt_pagesize;
252
#define get_psize(discard) 0
255
const char *db_version_str(void)
258
snprintf(v, sizeof(v), "BerkeleyDB (%d.%d.%d)",
259
DB_VERSION_MAJOR, DB_VERSION_MINOR, DB_VERSION_PATCH);
263
/** Initialize database. Expects open environment.
264
* \return pointer to database handle on success, NULL otherwise.
266
void *db_open(const char *path, const char *name, dbmode_t open_mode)
270
int retries = 2; /* how often do we retry to open after ENOENT+EEXIST
271
races? 2 is sufficient unless the kernel or
272
BerkeleyDB are buggy. */
275
dbh_t *handle = NULL;
276
uint32_t open_flags = 0;
278
* If locking fails with EAGAIN, then try without MMAP, fcntl()
279
* locking may be forbidden on mmapped files, or mmap may not be
280
* available for NFS. Thanks to Piotr Kucharski and Casper Dik,
281
* see news:comp.protocols.nfs and the bogofilter mailing list,
282
* message #1520, Message-ID: <20030206172016.GS1214@sgh.waw.pl>
283
* Date: Thu, 6 Feb 2003 18:20:16 +0100
286
uint32_t retryflags[] = { 0, DB_NOMMAP };
294
if (open_mode & DS_READ )
295
open_flags = DB_RDONLY;
296
if (open_mode & DS_CREATE )
297
open_flags = DB_CREATE | DB_EXCL;
299
/* retry when locking failed */
300
for (idx = 0; idx < COUNTOF(retryflags); idx += 1)
304
uint32_t retryflag = retryflags[idx], pagesize;
306
handle = dbh_init(path, name);
311
/* create DB handle */
312
if ((ret = db_create (&dbp, dbe, 0)) != 0) {
313
print_error(__FILE__, __LINE__, "(db) db_create, err: %d, %s",
314
ret, db_strerror(ret));
320
/* set cache size, but not when we're using an environment */
321
if (dbe == NULL && db_cachesize != 0 &&
322
(ret = dbp->set_cachesize(dbp, db_cachesize/1024, (db_cachesize % 1024) * 1024*1024, 1)) != 0) {
323
print_error(__FILE__, __LINE__, "(db) DB(%s)->set_cachesize(%u,%u,%u), err: %d, %s",
324
handle->name, db_cachesize/1024u, (db_cachesize % 1024u) * 1024u*1024u, 1u, ret, db_strerror(ret));
329
if (dbe && (t = strrchr(handle->name, DIRSEP_C)))
336
ret = DB_OPEN(dbp, t, NULL, dbtype, open_flags | retryflag, 0664);
339
err = (ret != ENOENT) || (open_flags & DB_RDONLY);
341
ret = DB_OPEN(dbp, t, NULL, dbtype, open_flags | DB_CREATE | DB_EXCL | retryflag, 0664);
345
handle->created = true;
350
if (ret == ENOENT && open_flags != DB_RDONLY)
358
if (open_flags != DB_RDONLY && ret == EEXIST && --retries) {
359
/* sleep for 4 to 100 ms - this is just to give up the CPU
360
* to another process and let it create the data base
362
rand_sleep(4 * 1000, 100 * 1000);
366
/* close again and bail out without further tries */
367
if (DEBUG_DATABASE(0))
368
print_error(__FILE__, __LINE__, "(db) DB->open(%s) - actually %s, bogohome %s, err %d, %s",
369
handle->name, t, bogohome, ret, db_strerror(ret));
374
/* see if the database byte order differs from that of the cpu's */
376
ret = dbp->get_byteswapped (dbp, &is_swapped);
379
is_swapped = dbp->get_byteswapped (dbp);
381
handle->is_swapped = is_swapped ? true : false;
384
dbp->err (dbp, ret, "%s (db) DB->get_byteswapped: %s",
385
progname, handle->name);
386
db_close(handle, false);
387
return NULL; /* handle already freed, ok to return */
390
ret = dbp->fd(dbp, &handle->fd);
392
dbp->err (dbp, ret, "%s (db) DB->fd: %s",
393
progname, handle->name);
394
db_close(handle, false);
395
return NULL; /* handle already freed, ok to return */
398
/* query page size */
399
pagesize = get_psize(dbp);
400
if (pagesize == 0xffffffff) {
408
/* check file size limit */
409
check_fsize_limit(handle->fd, pagesize);
411
/* skip manual lock when run in environment */
416
if (db_lock(handle->fd, F_SETLK,
417
(short int)(open_mode == DS_READ ? F_RDLCK : F_WRLCK)))
420
db_close(handle, true);
421
handle = NULL; /* db_close freed it, we don't want to use it anymore */
431
} /* for idx over retryflags */
434
handle->locked = true;
436
handle->locked=false;
454
int db_delete(void *vhandle, const dbv_t *token)
457
dbh_t *handle = vhandle;
458
DB *dbp = handle->dbp;
463
db_key.data = token->data;
464
db_key.size = token->leng;
466
ret = dbp->del(dbp, NULL, &db_key, 0);
468
if (ret != 0 && ret != DB_NOTFOUND) {
469
print_error(__FILE__, __LINE__, "(db) db_delete('%.*s'), err: %d, %s",
470
CLAMP_INT_MAX(db_key.size),
471
(const char *) db_key.data,
472
ret, db_strerror(ret));
476
return ret; /* 0 if ok */
480
int db_get_dbvalue(void *vhandle, const dbv_t *token, /*@out@*/ dbv_t *val)
486
dbh_t *handle = vhandle;
487
DB *dbp = handle->dbp;
492
db_key.data = token->data;
493
db_key.size = token->leng;
495
db_data.data = val->data;
496
db_data.size = val->leng; /* cur used */
497
db_data.ulen = val->leng; /* max size */
498
db_data.flags = DB_DBT_USERMEM; /* saves the memcpy */
500
ret = dbp->get(dbp, NULL, &db_key, &db_data, 0);
502
val->leng = db_data.size; /* read count */
509
if (DEBUG_DATABASE(3)) {
510
fprintf(dbgout, "db_get_dbvalue: [%.*s] not found\n",
511
CLAMP_INT_MAX(token->leng), (char *) token->data);
515
print_error(__FILE__, __LINE__, "(db) db_get_dbvalue( '%.*s' ), err: %d, %s",
516
CLAMP_INT_MAX(token->leng), (char *) token->data, ret, db_strerror(ret));
524
int db_set_dbvalue(void *vhandle, const dbv_t *token, dbv_t *val)
531
dbh_t *handle = vhandle;
532
DB *dbp = handle->dbp;
537
db_key.data = token->data;
538
db_key.size = token->leng;
540
db_data.data = val->data;
541
db_data.size = val->leng; /* write count */
543
ret = dbp->put(dbp, NULL, &db_key, &db_data, 0);
546
print_error(__FILE__, __LINE__, "(db) db_set_dbvalue( '%.*s' ), err: %d, %s",
547
CLAMP_INT_MAX(token->leng), (char *)token->data, ret, db_strerror(ret));
555
/* Close files and clean up. */
556
void db_close(void *vhandle, bool nosync)
559
dbh_t *handle = vhandle;
560
DB *dbp = handle->dbp;
561
uint32_t f = nosync ? DB_NOSYNC : 0;
563
if (DEBUG_DATABASE(1))
564
fprintf(dbgout, "db_close (%s) %s\n",
565
handle->name, nosync ? "nosync" : "sync");
567
ret = dbp->close(dbp, f);
568
#if DB_AT_LEAST(3,2) && DB_AT_MOST(4,0)
569
/* ignore dirty pages in buffer pool */
570
if (ret == DB_INCOMPLETE)
574
print_error(__FILE__, __LINE__, "(db) db_close err: %d, %s", ret, db_strerror(ret));
583
flush any data in memory to disk
585
void db_flush(void *vhandle)
588
dbh_t *handle = vhandle;
589
DB *dbp = handle->dbp;
591
ret = dbp->sync(dbp, 0);
592
#if DB_AT_LEAST(3,2) && DB_AT_MOST(4,0)
593
/* ignore dirty pages in buffer pool */
594
if (ret == DB_INCOMPLETE)
598
print_error(__FILE__, __LINE__, "(db) db_sync: err: %d, %s", ret, db_strerror(ret));
602
int db_foreach(void *vhandle, db_foreach_t hook, void *userdata)
604
dbh_t *handle = vhandle;
605
DB *dbp = handle->dbp;
613
dbv_t dbv_key, dbv_data;
614
memset(&key, 0, sizeof(key));
615
memset(&data, 0, sizeof(data));
617
ret = dbp->cursor(dbp, NULL, &dbcp, 0);
619
dbp->err(dbp, ret, "(cursor): %s", handle->path);
623
for (ret = dbcp->c_get(dbcp, &key, &data, DB_FIRST);
625
ret = dbcp->c_get(dbcp, &key, &data, DB_NEXT))
629
/* Question: Is there a way to avoid using malloc/free? */
631
/* switch to "dbv_t *" variables */
632
dbv_key.leng = key.size;
633
dbv_key.data = xmalloc(dbv_key.leng+1);
634
memcpy(dbv_key.data, key.data, dbv_key.leng);
635
((char *)dbv_key.data)[dbv_key.leng] = '\0';
637
dbv_data.data = data.data;
638
dbv_data.leng = data.size;
640
/* call user function */
641
rc = hook(&dbv_key, &dbv_data, userdata);
644
/* returns 0 if ok, 1 if not */
656
dbp->err(dbp, ret, "(c_get)");
659
if (dbcp->c_close(dbcp)) {
660
dbp->err(dbp, ret, "(c_close)");
664
return ret; /* 0 if ok */
667
const char *db_str_err(int e) {
668
return db_strerror(e);
671
/* dummy infrastructure, to be expanded by environment
672
* or transactional initialization/shutdown */
674
static int db_init(void) {
680
if (bogohome && getenv("BOGOFILTER_CONCURRENT_DATA_STORE")) {
681
int ret = db_env_create(&dbe, 0);
683
print_error(__FILE__, __LINE__, "db_env_create, err: %d, %s", ret, db_strerror(ret));
686
if (db_cachesize != 0 &&
687
(ret = dbe->set_cachesize(dbe, db_cachesize/1024, (db_cachesize % 1024) * 1024*1024, 1)) != 0) {
688
print_error(__FILE__, __LINE__, "DBENV->set_cachesize(%u), err: %d, %s",
689
db_cachesize, ret, db_strerror(ret));
693
/* Allow user to override DB_CDB_ALLDB to 0 */
694
if ((t = getenv("DB_CDB_ALLDB")))
697
ret = dbe->open(dbe, bogohome, DB_INIT_MPOOL | DB_INIT_CDB | DB_CREATE, /* mode */ 0644);
700
print_error(__FILE__, __LINE__, "DBENV->open, err: %d, %s", ret, db_strerror(ret));
708
static void db_cleanup(void) {