1
/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
1
/* Copyright (c) 2006-2012 Dovecot authors, see the included COPYING file */
5
#include "mkdir-parents.h"
6
#include "hex-binary.h"
8
#include "mail-namespace.h"
6
9
#include "mail-storage-private.h"
10
#include "fts-expunge-log.h"
7
11
#include "lucene-wrapper.h"
12
#include "fts-indexer.h"
8
13
#include "fts-lucene-plugin.h"
10
17
#define LUCENE_INDEX_DIR_NAME "lucene-indexes"
11
#define LUCENE_LOCK_SUBDIR_NAME "locks"
13
#define LUCENE_CONTEXT(obj) \
14
MODULE_CONTEXT(obj, fts_lucene_storage_module)
16
struct lucene_mail_storage {
17
union mail_storage_module_context module_ctx;
18
#define LUCENE_EXPUNGE_LOG_NAME "dovecot-expunges.log"
19
#define LUCENE_OPTIMIZE_BATCH_MSGS_COUNT 100
21
struct lucene_fts_backend {
22
struct fts_backend backend;
18
25
struct lucene_index *index;
19
26
struct mailbox *selected_box;
27
unsigned int selected_box_generation;
28
guid_128_t selected_box_guid;
30
struct fts_expunge_log *expunge_log;
32
unsigned int dir_created:1;
33
unsigned int updating:1;
23
struct lucene_fts_backend {
24
struct fts_backend backend;
25
struct lucene_mail_storage *lstorage;
36
struct lucene_fts_backend_update_context {
37
struct fts_backend_update_context ctx;
26
39
struct mailbox *box;
29
struct lucene_fts_backend_build_context {
30
struct fts_backend_build_context ctx;
41
uint32_t last_indexed_uid;
42
char *first_box_vname;
47
unsigned int added_msgs;
48
struct fts_expunge_log_append_ctx *expunge_ctx;
51
bool last_indexed_uid_set;
36
static MODULE_CONTEXT_DEFINE_INIT(fts_lucene_storage_module,
37
&mail_storage_module_register);
39
static void fts_backend_select(struct lucene_fts_backend *backend)
41
if (backend->lstorage->selected_box != backend->box) {
42
lucene_index_select_mailbox(backend->lstorage->index,
43
mailbox_get_name(backend->box));
44
backend->lstorage->selected_box = backend->box;
48
static struct fts_backend *fts_backend_lucene_init(struct mailbox *box)
50
struct lucene_mail_storage *lstorage;
54
static int fts_backend_lucene_mkdir(struct lucene_fts_backend *backend)
58
if (backend->dir_created)
61
backend->dir_created = TRUE;
62
if (mailbox_list_mkdir_root(backend->backend.ns->list,
64
MAILBOX_LIST_PATH_TYPE_INDEX, &error) < 0) {
65
i_error("lucene: Couldn't create root dir %s: %s",
66
backend->dir_path, error);
73
fts_lucene_get_mailbox_guid(struct mailbox *box, guid_128_t guid_r)
75
struct mailbox_metadata metadata;
77
if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID,
79
i_error("lucene: Couldn't get mailbox %s GUID: %s",
80
box->vname, mailbox_get_last_error(box, NULL));
83
memcpy(guid_r, metadata.guid, GUID_128_SIZE);
88
fts_backend_select(struct lucene_fts_backend *backend, struct mailbox *box)
91
unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
92
wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH];
96
i_assert(box != NULL);
98
if (backend->selected_box == box &&
99
backend->selected_box_generation == box->generation_sequence)
103
if (fts_lucene_get_mailbox_guid(box, guid) < 0)
105
buffer_create_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH);
106
binary_to_hex_append(&buf, guid, GUID_128_SIZE);
107
for (i = 0; i < N_ELEMENTS(wguid_hex); i++)
108
wguid_hex[i] = guid_hex[i];
110
lucene_index_select_mailbox(backend->index, wguid_hex);
112
lucene_index_unselect_mailbox(backend->index);
113
memset(&guid, 0, sizeof(guid));
115
backend->selected_box = box;
116
memcpy(backend->selected_box_guid, guid,
117
sizeof(backend->selected_box_guid));
118
backend->selected_box_generation =
119
box == NULL ? 0 : box->generation_sequence;
123
static struct fts_backend *fts_backend_lucene_alloc(void)
51
125
struct lucene_fts_backend *backend;
52
const char *path, *lock_path;
54
lstorage = LUCENE_CONTEXT(box->storage);
55
if (lstorage == NULL) {
56
path = mailbox_list_get_path(box->list, "INBOX",
57
MAILBOX_LIST_PATH_TYPE_INDEX);
59
/* in-memory indexes */
60
if (box->storage->set->mail_debug)
61
i_debug("fts squat: Disabled with in-memory indexes");
65
path = t_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
66
lock_path = t_strdup_printf("%s/"LUCENE_LOCK_SUBDIR_NAME, path);
67
if (mkdir_parents(lock_path, 0700) < 0 && errno != EEXIST) {
68
i_error("mkdir_parents(%s) failed: %m", lock_path);
72
lstorage = i_new(struct lucene_mail_storage, 1);
73
lstorage->index = lucene_index_init(path, lock_path);
74
MODULE_CONTEXT_SET(box->storage, fts_lucene_storage_module,
79
127
backend = i_new(struct lucene_fts_backend, 1);
80
128
backend->backend = fts_backend_lucene;
81
backend->lstorage = lstorage;
83
129
return &backend->backend;
133
fts_backend_lucene_init(struct fts_backend *_backend, const char **error_r)
135
struct lucene_fts_backend *backend =
136
(struct lucene_fts_backend *)_backend;
137
struct fts_lucene_user *fuser =
138
FTS_LUCENE_USER_CONTEXT(_backend->ns->user);
142
/* invalid settings */
143
*error_r = "Invalid fts_lucene settings";
147
path = mailbox_list_get_path(_backend->ns->list, NULL,
148
MAILBOX_LIST_PATH_TYPE_INDEX);
149
i_assert(path != NULL); /* fts already checked this */
151
backend->dir_path = i_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
152
backend->index = lucene_index_init(backend->dir_path,
153
_backend->ns->list, &fuser->set);
155
path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL);
156
backend->expunge_log = fts_expunge_log_init(path);
86
160
static void fts_backend_lucene_deinit(struct fts_backend *_backend)
88
162
struct lucene_fts_backend *backend =
89
163
(struct lucene_fts_backend *)_backend;
91
if (--backend->lstorage->refcount == 0) {
92
MODULE_CONTEXT_UNSET(backend->box->storage,
93
fts_lucene_storage_module);
94
lucene_index_deinit(backend->lstorage->index);
95
i_free(backend->lstorage);
165
lucene_index_deinit(backend->index);
166
fts_expunge_log_deinit(&backend->expunge_log);
167
i_free(backend->dir_path);
101
172
fts_backend_lucene_get_last_uid(struct fts_backend *_backend,
102
uint32_t *last_uid_r)
173
struct mailbox *box, uint32_t *last_uid_r)
104
175
struct lucene_fts_backend *backend =
105
176
(struct lucene_fts_backend *)_backend;
107
fts_backend_select(backend);
108
return lucene_index_get_last_uid(backend->lstorage->index, last_uid_r);
177
struct fts_lucene_user *fuser =
178
FTS_LUCENE_USER_CONTEXT(_backend->ns->user);
179
struct fts_index_header hdr;
180
uint32_t set_checksum;
182
if (fts_index_get_header(box, &hdr)) {
183
set_checksum = fts_lucene_settings_checksum(&fuser->set);
184
if (!fts_index_have_compatible_settings(_backend->ns->list,
186
/* need to rebuild the index */
189
*last_uid_r = hdr.last_indexed_uid;
194
/* either nothing has been indexed, or the index was corrupted.
195
do it the slow way. */
196
if (fts_backend_select(backend, box) < 0)
198
if (lucene_index_get_last_uid(backend->index, last_uid_r) < 0)
201
(void)fts_index_set_last_uid(box, *last_uid_r);
112
fts_backend_lucene_build_init(struct fts_backend *_backend,
113
uint32_t *last_uid_r,
114
struct fts_backend_build_context **ctx_r)
205
static struct fts_backend_update_context *
206
fts_backend_lucene_update_init(struct fts_backend *_backend)
116
208
struct lucene_fts_backend *backend =
117
209
(struct lucene_fts_backend *)_backend;
118
struct lucene_fts_backend_build_context *ctx;
121
fts_backend_select(backend);
122
if (lucene_index_build_init(backend->lstorage->index,
126
ctx = i_new(struct lucene_fts_backend_build_context, 1);
210
struct lucene_fts_backend_update_context *ctx;
212
i_assert(!backend->updating);
214
ctx = i_new(struct lucene_fts_backend_update_context, 1);
127
215
ctx->ctx.backend = _backend;
128
ctx->uid = last_uid + 1;
130
*last_uid_r = last_uid;
136
fts_backend_lucene_build_hdr(struct fts_backend_build_context *_ctx,
139
struct lucene_fts_backend_build_context *ctx =
140
(struct lucene_fts_backend_build_context *)_ctx;
142
i_assert(uid >= ctx->uid);
149
fts_backend_lucene_build_body_begin(struct fts_backend_build_context *_ctx,
150
uint32_t uid, const char *content_type,
151
const char *content_disposition ATTR_UNUSED)
153
struct lucene_fts_backend_build_context *ctx =
154
(struct lucene_fts_backend_build_context *)_ctx;
156
i_assert(uid >= ctx->uid);
158
if (!fts_backend_default_can_index(content_type))
216
backend->updating = TRUE;
221
fts_backend_lucene_need_optimize(struct lucene_fts_backend_update_context *ctx)
223
struct lucene_fts_backend *backend =
224
(struct lucene_fts_backend *)ctx->ctx.backend;
225
unsigned int expunges;
228
if (ctx->added_msgs >= LUCENE_OPTIMIZE_BATCH_MSGS_COUNT)
230
if (lucene_index_get_doc_count(backend->index, &numdocs) < 0)
233
if (fts_expunge_log_uid_count(backend->expunge_log, &expunges) < 0)
235
return expunges > 0 &&
236
numdocs / expunges <= 50; /* >2% of index has been expunged */
240
fts_backend_lucene_update_deinit(struct fts_backend_update_context *_ctx)
242
struct lucene_fts_backend_update_context *ctx =
243
(struct lucene_fts_backend_update_context *)_ctx;
244
struct lucene_fts_backend *backend =
245
(struct lucene_fts_backend *)_ctx->backend;
246
int ret = _ctx->failed ? -1 : 0;
248
i_assert(backend->updating);
250
backend->updating = FALSE;
251
if (ctx->lucene_opened)
252
lucene_index_build_deinit(backend->index);
254
if (ctx->expunge_ctx != NULL) {
255
if (fts_expunge_log_append_commit(&ctx->expunge_ctx) < 0)
259
if (fts_backend_lucene_need_optimize(ctx)) {
260
if (ctx->lucene_opened)
261
(void)fts_backend_optimize(_ctx->backend);
263
struct mail_user *user = backend->backend.ns->user;
264
const char *cmd, *path;
267
/* the optimize affects all mailboxes within namespace,
268
so just use any mailbox name in it */
269
cmd = t_strdup_printf("OPTIMIZE\t0\t%s\t%s\n",
270
str_tabescape(user->username),
271
str_tabescape(ctx->first_box_vname));
272
fd = fts_indexer_cmd(user, cmd, &path);
278
i_free(ctx->first_box_vname);
284
fts_backend_lucene_update_set_mailbox(struct fts_backend_update_context *_ctx,
287
struct lucene_fts_backend_update_context *ctx =
288
(struct lucene_fts_backend_update_context *)_ctx;
290
if (ctx->last_uid != 0) {
291
(void)fts_index_set_last_uid(ctx->box, ctx->last_uid);
294
if (ctx->first_box_vname == NULL)
295
ctx->first_box_vname = i_strdup(box->vname);
297
ctx->last_indexed_uid_set = FALSE;
301
fts_backend_lucene_update_expunge(struct fts_backend_update_context *_ctx,
304
struct lucene_fts_backend_update_context *ctx =
305
(struct lucene_fts_backend_update_context *)_ctx;
306
struct lucene_fts_backend *backend =
307
(struct lucene_fts_backend *)_ctx->backend;
308
struct fts_index_header hdr;
310
if (!ctx->last_indexed_uid_set) {
311
if (!fts_index_get_header(ctx->box, &hdr))
312
ctx->last_indexed_uid = 0;
314
ctx->last_indexed_uid = hdr.last_indexed_uid;
315
ctx->last_indexed_uid_set = TRUE;
317
if (ctx->last_indexed_uid == 0 ||
318
uid > ctx->last_indexed_uid + 100) {
319
/* don't waste time adding expunge to log for a message that
320
isn't even indexed. this check is racy, because indexer may
321
just be in the middle of indexing this message. we'll
322
attempt to avoid that by skipping the expunging only if
323
indexing hasn't been done for a while (100 msgs). */
327
if (ctx->expunge_ctx == NULL) {
329
fts_expunge_log_append_begin(backend->expunge_log);
332
if (fts_backend_select(backend, ctx->box) < 0)
335
fts_expunge_log_append_next(ctx->expunge_ctx,
336
backend->selected_box_guid, uid);
340
fts_backend_lucene_update_set_build_key(struct fts_backend_update_context *_ctx,
341
const struct fts_backend_build_key *key)
343
struct lucene_fts_backend_update_context *ctx =
344
(struct lucene_fts_backend_update_context *)_ctx;
345
struct lucene_fts_backend *backend =
346
(struct lucene_fts_backend *)_ctx->backend;
348
if (!ctx->lucene_opened) {
349
if (fts_backend_lucene_mkdir(backend) < 0)
350
ctx->ctx.failed = TRUE;
351
if (lucene_index_build_init(backend->index) < 0)
352
ctx->ctx.failed = TRUE;
353
ctx->lucene_opened = TRUE;
356
if (fts_backend_select(backend, ctx->box) < 0)
360
case FTS_BACKEND_BUILD_KEY_HDR:
361
case FTS_BACKEND_BUILD_KEY_MIME_HDR:
362
i_assert(key->hdr_name != NULL);
364
i_free(ctx->hdr_name);
365
ctx->hdr_name = i_strdup(key->hdr_name);
367
case FTS_BACKEND_BUILD_KEY_BODY_PART:
368
i_free_and_null(ctx->hdr_name);
370
case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
374
if (key->uid != ctx->last_uid) {
375
i_assert(key->uid >= ctx->last_uid);
376
ctx->last_uid = key->uid;
385
fts_backend_lucene_update_unset_build_key(struct fts_backend_update_context *_ctx)
387
struct lucene_fts_backend_update_context *ctx =
388
(struct lucene_fts_backend_update_context *)_ctx;
391
i_free_and_null(ctx->hdr_name);
167
fts_backend_lucene_build_more(struct fts_backend_build_context *_ctx,
168
const unsigned char *data, size_t size)
395
fts_backend_lucene_update_build_more(struct fts_backend_update_context *_ctx,
396
const unsigned char *data, size_t size)
170
struct lucene_fts_backend_build_context *ctx =
171
(struct lucene_fts_backend_build_context *)_ctx;
398
struct lucene_fts_backend_update_context *ctx =
399
(struct lucene_fts_backend_update_context *)_ctx;
172
400
struct lucene_fts_backend *backend =
173
401
(struct lucene_fts_backend *)_ctx->backend;
404
i_assert(ctx->uid != 0);
175
406
if (_ctx->failed)
178
i_assert(backend->lstorage->selected_box == backend->box);
179
return lucene_index_build_more(backend->lstorage->index,
180
ctx->uid, data, size, ctx->hdr);
184
fts_backend_lucene_build_deinit(struct fts_backend_build_context *ctx)
186
struct lucene_fts_backend *backend =
187
(struct lucene_fts_backend *)ctx->backend;
188
int ret = ctx->failed ? -1 : 0;
190
i_assert(backend->lstorage->selected_box == backend->box);
191
lucene_index_build_deinit(backend->lstorage->index);
197
fts_backend_lucene_expunge(struct fts_backend *_backend, struct mail *mail)
199
struct lucene_fts_backend *backend =
200
(struct lucene_fts_backend *)_backend;
202
fts_backend_select(backend);
203
(void)lucene_index_expunge(backend->lstorage->index, mail->uid);
207
fts_backend_lucene_expunge_finish(struct fts_backend *_backend ATTR_UNUSED,
208
struct mailbox *box ATTR_UNUSED,
209
bool committed ATTR_UNUSED)
214
fts_backend_lucene_lock(struct fts_backend *_backend ATTR_UNUSED)
220
fts_backend_lucene_unlock(struct fts_backend *_backend ATTR_UNUSED)
225
fts_backend_lucene_lookup(struct fts_backend *_backend,
226
const char *key, enum fts_lookup_flags flags,
227
ARRAY_TYPE(seq_range) *definite_uids,
228
ARRAY_TYPE(seq_range) *maybe_uids)
230
struct lucene_fts_backend *backend =
231
(struct lucene_fts_backend *)_backend;
233
i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
235
array_clear(maybe_uids);
236
fts_backend_select(backend);
237
return lucene_index_lookup(backend->lstorage->index,
238
flags, key, definite_uids);
410
ret = lucene_index_build_more(backend->index, ctx->uid,
411
data, size, ctx->hdr_name);
417
fts_backend_lucene_refresh(struct fts_backend *_backend)
419
struct lucene_fts_backend *backend =
420
(struct lucene_fts_backend *)_backend;
422
lucene_index_close(backend->index);
426
static int fts_backend_lucene_rescan(struct fts_backend *_backend)
428
struct lucene_fts_backend *backend =
429
(struct lucene_fts_backend *)_backend;
431
if (lucene_index_rescan(backend->index) < 0)
433
return lucene_index_optimize(backend->index);
436
static int fts_backend_lucene_optimize(struct fts_backend *_backend)
438
struct lucene_fts_backend *backend =
439
(struct lucene_fts_backend *)_backend;
442
ret = lucene_index_expunge_from_log(backend->index,
443
backend->expunge_log);
445
/* log was corrupted, need to rescan */
446
ret = lucene_index_rescan(backend->index);
449
ret = lucene_index_optimize(backend->index);
454
fts_backend_lucene_lookup(struct fts_backend *_backend, struct mailbox *box,
455
struct mail_search_arg *args, bool and_args,
456
struct fts_result *result)
458
struct lucene_fts_backend *backend =
459
(struct lucene_fts_backend *)_backend;
462
if (fts_backend_select(backend, box) < 0)
465
ret = lucene_index_lookup(backend->index, args, and_args,
471
/* a char* hash function from ASU -- from glib */
472
static unsigned int wstr_hash(const void *p)
474
const wchar_t *s = p;
475
unsigned int g, h = 0;
479
if ((g = h & 0xf0000000UL)) {
490
mailboxes_get_guids(struct mailbox *const boxes[],
491
struct hash_table *guids, struct fts_multi_result *result)
493
ARRAY_DEFINE(box_results, struct fts_result);
494
struct fts_result *box_result;
499
p_array_init(&box_results, result->pool, 32);
500
for (i = 0; boxes[i] != NULL; i++) {
501
if (fts_mailbox_get_guid(boxes[i], &guid) < 0)
504
i_assert(strlen(guid) == MAILBOX_GUID_HEX_LENGTH);
505
guid_dup = t_new(wchar_t, MAILBOX_GUID_HEX_LENGTH + 1);
506
for (j = 0; j < MAILBOX_GUID_HEX_LENGTH; j++)
507
guid_dup[j] = guid[j];
509
box_result = array_append_space(&box_results);
510
box_result->box = boxes[i];
511
hash_table_insert(guids, guid_dup, box_result);
514
(void)array_append_space(&box_results);
515
result->box_results = array_idx_modifiable(&box_results, 0);
520
fts_backend_lucene_lookup_multi(struct fts_backend *_backend,
521
struct mailbox *const boxes[],
522
struct mail_search_arg *args, bool and_args,
523
struct fts_multi_result *result)
525
struct lucene_fts_backend *backend =
526
(struct lucene_fts_backend *)_backend;
530
struct hash_table *guids;
532
guids = hash_table_create(default_pool, default_pool, 0,
534
(hash_cmp_callback_t *)wcscmp);
535
ret = mailboxes_get_guids(boxes, guids, result);
537
ret = lucene_index_lookup_multi(backend->index,
538
guids, args, and_args,
541
hash_table_destroy(&guids);
546
static void fts_backend_lucene_lookup_done(struct fts_backend *_backend)
548
/* the next refresh is going to close the index anyway, so we might as
550
fts_backend_lucene_refresh(_backend);
241
553
struct fts_backend fts_backend_lucene = {
242
554
.name = "lucene",
555
.flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS |
556
FTS_BACKEND_FLAG_FUZZY_SEARCH,
559
fts_backend_lucene_alloc,
246
560
fts_backend_lucene_init,
247
561
fts_backend_lucene_deinit,
248
562
fts_backend_lucene_get_last_uid,
250
fts_backend_lucene_build_init,
251
fts_backend_lucene_build_hdr,
252
fts_backend_lucene_build_body_begin,
254
fts_backend_lucene_build_more,
255
fts_backend_lucene_build_deinit,
256
fts_backend_lucene_expunge,
257
fts_backend_lucene_expunge_finish,
258
fts_backend_lucene_lock,
259
fts_backend_lucene_unlock,
563
fts_backend_lucene_update_init,
564
fts_backend_lucene_update_deinit,
565
fts_backend_lucene_update_set_mailbox,
566
fts_backend_lucene_update_expunge,
567
fts_backend_lucene_update_set_build_key,
568
fts_backend_lucene_update_unset_build_key,
569
fts_backend_lucene_update_build_more,
570
fts_backend_lucene_refresh,
571
fts_backend_lucene_rescan,
572
fts_backend_lucene_optimize,
573
fts_backend_default_can_lookup,
260
574
fts_backend_lucene_lookup,
575
fts_backend_lucene_lookup_multi,
576
fts_backend_lucene_lookup_done