18
19
#define SOLR_CMDBUF_SIZE (1024*64)
19
20
#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128)
20
#define SOLR_BUFFER_WARN_SIZE (1024*1024)
21
21
#define SOLR_MAX_MULTI_ROWS 100000
23
/* If header is larger than this, truncate it. */
24
#define SOLR_HEADER_MAX_SIZE (1024*1024)
25
/* If SOLR_HEADER_MAX_SIZE was already reached, write still to individual
26
header fields as long as they're smaller than this */
27
#define SOLR_HEADER_LINE_MAX_TRUNC_SIZE 1024
23
29
struct solr_fts_backend {
24
30
struct fts_backend backend;
31
struct solr_connection *solr_conn;
27
34
struct solr_fts_field {
38
45
struct solr_connection_post *post;
40
47
string_t *cmd, *cur_value, *cur_value2;
41
ARRAY_DEFINE(fields, struct solr_fts_field);
48
string_t *cmd_expunge;
49
ARRAY(struct solr_fts_field) fields;
43
51
uint32_t last_indexed_uid;
44
uint32_t size_warned_uid;
46
53
unsigned int last_indexed_uid_set:1;
47
54
unsigned int body_open:1;
48
55
unsigned int documents_added:1;
49
56
unsigned int expunges:1;
57
unsigned int truncate_header:1;
52
static struct solr_connection *solr_conn = NULL;
54
60
static bool is_valid_xml_char(unichar_t chr)
56
62
/* Valid characters in XML:
136
142
static void solr_quote_http(string_t *dest, const char *str)
138
144
str_append(dest, "%22");
139
solr_connection_http_escape(solr_conn, dest, str);
145
http_url_escape_param(dest, str);
140
146
str_append(dest, "%22");
153
fts_backend_solr_init(struct fts_backend *_backend,
154
const char **error_r ATTR_UNUSED)
159
fts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
161
struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
156
162
struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
157
const struct fts_solr_settings *set = &fuser->set;
159
if (solr_conn == NULL)
160
solr_conn = solr_connection_init(set->url, set->debug);
165
*error_r = "Invalid fts_solr setting";
168
return solr_connection_init(fuser->set.url, fuser->set.debug,
169
&backend->solr_conn, error_r);
164
172
static void fts_backend_solr_deinit(struct fts_backend *_backend)
193
202
str_append(str, "%22%22");
195
204
pool = pool_alloconly_create("solr last uid lookup", 1024);
196
if (solr_connection_select(solr_conn, str_c(str),
205
if (solr_connection_select(backend->solr_conn, str_c(str),
197
206
pool, &results) < 0)
199
208
else if (results[0] == NULL) {
229
238
if (get_last_uid_fallback(_backend, box, last_uid_r) < 0)
232
(void)fts_index_set_last_uid(box, *last_uid_r);
241
fts_index_set_last_uid(box, *last_uid_r);
241
250
ctx = i_new(struct solr_fts_backend_update_context, 1);
242
251
ctx->ctx.backend = _backend;
243
ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
244
252
i_array_init(&ctx->fields, 16);
245
253
return &ctx->ctx;
306
314
array_foreach_modifiable(&ctx->fields, field) {
307
315
str_printfa(ctx->cmd, "<field name=\"%s\">", field->key);
308
str_append_str(ctx->cmd, field->value);
316
xml_encode_data(ctx->cmd, str_data(field->value), str_len(field->value));
309
317
str_append(ctx->cmd, "</field>");
310
318
str_truncate(field->value, 0);
326
334
return solr_connection_post_end(ctx->post);
338
fts_backend_solr_expunge_flush(struct solr_fts_backend_update_context *ctx)
340
struct solr_fts_backend *backend =
341
(struct solr_fts_backend *)ctx->ctx.backend;
343
str_append(ctx->cmd_expunge, "</delete>");
344
(void)solr_connection_post(backend->solr_conn, str_c(ctx->cmd_expunge));
345
str_truncate(ctx->cmd_expunge, 0);
346
str_append(ctx->cmd_expunge, "<delete>");
330
350
fts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
332
352
struct solr_fts_backend_update_context *ctx =
333
353
(struct solr_fts_backend_update_context *)_ctx;
354
struct solr_fts_backend *backend =
355
(struct solr_fts_backend *)_ctx->backend;
334
356
struct solr_fts_field *field;
336
358
int ret = _ctx->failed ? -1 : 0;
341
363
if (ctx->documents_added || ctx->expunges) {
342
364
/* commit and wait until the documents we just indexed are
343
365
visible to the following search */
344
str = t_strdup_printf("<commit waitFlush=\"false\" "
345
"waitSearcher=\"%s\"/>",
367
fts_backend_solr_expunge_flush(ctx);
368
str = t_strdup_printf("<commit softCommit=\"true\" waitSearcher=\"%s\"/>",
346
369
ctx->documents_added ? "true" : "false");
347
if (solr_connection_post(solr_conn, str) < 0)
370
if (solr_connection_post(backend->solr_conn, str) < 0)
374
if (ctx->cmd != NULL)
376
if (ctx->cmd_expunge != NULL)
377
str_free(&ctx->cmd_expunge);
352
378
array_foreach_modifiable(&ctx->fields, field) {
353
379
str_free(&field->value);
354
380
i_free(field->key);
404
430
highly unlikely to be indexed at this time. */
407
ctx->expunges = TRUE;
412
cmd = t_str_new(256);
413
str_append(cmd, "<delete><id>");
414
xml_encode_id(ctx, cmd, uid);
415
str_append(cmd, "</id></delete>");
417
(void)solr_connection_post(solr_conn, str_c(cmd));
433
if (!ctx->expunges) {
434
ctx->expunges = TRUE;
435
ctx->cmd_expunge = str_new(default_pool, 1024);
436
str_append(ctx->cmd_expunge, "<delete>");
439
if (str_len(ctx->cmd_expunge) >= SOLR_CMDBUF_FLUSH_SIZE)
440
fts_backend_solr_expunge_flush(ctx);
442
str_append(ctx->cmd_expunge, "<id>");
443
xml_encode_id(ctx, ctx->cmd_expunge, uid);
444
str_append(ctx->cmd_expunge, "</id>");
422
448
fts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
451
struct solr_fts_backend *backend =
452
(struct solr_fts_backend *)ctx->ctx.backend;
425
454
if (ctx->post == NULL) {
426
455
i_assert(ctx->prev_uid == 0);
428
ctx->post = solr_connection_post_begin(solr_conn);
457
ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
458
ctx->post = solr_connection_post_begin(backend->solr_conn);
429
459
str_append(ctx->cmd, "<add>");
431
461
fts_backend_solr_doc_close(ctx);
433
463
ctx->prev_uid = uid;
464
ctx->truncate_header = FALSE;
434
465
fts_backend_solr_doc_open(ctx, uid);
518
549
xml_encode_data(ctx->cmd, data, size);
520
xml_encode_data(ctx->cur_value, data, size);
521
if (ctx->cur_value2 != NULL)
551
if (!ctx->truncate_header)
552
xml_encode_data(ctx->cur_value, data, size);
553
if (ctx->cur_value2 != NULL &&
554
(!ctx->truncate_header ||
555
str_len(ctx->cur_value2) < SOLR_HEADER_LINE_MAX_TRUNC_SIZE))
522
556
xml_encode_data(ctx->cur_value2, data, size);
527
561
str_len(ctx->cmd));
528
562
str_truncate(ctx->cmd, 0);
530
if (str_len(ctx->cur_value) >= SOLR_BUFFER_WARN_SIZE &&
531
ctx->size_warned_uid != ctx->prev_uid) {
564
if (!ctx->truncate_header &&
565
str_len(ctx->cur_value) >= SOLR_HEADER_MAX_SIZE) {
532
566
/* a large header */
533
567
i_assert(ctx->cur_value != ctx->cmd);
535
ctx->size_warned_uid = ctx->prev_uid;
536
i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge",
569
i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge, truncating",
537
570
ctx->cur_box->storage->user->username,
538
571
mailbox_get_vname(ctx->cur_box), ctx->prev_uid);
572
ctx->truncate_header = TRUE;
555
589
/* FIXME: proper rescan needed. for now we'll just reset the
557
591
iter = mailbox_list_iter_init(backend->ns->list, "*",
592
MAILBOX_LIST_ITER_SKIP_ALIASES |
558
593
MAILBOX_LIST_ITER_NO_AUTO_BOXES);
559
594
while ((info = mailbox_list_iter_next(iter)) != NULL) {
560
595
if ((info->flags &
561
596
(MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
564
box = mailbox_alloc(info->ns->list, info->name, 0);
599
box = mailbox_alloc(info->ns->list, info->vname, 0);
565
600
if (mailbox_open(box) == 0) {
566
601
if (fts_index_set_last_uid(box, 0) < 0)
723
758
const char *box_guid, ARRAY_TYPE(seq_range) *uids_r,
724
759
ARRAY_TYPE(fts_score_map) *scores_r)
761
struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
726
762
pool_t pool = pool_alloconly_create("fts solr search", 1024);
727
763
struct solr_result **results;
736
772
str_append(str, "%22%22");
738
ret = solr_connection_select(solr_conn, str_c(str), pool, &results);
774
ret = solr_connection_select(backend->solr_conn, str_c(str),
739
776
if (ret == 0 && results[0] != NULL) {
740
777
array_append_array(uids_r, &results[0]->uids);
741
778
array_append_array(scores_r, &results[0]->scores);
783
820
struct mailbox *const boxes[],
784
821
struct fts_multi_result *result)
823
struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
786
824
struct solr_result **solr_results;
787
825
struct fts_result *fts_result;
788
ARRAY_DEFINE(fts_results, struct fts_result);
789
struct hash_table *mailboxes;
826
ARRAY(struct fts_result) fts_results;
827
HASH_TABLE(char *, struct mailbox *) mailboxes;
790
828
struct mailbox *box;
791
829
const char *box_guid;
792
830
unsigned int i, len;
800
838
str_append(str, "%22%22");
802
mailboxes = hash_table_create(default_pool, default_pool, 0,
803
str_hash, (hash_cmp_callback_t *)strcmp);
840
hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp);
804
841
str_append(str, "%2B(");
805
842
len = str_len(str);
806
843
for (i = 0; boxes[i] != NULL; i++) {
816
853
str_append_c(str, ')');
818
if (solr_connection_select(solr_conn, str_c(str),
855
if (solr_connection_select(backend->solr_conn, str_c(str),
819
856
result->pool, &solr_results) < 0) {
820
857
hash_table_destroy(&mailboxes);
835
872
fts_result->scores = solr_results[i]->scores;
836
873
fts_result->scores_sorted = TRUE;
838
(void)array_append_space(&fts_results);
875
array_append_zero(&fts_results);
839
876
result->box_results = array_idx_modifiable(&fts_results, 0);
840
877
hash_table_destroy(&mailboxes);