1
/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
8
#include "fts-tokenizer.h"
9
#include "fts-tokenizer-private.h"
11
static ARRAY(const struct fts_tokenizer *) fts_tokenizer_classes;
13
void fts_tokenizers_init(void)
15
if (!array_is_created(&fts_tokenizer_classes)) {
16
fts_tokenizer_register(fts_tokenizer_generic);
17
fts_tokenizer_register(fts_tokenizer_email_address);
21
void fts_tokenizers_deinit(void)
23
if (array_is_created(&fts_tokenizer_classes))
24
array_free(&fts_tokenizer_classes);
28
void fts_tokenizer_register(const struct fts_tokenizer *tok_class)
30
if (!array_is_created(&fts_tokenizer_classes))
31
i_array_init(&fts_tokenizer_classes, FTS_TOKENIZER_CLASSES_NR);
32
array_append(&fts_tokenizer_classes, &tok_class, 1);
36
void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class)
38
const struct fts_tokenizer *const *tp;
41
array_foreach(&fts_tokenizer_classes, tp) {
42
if (strcmp((*tp)->name, tok_class->name) == 0) {
43
idx = array_foreach_idx(&fts_tokenizer_classes, tp);
44
array_delete(&fts_tokenizer_classes, idx, 1);
45
if (array_count(&fts_tokenizer_classes) == 0)
46
array_free(&fts_tokenizer_classes);
53
const struct fts_tokenizer *fts_tokenizer_find(const char *name)
55
const struct fts_tokenizer *const *tp;
57
array_foreach(&fts_tokenizer_classes, tp) {
58
if (strcmp((*tp)->name, name) == 0)
64
const char *fts_tokenizer_name(const struct fts_tokenizer *tok)
69
int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
70
struct fts_tokenizer *parent,
71
const char *const *settings,
72
struct fts_tokenizer **tokenizer_r,
75
struct fts_tokenizer *tok;
76
const char *empty_settings = NULL;
78
i_assert(settings == NULL || str_array_length(settings) % 2 == 0);
81
settings = &empty_settings;
83
if (tok_class->v->create(settings, &tok, error_r) < 0) {
88
tok->prev_reply_finished = TRUE;
90
fts_tokenizer_ref(parent);
92
tok->parent_input = buffer_create_dynamic(default_pool, 128);
99
void fts_tokenizer_ref(struct fts_tokenizer *tok)
101
i_assert(tok->refcount > 0);
106
void fts_tokenizer_unref(struct fts_tokenizer **_tok)
108
struct fts_tokenizer *tok = *_tok;
110
i_assert(tok->refcount > 0);
113
if (--tok->refcount > 0)
116
if (tok->parent_input != NULL)
117
buffer_free(&tok->parent_input);
118
if (tok->parent != NULL)
119
fts_tokenizer_unref(&tok->parent);
120
tok->v->destroy(tok);
124
fts_tokenizer_next_self(struct fts_tokenizer *tok,
125
const unsigned char *data, size_t size,
126
const char **token_r, const char **error_r)
131
i_assert(tok->prev_reply_finished ||
132
(data == tok->prev_data && size == tok->prev_size));
134
if (tok->prev_reply_finished) {
136
ret = tok->v->next(tok, data, size, &skip, token_r, error_r);
138
/* continuing previous data */
139
i_assert(tok->prev_skip <= size);
140
ret = tok->v->next(tok, data + tok->prev_skip,
141
size - tok->prev_skip, &skip,
146
i_assert(skip <= size - tok->prev_skip);
147
tok->prev_data = data;
148
tok->prev_size = size;
149
tok->prev_skip = tok->prev_skip + skip;
150
tok->prev_reply_finished = FALSE;
151
} else if (ret == 0) {
152
/* we need a new data block */
153
tok->prev_data = NULL;
156
tok->prev_reply_finished = TRUE;
161
void fts_tokenizer_reset(struct fts_tokenizer *tok)
166
int fts_tokenizer_next(struct fts_tokenizer *tok,
167
const unsigned char *data, size_t size,
168
const char **token_r, const char **error_r)
172
switch (tok->parent_state) {
173
case FTS_TOKENIZER_PARENT_STATE_ADD_DATA:
174
ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r);
175
if (ret <= 0 || tok->parent == NULL || tok->skip_parents)
177
buffer_set_used_size(tok->parent_input, 0);
178
buffer_append(tok->parent_input, *token_r, strlen(*token_r));
181
case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT:
182
ret = fts_tokenizer_next(tok->parent, tok->parent_input->data,
183
tok->parent_input->used, token_r, error_r);
188
case FTS_TOKENIZER_PARENT_STATE_FINALIZE:
189
ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r, error_r);
192
/* we're finished sending this token to parent tokenizer.
193
see if our own tokenizer has more tokens available */
194
tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA;
195
return fts_tokenizer_next(tok, data, size, token_r, error_r);
199
/* we must not be returning empty tokens */
200
i_assert(ret <= 0 || (*token_r)[0] != '\0');
204
int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
205
const char **error_r)
207
return fts_tokenizer_next(tok, NULL, 0, token_r, error_r);