1
/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
6
#include "module-context.h"
8
#include "http-client.h"
9
#include "message-parser.h"
10
#include "mail-user.h"
11
#include "fts-parser.h"
13
#define TIKA_USER_CONTEXT(obj) \
14
MODULE_CONTEXT(obj, fts_parser_tika_user_module)
16
struct fts_parser_tika_user {
17
union mail_user_module_context module_ctx;
18
struct http_url *http_url;
21
struct tika_fts_parser {
22
struct fts_parser parser;
23
struct mail_user *user;
24
struct http_client_request *http_req;
26
struct ioloop *ioloop;
28
struct istream *payload;
33
static struct http_client *tika_http_client = NULL;
34
static MODULE_CONTEXT_DEFINE_INIT(fts_parser_tika_user_module,
35
&mail_user_module_register);
38
tika_get_http_client_url(struct mail_user *user, struct http_url **http_url_r)
40
struct fts_parser_tika_user *tuser = TIKA_USER_CONTEXT(user);
41
struct http_client_settings http_set;
42
const char *url, *error;
44
url = mail_user_plugin_getenv(user, "fts_tika");
46
/* fts_tika disabled */
51
*http_url_r = tuser->http_url;
52
return *http_url_r == NULL ? -1 : 0;
55
tuser = p_new(user->pool, struct fts_parser_tika_user, 1);
56
MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser);
58
if (http_url_parse(url, NULL, 0, user->pool,
59
&tuser->http_url, &error) < 0) {
60
i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error);
64
if (tika_http_client == NULL) {
65
memset(&http_set, 0, sizeof(http_set));
66
http_set.max_idle_time_msecs = 100;
67
http_set.max_parallel_connections = 1;
68
http_set.max_pipelined_requests = 1;
69
http_set.max_redirects = 1;
70
http_set.max_attempts = 3;
71
http_set.connect_timeout_msecs = 5*1000;
72
http_set.request_timeout_msecs = 60*1000;
73
http_set.debug = user->mail_debug;
74
tika_http_client = http_client_init(&http_set);
76
*http_url_r = tuser->http_url;
81
fts_tika_parser_response(const struct http_response *response,
82
struct tika_fts_parser *parser)
84
i_assert(parser->payload == NULL);
86
switch (response->status) {
89
if (response->payload == NULL)
90
parser->payload = i_stream_create_from_data("", 0);
92
i_stream_ref(response->payload);
93
parser->payload = response->payload;
96
case 204: /* empty response */
97
case 415: /* Unsupported Media Type */
98
case 422: /* Unprocessable Entity */
99
if (parser->user->mail_debug) {
100
i_debug("fts_tika: PUT %s failed: %u %s",
101
mail_user_plugin_getenv(parser->user, "fts_tika"),
102
response->status, response->reason);
104
parser->payload = i_stream_create_from_data("", 0);
107
i_error("fts_tika: PUT %s failed: %u %s",
108
mail_user_plugin_getenv(parser->user, "fts_tika"),
109
response->status, response->reason);
110
parser->failed = TRUE;
113
parser->http_req = NULL;
114
io_loop_stop(current_ioloop);
117
static struct fts_parser *
118
fts_parser_tika_try_init(struct mail_user *user, const char *content_type,
119
const char *content_disposition)
121
struct tika_fts_parser *parser;
122
struct http_url *http_url;
123
struct http_client_request *http_req;
125
if (tika_get_http_client_url(user, &http_url) < 0)
128
parser = i_new(struct tika_fts_parser, 1);
129
parser->parser.v = fts_parser_tika;
132
http_req = http_client_request(tika_http_client, "PUT",
134
t_strconcat(http_url->path, http_url->enc_query, NULL),
135
fts_tika_parser_response, parser);
136
http_client_request_set_port(http_req, http_url->port);
137
http_client_request_set_ssl(http_req, http_url->have_ssl);
138
http_client_request_add_header(http_req, "Content-Type", content_type);
139
http_client_request_add_header(http_req, "Content-Disposition",
140
content_disposition);
141
http_client_request_add_header(http_req, "Accept", "text/plain");
143
parser->http_req = http_req;
144
return &parser->parser;
147
static void fts_parser_tika_more(struct fts_parser *_parser,
148
struct message_block *block)
150
struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser;
151
const unsigned char *data;
155
if (block->size > 0) {
156
/* first we'll send everything to Tika */
157
if (!parser->failed &&
158
http_client_request_send_payload(&parser->http_req,
161
parser->failed = TRUE;
166
if (parser->payload == NULL) {
167
/* read the result from Tika */
168
if (!parser->failed &&
169
http_client_request_finish_payload(&parser->http_req) < 0)
170
parser->failed = TRUE;
171
if (!parser->failed && parser->payload == NULL)
172
http_client_wait(tika_http_client);
175
i_assert(parser->payload != NULL);
177
/* continue returning data from Tika */
178
while ((ret = i_stream_read_data(parser->payload, &data, &size, 0)) == 0) {
181
/* wait for more input from Tika */
182
if (parser->ioloop == NULL) {
183
parser->ioloop = io_loop_create();
184
parser->io = io_add_istream(parser->payload, io_loop_stop,
187
io_loop_set_current(parser->ioloop);
189
io_loop_run(current_ioloop);
195
i_stream_skip(parser->payload, size);
199
if (parser->payload->stream_errno != 0) {
200
i_error("read(%s) failed: %s",
201
i_stream_get_name(parser->payload),
202
i_stream_get_error(parser->payload));
203
parser->failed = TRUE;
208
static int fts_parser_tika_deinit(struct fts_parser *_parser)
210
struct tika_fts_parser *parser = (struct tika_fts_parser *)_parser;
211
int ret = parser->failed ? -1 : 0;
213
if (parser->ioloop != NULL) {
214
io_remove(&parser->io);
215
io_loop_destroy(&parser->ioloop);
217
if (parser->payload != NULL)
218
i_stream_unref(&parser->payload);
219
/* FIXME: kludgy, http_req should be NULL here if we don't want to
220
free it. requires lib-http changes. */
221
if (parser->http_req != NULL)
222
http_client_request_abort(&parser->http_req);
227
static void fts_parser_tika_unload(void)
229
if (tika_http_client != NULL)
230
http_client_deinit(&tika_http_client);
233
struct fts_parser_vfuncs fts_parser_tika = {
234
fts_parser_tika_try_init,
235
fts_parser_tika_more,
236
fts_parser_tika_deinit,
237
fts_parser_tika_unload