1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
* contributor license agreements. See the NOTICE file distributed with
3
* this work for additional information regarding copyright ownership.
4
* The ASF licenses this file to You under the Apache License, Version 2.0
5
* (the "License"); you may not use this file except in compliance with
6
* the License. You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
18
* simple hokey charset recoding configuration module
20
* See mod_ebcdic and mod_charset for more thought-out examples. This
21
* one is just so Jeff can learn how a module works and experiment with
22
* basic character set recoding configuration.
24
* !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
28
#include "http_config.h"
30
#include "http_core.h"
32
#include "http_main.h"
33
#include "http_protocol.h"
34
#include "http_request.h"
35
#include "util_charset.h"
36
#include "apr_buckets.h"
37
#include "util_filter.h"
38
#include "apr_strings.h"
40
#include "apr_xlate.h"
41
#define APR_WANT_STRFUNC
44
#define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
45
#define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
47
#define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
48
* space left in the translation buffer
51
#define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
55
/* extended error status codes; this is used in addition to an apr_status_t to
56
* track errors in the translation filter
59
EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
60
EES_LIMIT, /* built-in restriction encountered */
61
EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
63
EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
64
EES_BAD_INPUT /* input data invalid */
67
/* registered name of the output translation filter */
68
#define XLATEOUT_FILTER_NAME "XLATEOUT"
69
/* registered name of input translation filter */
70
#define XLATEIN_FILTER_NAME "XLATEIN"
72
typedef struct charset_dir_t {
73
/** debug level; -1 means uninitialized, 0 means no debug */
75
const char *charset_source; /* source encoding */
76
const char *charset_default; /* how to ship on wire */
77
/** module does ap_add_*_filter()? */
78
enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
81
/* charset_filter_ctx_t is created for each filter instance; because the same
82
* filter code is used for translating in both directions, we need this context
83
* data to tell the filter which translation handle to use; it also can hold a
84
* character which was split between buckets
86
typedef struct charset_filter_ctx_t {
89
ees_t ees; /* extended error status */
91
char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
92
int ran; /* has filter instance run before? */
93
int noop; /* should we pass brigades through unchanged? */
94
char *tmp; /* buffer for input filtering */
95
apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
96
} charset_filter_ctx_t;
98
/* charset_req_t is available via r->request_config if any translation is
101
typedef struct charset_req_t {
103
charset_filter_ctx_t *output_ctx, *input_ctx;
106
/* debug level definitions */
107
#define DBGLVL_GORY 9 /* gory details */
108
#define DBGLVL_FLOW 4 /* enough messages to see what happens on
110
#define DBGLVL_PMC 2 /* messages about possible misconfiguration */
112
module AP_MODULE_DECLARE_DATA charset_lite_module;
114
static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
116
charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));
122
static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
124
charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
125
charset_dir_t *base = (charset_dir_t *)basev,
126
*over = (charset_dir_t *)overridesv;
128
/* If it is defined in the current container, use it. Otherwise, use the one
129
* from the enclosing container.
133
over->debug != -1 ? over->debug : base->debug;
135
over->charset_default ? over->charset_default : base->charset_default;
137
over->charset_source ? over->charset_source : base->charset_source;
139
over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
143
/* CharsetSourceEnc charset
145
static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
148
charset_dir_t *dc = in_dc;
150
dc->charset_source = name;
154
/* CharsetDefault charset
156
static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
159
charset_dir_t *dc = in_dc;
161
dc->charset_default = name;
165
/* CharsetOptions optionflag...
167
static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
170
charset_dir_t *dc = in_dc;
172
if (!strcasecmp(flag, "ImplicitAdd")) {
173
dc->implicit_add = IA_IMPADD;
175
else if (!strcasecmp(flag, "NoImplicitAdd")) {
176
dc->implicit_add = IA_NOIMPADD;
178
else if (!strncasecmp(flag, "DebugLevel=", 11)) {
179
dc->debug = atoi(flag + 11);
182
return apr_pstrcat(cmd->temp_pool,
183
"Invalid CharsetOptions option: ",
191
/* find_code_page() is a fixup hook that decides if translation should be
192
* enabled; if so, it sets up request data for use by the filter registration
193
* hook so that it knows what to do
195
static int find_code_page(request_rec *r)
197
charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
198
&charset_lite_module);
199
charset_req_t *reqinfo;
200
charset_filter_ctx_t *input_ctx, *output_ctx;
202
const char *mime_type;
204
if (dc->debug >= DBGLVL_FLOW) {
205
ap_log_rerror(APLOG_MARK,APLOG_DEBUG, 0, r,
206
"uri: %s file: %s method: %d "
207
"imt: %s flags: %s%s%s %s->%s",
208
r->uri, r->filename, r->method_number,
209
r->content_type ? r->content_type : "(unknown)",
210
r->main ? "S" : "", /* S if subrequest */
211
r->prev ? "R" : "", /* R if redirect */
212
r->proxyreq ? "P" : "", /* P if proxy */
213
dc->charset_source, dc->charset_default);
216
/* If we don't have a full directory configuration, bail out.
218
if (!dc->charset_source || !dc->charset_default) {
219
if (dc->debug >= DBGLVL_PMC) {
220
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
221
"incomplete configuration: src %s, dst %s",
222
dc->charset_source ? dc->charset_source : "unspecified",
223
dc->charset_default ? dc->charset_default : "unspecified");
228
/* catch proxy requests */
229
if (r->proxyreq) return DECLINED;
230
/* mod_rewrite indicators */
231
if (!strncmp(r->filename, "redirect:", 9)) return DECLINED;
232
if (!strncmp(r->filename, "gone:", 5)) return DECLINED;
233
if (!strncmp(r->filename, "passthrough:", 12)) return DECLINED;
234
if (!strncmp(r->filename, "forbidden:", 10)) return DECLINED;
235
/* no translation when server and network charsets are set to the same value */
236
if (!strcasecmp(dc->charset_source, dc->charset_default)) return DECLINED;
238
mime_type = r->content_type ? r->content_type : ap_default_type(r);
240
/* If mime type isn't text or message, bail out.
243
/* XXX When we handle translation of the request body, watch out here as
244
* 1.3 allowed additional mime types: multipart and
245
* application/x-www-form-urlencoded
248
if (strncasecmp(mime_type, "text/", 5) &&
249
#if APR_CHARSET_EBCDIC || AP_WANT_DIR_TRANSLATION
250
/* On an EBCDIC machine, be willing to translate mod_autoindex-
251
* generated output. Otherwise, it doesn't look too cool.
253
* XXX This isn't a perfect fix because this doesn't trigger us
254
* to convert from the charset of the source code to ASCII. The
255
* general solution seems to be to allow a generator to set an
256
* indicator in the r specifying that the body is coded in the
257
* implementation character set (i.e., the charset of the source
258
* code). This would get several different types of documents
259
* translated properly: mod_autoindex output, mod_status output,
260
* mod_info output, hard-coded error documents, etc.
262
strcmp(mime_type, DIR_MAGIC_TYPE) &&
264
strncasecmp(mime_type, "message/", 8)) {
265
if (dc->debug >= DBGLVL_GORY) {
266
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
267
"mime type is %s; no translation selected",
270
/* We must not bail out here (i.e., the MIME test must be in the filter
271
* itself, not in the fixup, because only then is the final MIME type known.
272
* Examples for late changes to the MIME type include CGI handling (MIME
273
* type is set in the Content-Type header produced by the CGI script), or
274
* PHP (until PHP runs, the MIME type is set to application/x-httpd-php)
278
if (dc->debug >= DBGLVL_GORY) {
279
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
280
"charset_source: %s charset_default: %s",
281
dc && dc->charset_source ? dc->charset_source : "(none)",
282
dc && dc->charset_default ? dc->charset_default : "(none)");
285
/* Get storage for the request data and the output filter context.
286
* We rarely need the input filter context, so allocate that separately.
288
reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
289
sizeof(charset_req_t) +
290
sizeof(charset_filter_ctx_t));
291
output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
295
ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
297
reqinfo->output_ctx = output_ctx;
299
/* We must not open the xlation table here yet, because the final MIME
300
* type is not known until we are actually called in the output filter.
301
* With POST or PUT request, the case is different, because their MIME
302
* type is set in the request headers, and their data are prerequisites
303
* for actually calling, e.g., the CGI handler later on.
305
output_ctx->xlate = NULL;
307
switch (r->method_number) {
310
/* Set up input translation. Note: A request body can be included
311
* with the OPTIONS method, but for now we don't set up translation
314
input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
315
input_ctx->bb = apr_brigade_create(r->pool,
316
r->connection->bucket_alloc);
317
input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
319
reqinfo->input_ctx = input_ctx;
320
rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
321
dc->charset_default, r->pool);
322
if (rv != APR_SUCCESS) {
323
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
324
"can't open translation %s->%s",
325
dc->charset_default, dc->charset_source);
326
return HTTP_INTERNAL_SERVER_ERROR;
333
static int configured_in_list(request_rec *r, const char *filter_name,
334
struct ap_filter_t *filter_list)
336
struct ap_filter_t *filter = filter_list;
339
if (!strcasecmp(filter_name, filter->frec->name)) {
342
filter = filter->next;
347
static int configured_on_input(request_rec *r, const char *filter_name)
349
return configured_in_list(r, filter_name, r->input_filters);
352
static int configured_on_output(request_rec *r, const char *filter_name)
354
return configured_in_list(r, filter_name, r->output_filters);
357
/* xlate_insert_filter() is a filter hook which decides whether or not
358
* to insert a translation filter for the current request.
360
static void xlate_insert_filter(request_rec *r)
362
/* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
363
charset_req_t *reqinfo = ap_get_module_config(r->request_config,
364
&charset_lite_module);
365
charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
366
&charset_lite_module);
369
if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
370
ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
373
else if (dc->debug >= DBGLVL_FLOW) {
374
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
375
"xlate output filter not added implicitly because %s",
376
!reqinfo->output_ctx ?
377
"no output configuration available" :
378
"another module added the filter");
381
if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
382
ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
385
else if (dc->debug >= DBGLVL_FLOW) {
386
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
387
"xlate input filter not added implicitly because %s",
388
!reqinfo->input_ctx ?
389
"no input configuration available" :
390
"another module added the filter");
395
/* stuff that sucks that I know of:
398
* why create an eos bucket when we see it come down the stream? just send the one
399
* passed as input... news flash: this will be fixed when xlate_out_filter() starts
400
* using the more generic xlate_brigade()
402
* translation mechanics:
403
* we don't handle characters that straddle more than two buckets; an error
407
/* send_downstream() is passed the translated data; it puts it in a single-
408
* bucket brigade and passes the brigade to the next filter
410
static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
412
request_rec *r = f->r;
413
conn_rec *c = r->connection;
414
apr_bucket_brigade *bb;
416
charset_filter_ctx_t *ctx = f->ctx;
419
bb = apr_brigade_create(r->pool, c->bucket_alloc);
420
b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
421
APR_BRIGADE_INSERT_TAIL(bb, b);
422
rv = ap_pass_brigade(f->next, bb);
423
if (rv != APR_SUCCESS) {
424
ctx->ees = EES_DOWNSTREAM;
429
static apr_status_t send_eos(ap_filter_t *f)
431
request_rec *r = f->r;
432
conn_rec *c = r->connection;
433
apr_bucket_brigade *bb;
435
charset_filter_ctx_t *ctx = f->ctx;
438
bb = apr_brigade_create(r->pool, c->bucket_alloc);
439
b = apr_bucket_eos_create(c->bucket_alloc);
440
APR_BRIGADE_INSERT_TAIL(bb, b);
441
rv = ap_pass_brigade(f->next, bb);
442
if (rv != APR_SUCCESS) {
443
ctx->ees = EES_DOWNSTREAM;
448
static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
450
apr_size_t partial_len)
454
if (sizeof(ctx->buf) > partial_len) {
455
ctx->saved = partial_len;
456
memcpy(ctx->buf, partial, partial_len);
461
ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
468
static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
470
const char **cur_str,
477
apr_size_t tmp_input_len;
479
/* Keep adding bytes from the input string to the saved string until we
480
* 1) finish the input char
482
* or 3) run out of bytes to add
486
ctx->buf[ctx->saved] = **cur_str;
490
tmp_input_len = ctx->saved;
491
rv = apr_xlate_conv_buffer(ctx->xlate,
496
} while (rv == APR_INCOMPLETE && *cur_len);
498
if (rv == APR_SUCCESS) {
502
ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
503
* straddling more than two buckets
510
static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
512
charset_filter_ctx_t *ctx = f->ctx;
520
msg = "xlate filter - a built-in restriction was encountered";
524
msg = "xlate filter - an input character was invalid";
526
case EES_BUCKET_READ:
528
msg = "xlate filter - bucket read routine failed";
530
case EES_INCOMPLETE_CHAR:
532
strcpy(msgbuf, "xlate filter - incomplete char at end of input - ");
534
while ((apr_size_t)cur < ctx->saved) {
535
apr_snprintf(msgbuf + strlen(msgbuf), sizeof(msgbuf) - strlen(msgbuf),
536
"%02X", (unsigned)ctx->buf[cur]);
542
msg = "xlate filter - an error occurred in a lower filter";
545
msg = "xlate filter - returning error";
547
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
551
/* chk_filter_chain() is called once per filter instance; it tries to
552
* determine if the current filter instance should be disabled because
553
* its translation is incompatible with the translation of an existing
554
* instance of the translate filter
556
* Example bad scenario:
558
* configured filter chain for the request:
559
* INCLUDES XLATEOUT(8859-1->UTS-16)
560
* configured filter chain for the subrequest:
561
* XLATEOUT(8859-1->UTS-16)
563
* When the subrequest is processed, the filter chain will be
564
* XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
565
* This makes no sense, so the instance of XLATEOUT added for the
566
* subrequest will be noop-ed.
568
* Example good scenario:
570
* configured filter chain for the request:
571
* INCLUDES XLATEOUT(8859-1->UTS-16)
572
* configured filter chain for the subrequest:
573
* XLATEOUT(IBM-1047->8859-1)
575
* When the subrequest is processed, the filter chain will be
576
* XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
577
* This makes sense, so the instance of XLATEOUT added for the
578
* subrequest will be left alone and it will translate from
581
static void chk_filter_chain(ap_filter_t *f)
584
charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
586
int debug = ctx->dc->debug;
587
int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
593
/* walk the filter chain; see if it makes sense for our filter to
596
curf = output ? f->r->output_filters : f->r->input_filters;
598
if (!strcasecmp(curf->frec->name, f->frec->name) &&
600
curctx = (charset_filter_ctx_t *)curf->ctx;
601
if (!last_xlate_ctx) {
602
last_xlate_ctx = curctx;
605
if (strcmp(last_xlate_ctx->dc->charset_default,
606
curctx->dc->charset_source)) {
607
/* incompatible translation
608
* if our filter instance is incompatible with an instance
609
* already in place, noop our instance
611
* . We are only willing to noop our own instance.
612
* . It is possible to noop another instance which has not
613
* yet run, but this is not currently implemented.
614
* Hopefully it will not be needed.
615
* . It is not possible to noop an instance which has
618
if (last_xlate_ctx == f->ctx) {
619
last_xlate_ctx->noop = 1;
620
if (debug >= DBGLVL_PMC) {
621
const char *symbol = output ? "->" : "<-";
623
ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
626
"translation %s%s%s; existing "
627
"translation %s%s%s",
628
f->r->uri ? "uri" : "file",
629
f->r->uri ? f->r->uri : f->r->filename,
630
last_xlate_ctx->dc->charset_source,
632
last_xlate_ctx->dc->charset_default,
633
curctx->dc->charset_source,
635
curctx->dc->charset_default);
639
const char *symbol = output ? "->" : "<-";
641
ap_log_rerror(APLOG_MARK, APLOG_ERR,
643
"chk_filter_chain() - can't disable "
644
"translation %s%s%s; existing "
645
"translation %s%s%s",
646
last_xlate_ctx->dc->charset_source,
648
last_xlate_ctx->dc->charset_default,
649
curctx->dc->charset_source,
651
curctx->dc->charset_default);
661
/* xlate_brigade() is used to filter request and response bodies
663
* we'll stop when one of the following occurs:
664
* . we run out of buckets
665
* . we run out of space in the output buffer
669
* bb: brigade to process
670
* buffer: storage to hold the translated characters
671
* buffer_size: size of buffer
672
* (and a few more uninteresting parms)
675
* return value: APR_SUCCESS or some error code
676
* bb: we've removed any buckets representing the
677
* translated characters; the eos bucket, if
678
* present, will be left in the brigade
679
* buffer: filled in with translated characters
680
* buffer_size: updated with the bytes remaining
681
* hit_eos: did we hit an EOS bucket?
683
static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
684
apr_bucket_brigade *bb,
686
apr_size_t *buffer_avail,
689
apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
690
apr_bucket *consumed_bucket;
692
apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
693
apr_size_t bucket_avail; /* bytes left in current bucket */
694
apr_status_t rv = APR_SUCCESS;
698
consumed_bucket = NULL;
700
if (!bucket_avail) { /* no bytes left to process in the current bucket... */
701
if (consumed_bucket) {
702
apr_bucket_delete(consumed_bucket);
703
consumed_bucket = NULL;
705
b = APR_BRIGADE_FIRST(bb);
706
if (b == APR_BRIGADE_SENTINEL(bb) ||
707
APR_BUCKET_IS_EOS(b)) {
710
rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
711
if (rv != APR_SUCCESS) {
712
ctx->ees = EES_BUCKET_READ;
715
bucket_avail = bytes_in_bucket;
716
consumed_bucket = b; /* for axing when we're done reading it */
719
/* We've got data, so translate it. */
721
/* Rats... we need to finish a partial character from the previous
724
* Strangely, finish_partial_char() increments the input buffer
725
* pointer but does not increment the output buffer pointer.
727
apr_size_t old_buffer_avail = *buffer_avail;
728
rv = finish_partial_char(ctx,
729
&bucket, &bucket_avail,
730
&buffer, buffer_avail);
731
buffer += old_buffer_avail - *buffer_avail;
734
apr_size_t old_buffer_avail = *buffer_avail;
735
apr_size_t old_bucket_avail = bucket_avail;
736
rv = apr_xlate_conv_buffer(ctx->xlate,
737
bucket, &bucket_avail,
740
buffer += old_buffer_avail - *buffer_avail;
741
bucket += old_bucket_avail - bucket_avail;
743
if (rv == APR_INCOMPLETE) { /* partial character at end of input */
744
/* We need to save the final byte(s) for next time; we can't
745
* convert it until we look at the next bucket.
747
rv = set_aside_partial_char(ctx, bucket, bucket_avail);
751
if (rv != APR_SUCCESS) {
752
/* bad input byte or partial char too big to store */
755
if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
756
/* if any data remains in the current bucket, split there */
758
apr_bucket_split(b, bytes_in_bucket - bucket_avail);
760
apr_bucket_delete(b);
766
if (!APR_BRIGADE_EMPTY(bb)) {
767
b = APR_BRIGADE_FIRST(bb);
768
if (APR_BUCKET_IS_EOS(b)) {
769
/* Leave the eos bucket in the brigade for reporting to
770
* subsequent filters.
774
/* Oops... we have a partial char from the previous bucket
775
* that won't be completed because there's no more data.
778
ctx->ees = EES_INCOMPLETE_CHAR;
786
/* xlate_out_filter() handles (almost) arbitrary conversions from one charset
788
* translation is determined in the fixup hook (find_code_page), which is
789
* where the filter's context data is set up... the context data gives us
790
* the translation handle
792
static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
794
charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
795
&charset_lite_module);
796
charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
797
&charset_lite_module);
798
charset_filter_ctx_t *ctx = f->ctx;
799
apr_bucket *dptr, *consumed_bucket;
801
apr_size_t cur_len, cur_avail;
802
char tmp[OUTPUT_XLATE_BUF_SIZE];
803
apr_size_t space_avail;
805
apr_status_t rv = APR_SUCCESS;
808
/* this is SetOutputFilter path; grab the preallocated context,
809
* if any; note that if we decided not to do anything in an earlier
810
* handler, we won't even have a reqinfo
813
ctx = f->ctx = reqinfo->output_ctx;
814
reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
815
* in the filter chain; we can't have two
816
* instances using the same context
819
if (!ctx) { /* no idea how to translate; don't do anything */
820
ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
826
/* Opening the output translation (this used to be done in the fixup hook,
827
* but that was too early: a subsequent type modification, e.g., by a
828
* CGI script, would go unnoticed. Now we do it in the filter itself.)
830
if (!ctx->noop && ctx->xlate == NULL)
832
const char *mime_type = f->r->content_type ? f->r->content_type : ap_default_type(f->r);
834
/* XXX When we handle translation of the request body, watch out here as
835
* 1.3 allowed additional mime types: multipart and
836
* application/x-www-form-urlencoded
838
if (strncasecmp(mime_type, "text/", 5) == 0 ||
839
#if APR_CHARSET_EBCDIC
840
/* On an EBCDIC machine, be willing to translate mod_autoindex-
841
* generated output. Otherwise, it doesn't look too cool.
843
* XXX This isn't a perfect fix because this doesn't trigger us
844
* to convert from the charset of the source code to ASCII. The
845
* general solution seems to be to allow a generator to set an
846
* indicator in the r specifying that the body is coded in the
847
* implementation character set (i.e., the charset of the source
848
* code). This would get several different types of documents
849
* translated properly: mod_autoindex output, mod_status output,
850
* mod_info output, hard-coded error documents, etc.
852
strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
854
strncasecmp(mime_type, "message/", 8) == 0) {
856
rv = apr_xlate_open(&ctx->xlate,
857
dc->charset_default, dc->charset_source, f->r->pool);
858
if (rv != APR_SUCCESS) {
859
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
860
"can't open translation %s->%s",
861
dc->charset_source, dc->charset_default);
867
if (dc->debug >= DBGLVL_GORY)
868
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
869
"mime type is %s; no translation selected",
874
if (dc->debug >= DBGLVL_GORY) {
875
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
876
"xlate_out_filter() - "
877
"charset_source: %s charset_default: %s",
878
dc && dc->charset_source ? dc->charset_source : "(none)",
879
dc && dc->charset_default ? dc->charset_default : "(none)");
882
if (!ctx->ran) { /* filter never ran before */
888
return ap_pass_brigade(f->next, bb);
891
dptr = APR_BRIGADE_FIRST(bb);
894
space_avail = sizeof(tmp);
895
consumed_bucket = NULL;
897
if (!cur_len) { /* no bytes left to process in the current bucket... */
898
if (consumed_bucket) {
899
apr_bucket_delete(consumed_bucket);
900
consumed_bucket = NULL;
902
if (dptr == APR_BRIGADE_SENTINEL(bb)) {
906
if (APR_BUCKET_IS_EOS(dptr)) {
908
cur_len = -1; /* XXX yuck, but that tells us to send
909
* eos down; when we minimize our bb construction
910
* we'll fix this crap */
912
/* Oops... we have a partial char from the previous bucket
913
* that won't be completed because there's no more data.
916
ctx->ees = EES_INCOMPLETE_CHAR;
920
rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
921
if (rv != APR_SUCCESS) {
923
ctx->ees = EES_BUCKET_READ;
926
consumed_bucket = dptr; /* for axing when we're done reading it */
927
dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
930
/* Try to fill up our tmp buffer with translated data. */
933
if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
935
/* Rats... we need to finish a partial character from the previous
940
tmp_tmp = tmp + sizeof(tmp) - space_avail;
941
rv = finish_partial_char(ctx,
943
&tmp_tmp, &space_avail);
946
rv = apr_xlate_conv_buffer(ctx->xlate,
948
tmp + sizeof(tmp) - space_avail, &space_avail);
950
/* Update input ptr and len after consuming some bytes */
951
cur_str += cur_len - cur_avail;
954
if (rv == APR_INCOMPLETE) { /* partial character at end of input */
955
/* We need to save the final byte(s) for next time; we can't
956
* convert it until we look at the next bucket.
958
rv = set_aside_partial_char(ctx, cur_str, cur_len);
964
if (rv != APR_SUCCESS) {
965
/* bad input byte or partial char too big to store */
969
if (space_avail < XLATE_MIN_BUFF_LEFT) {
970
/* It is time to flush, as there is not enough space left in the
971
* current output buffer to bother with converting more data.
973
rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
974
if (rv != APR_SUCCESS) {
978
/* tmp is now empty */
979
space_avail = sizeof(tmp);
983
if (rv == APR_SUCCESS) {
984
if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
985
rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
988
if (rv == APR_SUCCESS) {
994
log_xlate_error(f, rv);
1000
static int xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
1001
ap_input_mode_t mode, apr_read_type_e block,
1002
apr_off_t readbytes)
1005
charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
1006
&charset_lite_module);
1007
charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
1008
&charset_lite_module);
1009
charset_filter_ctx_t *ctx = f->ctx;
1010
apr_size_t buffer_size;
1014
/* this is SetInputFilter path; grab the preallocated context,
1015
* if any; note that if we decided not to do anything in an earlier
1016
* handler, we won't even have a reqinfo
1019
ctx = f->ctx = reqinfo->input_ctx;
1020
reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
1021
* in the filter chain; we can't have two
1022
* instances using the same context
1025
if (!ctx) { /* no idea how to translate; don't do anything */
1026
ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
1032
if (dc->debug >= DBGLVL_GORY) {
1033
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
1034
"xlate_in_filter() - "
1035
"charset_source: %s charset_default: %s",
1036
dc && dc->charset_source ? dc->charset_source : "(none)",
1037
dc && dc->charset_default ? dc->charset_default : "(none)");
1040
if (!ctx->ran) { /* filter never ran before */
1041
chk_filter_chain(f);
1046
return ap_get_brigade(f->next, bb, mode, block, readbytes);
1049
if (APR_BRIGADE_EMPTY(ctx->bb)) {
1050
if ((rv = ap_get_brigade(f->next, bb, mode, block,
1051
readbytes)) != APR_SUCCESS) {
1056
APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
1059
buffer_size = INPUT_XLATE_BUF_SIZE;
1060
rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
1061
if (rv == APR_SUCCESS) {
1063
/* move anything leftover into our context for next time;
1064
* we don't currently "set aside" since the data came from
1065
* down below, but I suspect that for long-term we need to
1068
APR_BRIGADE_CONCAT(ctx->bb, bb);
1070
if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
1073
e = apr_bucket_heap_create(ctx->tmp,
1074
INPUT_XLATE_BUF_SIZE - buffer_size,
1075
NULL, f->r->connection->bucket_alloc);
1076
/* make sure we insert at the head, because there may be
1077
* an eos bucket already there, and the eos bucket should
1078
* come after the data
1080
APR_BRIGADE_INSERT_HEAD(bb, e);
1083
/* XXX need to get some more data... what if the last brigade
1084
* we got had only the first byte of a multibyte char? we need
1085
* to grab more data from the network instead of returning an
1091
log_xlate_error(f, rv);
1097
static const command_rec cmds[] =
1099
AP_INIT_TAKE1("CharsetSourceEnc",
1103
"source (html,cgi,ssi) file charset"),
1104
AP_INIT_TAKE1("CharsetDefault",
1105
add_charset_default,
1108
"name of default charset"),
1109
AP_INIT_ITERATE("CharsetOptions",
1110
add_charset_options,
1113
"valid options: ImplicitAdd, NoImplicitAdd, DebugLevel=n"),
1117
static void charset_register_hooks(apr_pool_t *p)
1119
ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
1120
ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
1121
ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
1123
ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
1127
module AP_MODULE_DECLARE_DATA charset_lite_module =
1129
STANDARD20_MODULE_STUFF,
1130
create_charset_dir_conf,
1131
merge_charset_dir_conf,
1135
charset_register_hooks