X-Git-Url: http://git.home-dn.net/?p=manu%2Fmod-proxy-html.git;a=blobdiff_plain;f=mod_proxy_html.c;h=6a97d3e3e618cb7920e62cfbf113eb04cfd1e422;hp=0157c2f1527528b6cd168f370ac6e3925e6e4cc8;hb=HEAD;hpb=6556ff33ca2610d4f8ebddb750bb773f6bae4d16 diff --git a/mod_proxy_html.c b/mod_proxy_html.c index 0157c2f..6a97d3e 100644 --- a/mod_proxy_html.c +++ b/mod_proxy_html.c @@ -1,5 +1,5 @@ /******************************************************************** - Copyright (c) 2003-8, WebThing Ltd + Copyright (c) 2003-9, WebThing Ltd Author: Nick Kew This program is free software; you can redistribute it and/or modify @@ -17,22 +17,11 @@ http://apache.webthing.com/COPYING.txt *********************************************************************/ - -/******************************************************************** - Note to Users - - You are requested to register as a user, at - http://apache.webthing.com/registration.html - - This entitles you to support from the developer. - I'm unlikely to reply to help/support requests from - non-registered users, unless you're paying and/or offering - constructive feedback such as bug reports or sensible - suggestions for further development. - - It also makes a small contribution to the effort - that's gone into developing this work. -*********************************************************************/ +/**** NOTICE TO PACKAGERS + * + * This module now relies on mod_xml2enc for i18n support. + * You should make mod_xml2enc a dependency in your packages. + */ /* End of Notices */ @@ -57,7 +46,8 @@ http://apache.webthing.com/COPYING.txt #define VERBOSEB(x) if (verbose) {x} #endif -#define VERSION_STRING "proxy_html/3.0.1" +/* 3.1.2 - trivial changes to fix compile on Windows */ +#define VERSION_STRING "proxy_html/3.1.2" #include @@ -70,7 +60,11 @@ http://apache.webthing.com/COPYING.txt #include #include #include -#include +#include + +#include +#include +#include /* To support Apache 2.1/2.2, we need the ap_ forms of the * regexp stuff, and they're now used in the code. @@ -91,6 +85,12 @@ http://apache.webthing.com/COPYING.txt #define APACHE22 #endif +/* globals set once at startup */ +static ap_regex_t* seek_meta ; +static const apr_strmatch_pattern* seek_content ; +static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL; +static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL; + module AP_MODULE_DECLARE_DATA proxy_html_module ; #define M_HTML 0x01 @@ -135,23 +135,17 @@ typedef struct { size_t bufsz ; apr_hash_t* links; apr_array_header_t* events; - apr_array_header_t* skipto; - xmlCharEncoding default_encoding; const char* charset_out; int extfix ; int metafix ; int strip_comments ; int interp; + int enabled; #ifndef GO_FASTER int verbose ; #endif } proxy_html_conf ; typedef struct { - apr_xlate_t* convset; - char* buf; - apr_size_t bytes; -} conv_t; -typedef struct { ap_filter_t* f ; proxy_html_conf* cfg ; htmlParserCtxtPtr parser ; @@ -159,8 +153,6 @@ typedef struct { char* buf ; size_t offset ; size_t avail ; - conv_t* conv_in; - conv_t* conv_out; const char* encoding; urlmap* map; } saxctxt ; @@ -195,132 +187,15 @@ static void normalise(unsigned int flags, char* str) { *p = tolower(*p) ; if ( flags & NORM_MSSLASH ) - for ( p = ap_strchr_c(str, '\\') ; p ; p = ap_strchr_c(p+1, '\\') ) + for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') ) *p = '/' ; } -static void consume_buffer(saxctxt* ctx, const char* inbuf, - int bytes, int flag) { - apr_status_t rv; - apr_size_t insz; - char* buf; -#ifndef GO_FASTER - int verbose = ctx->cfg->verbose; -#endif - if (ctx->conv_in == NULL) { - /* just feed it to libxml2 */ - htmlParseChunk(ctx->parser, inbuf, bytes, flag) ; - return; - } - if (ctx->conv_in->bytes > 0) { - /* FIXME: make this a reusable buf? */ - buf = apr_palloc(ctx->f->r->pool, ctx->conv_in->bytes + bytes); - memcpy(buf, ctx->conv_in->buf, ctx->conv_in->bytes); - memcpy(buf + ctx->conv_in->bytes, inbuf, bytes); - bytes += ctx->conv_in->bytes; - ctx->conv_in->bytes = 0; - } else { - buf = (char*) inbuf; - } - insz = bytes; - while (insz > 0) { - char outbuf[4096]; - apr_size_t outsz = 4096; - rv = apr_xlate_conv_buffer(ctx->conv_in->convset, - buf + (bytes - insz), &insz, - outbuf, &outsz); - htmlParseChunk(ctx->parser, outbuf, 4096-outsz, flag) ; - switch (rv) { - case APR_SUCCESS: - continue; - case APR_EINCOMPLETE: - if (insz < 32) {/* save dangling byte(s) and return */ - ctx->conv_in->bytes = insz; - ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz) - : apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz); - return; - } else { /*OK, maybe 4096 wasn't big enough, and ended mid-char */ - continue; - } - case APR_EINVAL: /* try skipping one bad byte */ - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r, - "Skipping invalid byte in input stream!") ) ; - --insz; - continue; - default: - /* Erk! What's this? Bail out and eat the buf raw - * if libxml2 will accept it! - */ - ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r, - "Failed to convert input; trying it raw") ; - htmlParseChunk(ctx->parser, buf + (bytes - insz), insz, flag) ; - ctx->conv_in = NULL; /* don't try converting any more */ - return; - } - } -} -static void AP_fwrite(saxctxt* ctx, const char* inbuf, int bytes, int flush) { - /* convert charset if necessary, and output */ - char* buf; - apr_status_t rv; - apr_size_t insz ; -#ifndef GO_FASTER - int verbose = ctx->cfg->verbose; -#endif +#define consume_buffer(ctx,inbuf,bytes,flag) \ + htmlParseChunk(ctx->parser, inbuf, bytes, flag) - if (ctx->conv_out == NULL) { - ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes); - return; - } - if (ctx->conv_out->bytes > 0) { - /* FIXME: make this a reusable buf? */ - buf = apr_palloc(ctx->f->r->pool, ctx->conv_out->bytes + bytes); - memcpy(buf, ctx->conv_out->buf, ctx->conv_out->bytes); - memcpy(buf + ctx->conv_out->bytes, inbuf, bytes); - bytes += ctx->conv_out->bytes; - ctx->conv_out->bytes = 0; - } else { - buf = (char*) inbuf; - } - insz = bytes; - while (insz > 0) { - char outbuf[2048]; - apr_size_t outsz = 2048; - rv = apr_xlate_conv_buffer(ctx->conv_out->convset, - buf + (bytes - insz), &insz, - outbuf, &outsz); - ap_fwrite(ctx->f->next, ctx->bb, outbuf, 2048-outsz) ; - switch (rv) { - case APR_SUCCESS: - continue; - case APR_EINCOMPLETE: /* save dangling byte(s) and return */ - /* but if we need to flush, just abandon them */ - if ( flush) { /* if we're flushing, this must be complete */ - /* so this is an error */ - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r, - "Skipping invalid byte in output stream!") ) ; - } else { - ctx->conv_out->bytes = insz; - ctx->conv_out->buf = (buf != inbuf) ? buf + (bytes-insz) - : apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz); - } - break; - case APR_EINVAL: /* try skipping one bad byte */ - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r, - "Skipping invalid byte in output stream!") ) ; - --insz; - continue; - default: - /* Erk! What's this? Bail out and pass the buf raw - * if libxml2 will accept it! - */ - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r, - "Failed to convert output; sending UTF-8") ) ; - ap_fwrite(ctx->f->next, ctx->bb, buf + (bytes - insz), insz) ; - break; - } - } -} +#define AP_fwrite(ctx,inbuf,bytes,flush) \ + ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes); /* This is always utf-8 on entry. We can convert charset within FLUSH */ #define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1 @@ -350,9 +225,9 @@ static void preserve(saxctxt* ctx, const size_t len) { newbuf = realloc(ctx->buf, ctx->avail) ; if ( newbuf != ctx->buf ) { if ( ctx->buf ) - apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ; + apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free); apr_pool_cleanup_register(ctx->f->r->pool, newbuf, - (void*)free, apr_pool_cleanup_null); + (int(*)(void*))free, apr_pool_cleanup_null); ctx->buf = newbuf ; } } @@ -614,7 +489,7 @@ static void pstartElement(void* ctxt, const xmlChar* uname, ++num_match ; offs = match = pmatch[0].rm_so ; s_from = pmatch[0].rm_eo - match ; - subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, + subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf, nmatch, pmatch) ; VERBOSE( { const char* f = apr_pstrndup(ctx->f->r->pool, @@ -765,100 +640,6 @@ static void pstartElement(void* ctxt, const xmlChar* uname, } } -/* globals set once at startup */ -static ap_regex_t* seek_meta_ctype ; -static ap_regex_t* seek_charset ; -static ap_regex_t* seek_meta ; - -static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf, - size_t bytes) { -#ifndef GO_FASTER - int verbose = ctx->cfg->verbose; -#endif - request_rec* r = ctx->f->r ; - proxy_html_conf* cfg = ctx->cfg ; - xmlCharEncoding ret ; - char* p ; - ap_regmatch_t match[2] ; - char* buf = (char*)cbuf ; - apr_xlate_t* convset; - - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Content-Type is %s", r->content_type) ) ; - -/* If we've got it in the HTTP headers, there's nothing to do */ - if ( r->content_type && - ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) { - p += 8 ; - if ( ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) , - ctx->encoding ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset %s from HTTP headers", ctx->encoding) ) ; - if ( ret = xmlParseCharEncoding(ctx->encoding), - ((ret != XML_CHAR_ENCODING_ERROR ) - && (ret != XML_CHAR_ENCODING_NONE))) { - return ret ; - } - } - } - -/* to sniff, first we look for BOM */ - if (ctx->encoding == NULL) { - if ( ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes), - ret != XML_CHAR_ENCODING_NONE ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset from XML rules.") ) ; - return ret ; - } - -/* If none of the above, look for a META-thingey */ - if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) { - p = apr_pstrndup(r->pool, buf + match[0].rm_so, - match[0].rm_eo - match[0].rm_so) ; - if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 ) - ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so, - match[1].rm_eo - match[1].rm_so) ; - } - } - -/* either it's set to something we found or it's still the default */ - if ( ctx->encoding ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset %s from HTML META", ctx->encoding) ) ; - if ( ret = xmlParseCharEncoding(ctx->encoding), - ((ret != XML_CHAR_ENCODING_ERROR ) - && (ret != XML_CHAR_ENCODING_NONE))) { - return ret ; - } -/* Unsupported charset. Can we get (iconv) support through apr_xlate? */ -/* Aaargh! libxml2 has undocumented support. So this fails - * if metafix is not active. Have to make it conditional. - */ - if (cfg->metafix) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, - "Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding) ) ; - if (apr_xlate_open(&convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) { - ctx->conv_in = apr_pcalloc(r->pool, sizeof(conv_t)); - ctx->conv_in->convset = convset ; - return XML_CHAR_ENCODING_UTF8 ; - } else { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Charset %s not supported. Consider aliasing it?", ctx->encoding) ; - } - } else { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Charset %s not supported. Consider aliasing it or use metafix?", - ctx->encoding) ; - } - } - - -/* Use configuration default as a last resort */ - ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, - "No usable charset information; using configuration default") ; - return (cfg->default_encoding == XML_CHAR_ENCODING_NONE) - ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ; -} static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/ #ifndef GO_FASTER , int verbose @@ -882,21 +663,26 @@ static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/ header = apr_pstrndup(r->pool, p, q-p) ; if ( strncasecmp(header, "Content-", 8) ) { /* find content=... string */ - for ( p = ap_strstr((char*)buf+offs+pmatch[0].rm_so, "content") ; *p ; ) { - p += 7 ; - while ( *p && isspace(*p) ) - ++p ; - if ( *p != '=' ) - continue ; - while ( *p && isspace(*++p) ) ; - if ( ( *p == '\'' ) || ( *p == '"' ) ) { - delim = *p++ ; - for ( q = p ; *q != delim ; ++q ) ; - } else { - for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ; - } - content = apr_pstrndup(r->pool, p, q-p) ; - break ; + p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so, + pmatch[0].rm_eo - pmatch[0].rm_so); + /* if it doesn't contain "content", ignore, don't crash! */ + if (p != NULL) { + while (*p) { + p += 7 ; + while ( *p && isspace(*p) ) + ++p ; + if ( *p != '=' ) + continue ; + while ( *p && isspace(*++p) ) ; + if ( ( *p == '\'' ) || ( *p == '"' ) ) { + delim = *p++ ; + for ( q = p ; *q != delim ; ++q ) ; + } else { + for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ; + } + content = apr_pstrndup(r->pool, p, q-p) ; + break ; + } } } else if ( !strncasecmp(header, "Content-Type", 12) ) { ret = apr_palloc(r->pool, sizeof(meta) ) ; @@ -938,11 +724,12 @@ static const char* interpolate_vars(request_rec* r, const char* str) { var = apr_pstrndup(r->pool, start+2, end-start-2) ; } replacement = apr_table_get(r->subprocess_env, var) ; - if (!replacement) + if (!replacement) { if (delim) replacement = apr_pstrndup(r->pool, delim+1, end-delim-1); else replacement = ""; + } str = apr_pstrcat(r->pool, before, replacement, after, NULL); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, "Interpolating %s => %s", var, replacement) ; @@ -1033,7 +820,7 @@ static saxctxt* check_filter_init (ap_filter_t* f) { if ( errmsg ) { #ifndef GO_FASTER if ( cfg->verbose ) { - ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ; + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ; } #endif ap_remove_output_filter(f) ; @@ -1057,8 +844,6 @@ static saxctxt* check_filter_init (ap_filter_t* f) { return f->ctx ; } static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { - apr_xlate_t* convset; - const char* charset = NULL; apr_bucket* b ; meta* m = NULL ; xmlCharEncoding enc ; @@ -1101,69 +886,28 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS ) { if ( ctxt->parser == NULL ) { - if ( buf[bytes] != 0 ) { - /* make a string for parse routines to play with */ - char* buf1 = apr_palloc(f->r->pool, bytes+1) ; - memcpy(buf1, buf, bytes) ; - buf1[bytes] = 0 ; - buf = buf1 ; - } - /* For publishing systems that insert crap at the head of a - * page that buggers up the parser. Search to first instance - * of some relatively sane, or at least parseable, element. - */ - if (ctxt->cfg->skipto != NULL) { - char* p = ap_strchr_c(buf, '<'); - tattr* starts = (tattr*) ctxt->cfg->skipto->elts; - int found = 0; - while (!found && *p) { - int i; - for (i = 0; i < ctxt->cfg->skipto->nelts; ++i) { - if ( !strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) { - bytes -= (p-buf); - buf = p ; - found = 1; - VERBOSE( - ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, - "Skipped to first <%s> element", starts[i].val) - ) ; - break; - } - } - p = ap_strchr_c(p+1, '<'); - } - if (p == NULL) { - ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, - "Failed to find start of recognised HTML!") ; - } - } - - enc = sniff_encoding(ctxt, buf, bytes) ; - /* now we have input charset, set output charset too */ - if (ctxt->cfg->charset_out) { - if (!strcmp(ctxt->cfg->charset_out, "*")) - charset = ctxt->encoding; - else - charset = ctxt->cfg->charset_out; - if (strcasecmp(charset, "utf-8")) { - if (apr_xlate_open(&convset, charset, "UTF-8", - f->r->pool) == APR_SUCCESS) { - ctxt->conv_out = apr_pcalloc(f->r->pool, sizeof(conv_t)); - ctxt->conv_out->convset = convset; - } else { - ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, - "Output charset %s not supported. Falling back to UTF-8", - charset) ; - } - } - } - if (ctxt->conv_out) { - const char* ctype = apr_psprintf(f->r->pool, - "text/html;charset=%s", charset); - ap_set_content_type(f->r, ctype) ; - } else { + const char* cenc; + if (!xml2enc_charset || + (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) { + if (!xml2enc_charset) + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, + "No i18n support found. Install mod_xml2enc if required") ; + enc = XML_CHAR_ENCODING_NONE; ap_set_content_type(f->r, "text/html;charset=utf-8") ; + } else { + /* if we wanted a non-default charset_out, insert the + * xml2enc filter now that we've sniffed it + */ + if (ctxt->cfg->charset_out && xml2enc_filter) { + if (*ctxt->cfg->charset_out != '*') + cenc = ctxt->cfg->charset_out; + xml2enc_filter(f->r, cenc, ENCIO_OUTPUT); + ap_set_content_type(f->r, + apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ; + } else /* Normal case, everything worked, utf-8 output */ + ap_set_content_type(f->r, "text/html;charset=utf-8") ; } + ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ; ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ; buf += 4; @@ -1174,7 +918,7 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { return rv; } apr_pool_cleanup_register(f->r->pool, ctxt->parser, - (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ; + (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ; #ifndef USE_OLD_LIBXML2 if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts ) ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, @@ -1209,7 +953,6 @@ static void* proxy_html_config(apr_pool_t* pool, char* x) { ret->doctype = DEFAULT_DOCTYPE ; ret->etag = DEFAULT_ETAG ; ret->bufsz = 8192 ; - ret->default_encoding = XML_CHAR_ENCODING_NONE ; /* ret->interp = 1; */ /* don't initialise links and events until they get set/used */ return ret ; @@ -1223,8 +966,6 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { conf->links = (add->links == NULL) ? base->links : add->links; conf->events = (add->events == NULL) ? base->events : add->events; - conf->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE) - ? base->default_encoding : add->default_encoding ; conf->charset_out = (add->charset_out == NULL) ? base->charset_out : add->charset_out ; @@ -1254,7 +995,7 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { conf->extfix = add->extfix ; conf->interp = add->interp ; conf->strip_comments = add->strip_comments ; - conf->skipto = add->skipto ; + conf->enabled = add->enabled; #ifndef GO_FASTER conf->verbose = add->verbose ; #endif @@ -1264,7 +1005,7 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { conf->extfix = base->extfix | add->extfix ; conf->interp = base->interp | add->interp ; conf->strip_comments = base->strip_comments | add->strip_comments ; - conf->skipto = add->skipto ? add->skipto : base->skipto ; + conf->enabled = add->enabled | base->enabled; #ifndef GO_FASTER conf->verbose = base->verbose | add->verbose ; #endif @@ -1303,16 +1044,17 @@ static void comp_urlmap(apr_pool_t* pool, urlmap* newmap, newmap->to = to ; } if (cond != NULL) { + char* cond_copy; newmap->cond = apr_pcalloc(pool, sizeof(rewritecond)); if (cond[0] == '!') { newmap->cond->rel = -1; - newmap->cond->env = cond+1; + newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1); } else { newmap->cond->rel = 1; - newmap->cond->env = cond; + newmap->cond->env = cond_copy = apr_pstrdup(pool, cond); } - eq = ap_strchr_c(++cond, '='); - if (eq && (eq != cond)) { + eq = ap_strchr(++cond_copy, '='); + if (eq) { *eq = 0; newmap->cond->val = eq+1; } @@ -1400,15 +1142,6 @@ static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) { attr->val = arg; return NULL ; } -static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) { - tattr* attr; - proxy_html_conf* cfg = CFG; - if (cfg->skipto == NULL) - cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr)); - attr = apr_array_push(cfg->skipto) ; - attr->val = arg; - return NULL ; -} static const char* set_links(cmd_parms* cmd, void* CFG, const char* elt, const char* att) { apr_array_header_t* attrs; @@ -1427,33 +1160,7 @@ static const char* set_links(cmd_parms* cmd, void* CFG, attr->val = att ; return NULL ; } -static const char* set_charset_alias(cmd_parms* cmd, void* CFG, - const char* charset, const char* alias) { - const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY); - if (errmsg != NULL) - return errmsg ; - else if (xmlAddEncodingAlias(charset, alias) == 0) - return NULL; - else - return "Error setting charset alias"; -} -static const char* set_charset_default(cmd_parms* cmd, void* CFG, - const char* charset) { - proxy_html_conf* cfg = CFG; - cfg->default_encoding = xmlParseCharEncoding(charset); - switch(cfg->default_encoding) { - case XML_CHAR_ENCODING_NONE: - return "Default charset not found"; - case XML_CHAR_ENCODING_ERROR: - return "Invalid or unsupported default charset"; - default: - return NULL; - } -} static const command_rec proxy_html_cmds[] = { - AP_INIT_ITERATE("ProxyHTMLStartParse", set_skipto, NULL, - RSRC_CONF|ACCESS_CONF, - "Ignore anything in front of the first of these elements"), AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL, RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"), AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL, @@ -1485,38 +1192,52 @@ static const command_rec proxy_html_cmds[] = { AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot, (void*)APR_OFFSETOF(proxy_html_conf, bufsz), RSRC_CONF|ACCESS_CONF, "Buffer size" ) , - AP_INIT_ITERATE2("ProxyHTMLCharsetAlias", set_charset_alias, NULL, - RSRC_CONF, "ProxyHTMLCharsetAlias charset alias [more aliases]" ) , - AP_INIT_TAKE1("ProxyHTMLCharsetDefault", set_charset_default, NULL, - RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetDefault charset" ) , AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot, (void*)APR_OFFSETOF(proxy_html_conf, charset_out), RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) , + AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot, + (void*)APR_OFFSETOF(proxy_html_conf, enabled), + RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) , { NULL } } ; static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2, server_rec* s) { ap_add_version_component(p, VERSION_STRING) ; - seek_meta_ctype = ap_pregcomp(p, - "(]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)", - AP_REG_EXTENDED|AP_REG_ICASE) ; - seek_charset = ap_pregcomp(p, "charset=([A-Za-z0-9_-]+)", - AP_REG_EXTENDED|AP_REG_ICASE) ; seek_meta = ap_pregcomp(p, "]*(http-equiv)[^>]*>", AP_REG_EXTENDED|AP_REG_ICASE) ; + seek_content = apr_strmatch_precompile(p, "content", 0); memset(&sax, 0, sizeof(htmlSAXHandler)); sax.startElement = pstartElement ; sax.endElement = pendElement ; sax.characters = pcharacters ; sax.comment = pcomment ; sax.cdataBlock = pcdata ; + xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset); + xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter); + if (!xml2enc_charset) { + ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2, + "I18n support in mod_proxy_html requires mod_xml2enc. " + "Without it, non-ASCII characters in proxied pages are " + "likely to display incorrectly."); + } return OK ; } +static void proxy_html_insert(request_rec* r) { + proxy_html_conf* cfg + = ap_get_module_config(r->per_dir_config, &proxy_html_module); + if (cfg->enabled) { + if (xml2enc_filter) + xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS); + ap_add_output_filter("proxy-html", NULL, r, r->connection); + } +} static void proxy_html_hooks(apr_pool_t* p) { + static const char* aszSucc[] = { "mod_filter.c", NULL }; ap_register_output_filter_protocol("proxy-html", proxy_html_filter, NULL, AP_FTYPE_RESOURCE, AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ; ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ; + ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ; } module AP_MODULE_DECLARE_DATA proxy_html_module = { STANDARD20_MODULE_STUFF,