/********************************************************************
- Copyright (c) 2003-7, WebThing Ltd
+ Copyright (c) 2003-9, WebThing Ltd
Author: Nick Kew <nick@webthing.com>
This program is free software; you can redistribute it and/or modify
*********************************************************************/
-
-/********************************************************************
- Note to Users
-
- You are requested to register as a user, at
- http://apache.webthing.com/registration.html
-
- This entitles you to support from the developer.
- I'm unlikely to reply to help/support requests from
- non-registered users, unless you're paying and/or offering
- constructive feedback such as bug reports or sensible
- suggestions for further development.
-
- It also makes a small contribution to the effort
- that's gone into developing this work.
-*********************************************************************/
+/**** NOTICE TO PACKAGERS
+ *
+ * This module now relies on mod_xml2enc for i18n support.
+ * You should make mod_xml2enc a dependency in your packages.
+ */
/* End of Notices */
#ifdef GO_FASTER
#define VERBOSE(x)
+#define VERBOSEB(x)
#else
-#define VERBOSE(x) if ( verbose ) x
+#define VERBOSE(x) if (verbose) x
+#define VERBOSEB(x) if (verbose) {x}
#endif
-#define VERSION_STRING "proxy_html/3.0.0"
+/* 3.1.2 - trivial changes to fix compile on Windows */
+#define VERSION_STRING "proxy_html/3.1.2"
#include <ctype.h>
#include <http_log.h>
#include <apr_strings.h>
#include <apr_hash.h>
-#include <apr_xlate.h>
+#include <apr_strmatch.h>
+
+#include <apr_optional.h>
+#include <mod_xml2enc.h>
+#include <http_request.h>
/* To support Apache 2.1/2.2, we need the ap_ forms of the
* regexp stuff, and they're now used in the code.
#define APACHE22
#endif
+/* globals set once at startup */
+static ap_regex_t* seek_meta ;
+static const apr_strmatch_pattern* seek_content ;
+static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
+static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
+
module AP_MODULE_DECLARE_DATA proxy_html_module ;
#define M_HTML 0x01
size_t bufsz ;
apr_hash_t* links;
apr_array_header_t* events;
- apr_array_header_t* skipto;
- xmlCharEncoding default_encoding;
const char* charset_out;
int extfix ;
int metafix ;
int strip_comments ;
int interp;
+ int enabled;
#ifndef GO_FASTER
int verbose ;
#endif
} proxy_html_conf ;
typedef struct {
- apr_xlate_t* convset;
- char* buf;
- apr_size_t bytes;
-} conv_t;
-typedef struct {
ap_filter_t* f ;
proxy_html_conf* cfg ;
htmlParserCtxtPtr parser ;
char* buf ;
size_t offset ;
size_t avail ;
- conv_t* conv_in;
- conv_t* conv_out;
const char* encoding;
urlmap* map;
} saxctxt ;
*p = tolower(*p) ;
if ( flags & NORM_MSSLASH )
- for ( p = ap_strchr_c(str, '\\') ; p ; p = ap_strchr_c(p+1, '\\') )
+ for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') )
*p = '/' ;
}
-static void consume_buffer(saxctxt* ctx, const char* inbuf,
- int bytes, int flag) {
- apr_status_t rv;
- apr_size_t insz;
- char* buf;
-#ifndef GO_FASTER
- int verbose = ctx->cfg->verbose;
-#endif
- if (ctx->conv_in == NULL) {
- /* just feed it to libxml2 */
- htmlParseChunk(ctx->parser, inbuf, bytes, flag) ;
- return;
- }
- if (ctx->conv_in->bytes > 0) {
- /* FIXME: make this a reusable buf? */
- buf = apr_palloc(ctx->f->r->pool, ctx->conv_in->bytes + bytes);
- memcpy(buf, ctx->conv_in->buf, ctx->conv_in->bytes);
- memcpy(buf + ctx->conv_in->bytes, inbuf, bytes);
- bytes += ctx->conv_in->bytes;
- ctx->conv_in->bytes = 0;
- } else {
- buf = (char*) inbuf;
- }
- insz = bytes;
- while (insz > 0) {
- char outbuf[4096];
- apr_size_t outsz = 4096;
- rv = apr_xlate_conv_buffer(ctx->conv_in->convset,
- buf + (bytes - insz), &insz,
- outbuf, &outsz);
- htmlParseChunk(ctx->parser, outbuf, 4096-outsz, flag) ;
- switch (rv) {
- case APR_SUCCESS:
- continue;
- case APR_EINCOMPLETE: /* save dangling byte(s) and return */
- ctx->conv_in->bytes = insz;
- ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz)
- : apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz);
- break;
- case APR_EINVAL: /* try skipping one bad byte */
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
- "Skipping invalid byte in input stream!") ) ;
- --insz;
- continue;
- default:
- /* Erk! What's this? Bail out and eat the buf raw
- * if libxml2 will accept it!
- */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r,
- "Failed to convert input; trying it raw") ;
- htmlParseChunk(ctx->parser, buf + (bytes - insz), insz, flag) ;
- ctx->conv_in = NULL; /* don't try converting any more */
- break;
- }
- }
-}
-static void AP_fwrite(saxctxt* ctx, const char* inbuf, int bytes, int flush) {
- /* convert charset if necessary, and output */
- char* buf;
- apr_status_t rv;
- apr_size_t insz ;
-#ifndef GO_FASTER
- int verbose = ctx->cfg->verbose;
-#endif
+#define consume_buffer(ctx,inbuf,bytes,flag) \
+ htmlParseChunk(ctx->parser, inbuf, bytes, flag)
- if (ctx->conv_out == NULL) {
- ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
- return;
- }
- if (ctx->conv_out->bytes > 0) {
- /* FIXME: make this a reusable buf? */
- buf = apr_palloc(ctx->f->r->pool, ctx->conv_out->bytes + bytes);
- memcpy(buf, ctx->conv_out->buf, ctx->conv_out->bytes);
- memcpy(buf + ctx->conv_out->bytes, inbuf, bytes);
- bytes += ctx->conv_out->bytes;
- ctx->conv_out->bytes = 0;
- } else {
- buf = (char*) inbuf;
- }
- insz = bytes;
- while (insz > 0) {
- char outbuf[2048];
- apr_size_t outsz = 2048;
- rv = apr_xlate_conv_buffer(ctx->conv_out->convset,
- buf + (bytes - insz), &insz,
- outbuf, &outsz);
- ap_fwrite(ctx->f->next, ctx->bb, outbuf, 2048-outsz) ;
- switch (rv) {
- case APR_SUCCESS:
- continue;
- case APR_EINCOMPLETE: /* save dangling byte(s) and return */
- /* but if we need to flush, just abandon them */
- if ( flush) { /* if we're flushing, this must be complete */
- /* so this is an error */
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
- "Skipping invalid byte in output stream!") ) ;
- } else {
- ctx->conv_out->bytes = insz;
- ctx->conv_out->buf = (buf != inbuf) ? buf + (bytes-insz)
- : apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz);
- }
- break;
- case APR_EINVAL: /* try skipping one bad byte */
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
- "Skipping invalid byte in output stream!") ) ;
- --insz;
- continue;
- default:
- /* Erk! What's this? Bail out and pass the buf raw
- * if libxml2 will accept it!
- */
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r,
- "Failed to convert output; sending UTF-8") ) ;
- ap_fwrite(ctx->f->next, ctx->bb, buf + (bytes - insz), insz) ;
- break;
- }
- }
-}
+#define AP_fwrite(ctx,inbuf,bytes,flush) \
+ ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
/* This is always utf-8 on entry. We can convert charset within FLUSH */
#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1
newbuf = realloc(ctx->buf, ctx->avail) ;
if ( newbuf != ctx->buf ) {
if ( ctx->buf )
- apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ;
+ apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free);
apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
- (void*)free, apr_pool_cleanup_null);
+ (int(*)(void*))free, apr_pool_cleanup_null);
ctx->buf = newbuf ;
}
}
s_to = strlen(subs) ;
len = strlen(ctx->buf) ;
offs += match ;
- VERBOSE( {
+ VERBOSEB(
const char* f = apr_pstrndup(ctx->f->r->pool,
ctx->buf + offs , s_from ) ;
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
"C/RX: match at %s, substituting %s", f, subs) ;
- } )
+ )
if ( s_to > s_from) {
preserve(ctx, s_to - s_from) ;
memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
++num_match ;
offs = match = pmatch[0].rm_so ;
s_from = pmatch[0].rm_eo - match ;
- subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf,
nmatch, pmatch) ;
VERBOSE( {
const char* f = apr_pstrndup(ctx->f->r->pool,
}
}
-/* globals set once at startup */
-static ap_regex_t* seek_meta_ctype ;
-static ap_regex_t* seek_charset ;
-static ap_regex_t* seek_meta ;
-
-static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf,
- size_t bytes) {
-#ifndef GO_FASTER
- int verbose = ctx->cfg->verbose;
-#endif
- request_rec* r = ctx->f->r ;
- proxy_html_conf* cfg = ctx->cfg ;
- xmlCharEncoding ret ;
- char* p ;
- ap_regmatch_t match[2] ;
- char* buf = (char*)cbuf ;
- apr_xlate_t* convset;
-
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Content-Type is %s", r->content_type) ) ;
-
-/* If we've got it in the HTTP headers, there's nothing to do */
- if ( r->content_type &&
- ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
- p += 8 ;
- if ( ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) ,
- ctx->encoding ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTTP headers", ctx->encoding) ) ;
- if ( ret = xmlParseCharEncoding(ctx->encoding),
- ((ret != XML_CHAR_ENCODING_ERROR )
- && (ret != XML_CHAR_ENCODING_NONE))) {
- return ret ;
- }
- }
- }
-
-/* to sniff, first we look for BOM */
- if (ctx->encoding == NULL) {
- if ( ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes),
- ret != XML_CHAR_ENCODING_NONE ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset from XML rules.") ) ;
- return ret ;
- }
-
-/* If none of the above, look for a META-thingey */
- if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) {
- p = apr_pstrndup(r->pool, buf + match[0].rm_so,
- match[0].rm_eo - match[0].rm_so) ;
- if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 )
- ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
- match[1].rm_eo - match[1].rm_so) ;
- }
- }
-
-/* either it's set to something we found or it's still the default */
- if ( ctx->encoding ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTML META", ctx->encoding) ) ;
- if ( ret = xmlParseCharEncoding(ctx->encoding),
- ((ret != XML_CHAR_ENCODING_ERROR )
- && (ret != XML_CHAR_ENCODING_NONE))) {
- return ret ;
- }
-/* Unsupported charset. Can we get (iconv) support through apr_xlate? */
-/* Aaargh! libxml2 has undocumented <META-crap> support. So this fails
- * if metafix is not active. Have to make it conditional.
- */
- if (cfg->metafix) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
- "Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding) ) ;
- if (apr_xlate_open(&convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) {
- ctx->conv_in = apr_pcalloc(r->pool, sizeof(conv_t));
- ctx->conv_in->convset = convset ;
- return XML_CHAR_ENCODING_UTF8 ;
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Charset %s not supported. Consider aliasing it?", ctx->encoding) ;
- }
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Charset %s not supported. Consider aliasing it or use metafix?",
- ctx->encoding) ;
- }
- }
-
-
-/* Use configuration default as a last resort */
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
- "No usable charset information; using configuration default") ;
- return (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
- ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
-}
static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
#ifndef GO_FASTER
, int verbose
header = apr_pstrndup(r->pool, p, q-p) ;
if ( strncasecmp(header, "Content-", 8) ) {
/* find content=... string */
- for ( p = ap_strstr((char*)buf+offs+pmatch[0].rm_so, "content") ; *p ; ) {
- p += 7 ;
- while ( *p && isspace(*p) )
- ++p ;
- if ( *p != '=' )
- continue ;
- while ( *p && isspace(*++p) ) ;
- if ( ( *p == '\'' ) || ( *p == '"' ) ) {
- delim = *p++ ;
- for ( q = p ; *q != delim ; ++q ) ;
- } else {
- for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
- }
- content = apr_pstrndup(r->pool, p, q-p) ;
- break ;
+ p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
+ pmatch[0].rm_eo - pmatch[0].rm_so);
+ /* if it doesn't contain "content", ignore, don't crash! */
+ if (p != NULL) {
+ while (*p) {
+ p += 7 ;
+ while ( *p && isspace(*p) )
+ ++p ;
+ if ( *p != '=' )
+ continue ;
+ while ( *p && isspace(*++p) ) ;
+ if ( ( *p == '\'' ) || ( *p == '"' ) ) {
+ delim = *p++ ;
+ for ( q = p ; *q != delim ; ++q ) ;
+ } else {
+ for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
+ }
+ content = apr_pstrndup(r->pool, p, q-p) ;
+ break ;
+ }
}
} else if ( !strncasecmp(header, "Content-Type", 12) ) {
ret = apr_palloc(r->pool, sizeof(meta) ) ;
var = apr_pstrndup(r->pool, start+2, end-start-2) ;
}
replacement = apr_table_get(r->subprocess_env, var) ;
- if (!replacement)
+ if (!replacement) {
if (delim)
replacement = apr_pstrndup(r->pool, delim+1, end-delim-1);
else
replacement = "";
+ }
str = apr_pstrcat(r->pool, before, replacement, after, NULL);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"Interpolating %s => %s", var, replacement) ;
}
static saxctxt* check_filter_init (ap_filter_t* f) {
saxctxt* fctx ;
- proxy_html_conf* cfg
+ if ( ! f->ctx) {
+ proxy_html_conf* cfg
= ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
- const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
-
- const char* errmsg = NULL ;
- if ( !force ) {
- if ( ! f->r->proxyreq ) {
- errmsg = "Non-proxy request; not inserting proxy-html filter" ;
- } else if ( ! f->r->content_type ) {
- errmsg = "No content-type; bailing out of proxy-html filter" ;
- } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
- strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
- errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
+
+ const char* errmsg = NULL ;
+ if ( !force ) {
+ if ( ! f->r->proxyreq ) {
+ errmsg = "Non-proxy request; not inserting proxy-html filter" ;
+ } else if ( ! f->r->content_type ) {
+ errmsg = "No content-type; bailing out of proxy-html filter" ;
+ } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
+ strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
+ errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ }
+ }
+ if (!cfg->links) {
+ errmsg = "No links configured: nothing for proxy-html filter to do";
}
- }
- if ( errmsg ) {
+ if ( errmsg ) {
#ifndef GO_FASTER
- if ( cfg->verbose ) {
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
- }
+ if ( cfg->verbose ) {
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ;
+ }
#endif
- ap_remove_output_filter(f) ;
- return NULL ;
- }
+ ap_remove_output_filter(f) ;
+ return NULL ;
+ }
- if ( ! f->ctx) {
fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
fctx->f = f ;
fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
return f->ctx ;
}
static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
- apr_xlate_t* convset;
- const char* charset = NULL;
apr_bucket* b ;
meta* m = NULL ;
xmlCharEncoding enc ;
} else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
== APR_SUCCESS ) {
if ( ctxt->parser == NULL ) {
- if ( buf[bytes] != 0 ) {
- /* make a string for parse routines to play with */
- char* buf1 = apr_palloc(f->r->pool, bytes+1) ;
- memcpy(buf1, buf, bytes) ;
- buf1[bytes] = 0 ;
- buf = buf1 ;
- }
- /* For publishing systems that insert crap at the head of a
- * page that buggers up the parser. Search to first instance
- * of some relatively sane, or at least parseable, element.
- */
- if (ctxt->cfg->skipto != NULL) {
- char* p = ap_strchr_c(buf, '<');
- tattr* starts = (tattr*) ctxt->cfg->skipto->elts;
- int found = 0;
- while (!found && *p) {
- int i;
- for (i = 0; i < ctxt->cfg->skipto->nelts; ++i) {
- if ( !strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
- bytes -= (p-buf);
- buf = p ;
- found = 1;
- VERBOSE(
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
- "Skipped to first <%s> element", starts[i].val)
- ) ;
- break;
- }
- }
- p = ap_strchr_c(p+1, '<');
- }
- if (p == NULL) {
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
- "Failed to find start of recognised HTML!") ;
- }
- }
-
- enc = sniff_encoding(ctxt, buf, bytes) ;
- /* now we have input charset, set output charset too */
- if (ctxt->cfg->charset_out) {
- if (!strcmp(ctxt->cfg->charset_out, "*"))
- charset = ctxt->encoding;
- else
- charset = ctxt->cfg->charset_out;
- if (strcasecmp(charset, "utf-8")) {
- if (apr_xlate_open(&convset, charset, "UTF-8",
- f->r->pool) == APR_SUCCESS) {
- ctxt->conv_out = apr_pcalloc(f->r->pool, sizeof(conv_t));
- ctxt->conv_out->convset = convset;
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
- "Output charset %s not supported. Falling back to UTF-8",
- charset) ;
- }
- }
- }
- if (ctxt->conv_out) {
- const char* ctype = apr_psprintf(f->r->pool,
- "text/html;charset=%s", charset);
- ap_set_content_type(f->r, ctype) ;
- } else {
+ const char* cenc;
+ if (!xml2enc_charset ||
+ (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
+ if (!xml2enc_charset)
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
+ "No i18n support found. Install mod_xml2enc if required") ;
+ enc = XML_CHAR_ENCODING_NONE;
ap_set_content_type(f->r, "text/html;charset=utf-8") ;
+ } else {
+ /* if we wanted a non-default charset_out, insert the
+ * xml2enc filter now that we've sniffed it
+ */
+ if (ctxt->cfg->charset_out && xml2enc_filter) {
+ if (*ctxt->cfg->charset_out != '*')
+ cenc = ctxt->cfg->charset_out;
+ xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
+ ap_set_content_type(f->r,
+ apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ;
+ } else /* Normal case, everything worked, utf-8 output */
+ ap_set_content_type(f->r, "text/html;charset=utf-8") ;
}
+
ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ;
buf += 4;
return rv;
}
apr_pool_cleanup_register(f->r->pool, ctxt->parser,
- (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ;
+ (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ;
#ifndef USE_OLD_LIBXML2
if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
ret->doctype = DEFAULT_DOCTYPE ;
ret->etag = DEFAULT_ETAG ;
ret->bufsz = 8192 ;
- ret->default_encoding = XML_CHAR_ENCODING_NONE ;
/* ret->interp = 1; */
/* don't initialise links and events until they get set/used */
return ret ;
conf->links = (add->links == NULL) ? base->links : add->links;
conf->events = (add->events == NULL) ? base->events : add->events;
- conf->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
- ? base->default_encoding : add->default_encoding ;
conf->charset_out = (add->charset_out == NULL)
? base->charset_out : add->charset_out ;
conf->extfix = add->extfix ;
conf->interp = add->interp ;
conf->strip_comments = add->strip_comments ;
- conf->skipto = add->skipto ;
+ conf->enabled = add->enabled;
#ifndef GO_FASTER
conf->verbose = add->verbose ;
#endif
conf->extfix = base->extfix | add->extfix ;
conf->interp = base->interp | add->interp ;
conf->strip_comments = base->strip_comments | add->strip_comments ;
- conf->skipto = add->skipto ? add->skipto : base->skipto ;
+ conf->enabled = add->enabled | base->enabled;
#ifndef GO_FASTER
conf->verbose = base->verbose | add->verbose ;
#endif
newmap->to = to ;
}
if (cond != NULL) {
+ char* cond_copy;
newmap->cond = apr_pcalloc(pool, sizeof(rewritecond));
if (cond[0] == '!') {
newmap->cond->rel = -1;
- newmap->cond->env = cond+1;
+ newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1);
} else {
newmap->cond->rel = 1;
- newmap->cond->env = cond;
+ newmap->cond->env = cond_copy = apr_pstrdup(pool, cond);
}
- eq = ap_strchr_c(++cond, '=');
- if (eq && (eq != cond)) {
+ eq = ap_strchr(++cond_copy, '=');
+ if (eq) {
*eq = 0;
newmap->cond->val = eq+1;
}
attr->val = arg;
return NULL ;
}
-static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) {
- tattr* attr;
- proxy_html_conf* cfg = CFG;
- if (cfg->skipto == NULL)
- cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
- attr = apr_array_push(cfg->skipto) ;
- attr->val = arg;
- return NULL ;
-}
static const char* set_links(cmd_parms* cmd, void* CFG,
const char* elt, const char* att) {
apr_array_header_t* attrs;
attr->val = att ;
return NULL ;
}
-static const char* set_charset_alias(cmd_parms* cmd, void* CFG,
- const char* charset, const char* alias) {
- const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
- if (errmsg != NULL)
- return errmsg ;
- else if (xmlAddEncodingAlias(charset, alias) == 0)
- return NULL;
- else
- return "Error setting charset alias";
-}
-static const char* set_charset_default(cmd_parms* cmd, void* CFG,
- const char* charset) {
- proxy_html_conf* cfg = CFG;
- cfg->default_encoding = xmlParseCharEncoding(charset);
- switch(cfg->default_encoding) {
- case XML_CHAR_ENCODING_NONE:
- return "Default charset not found";
- case XML_CHAR_ENCODING_ERROR:
- return "Invalid or unsupported default charset";
- default:
- return NULL;
- }
-}
static const command_rec proxy_html_cmds[] = {
- AP_INIT_ITERATE("ProxyHTMLStartParse", set_skipto, NULL,
- RSRC_CONF|ACCESS_CONF,
- "Ignore anything in front of the first of these elements"),
AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"),
AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
(void*)APR_OFFSETOF(proxy_html_conf, bufsz),
RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
- AP_INIT_ITERATE2("ProxyHTMLCharsetAlias", set_charset_alias, NULL,
- RSRC_CONF, "ProxyHTMLCharsetAlias charset alias [more aliases]" ) ,
- AP_INIT_TAKE1("ProxyHTMLCharsetDefault", set_charset_default, NULL,
- RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetDefault charset" ) ,
AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
(void*)APR_OFFSETOF(proxy_html_conf, charset_out),
RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) ,
+ AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, enabled),
+ RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) ,
{ NULL }
} ;
static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
server_rec* s) {
ap_add_version_component(p, VERSION_STRING) ;
- seek_meta_ctype = ap_pregcomp(p,
- "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
- seek_charset = ap_pregcomp(p, "charset=([A-Za-z0-9_-]+)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
AP_REG_EXTENDED|AP_REG_ICASE) ;
+ seek_content = apr_strmatch_precompile(p, "content", 0);
memset(&sax, 0, sizeof(htmlSAXHandler));
sax.startElement = pstartElement ;
sax.endElement = pendElement ;
sax.characters = pcharacters ;
sax.comment = pcomment ;
sax.cdataBlock = pcdata ;
+ xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
+ xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
+ if (!xml2enc_charset) {
+ ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2,
+ "I18n support in mod_proxy_html requires mod_xml2enc. "
+ "Without it, non-ASCII characters in proxied pages are "
+ "likely to display incorrectly.");
+ }
return OK ;
}
+static void proxy_html_insert(request_rec* r) {
+ proxy_html_conf* cfg
+ = ap_get_module_config(r->per_dir_config, &proxy_html_module);
+ if (cfg->enabled) {
+ if (xml2enc_filter)
+ xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
+ ap_add_output_filter("proxy-html", NULL, r, r->connection);
+ }
+}
static void proxy_html_hooks(apr_pool_t* p) {
+ static const char* aszSucc[] = { "mod_filter.c", NULL };
ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
NULL, AP_FTYPE_RESOURCE,
AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ;
ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
+ ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ;
}
module AP_MODULE_DECLARE_DATA proxy_html_module = {
STANDARD20_MODULE_STUFF,