X-Git-Url: http://git.home-dn.net/?p=manu%2Fmod-proxy-html.git;a=blobdiff_plain;f=mod_proxy_html.c;h=6a97d3e3e618cb7920e62cfbf113eb04cfd1e422;hp=0157c2f1527528b6cd168f370ac6e3925e6e4cc8;hb=HEAD;hpb=6556ff33ca2610d4f8ebddb750bb773f6bae4d16

diff --git a/mod_proxy_html.c b/mod_proxy_html.c
index 0157c2f..6a97d3e 100644
--- a/mod_proxy_html.c
+++ b/mod_proxy_html.c
@@ -1,5 +1,5 @@
 /********************************************************************
-	 Copyright (c) 2003-8, WebThing Ltd
+	 Copyright (c) 2003-9, WebThing Ltd
 	 Author: Nick Kew <nick@webthing.com>
 
 This program is free software; you can redistribute it and/or modify
@@ -17,22 +17,11 @@ http://apache.webthing.com/COPYING.txt
 
 *********************************************************************/
 
-
-/********************************************************************
-	Note to Users
- 
-	You are requested to register as a user, at
-	http://apache.webthing.com/registration.html
- 
-	This entitles you to support from the developer.
-	I'm unlikely to reply to help/support requests from
-	non-registered users, unless you're paying and/or offering
-	constructive feedback such as bug reports or sensible
-	suggestions for further development.
- 
-	It also makes a small contribution to the effort
-	that's gone into developing this work.
-*********************************************************************/
+/**** NOTICE TO PACKAGERS
+ *
+ * This module now relies on mod_xml2enc for i18n support.
+ * You should make mod_xml2enc a dependency in your packages.
+ */
 
 /* End of Notices */
 
@@ -57,7 +46,8 @@ http://apache.webthing.com/COPYING.txt
 #define VERBOSEB(x) if (verbose) {x}
 #endif
 
-#define VERSION_STRING "proxy_html/3.0.1"
+/* 3.1.2 - trivial changes to fix compile on Windows */
+#define VERSION_STRING "proxy_html/3.1.2"
 
 #include <ctype.h>
 
@@ -70,7 +60,11 @@ http://apache.webthing.com/COPYING.txt
 #include <http_log.h>
 #include <apr_strings.h>
 #include <apr_hash.h>
-#include <apr_xlate.h>
+#include <apr_strmatch.h>
+
+#include <apr_optional.h>
+#include <mod_xml2enc.h>
+#include <http_request.h>
 
 /* To support Apache 2.1/2.2, we need the ap_ forms of the
  * regexp stuff, and they're now used in the code.
@@ -91,6 +85,12 @@ http://apache.webthing.com/COPYING.txt
 #define APACHE22
 #endif
 
+/* globals set once at startup */
+static ap_regex_t* seek_meta ;
+static const apr_strmatch_pattern* seek_content ;
+static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
+static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
+
 module AP_MODULE_DECLARE_DATA proxy_html_module ;
 
 #define M_HTML			0x01
@@ -135,23 +135,17 @@ typedef struct {
   size_t bufsz ;
   apr_hash_t* links;
   apr_array_header_t* events;
-  apr_array_header_t* skipto;
-  xmlCharEncoding default_encoding;
   const char* charset_out;
   int extfix ;
   int metafix ;
   int strip_comments ;
   int interp;
+  int enabled;
 #ifndef GO_FASTER
   int verbose ;
 #endif
 } proxy_html_conf ;
 typedef struct {
-  apr_xlate_t* convset;
-  char* buf;
-  apr_size_t bytes;
-} conv_t;
-typedef struct {
   ap_filter_t* f ;
   proxy_html_conf* cfg ;
   htmlParserCtxtPtr parser ;
@@ -159,8 +153,6 @@ typedef struct {
   char* buf ;
   size_t offset ;
   size_t avail ;
-  conv_t* conv_in;
-  conv_t* conv_out;
   const char* encoding;
   urlmap* map;
 } saxctxt ;
@@ -195,132 +187,15 @@ static void normalise(unsigned int flags, char* str) {
 	*p = tolower(*p) ;
 
   if ( flags & NORM_MSSLASH )
-    for ( p = ap_strchr_c(str, '\\') ; p ; p = ap_strchr_c(p+1, '\\') )
+    for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') )
       *p = '/' ;
 
 }
-static void consume_buffer(saxctxt* ctx, const char* inbuf,
-		int bytes, int flag) {
-  apr_status_t rv;
-  apr_size_t insz;
-  char* buf;
-#ifndef GO_FASTER
-  int verbose = ctx->cfg->verbose;
-#endif
-  if (ctx->conv_in == NULL) {
-    /* just feed it to libxml2 */
-    htmlParseChunk(ctx->parser, inbuf, bytes, flag) ;
-    return;
-  }
-  if (ctx->conv_in->bytes > 0) {
-    /* FIXME: make this a reusable buf? */
-    buf = apr_palloc(ctx->f->r->pool, ctx->conv_in->bytes + bytes);
-    memcpy(buf, ctx->conv_in->buf, ctx->conv_in->bytes);
-    memcpy(buf + ctx->conv_in->bytes, inbuf, bytes);
-    bytes += ctx->conv_in->bytes;
-    ctx->conv_in->bytes = 0;
-  } else {
-    buf = (char*) inbuf;
-  }
-  insz = bytes;
-  while (insz > 0) {
-    char outbuf[4096];
-    apr_size_t outsz = 4096;
-    rv = apr_xlate_conv_buffer(ctx->conv_in->convset,
-				buf + (bytes - insz), &insz,
-				outbuf, &outsz);
-    htmlParseChunk(ctx->parser, outbuf, 4096-outsz, flag) ;
-    switch (rv) {
-      case APR_SUCCESS:
-	continue;
-      case APR_EINCOMPLETE:
-	if (insz < 32) {/* save dangling byte(s) and return */
-	  ctx->conv_in->bytes = insz;
-	  ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz)
-		: apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz);
-	  return;
-	} else { /*OK, maybe 4096 wasn't big enough, and ended mid-char */
-	  continue;
-	}
-      case APR_EINVAL:		/* try skipping one bad byte */
-        VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
-		"Skipping invalid byte in input stream!") ) ;
-	--insz;
-	continue;
-      default:
-	/* Erk!  What's this? Bail out and eat the buf raw
-	 * if libxml2 will accept it!
-	 */
-        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r,
-		"Failed to convert input; trying it raw") ;
-        htmlParseChunk(ctx->parser, buf + (bytes - insz), insz, flag) ;
-	ctx->conv_in = NULL;	/* don't try converting any more */
-	return;
-    }
-  }
-}
-static void AP_fwrite(saxctxt* ctx, const char* inbuf, int bytes, int flush) {
-  /* convert charset if necessary, and output */
-  char* buf;
-  apr_status_t rv;
-  apr_size_t insz ;
-#ifndef GO_FASTER
-  int verbose = ctx->cfg->verbose;
-#endif
+#define consume_buffer(ctx,inbuf,bytes,flag) \
+	htmlParseChunk(ctx->parser, inbuf, bytes, flag)
 
-  if (ctx->conv_out == NULL) {
-    ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
-    return;
-  }
-  if (ctx->conv_out->bytes > 0) {
-    /* FIXME: make this a reusable buf? */
-    buf = apr_palloc(ctx->f->r->pool, ctx->conv_out->bytes + bytes);
-    memcpy(buf, ctx->conv_out->buf, ctx->conv_out->bytes);
-    memcpy(buf + ctx->conv_out->bytes, inbuf, bytes);
-    bytes += ctx->conv_out->bytes;
-    ctx->conv_out->bytes = 0;
-  } else {
-    buf = (char*) inbuf;
-  }
-  insz = bytes;
-  while (insz > 0) {
-    char outbuf[2048];
-    apr_size_t outsz = 2048;
-    rv = apr_xlate_conv_buffer(ctx->conv_out->convset,
-				buf + (bytes - insz), &insz,
-				outbuf, &outsz);
-    ap_fwrite(ctx->f->next, ctx->bb, outbuf, 2048-outsz) ;
-    switch (rv) {
-      case APR_SUCCESS:
-	continue;
-      case APR_EINCOMPLETE:	/* save dangling byte(s) and return */
-				/* but if we need to flush, just abandon them */
-	if  ( flush) {		/* if we're flushing, this must be complete */
-				/* so this is an error */
-          VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
-		"Skipping invalid byte in output stream!") ) ;
-	} else {
-	  ctx->conv_out->bytes = insz;
-	  ctx->conv_out->buf = (buf != inbuf) ? buf + (bytes-insz)
-		: apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz);
-	}
-	break;
-      case APR_EINVAL:		/* try skipping one bad byte */
-        VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
-		"Skipping invalid byte in output stream!") ) ;
-	--insz;
-	continue;
-      default:
-	/* Erk!  What's this? Bail out and pass the buf raw
-	 * if libxml2 will accept it!
-	 */
-        VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, ctx->f->r,
-		"Failed to convert output; sending UTF-8") ) ;
-        ap_fwrite(ctx->f->next, ctx->bb, buf + (bytes - insz), insz) ;
-	break;
-    }
-  }
-}
+#define AP_fwrite(ctx,inbuf,bytes,flush) \
+	ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
 
 /* This is always utf-8 on entry.  We can convert charset within FLUSH */
 #define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1
@@ -350,9 +225,9 @@ static void preserve(saxctxt* ctx, const size_t len) {
   newbuf = realloc(ctx->buf, ctx->avail) ;
   if ( newbuf != ctx->buf ) {
     if ( ctx->buf )
-	apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ;
+	apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free);
     apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
-	(void*)free, apr_pool_cleanup_null);
+	(int(*)(void*))free, apr_pool_cleanup_null);
     ctx->buf = newbuf ;
   }
 }
@@ -614,7 +489,7 @@ static void pstartElement(void* ctxt, const xmlChar* uname,
 		  ++num_match ;
 		  offs = match = pmatch[0].rm_so ;
 		  s_from = pmatch[0].rm_eo - match ;
-		  subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+		  subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf,
 		  	nmatch, pmatch) ;
 		  VERBOSE( {
 		    const char* f = apr_pstrndup(ctx->f->r->pool,
@@ -765,100 +640,6 @@ static void pstartElement(void* ctxt, const xmlChar* uname,
   }
 }
 
-/* globals set once at startup */
-static ap_regex_t* seek_meta_ctype ;
-static ap_regex_t* seek_charset ;
-static ap_regex_t* seek_meta ;
-
-static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf,
-		size_t bytes) {
-#ifndef GO_FASTER
-  int verbose = ctx->cfg->verbose;
-#endif
-  request_rec* r = ctx->f->r ;
-  proxy_html_conf* cfg = ctx->cfg ;
-  xmlCharEncoding ret ;
-  char* p ;
-  ap_regmatch_t match[2] ;
-  char* buf = (char*)cbuf ;
-  apr_xlate_t* convset;
-
-  VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
-		"Content-Type is %s", r->content_type) ) ;
-
-/* If we've got it in the HTTP headers, there's nothing to do */
-  if ( r->content_type &&
-	( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
-    p += 8 ;
-    if ( ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) ,
-	ctx->encoding ) {
-	VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
-		"Got charset %s from HTTP headers", ctx->encoding) ) ;
-      if ( ret = xmlParseCharEncoding(ctx->encoding),
-		((ret != XML_CHAR_ENCODING_ERROR )
-		 && (ret != XML_CHAR_ENCODING_NONE))) {
-	return ret ;
-      }
-    }
-  }
-
-/* to sniff, first we look for BOM */
-  if (ctx->encoding == NULL) {
-    if ( ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes),
-	ret != XML_CHAR_ENCODING_NONE ) {
-      VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
-	"Got charset from XML rules.") ) ;
-      return ret ;
-    }
-
-/* If none of the above, look for a META-thingey */
-    if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) {
-      p = apr_pstrndup(r->pool, buf + match[0].rm_so,
-	match[0].rm_eo - match[0].rm_so) ;
-      if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 )
-        ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
-	  match[1].rm_eo - match[1].rm_so) ;
-    }
-  }
-
-/* either it's set to something we found or it's still the default */
-  if ( ctx->encoding ) {
-    VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
-	"Got charset %s from HTML META", ctx->encoding) ) ;
-    if ( ret = xmlParseCharEncoding(ctx->encoding),
-		((ret != XML_CHAR_ENCODING_ERROR )
-		 && (ret != XML_CHAR_ENCODING_NONE))) {
-      return ret ;
-    }
-/* Unsupported charset.  Can we get (iconv) support through apr_xlate? */
-/* Aaargh!  libxml2 has undocumented <META-crap> support.  So this fails
- * if metafix is not active.  Have to make it conditional.
- */
-    if (cfg->metafix) {
-      VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
-	"Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding) ) ;
-      if (apr_xlate_open(&convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) {
-        ctx->conv_in = apr_pcalloc(r->pool, sizeof(conv_t));
-        ctx->conv_in->convset = convset ;
-        return XML_CHAR_ENCODING_UTF8 ;
-      } else {
-        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
-	  "Charset %s not supported.  Consider aliasing it?", ctx->encoding) ;
-      }
-    } else {
-      ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
-	  "Charset %s not supported.  Consider aliasing it or use metafix?",
-	  ctx->encoding) ;
-    }
-  }
-
-
-/* Use configuration default as a last resort */
-  ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
-	"No usable charset information; using configuration default") ;
-  return (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
-	? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
-}
 static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
 #ifndef GO_FASTER
 		, int verbose
@@ -882,21 +663,26 @@ static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
     header = apr_pstrndup(r->pool, p, q-p) ;
     if ( strncasecmp(header, "Content-", 8) ) {
 /* find content=... string */
-      for ( p = ap_strstr((char*)buf+offs+pmatch[0].rm_so, "content") ; *p ; ) {
-	p += 7 ;
-	while ( *p && isspace(*p) )
-	  ++p ;
-	if ( *p != '=' )
-	  continue ;
-	while ( *p && isspace(*++p) ) ;
-	if ( ( *p == '\'' ) || ( *p == '"' ) ) {
-	  delim = *p++ ;
-	  for ( q = p ; *q != delim ; ++q ) ;
-	} else {
-	  for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
-	}
-	content = apr_pstrndup(r->pool, p, q-p) ;
-	break ;
+      p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
+			pmatch[0].rm_eo - pmatch[0].rm_so);
+      /* if it doesn't contain "content", ignore, don't crash! */
+      if (p != NULL) {
+        while (*p) {
+	  p += 7 ;
+	  while ( *p && isspace(*p) )
+	    ++p ;
+	  if ( *p != '=' )
+	    continue ;
+	  while ( *p && isspace(*++p) ) ;
+	  if ( ( *p == '\'' ) || ( *p == '"' ) ) {
+	    delim = *p++ ;
+	    for ( q = p ; *q != delim ; ++q ) ;
+	  } else {
+	    for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
+	  }
+	  content = apr_pstrndup(r->pool, p, q-p) ;
+	  break ;
+        }
       }
     } else if ( !strncasecmp(header, "Content-Type", 12) ) {
       ret = apr_palloc(r->pool, sizeof(meta) ) ;
@@ -938,11 +724,12 @@ static const char* interpolate_vars(request_rec* r, const char* str) {
       var = apr_pstrndup(r->pool, start+2, end-start-2) ;
     }
     replacement = apr_table_get(r->subprocess_env, var) ;
-    if (!replacement)
+    if (!replacement) {
       if (delim)
 	replacement = apr_pstrndup(r->pool, delim+1, end-delim-1);
       else
 	replacement = "";
+    }
     str = apr_pstrcat(r->pool, before, replacement, after, NULL);
     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
 			"Interpolating %s  =>  %s", var, replacement) ;
@@ -1033,7 +820,7 @@ static saxctxt* check_filter_init (ap_filter_t* f) {
     if ( errmsg ) {
 #ifndef GO_FASTER
       if ( cfg->verbose ) {
-        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
+        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ;
       }
 #endif
       ap_remove_output_filter(f) ;
@@ -1057,8 +844,6 @@ static saxctxt* check_filter_init (ap_filter_t* f) {
   return f->ctx ;
 }
 static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
-  apr_xlate_t* convset;
-  const char* charset = NULL;
   apr_bucket* b ;
   meta* m = NULL ;
   xmlCharEncoding enc ;
@@ -1101,69 +886,28 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
     } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
 	      == APR_SUCCESS ) {
       if ( ctxt->parser == NULL ) {
-	if ( buf[bytes] != 0 ) {
-	  /* make a string for parse routines to play with */
-	  char* buf1 = apr_palloc(f->r->pool, bytes+1) ;
-	  memcpy(buf1, buf, bytes) ;
-	  buf1[bytes] = 0 ;
-	  buf = buf1 ;
-	}
-	/* For publishing systems that insert crap at the head of a
-	 * page that buggers up the parser.  Search to first instance
-	 * of some relatively sane, or at least parseable, element.
-	 */
-	if (ctxt->cfg->skipto != NULL) {
-	  char* p = ap_strchr_c(buf, '<');
-	  tattr* starts = (tattr*) ctxt->cfg->skipto->elts;
-	  int found = 0;
-	  while (!found && *p) {
-	    int i;
-	    for (i = 0; i < ctxt->cfg->skipto->nelts; ++i) {
-	      if ( !strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) {
-		bytes -= (p-buf);
-		buf = p ;
-		found = 1;
-		VERBOSE(
-		ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
-			"Skipped to first <%s> element", starts[i].val)
-		) ;
-		break;
-	      }
-	    }
-	    p = ap_strchr_c(p+1, '<');
-	  }
-	  if (p == NULL) {
-            ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
-	        "Failed to find start of recognised HTML!") ;
-	  }
-	}
-
-	enc = sniff_encoding(ctxt, buf, bytes) ;
-	/* now we have input charset, set output charset too */
-        if (ctxt->cfg->charset_out) {
-	  if (!strcmp(ctxt->cfg->charset_out, "*"))
-	    charset = ctxt->encoding;
-	  else
-	    charset = ctxt->cfg->charset_out;
-	  if (strcasecmp(charset, "utf-8")) {
-            if (apr_xlate_open(&convset, charset, "UTF-8",
-		f->r->pool) == APR_SUCCESS) {
-	      ctxt->conv_out = apr_pcalloc(f->r->pool, sizeof(conv_t));
-	      ctxt->conv_out->convset = convset;
-            } else {
-              ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
-	        "Output charset %s not supported.  Falling back to UTF-8",
-	        charset) ;
-            }
-	  }
-        }
-	if (ctxt->conv_out) {
-	  const char* ctype = apr_psprintf(f->r->pool,
-			"text/html;charset=%s", charset);
-	  ap_set_content_type(f->r, ctype) ;
-	} else {
+	const char* cenc;
+	if (!xml2enc_charset ||
+		(xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
+	  if (!xml2enc_charset)
+	    ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
+		"No i18n support found.  Install mod_xml2enc if required") ;
+	  enc = XML_CHAR_ENCODING_NONE;
 	  ap_set_content_type(f->r, "text/html;charset=utf-8") ;
+	} else {
+          /* if we wanted a non-default charset_out, insert the
+	   * xml2enc filter now that we've sniffed it
+	   */
+	  if (ctxt->cfg->charset_out && xml2enc_filter) {
+	    if (*ctxt->cfg->charset_out != '*')
+              cenc = ctxt->cfg->charset_out;
+	    xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
+	    ap_set_content_type(f->r,
+		apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ;
+	  } else /* Normal case, everything worked, utf-8 output */
+	    ap_set_content_type(f->r, "text/html;charset=utf-8") ;
 	}
+
 	ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
 	ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ;
 	buf += 4;
@@ -1174,7 +918,7 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
 	  return rv;
 	}
 	apr_pool_cleanup_register(f->r->pool, ctxt->parser,
-		(void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ;
+		(int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ;
 #ifndef USE_OLD_LIBXML2
 	if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
 	  ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
@@ -1209,7 +953,6 @@ static void* proxy_html_config(apr_pool_t* pool, char* x) {
   ret->doctype = DEFAULT_DOCTYPE ;
   ret->etag = DEFAULT_ETAG ;
   ret->bufsz = 8192 ;
-  ret->default_encoding = XML_CHAR_ENCODING_NONE ;
   /* ret->interp = 1; */
   /* don't initialise links and events until they get set/used */
   return ret ;
@@ -1223,8 +966,6 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
   conf->links = (add->links == NULL) ? base->links : add->links;
   conf->events = (add->events == NULL) ? base->events : add->events;
 
-  conf->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE)
-	? base->default_encoding : add->default_encoding ;
   conf->charset_out = (add->charset_out == NULL)
 	? base->charset_out : add->charset_out ;
 
@@ -1254,7 +995,7 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
     conf->extfix = add->extfix ;
     conf->interp = add->interp ;
     conf->strip_comments = add->strip_comments ;
-    conf->skipto = add->skipto ;
+    conf->enabled = add->enabled;
 #ifndef GO_FASTER
     conf->verbose = add->verbose ;
 #endif
@@ -1264,7 +1005,7 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
     conf->extfix = base->extfix | add->extfix ;
     conf->interp = base->interp | add->interp ;
     conf->strip_comments = base->strip_comments | add->strip_comments ;
-    conf->skipto = add->skipto ? add->skipto : base->skipto ;
+    conf->enabled = add->enabled | base->enabled;
 #ifndef GO_FASTER
     conf->verbose = base->verbose | add->verbose ;
 #endif
@@ -1303,16 +1044,17 @@ static void comp_urlmap(apr_pool_t* pool, urlmap* newmap,
     newmap->to = to ;
   }
   if (cond != NULL) {
+    char* cond_copy;
     newmap->cond = apr_pcalloc(pool, sizeof(rewritecond));
     if (cond[0] == '!') {
       newmap->cond->rel = -1;
-      newmap->cond->env = cond+1;
+      newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1);
     } else {
       newmap->cond->rel = 1;
-      newmap->cond->env = cond;
+      newmap->cond->env = cond_copy = apr_pstrdup(pool, cond);
     }
-    eq = ap_strchr_c(++cond, '=');
-    if (eq && (eq != cond)) {
+    eq = ap_strchr(++cond_copy, '=');
+    if (eq) {
       *eq = 0;
       newmap->cond->val = eq+1;
     }
@@ -1400,15 +1142,6 @@ static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) {
   attr->val = arg;
   return NULL ;
 }
-static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) {
-  tattr* attr;
-  proxy_html_conf* cfg = CFG;
-  if (cfg->skipto == NULL)
-    cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr));
-  attr = apr_array_push(cfg->skipto) ;
-  attr->val = arg;
-  return NULL ;
-}
 static const char* set_links(cmd_parms* cmd, void* CFG,
 	const char* elt, const char* att) {
   apr_array_header_t* attrs;
@@ -1427,33 +1160,7 @@ static const char* set_links(cmd_parms* cmd, void* CFG,
   attr->val = att ;
   return NULL ;
 }
-static const char* set_charset_alias(cmd_parms* cmd, void* CFG,
-	const char* charset, const char* alias) {
-  const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY);
-  if (errmsg != NULL)
-    return errmsg ;
-  else if (xmlAddEncodingAlias(charset, alias) == 0)
-    return NULL;
-  else
-    return "Error setting charset alias";
-}
-static const char* set_charset_default(cmd_parms* cmd, void* CFG,
-	const char* charset) {
-  proxy_html_conf* cfg = CFG;
-  cfg->default_encoding = xmlParseCharEncoding(charset);
-  switch(cfg->default_encoding) {
-    case XML_CHAR_ENCODING_NONE:
-      return "Default charset not found";
-    case XML_CHAR_ENCODING_ERROR:
-      return "Invalid or unsupported default charset";
-    default:
-      return NULL;
-  }
-}
 static const command_rec proxy_html_cmds[] = {
-  AP_INIT_ITERATE("ProxyHTMLStartParse", set_skipto, NULL,
-	RSRC_CONF|ACCESS_CONF,
-	"Ignore anything in front of the first of these elements"),
   AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
 	RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"),
   AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
@@ -1485,38 +1192,52 @@ static const command_rec proxy_html_cmds[] = {
   AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
 	(void*)APR_OFFSETOF(proxy_html_conf, bufsz),
 	RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
-  AP_INIT_ITERATE2("ProxyHTMLCharsetAlias", set_charset_alias, NULL,
-	RSRC_CONF, "ProxyHTMLCharsetAlias charset alias [more aliases]" ) ,
-  AP_INIT_TAKE1("ProxyHTMLCharsetDefault", set_charset_default, NULL,
-	RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetDefault charset" ) ,
   AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
 	(void*)APR_OFFSETOF(proxy_html_conf, charset_out),
 	RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) ,
+  AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
+	(void*)APR_OFFSETOF(proxy_html_conf, enabled),
+	RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) ,
   { NULL }
 } ;
 static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
 	server_rec* s) {
   ap_add_version_component(p, VERSION_STRING) ;
-  seek_meta_ctype = ap_pregcomp(p,
-	"(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
-	AP_REG_EXTENDED|AP_REG_ICASE) ;
-  seek_charset = ap_pregcomp(p, "charset=([A-Za-z0-9_-]+)",
-	AP_REG_EXTENDED|AP_REG_ICASE) ;
   seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
 	AP_REG_EXTENDED|AP_REG_ICASE) ;
+  seek_content = apr_strmatch_precompile(p, "content", 0);
   memset(&sax, 0, sizeof(htmlSAXHandler));
   sax.startElement = pstartElement ;
   sax.endElement = pendElement ;
   sax.characters = pcharacters ;
   sax.comment = pcomment ;
   sax.cdataBlock = pcdata ;
+  xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
+  xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
+  if (!xml2enc_charset) {
+    ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2,
+      "I18n support in mod_proxy_html requires mod_xml2enc. "
+      "Without it, non-ASCII characters in proxied pages are "
+      "likely to display incorrectly.");
+  }
   return OK ;
 }
+static void proxy_html_insert(request_rec* r) {
+  proxy_html_conf* cfg
+	= ap_get_module_config(r->per_dir_config, &proxy_html_module);
+  if (cfg->enabled) {
+    if (xml2enc_filter)
+      xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
+    ap_add_output_filter("proxy-html", NULL, r, r->connection);
+  }
+}
 static void proxy_html_hooks(apr_pool_t* p) {
+  static const char* aszSucc[] = { "mod_filter.c", NULL };
   ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
 	NULL, AP_FTYPE_RESOURCE,
 	AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ;
   ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
+  ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ;
 }
 module AP_MODULE_DECLARE_DATA proxy_html_module = {
 	STANDARD20_MODULE_STUFF,