--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html lang="en"><head>
+<title>mod_proxy_html</title>
+<style type="text/css">
+@import url(/index.css) ;
+</style>
+</head><body>
+<div id="apache">
+<h1>mod_proxy_html: Frequently Asked Questions</h1>
+<p>This answers some of the most frequently asked questions
+that aren't dealt with (or that people overlook) in the documentation
+and the apachetutor tutorial. <span class="v3">This was written for
+Version 2, and most of the questions are moot in Version 3.</span></p>
+<h2>Questions</h2>
+<ol>
+<li><a href="#charset">Can mod_proxy_html support (charset XYZ) as input?</a></li>
+<li><a href="#iconv">Can mod_proxy_html support (charset XYZ) as output?</a></li>
+<li><a href="#script">Why does mod_proxy_html mangle my Javascript?</a></li>
+<li><a href="#attr">Why doesn't mod_proxy_html rewrite urls in [some attribute]?</a></li>
+</ol>
+<h2>Answers</h2>
+<dl>
+<dt id="charset">Can mod_proxy_html support (charset XYZ) as input?</dt>
+<dd><p>That depends entirely on libxml2. mod_proxy_html supports
+charset detection, but does not itself support any charsets.
+It works by passing the charset detected to libxml2 when it sets
+up the parser.</p>
+<p>This means that mod_proxy_html inherits its charset support
+from libxml2, and will always support <em>exactly</em> the same
+charsets available in the version of libxml2 you have installed.
+So bug the libxml2 folks, not us!</p>
+<p class="v3">In Version 3, charset support is much expanded provided
+<code>ProxyHTMLMeta</code> is enabled, and any charset can be supported
+by aliasing it with <code>ProxyHTMLCharsetAlias</code>.</p>
+</dd>
+<dt id="iconv">Can mod_proxy_html support (charset XYZ) as output?</dt>
+<dd><p>libxml2 uses <code>utf-8</code> internally for everything.
+Generating output with another charset is therefore an additional
+overhead, and the decision was taken to exclude any such capability
+from mod_proxy_html. There is an easy workaround: you can transcode
+the output using another filter, such as mod_charset_lite.</p>
+<p class="v3">Version 3 supports output transformation to other
+charsets using <code>ProxyHTMLCharsetOut</code>.</p>
+</dd>
+<dt id="script">Why does mod_proxy_html mangle my Javascript?</dt>
+<dd><p>It doesn't. Your javascript is simply too badly malformed,
+and libxml2's error correction isn't what you expect!
+Check it with <a href="http://valet.webthing.com/page/">a validator</a>,
+or with libxml2's <tt>xmllint --html</tt>
+(which uses the same parser as mod_proxy_html). Here is
+<a href="/mod_publisher/parser.html">a fuller explanation</a>.</p>
+<p>The best fix for this is to remove the javascript from your markup,
+and import it from a separate <tt>.js</tt> file. If you have an
+irredeemably broken publishing system, you may have to upgrade to
+<a href="/mod_publisher/">mod_publisher</a> or resort to the
+non-markup-aware <a href="/mod_line_edit/">mod_line_edit</a>.</p>
+</dd>
+<dt id="attr">Why doesn't mod_proxy_html rewrite urls in [some attribute]?</dt>
+<dd><p>mod_proxy_html is based on W3C HTML 4.01 and XHTML 1.0 (which are
+identical in terms of elements and attributes). It supports all links
+defined in W3C HTML, even those that have been deprecated since 1997.
+But it does <strong>NOT</strong> support proprietary pseudo-HTML "extensions"
+that have never been part of <strong>any</strong> published HTML standard.
+Of course, it's trivial to add them to the source.</p>
+<p>This has been the most commonly requested feature since mod_proxy_html 2.0
+was released in 2004. It cannot reasonably be satisfied, because everyone's
+pet "extensions" are different. <span class="v3">Version 3 deals with this
+by taking all HTML knowledge out of the code and loading it from httpd.conf
+instead, so admins can meet their own needs without recompiling.</span></p>
+</dd>
+</dl>
+</div>
+</body></html>
/********************************************************************
- Copyright (c) 2003-7, WebThing Ltd
+ Copyright (c) 2003-8, WebThing Ltd
Author: Nick Kew <nick@webthing.com>
This program is free software; you can redistribute it and/or modify
#ifdef GO_FASTER
#define VERBOSE(x)
+#define VERBOSEB(x)
#else
-#define VERBOSE(x) if ( verbose ) x
+#define VERBOSE(x) if (verbose) x
+#define VERBOSEB(x) if (verbose) {x}
#endif
-#define VERSION_STRING "proxy_html/3.0.0"
+#define VERSION_STRING "proxy_html/3.0.1"
#include <ctype.h>
switch (rv) {
case APR_SUCCESS:
continue;
- case APR_EINCOMPLETE: /* save dangling byte(s) and return */
- ctx->conv_in->bytes = insz;
- ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz)
+ case APR_EINCOMPLETE:
+ if (insz < 32) {/* save dangling byte(s) and return */
+ ctx->conv_in->bytes = insz;
+ ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz)
: apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz);
- break;
+ return;
+ } else { /*OK, maybe 4096 wasn't big enough, and ended mid-char */
+ continue;
+ }
case APR_EINVAL: /* try skipping one bad byte */
VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r,
"Skipping invalid byte in input stream!") ) ;
"Failed to convert input; trying it raw") ;
htmlParseChunk(ctx->parser, buf + (bytes - insz), insz, flag) ;
ctx->conv_in = NULL; /* don't try converting any more */
- break;
+ return;
}
}
}
s_to = strlen(subs) ;
len = strlen(ctx->buf) ;
offs += match ;
- VERBOSE( {
+ VERBOSEB(
const char* f = apr_pstrndup(ctx->f->r->pool,
ctx->buf + offs , s_from ) ;
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
"C/RX: match at %s, substituting %s", f, subs) ;
- } )
+ )
if ( s_to > s_from) {
preserve(ctx, s_to - s_from) ;
memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
}
static saxctxt* check_filter_init (ap_filter_t* f) {
saxctxt* fctx ;
- proxy_html_conf* cfg
+ if ( ! f->ctx) {
+ proxy_html_conf* cfg
= ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
- const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
-
- const char* errmsg = NULL ;
- if ( !force ) {
- if ( ! f->r->proxyreq ) {
- errmsg = "Non-proxy request; not inserting proxy-html filter" ;
- } else if ( ! f->r->content_type ) {
- errmsg = "No content-type; bailing out of proxy-html filter" ;
- } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
- strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
- errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
+
+ const char* errmsg = NULL ;
+ if ( !force ) {
+ if ( ! f->r->proxyreq ) {
+ errmsg = "Non-proxy request; not inserting proxy-html filter" ;
+ } else if ( ! f->r->content_type ) {
+ errmsg = "No content-type; bailing out of proxy-html filter" ;
+ } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
+ strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
+ errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ }
+ }
+ if (!cfg->links) {
+ errmsg = "No links configured: nothing for proxy-html filter to do";
}
- }
- if ( errmsg ) {
+ if ( errmsg ) {
#ifndef GO_FASTER
- if ( cfg->verbose ) {
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
- }
+ if ( cfg->verbose ) {
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
+ }
#endif
- ap_remove_output_filter(f) ;
- return NULL ;
- }
+ ap_remove_output_filter(f) ;
+ return NULL ;
+ }
- if ( ! f->ctx) {
fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
fctx->f = f ;
fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;