From 0763d8ba77649020ce7b6cd3fac1fe390521b531 Mon Sep 17 00:00:00 2001 From: Emmanuel Lacour Date: Wed, 20 Aug 2008 10:14:13 +0000 Subject: [PATCH] Tagging mod-proxy-html (3.0.1-1) --- README | 1 + debian/changelog | 7 ++++++ debian/docs | 1 + faq.html | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mod_proxy_html.c | 71 +++++++++++++++++++++++++++++------------------------ 5 files changed, 123 insertions(+), 31 deletions(-) create mode 100644 faq.html diff --git a/README b/README index 296798b..2c2f65d 100644 --- a/README +++ b/README @@ -8,6 +8,7 @@ some new configuration. You can Include proxy_html.conf from this bundle in your httpd.conf (or apache.conf) to use Version 3 as a drop-in replacement for Version 2. + WINDOWS USERS: You may need to install some prerequisite libraries before you can diff --git a/debian/changelog b/debian/changelog index f976198..e31ed5e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +mod-proxy-html (3.0.1-1) unstable; urgency=low + + * New upstream release + * Added FAQ from upstream site. + + -- Emmanuel Lacour Wed, 20 Aug 2008 12:06:58 +0200 + mod-proxy-html (3.0.0-1) unstable; urgency=low * New upstream release, closes: #446782 diff --git a/debian/docs b/debian/docs index 3a0c4da..37ac420 100644 --- a/debian/docs +++ b/debian/docs @@ -1,2 +1,3 @@ config.html guide.html +faq.html diff --git a/faq.html b/faq.html new file mode 100644 index 0000000..86cfe03 --- /dev/null +++ b/faq.html @@ -0,0 +1,74 @@ + + + +mod_proxy_html + + +
+

mod_proxy_html: Frequently Asked Questions

+

This answers some of the most frequently asked questions +that aren't dealt with (or that people overlook) in the documentation +and the apachetutor tutorial. This was written for +Version 2, and most of the questions are moot in Version 3.

+

Questions

+
    +
  1. Can mod_proxy_html support (charset XYZ) as input?
  2. +
  3. Can mod_proxy_html support (charset XYZ) as output?
  4. +
  5. Why does mod_proxy_html mangle my Javascript?
  6. +
  7. Why doesn't mod_proxy_html rewrite urls in [some attribute]?
  8. +
+

Answers

+
+
Can mod_proxy_html support (charset XYZ) as input?
+

That depends entirely on libxml2. mod_proxy_html supports +charset detection, but does not itself support any charsets. +It works by passing the charset detected to libxml2 when it sets +up the parser.

+

This means that mod_proxy_html inherits its charset support +from libxml2, and will always support exactly the same +charsets available in the version of libxml2 you have installed. +So bug the libxml2 folks, not us!

+

In Version 3, charset support is much expanded provided +ProxyHTMLMeta is enabled, and any charset can be supported +by aliasing it with ProxyHTMLCharsetAlias.

+
+
Can mod_proxy_html support (charset XYZ) as output?
+

libxml2 uses utf-8 internally for everything. +Generating output with another charset is therefore an additional +overhead, and the decision was taken to exclude any such capability +from mod_proxy_html. There is an easy workaround: you can transcode +the output using another filter, such as mod_charset_lite.

+

Version 3 supports output transformation to other +charsets using ProxyHTMLCharsetOut.

+
+
Why does mod_proxy_html mangle my Javascript?
+

It doesn't. Your javascript is simply too badly malformed, +and libxml2's error correction isn't what you expect! +Check it with a validator, +or with libxml2's xmllint --html +(which uses the same parser as mod_proxy_html). Here is +a fuller explanation.

+

The best fix for this is to remove the javascript from your markup, +and import it from a separate .js file. If you have an +irredeemably broken publishing system, you may have to upgrade to +mod_publisher or resort to the +non-markup-aware mod_line_edit.

+
+
Why doesn't mod_proxy_html rewrite urls in [some attribute]?
+

mod_proxy_html is based on W3C HTML 4.01 and XHTML 1.0 (which are +identical in terms of elements and attributes). It supports all links +defined in W3C HTML, even those that have been deprecated since 1997. +But it does NOT support proprietary pseudo-HTML "extensions" +that have never been part of any published HTML standard. +Of course, it's trivial to add them to the source.

+

This has been the most commonly requested feature since mod_proxy_html 2.0 +was released in 2004. It cannot reasonably be satisfied, because everyone's +pet "extensions" are different. Version 3 deals with this +by taking all HTML knowledge out of the code and loading it from httpd.conf +instead, so admins can meet their own needs without recompiling.

+
+
+
+ diff --git a/mod_proxy_html.c b/mod_proxy_html.c index 6a4e59b..0157c2f 100644 --- a/mod_proxy_html.c +++ b/mod_proxy_html.c @@ -1,5 +1,5 @@ /******************************************************************** - Copyright (c) 2003-7, WebThing Ltd + Copyright (c) 2003-8, WebThing Ltd Author: Nick Kew This program is free software; you can redistribute it and/or modify @@ -51,11 +51,13 @@ http://apache.webthing.com/COPYING.txt #ifdef GO_FASTER #define VERBOSE(x) +#define VERBOSEB(x) #else -#define VERBOSE(x) if ( verbose ) x +#define VERBOSE(x) if (verbose) x +#define VERBOSEB(x) if (verbose) {x} #endif -#define VERSION_STRING "proxy_html/3.0.0" +#define VERSION_STRING "proxy_html/3.0.1" #include @@ -231,11 +233,15 @@ static void consume_buffer(saxctxt* ctx, const char* inbuf, switch (rv) { case APR_SUCCESS: continue; - case APR_EINCOMPLETE: /* save dangling byte(s) and return */ - ctx->conv_in->bytes = insz; - ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz) + case APR_EINCOMPLETE: + if (insz < 32) {/* save dangling byte(s) and return */ + ctx->conv_in->bytes = insz; + ctx->conv_in->buf = (buf != inbuf) ? buf + (bytes-insz) : apr_pmemdup(ctx->f->r->pool, buf + (bytes-insz), insz); - break; + return; + } else { /*OK, maybe 4096 wasn't big enough, and ended mid-char */ + continue; + } case APR_EINVAL: /* try skipping one bad byte */ VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->f->r, "Skipping invalid byte in input stream!") ) ; @@ -249,7 +255,7 @@ static void consume_buffer(saxctxt* ctx, const char* inbuf, "Failed to convert input; trying it raw") ; htmlParseChunk(ctx->parser, buf + (bytes - insz), insz, flag) ; ctx->conv_in = NULL; /* don't try converting any more */ - break; + return; } } } @@ -386,12 +392,12 @@ static void dump_content(saxctxt* ctx) { s_to = strlen(subs) ; len = strlen(ctx->buf) ; offs += match ; - VERBOSE( { + VERBOSEB( const char* f = apr_pstrndup(ctx->f->r->pool, ctx->buf + offs , s_from ) ; ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r, "C/RX: match at %s, substituting %s", f, subs) ; - } ) + ) if ( s_to > s_from) { preserve(ctx, s_to - s_from) ; memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, @@ -1004,33 +1010,36 @@ static void fixup_rules(saxctxt* ctx) { } static saxctxt* check_filter_init (ap_filter_t* f) { saxctxt* fctx ; - proxy_html_conf* cfg + if ( ! f->ctx) { + proxy_html_conf* cfg = ap_get_module_config(f->r->per_dir_config, &proxy_html_module); - const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE"); - - const char* errmsg = NULL ; - if ( !force ) { - if ( ! f->r->proxyreq ) { - errmsg = "Non-proxy request; not inserting proxy-html filter" ; - } else if ( ! f->r->content_type ) { - errmsg = "No content-type; bailing out of proxy-html filter" ; - } else if ( strncasecmp(f->r->content_type, "text/html", 9) && - strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) { - errmsg = "Non-HTML content; not inserting proxy-html filter" ; + const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE"); + + const char* errmsg = NULL ; + if ( !force ) { + if ( ! f->r->proxyreq ) { + errmsg = "Non-proxy request; not inserting proxy-html filter" ; + } else if ( ! f->r->content_type ) { + errmsg = "No content-type; bailing out of proxy-html filter" ; + } else if ( strncasecmp(f->r->content_type, "text/html", 9) && + strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) { + errmsg = "Non-HTML content; not inserting proxy-html filter" ; + } + } + if (!cfg->links) { + errmsg = "No links configured: nothing for proxy-html filter to do"; } - } - if ( errmsg ) { + if ( errmsg ) { #ifndef GO_FASTER - if ( cfg->verbose ) { - ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ; - } + if ( cfg->verbose ) { + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ; + } #endif - ap_remove_output_filter(f) ; - return NULL ; - } + ap_remove_output_filter(f) ; + return NULL ; + } - if ( ! f->ctx) { fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ; fctx->f = f ; fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; -- 2.11.0