X-Git-Url: http://git.home-dn.net/?p=manu%2Fmod-proxy-html.git;a=blobdiff_plain;f=mod_proxy_html.c;h=6a97d3e3e618cb7920e62cfbf113eb04cfd1e422;hp=0215cb24c5e056551bbaf334c2b3bbbed4ecbc29;hb=e549ceb293b562539137a1f692c267afa1e66c7b;hpb=e23d1bdf874c2af6d7ab54fa834da653cc75c6cc diff --git a/mod_proxy_html.c b/mod_proxy_html.c index 0215cb2..6a97d3e 100644 --- a/mod_proxy_html.c +++ b/mod_proxy_html.c @@ -1,40 +1,28 @@ /******************************************************************** - Copyright (c) 2003-4, WebThing Ltd + Copyright (c) 2003-9, WebThing Ltd Author: Nick Kew This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - +it under the terms of the GNU General Public License Version 2, +as published by the Free Software Foundation. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -*********************************************************************/ - +You can obtain a copy of the GNU General Poblic License Version 2 +from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or +http://apache.webthing.com/COPYING.txt -/******************************************************************** - Note to Users - - You are requested to register as a user, at - http://apache.webthing.com/registration.html - - This entitles you to support from the developer. - I'm unlikely to reply to help/support requests from - non-registered users, unless you're paying and/or offering - constructive feedback such as bug reports or sensible - suggestions for further development. - - It also makes a small contribution to the effort - that's gone into developing this work. *********************************************************************/ +/**** NOTICE TO PACKAGERS + * + * This module now relies on mod_xml2enc for i18n support. + * You should make mod_xml2enc a dependency in your packages. + */ + /* End of Notices */ @@ -52,15 +40,18 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #ifdef GO_FASTER #define VERBOSE(x) +#define VERBOSEB(x) #else -#define VERBOSE(x) if ( verbose ) x +#define VERBOSE(x) if (verbose) x +#define VERBOSEB(x) if (verbose) {x} #endif -#define VERSION_STRING "proxy_html/2.4" +/* 3.1.2 - trivial changes to fix compile on Windows */ +#define VERSION_STRING "proxy_html/3.1.2" #include -/* libxml */ +/* libxml2 */ #include /* apache */ @@ -68,45 +59,93 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include #include #include +#include +#include + +#include +#include +#include + +/* To support Apache 2.1/2.2, we need the ap_ forms of the + * regexp stuff, and they're now used in the code. + * To support 2.0 in the same compile, * we #define the + * AP_ versions if necessary. + */ +#ifndef AP_REG_ICASE +/* it's 2.0, so we #define the ap_ versions */ +#define ap_regex_t regex_t +#define ap_regmatch_t regmatch_t +#define AP_REG_EXTENDED REG_EXTENDED +#define AP_REG_ICASE REG_ICASE +#define AP_REG_NOSUB REG_NOSUB +#define AP_REG_NEWLINE REG_NEWLINE +#define APACHE20 +#define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d) +#else +#define APACHE22 +#endif + +/* globals set once at startup */ +static ap_regex_t* seek_meta ; +static const apr_strmatch_pattern* seek_content ; +static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL; +static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL; module AP_MODULE_DECLARE_DATA proxy_html_module ; -#define M_HTML 0x01 -#define M_EVENTS 0x02 -#define M_CDATA 0x04 -#define M_REGEX 0x08 -#define M_ATSTART 0x10 -#define M_ATEND 0x20 -#define M_LAST 0x40 +#define M_HTML 0x01 +#define M_EVENTS 0x02 +#define M_CDATA 0x04 +#define M_REGEX 0x08 +#define M_ATSTART 0x10 +#define M_ATEND 0x20 +#define M_LAST 0x40 +#define M_NOTLAST 0x80 +#define M_INTERPOLATE_TO 0x100 +#define M_INTERPOLATE_FROM 0x200 typedef struct { + const char* val; +} tattr; +typedef struct { unsigned int start ; unsigned int end ; } meta ; +typedef struct { + const char* env; + const char* val; + int rel; +} rewritecond; typedef struct urlmap { struct urlmap* next ; unsigned int flags ; + unsigned int regflags ; union { const char* c ; ap_regex_t* r ; } from ; const char* to ; + rewritecond* cond; } urlmap ; typedef struct { urlmap* map ; const char* doctype ; const char* etag ; unsigned int flags ; + size_t bufsz ; + apr_hash_t* links; + apr_array_header_t* events; + const char* charset_out; int extfix ; int metafix ; int strip_comments ; + int interp; + int enabled; #ifndef GO_FASTER int verbose ; #endif - size_t bufsz ; } proxy_html_conf ; typedef struct { - htmlSAXHandlerPtr sax ; ap_filter_t* f ; proxy_html_conf* cfg ; htmlParserCtxtPtr parser ; @@ -114,58 +153,54 @@ typedef struct { char* buf ; size_t offset ; size_t avail ; + const char* encoding; + urlmap* map; } saxctxt ; -static int is_empty_elt(const char* name) { - const char** p ; - static const char* empty_elts[] = { - "br" , - "link" , - "img" , - "hr" , - "input" , - "meta" , - "base" , - "area" , - "param" , - "col" , - "frame" , - "isindex" , - "basefont" , - NULL - } ; - for ( p = empty_elts ; *p ; ++p ) - if ( !strcmp( *p, name) ) - return 1 ; - return 0 ; -} - -typedef struct { - const char* name ; - const char** attrs ; -} elt_t ; #define NORM_LC 0x1 #define NORM_MSSLASH 0x2 #define NORM_RESET 0x4 +static htmlSAXHandler sax ; typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t ; +static const char* const fpi_html = + "\n" ; +static const char* const fpi_html_legacy = + "\n" ; +static const char* const fpi_xhtml = + "\n" ; +static const char* const fpi_xhtml_legacy = + "\n" ; +static const char* const html_etag = ">" ; +static const char* const xhtml_etag = " />" ; +/*#define DEFAULT_DOCTYPE fpi_html */ +static const char* const DEFAULT_DOCTYPE = "" ; +#define DEFAULT_ETAG html_etag + static void normalise(unsigned int flags, char* str) { - xmlChar* p ; + char* p ; if ( flags & NORM_LC ) for ( p = str ; *p ; ++p ) if ( isupper(*p) ) *p = tolower(*p) ; if ( flags & NORM_MSSLASH ) - for ( p = strchr(str, '\\') ; p ; p = strchr(p+1, '\\') ) + for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') ) *p = '/' ; } +#define consume_buffer(ctx,inbuf,bytes,flag) \ + htmlParseChunk(ctx->parser, inbuf, bytes, flag) + +#define AP_fwrite(ctx,inbuf,bytes,flush) \ + ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes); -#define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1 -static void pcharacters(void* ctxt, const xmlChar *chars, int length) { +/* This is always utf-8 on entry. We can convert charset within FLUSH */ +#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1 +static void pcharacters(void* ctxt, const xmlChar *uchars, int length) { + const char* chars = (const char*) uchars; saxctxt* ctx = (saxctxt*) ctxt ; int i ; int begin ; @@ -190,9 +225,9 @@ static void preserve(saxctxt* ctx, const size_t len) { newbuf = realloc(ctx->buf, ctx->avail) ; if ( newbuf != ctx->buf ) { if ( ctx->buf ) - apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ; + apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free); apr_pool_cleanup_register(ctx->f->r->pool, newbuf, - (void*)free, apr_pool_cleanup_null); + (int(*)(void*))free, apr_pool_cleanup_null); ctx->buf = newbuf ; } } @@ -211,13 +246,14 @@ static void dump_content(saxctxt* ctx) { ap_regmatch_t pmatch[10] ; char* subs ; size_t len, offs ; + urlmap* themap = ctx->map; #ifndef GO_FASTER int verbose = ctx->cfg->verbose ; #endif pappend(ctx, &c, 1) ; /* append null byte */ /* parse the text for URLs */ - for ( m = ctx->cfg->map ; m ; m = m->next ) { + for ( m = themap ; m ; m = m->next ) { if ( ! ( m->flags & M_CDATA ) ) continue ; if ( m->flags & M_REGEX ) { @@ -231,12 +267,12 @@ static void dump_content(saxctxt* ctx) { s_to = strlen(subs) ; len = strlen(ctx->buf) ; offs += match ; - VERBOSE( { + VERBOSEB( const char* f = apr_pstrndup(ctx->f->r->pool, ctx->buf + offs , s_from ) ; ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r, "C/RX: match at %s, substituting %s", f, subs) ; - } ) + ) if ( s_to > s_from) { preserve(ctx, s_to - s_from) ; memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, @@ -275,17 +311,22 @@ static void dump_content(saxctxt* ctx) { } } } - ap_fputs(ctx->f->next, ctx->bb, ctx->buf) ; + AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1) ; } -static void pcdata(void* ctxt, const xmlChar *chars, int length) { +static void pcdata(void* ctxt, const xmlChar *uchars, int length) { + const char* chars = (const char*) uchars; saxctxt* ctx = (saxctxt*) ctxt ; if ( ctx->cfg->extfix ) { pappend(ctx, chars, length) ; } else { - ap_fwrite(ctx->f->next, ctx->bb, chars, length) ; + /* not sure if this should force-flush + * (i.e. can one cdata section come in multiple calls?) + */ + AP_fwrite(ctx, chars, length, 0) ; } } -static void pcomment(void* ctxt, const xmlChar *chars) { +static void pcomment(void* ctxt, const xmlChar *uchars) { + const char* chars = (const char*) uchars; saxctxt* ctx = (saxctxt*) ctxt ; if ( ctx->cfg->strip_comments ) return ; @@ -295,29 +336,47 @@ static void pcomment(void* ctxt, const xmlChar *chars) { pappend(ctx, chars, strlen(chars) ) ; pappend(ctx, "-->", 3) ; } else { - ap_fputstrs(ctx->f->next, ctx->bb, "", NULL) ; + ap_fputs(ctx->f->next, ctx->bb, "") ; } } -static void pendElement(void* ctxt, const xmlChar* name) { +static void pendElement(void* ctxt, const xmlChar* uname) { saxctxt* ctx = (saxctxt*) ctxt ; + const char* name = (const char*) uname; + const htmlElemDesc* desc = htmlTagLookup(uname); + + if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { + /* enforce html */ + if (!desc || desc->depr) + return; + + } else if ((ctx->cfg->doctype == fpi_html) + || (ctx->cfg->doctype == fpi_xhtml)) { + /* enforce html legacy */ + if (!desc) + return; + } + /* TODO - implement HTML "allowed here" using the stack */ + /* nah. Keeping the stack is too much overhead */ + if ( ctx->offset > 0 ) { dump_content(ctx) ; ctx->offset = 0 ; /* having dumped it, we can re-use the memory */ } - if ( ! is_empty_elt(name) ) + if ( !desc || ! desc->empty ) { ap_fprintf(ctx->f->next, ctx->bb, "", name) ; + } } -static void pstartElement(void* ctxt, const xmlChar* name, - const xmlChar** attrs ) { +static void pstartElement(void* ctxt, const xmlChar* uname, + const xmlChar** uattrs ) { + int required_attrs ; int num_match ; size_t offs, len ; char* subs ; rewrite_t is_uri ; - const char** linkattrs ; - const xmlChar** a ; - const elt_t* elt ; - const char** linkattr ; + const char** a ; urlmap* m ; size_t s_to, s_from, match ; char* found ; @@ -327,88 +386,92 @@ static void pstartElement(void* ctxt, const xmlChar* name, #ifndef GO_FASTER int verbose = ctx->cfg->verbose ; #endif - - static const char* href[] = { "href", NULL } ; - static const char* cite[] = { "cite", NULL } ; - static const char* action[] = { "action", NULL } ; - static const char* imgattr[] = { "src", "longdesc", "usemap", NULL } ; - static const char* inputattr[] = { "src", "usemap", NULL } ; - static const char* scriptattr[] = { "src", "for", NULL } ; - static const char* frameattr[] = { "src", "longdesc", NULL } ; - static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ; - static const char* profile[] = { "profile", NULL } ; - static const char* background[] = { "background", NULL } ; - static const char* codebase[] = { "codebase", NULL } ; - - static const elt_t linked_elts[] = { - { "a" , href } , - { "img" , imgattr } , - { "form", action } , - { "link" , href } , - { "script" , scriptattr } , - { "base" , href } , - { "area" , href } , - { "input" , inputattr } , - { "frame", frameattr } , - { "iframe", frameattr } , - { "object", objattr } , - { "q" , cite } , - { "blockquote" , cite } , - { "ins" , cite } , - { "del" , cite } , - { "head" , profile } , - { "body" , background } , - { "applet", codebase } , - { NULL, NULL } - } ; - static const char* events[] = { - "onclick" , - "ondblclick" , - "onmousedown" , - "onmouseup" , - "onmouseover" , - "onmousemove" , - "onmouseout" , - "onkeypress" , - "onkeydown" , - "onkeyup" , - "onfocus" , - "onblur" , - "onload" , - "onunload" , - "onsubmit" , - "onreset" , - "onselect" , - "onchange" , - NULL - } ; + apr_array_header_t *linkattrs; + int i; + const char* name = (const char*) uname; + const char** attrs = (const char**) uattrs; + const htmlElemDesc* desc = htmlTagLookup(uname); + urlmap* themap = ctx->map; +#ifdef HAVE_STACK + const void** descp; +#endif + int enforce = 0; + if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { + /* enforce html */ + enforce = 2; + if (!desc || desc->depr) + return; + + } else if ((ctx->cfg->doctype == fpi_html) + || (ctx->cfg->doctype == fpi_xhtml)) { + enforce = 1; + /* enforce html legacy */ + if (!desc) { + return; + } + } + if (!desc && enforce) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, + "Bogus HTML element %s dropped", name) ; + return; + } + if (desc && desc->depr && (enforce == 2) ) { + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, + "Deprecated HTML element %s dropped", name) ; + return; + } +#ifdef HAVE_STACK + descp = apr_array_push(ctx->stack); + *descp = desc; + /* TODO - implement HTML "allowed here" */ +#endif ap_fputc(ctx->f->next, ctx->bb, '<') ; ap_fputs(ctx->f->next, ctx->bb, name) ; + required_attrs = 0; + if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL)) + for (a = desc->attrs_req; *a; a++) + ++required_attrs; + if ( attrs ) { - linkattrs = 0 ; - for ( elt = linked_elts; elt->name != NULL ; ++elt ) - if ( !strcmp(elt->name, name) ) { - linkattrs = elt->attrs ; - break ; - } + linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING) ; for ( a = attrs ; *a ; a += 2 ) { + if (desc && enforce > 0) { + switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) { + case HTML_INVALID: + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, + "Bogus HTML attribute %s of %s dropped", *a, name); + continue; + case HTML_DEPRECATED: + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, + "Deprecated HTML attribute %s of %s dropped", *a, name); + continue; + case HTML_REQUIRED: + required_attrs--; /* cross off the number still needed */ + /* fallthrough - required implies valid */ + default: + break; + } + } ctx->offset = 0 ; if ( a[1] ) { pappend(ctx, a[1], strlen(a[1])+1) ; is_uri = ATTR_IGNORE ; if ( linkattrs ) { - for ( linkattr = linkattrs ; *linkattr ; ++linkattr) { - if ( !strcmp(*linkattr, *a) ) { + tattr* attrs = (tattr*) linkattrs->elts; + for (i=0; i < linkattrs->nelts; ++i) { + if ( !strcmp(*a, attrs[i].val)) { is_uri = ATTR_URI ; break ; } } } - if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix ) { - for ( linkattr = events; *linkattr; ++linkattr ) { - if ( !strcmp(*linkattr, *a) ) { + if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix + && (ctx->cfg->events != NULL) ) { + for (i=0; i < ctx->cfg->events->nelts; ++i) { + tattr* attrs = (tattr*) ctx->cfg->events->elts; + if ( !strcmp(*a, attrs[i].val)) { is_uri = ATTR_EVENT ; break ; } @@ -417,7 +480,7 @@ static void pstartElement(void* ctxt, const xmlChar* name, switch ( is_uri ) { case ATTR_URI: num_match = 0 ; - for ( m = ctx->cfg->map ; m ; m = m->next ) { + for ( m = themap ; m ; m = m->next ) { if ( ! ( m->flags & M_HTML ) ) continue ; if ( m->flags & M_REGEX ) { @@ -426,7 +489,7 @@ static void pstartElement(void* ctxt, const xmlChar* name, ++num_match ; offs = match = pmatch[0].rm_so ; s_from = pmatch[0].rm_eo - match ; - subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, + subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf, nmatch, pmatch) ; VERBOSE( { const char* f = apr_pstrndup(ctx->f->r->pool, @@ -468,12 +531,13 @@ static void pstartElement(void* ctxt, const xmlChar* name, break ; } } - if ( num_match > 0 ) /* URIs only want one match */ + /* URIs only want one match unless overridden in the config */ + if ( (num_match > 0) && !( m->flags & M_NOTLAST ) ) break ; } break ; case ATTR_EVENT: - for ( m = ctx->cfg->map ; m ; m = m->next ) { + for ( m = themap ; m ; m = m->next ) { num_match = 0 ; /* reset here since we're working per-rule */ if ( ! ( m->flags & M_EVENTS ) ) continue ; @@ -557,111 +621,25 @@ static void pstartElement(void* ctxt, const xmlChar* name, anything that needs it in the value. */ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ; - pcharacters(ctx, ctx->buf, strlen(ctx->buf)) ; + pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)) ; ap_fputc(ctx->f->next, ctx->bb, '"') ; } } } ctx->offset = 0 ; - if ( is_empty_elt(name) ) + if ( desc && desc->empty ) ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ; else ap_fputc(ctx->f->next, ctx->bb, '>') ; -} -static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) { - htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ; - sax->startDocument = NULL ; - sax->endDocument = NULL ; - sax->startElement = pstartElement ; - sax->endElement = pendElement ; - sax->characters = pcharacters ; - sax->comment = pcomment ; - sax->cdataBlock = pcdata ; - return sax ; -} - -static ap_regex_t* seek_meta_ctype ; -static ap_regex_t* seek_charset ; -static ap_regex_t* seek_meta ; - -static void proxy_html_child_init(apr_pool_t* pool, server_rec* s) { - seek_meta_ctype = ap_pregcomp(pool, - "(]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)", - AP_REG_EXTENDED|AP_REG_ICASE) ; - seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)", - AP_REG_EXTENDED|AP_REG_ICASE) ; - seek_meta = ap_pregcomp(pool, "]*(http-equiv)[^>]*>", - AP_REG_EXTENDED|AP_REG_ICASE) ; -} - -static xmlCharEncoding sniff_encoding(request_rec* r, const char* cbuf, size_t bytes -#ifndef GO_FASTER - , int verbose -#endif - ) { - xmlCharEncoding ret ; - char* encoding = NULL ; - char* p ; - char* q ; - ap_regmatch_t match[2] ; - unsigned char* buf = (unsigned char*)cbuf ; - - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Content-Type is %s", r->content_type) ) ; - -/* If we've got it in the HTTP headers, there's nothing to do */ - if ( r->content_type && - ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) { - p += 8 ; - if ( encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) , encoding ) { - if ( ret = xmlParseCharEncoding(encoding), - ret != XML_CHAR_ENCODING_ERROR ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset %s from HTTP headers", encoding) ) ; - return ret ; - } else { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Unsupported charset %s in HTTP headers", encoding) ; - encoding = NULL ; - } - } - } -/* to sniff, first we look for BOM */ - if ( ret = xmlDetectCharEncoding(buf, bytes), - ret != XML_CHAR_ENCODING_NONE ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset from XML rules.") ) ; - return ret ; + if ((enforce > 0) && (required_attrs > 0)) { + /* if there are more required attributes than we found then complain */ + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, + "HTML element %s is missing %d required attributes", + name, required_attrs); } - -/* If none of the above, look for a META-thingey */ - encoding = NULL ; - if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) { - p = apr_pstrndup(r->pool, buf + match[0].rm_so, - match[0].rm_eo - match[0].rm_so) ; - if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 ) - encoding = apr_pstrndup(r->pool, p+match[1].rm_so, - match[1].rm_eo - match[1].rm_so) ; - } - -/* either it's set to something we found or it's still the default */ - if ( encoding ) - if ( ret = xmlParseCharEncoding(encoding), - ret != XML_CHAR_ENCODING_ERROR ) { - VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, - "Got charset %s from HTML META", encoding) ) ; - return ret ; - } else { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Unsupported charset %s in HTML META", encoding) ; - } - -/* the old HTTP default is a last resort */ - ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, - "No usable charset information: using old HTTP default LATIN1") ; - return XML_CHAR_ENCODING_8859_1 ; } + static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/ #ifndef GO_FASTER , int verbose @@ -685,21 +663,26 @@ static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/ header = apr_pstrndup(r->pool, p, q-p) ; if ( strncasecmp(header, "Content-", 8) ) { /* find content=... string */ - for ( p = strstr(buf+offs+pmatch[0].rm_so, "content") ; *p ; ) { - p += 7 ; - while ( *p && isspace(*p) ) - ++p ; - if ( *p != '=' ) - continue ; - while ( *p && isspace(*++p) ) ; - if ( ( *p == '\'' ) || ( *p == '"' ) ) { - delim = *p++ ; - for ( q = p ; *q != delim ; ++q ) ; - } else { - for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ; - } - content = apr_pstrndup(r->pool, p, q-p) ; - break ; + p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so, + pmatch[0].rm_eo - pmatch[0].rm_so); + /* if it doesn't contain "content", ignore, don't crash! */ + if (p != NULL) { + while (*p) { + p += 7 ; + while ( *p && isspace(*p) ) + ++p ; + if ( *p != '=' ) + continue ; + while ( *p && isspace(*++p) ) ; + if ( ( *p == '\'' ) || ( *p == '"' ) ) { + delim = *p++ ; + for ( q = p ; *q != delim ; ++q ) ; + } else { + for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ; + } + content = apr_pstrndup(r->pool, p, q-p) ; + break ; + } } } else if ( !strncasecmp(header, "Content-Type", 12) ) { ret = apr_palloc(r->pool, sizeof(meta) ) ; @@ -716,66 +699,148 @@ static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/ return ret ; } -static int proxy_html_filter_init(ap_filter_t* f) { - const char* env ; - saxctxt* fctx ; +static const char* interpolate_vars(request_rec* r, const char* str) { + const char* start; + const char* end; + const char* delim; + const char* before; + const char* after; + const char* replacement; + const char* var; + for (;;) { + start = str ; + if (start = ap_strstr_c(start, "${"), start == NULL) + break; -#if 0 -/* remove content-length filter */ - ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ; - ap_filter_t* ff = f->next ; - - do { - ap_filter_t* fnext = ff->next ; - if ( ff->frec == clf ) - ap_remove_output_filter(ff) ; - ff = fnext ; - } while ( ff ) ; -#endif + if (end = ap_strchr_c(start+2, '}'), end == NULL) + break; - fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ; - fctx->sax = setupSAX(f->r->pool) ; - fctx->f = f ; - fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; - fctx->cfg = ap_get_module_config(f->r->per_dir_config,&proxy_html_module); - - if ( f->r->proto_num >= 1001 ) { - if ( ! f->r->main && ! f->r->prev ) { - env = apr_table_get(f->r->subprocess_env, "force-response-1.0") ; - if ( !env ) - f->r->chunked = 1 ; + delim = ap_strchr_c(start, '|'); + before = apr_pstrndup(r->pool, str, start-str); + after = end+1; + if (delim) { + var = apr_pstrndup(r->pool, start+2, delim-start-2) ; + } else { + var = apr_pstrndup(r->pool, start+2, end-start-2) ; } + replacement = apr_table_get(r->subprocess_env, var) ; + if (!replacement) { + if (delim) + replacement = apr_pstrndup(r->pool, delim+1, end-delim-1); + else + replacement = ""; + } + str = apr_pstrcat(r->pool, before, replacement, after, NULL); + ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, + "Interpolating %s => %s", var, replacement) ; } - - apr_table_unset(f->r->headers_out, "Content-Length") ; - apr_table_unset(f->r->headers_out, "ETag") ; - return OK ; + return str; } -static saxctxt* check_filter_init (ap_filter_t* f) { +static void fixup_rules(saxctxt* ctx) { + const char* thisval; + urlmap* newp; + urlmap* p; + urlmap* prev = NULL; + request_rec* r = ctx->f->r; + int has_cond; + + for (p = ctx->cfg->map; p; p = p->next) { + has_cond = -1; + if (p->cond != NULL) { + thisval = apr_table_get(r->subprocess_env, p->cond->env); + if (!p->cond->val) { + /* required to be "anything" */ + if (thisval) + has_cond = 1; /* satisfied */ + else + has_cond = 0; /* unsatisfied */ + } else { + if (thisval && !strcasecmp(p->cond->val, thisval)) { + has_cond = 1; /* satisfied */ + } else { + has_cond = 0; /* unsatisfied */ + } + } + if (((has_cond == 0) && (p->cond->rel ==1 )) + || ((has_cond == 1) && (p->cond->rel == -1))) { + continue; /* condition is unsatisfied */ + } + } - const char* errmsg = NULL ; - if ( ! f->r->proxyreq ) { - errmsg = "Non-proxy request; not inserting proxy-html filter" ; - } else if ( ! f->r->content_type ) { - errmsg = "No content-type; bailing out of proxy-html filter" ; - } else if ( strncasecmp(f->r->content_type, "text/html", 9) && - strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) { - errmsg = "Non-HTML content; not inserting proxy-html filter" ; + newp = apr_pmemdup(r->pool, p, sizeof(urlmap)); + + if (newp->flags & M_INTERPOLATE_FROM) { + newp->from.c = interpolate_vars(r, newp->from.c); + if (!newp->from.c || !*newp->from.c) + continue; /* don't use empty from-pattern */ + if (newp->flags & M_REGEX) { + newp->from.r = ap_pregcomp(r->pool, newp->from.c, newp->regflags) ; + } + } + if (newp->flags & M_INTERPOLATE_TO) { + newp->to = interpolate_vars(r, newp->to); + } + /* evaluate p->cond; continue if unsatisfied */ + /* create new urlmap with memcpy and append to map */ + /* interpolate from if flagged to do so */ + /* interpolate to if flagged to do so */ + + if (prev != NULL) + prev->next = newp ; + else + ctx->map = newp ; + prev = newp ; } - if ( errmsg ) { -#ifndef GO_FASTER + if (prev) + prev->next = NULL; +} +static saxctxt* check_filter_init (ap_filter_t* f) { + saxctxt* fctx ; + if ( ! f->ctx) { proxy_html_conf* cfg = ap_get_module_config(f->r->per_dir_config, &proxy_html_module); - if ( cfg->verbose ) { - ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ; + const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE"); + + const char* errmsg = NULL ; + if ( !force ) { + if ( ! f->r->proxyreq ) { + errmsg = "Non-proxy request; not inserting proxy-html filter" ; + } else if ( ! f->r->content_type ) { + errmsg = "No content-type; bailing out of proxy-html filter" ; + } else if ( strncasecmp(f->r->content_type, "text/html", 9) && + strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) { + errmsg = "Non-HTML content; not inserting proxy-html filter" ; + } + } + if (!cfg->links) { + errmsg = "No links configured: nothing for proxy-html filter to do"; } + + if ( errmsg ) { +#ifndef GO_FASTER + if ( cfg->verbose ) { + ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ; + } #endif - ap_remove_output_filter(f) ; - return NULL ; + ap_remove_output_filter(f) ; + return NULL ; + } + + fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ; + fctx->f = f ; + fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; + fctx->cfg = cfg; + apr_table_unset(f->r->headers_out, "Content-Length") ; + + if (cfg->interp) + fixup_rules(fctx); + else + fctx->map = cfg->map; + /* defer dealing with charset_out until after sniffing charset_in + * so we can support setting one to t'other. + */ } - if ( ! f->ctx ) - proxy_html_filter_init(f) ; return f->ctx ; } static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { @@ -784,59 +849,95 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { xmlCharEncoding enc ; const char* buf = 0 ; apr_size_t bytes = 0 ; +#ifndef USE_OLD_LIBXML2 int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET | XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ; +#endif saxctxt* ctxt = check_filter_init(f) ; +#ifndef GO_FASTER + int verbose; +#endif if ( ! ctxt ) return ap_pass_brigade(f->next, bb) ; +#ifndef GO_FASTER + verbose = ctxt->cfg->verbose; +#endif for ( b = APR_BRIGADE_FIRST(bb) ; b != APR_BRIGADE_SENTINEL(bb) ; b = APR_BUCKET_NEXT(b) ) { - if ( APR_BUCKET_IS_EOS(b) ) { - if ( ctxt->parser != NULL ) { - htmlParseChunk(ctxt->parser, buf, 0, 1) ; + if ( APR_BUCKET_IS_METADATA(b) ) { + if ( APR_BUCKET_IS_EOS(b) ) { + if ( ctxt->parser != NULL ) { + consume_buffer(ctxt, buf, 0, 1); + } + APR_BRIGADE_INSERT_TAIL(ctxt->bb, + apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ; + ap_pass_brigade(ctxt->f->next, ctxt->bb) ; + } else if ( APR_BUCKET_IS_FLUSH(b) ) { + /* pass on flush, except at start where it would cause + * headers to be sent before doc sniffing + */ + if ( ctxt->parser != NULL ) { + ap_fflush(ctxt->f->next, ctxt->bb) ; + } } - APR_BRIGADE_INSERT_TAIL(ctxt->bb, - apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ; - ap_pass_brigade(ctxt->f->next, ctxt->bb) ; } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS ) { if ( ctxt->parser == NULL ) { - if ( buf[bytes] != 0 ) { - /* make a string for parse routines to play with */ - char* buf1 = apr_palloc(f->r->pool, bytes+1) ; - memcpy(buf1, buf, bytes) ; - buf1[bytes] = 0 ; - buf = buf1 ; + const char* cenc; + if (!xml2enc_charset || + (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) { + if (!xml2enc_charset) + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, + "No i18n support found. Install mod_xml2enc if required") ; + enc = XML_CHAR_ENCODING_NONE; + ap_set_content_type(f->r, "text/html;charset=utf-8") ; + } else { + /* if we wanted a non-default charset_out, insert the + * xml2enc filter now that we've sniffed it + */ + if (ctxt->cfg->charset_out && xml2enc_filter) { + if (*ctxt->cfg->charset_out != '*') + cenc = ctxt->cfg->charset_out; + xml2enc_filter(f->r, cenc, ENCIO_OUTPUT); + ap_set_content_type(f->r, + apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ; + } else /* Normal case, everything worked, utf-8 output */ + ap_set_content_type(f->r, "text/html;charset=utf-8") ; } -#ifndef GO_FASTER - enc = sniff_encoding(f->r, buf, bytes, ctxt->cfg->verbose) ; + + ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ; + ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ; + buf += 4; + bytes -= 4; + if (ctxt->parser == NULL) { + apr_status_t rv = ap_pass_brigade(f->next, bb) ; + ap_remove_output_filter(f) ; + return rv; + } + apr_pool_cleanup_register(f->r->pool, ctxt->parser, + (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ; +#ifndef USE_OLD_LIBXML2 + if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts ) + ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, + "Unsupported parser opts %x", xmlopts) ; +#endif if ( ctxt->cfg->metafix ) +#ifndef GO_FASTER m = metafix(f->r, buf, ctxt->cfg->verbose) ; #else - enc = sniff_encoding(f->r, buf, bytes) ; - if ( ctxt->cfg->metafix ) m = metafix(f->r, buf) ; #endif - ap_set_content_type(f->r, "text/html;charset=utf-8") ; - ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ; if ( m ) { - ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt, - buf, m->start, 0, enc ) ; - htmlParseChunk(ctxt->parser, buf+m->end, bytes-m->end, 0) ; + consume_buffer(ctxt, buf, m->start, 0) ; + consume_buffer(ctxt, buf+m->end, bytes-m->end, 0) ; } else { - ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt, - buf, bytes, 0, enc ) ; + consume_buffer(ctxt, buf, bytes, 0) ; } - apr_pool_cleanup_register(f->r->pool, ctxt->parser, - (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ; - if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts ) - ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, - "Unsupported parser opts %x", xmlopts) ; } else { - htmlParseChunk(ctxt->parser, buf, bytes, 0) ; + consume_buffer(ctxt, buf, bytes, 0) ; } } else { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ; @@ -846,25 +947,14 @@ static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) { apr_brigade_cleanup(bb) ; return APR_SUCCESS ; } -static const char* fpi_html = - "\n" ; -static const char* fpi_html_legacy = - "\n" ; -static const char* fpi_xhtml = - "\n" ; -static const char* fpi_xhtml_legacy = - "\n" ; -static const char* html_etag = ">" ; -static const char* xhtml_etag = " />" ; -/*#define DEFAULT_DOCTYPE fpi_html */ -static const char* DEFAULT_DOCTYPE = "" ; -#define DEFAULT_ETAG html_etag static void* proxy_html_config(apr_pool_t* pool, char* x) { proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ; ret->doctype = DEFAULT_DOCTYPE ; ret->etag = DEFAULT_ETAG ; ret->bufsz = 8192 ; + /* ret->interp = 1; */ + /* don't initialise links and events until they get set/used */ return ret ; } static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { @@ -872,6 +962,13 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { proxy_html_conf* add = (proxy_html_conf*) ADD ; proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ; + /* don't merge declarations - just use the most specific */ + conf->links = (add->links == NULL) ? base->links : add->links; + conf->events = (add->events == NULL) ? base->events : add->events; + + conf->charset_out = (add->charset_out == NULL) + ? base->charset_out : add->charset_out ; + if ( add->map && base->map ) { urlmap* a ; conf->map = NULL ; @@ -896,7 +993,9 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { conf->flags = add->flags ^ NORM_RESET ; conf->metafix = add->metafix ; conf->extfix = add->extfix ; + conf->interp = add->interp ; conf->strip_comments = add->strip_comments ; + conf->enabled = add->enabled; #ifndef GO_FASTER conf->verbose = add->verbose ; #endif @@ -904,23 +1003,20 @@ static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) { conf->flags = base->flags | add->flags ; conf->metafix = base->metafix | add->metafix ; conf->extfix = base->extfix | add->extfix ; + conf->interp = base->interp | add->interp ; conf->strip_comments = base->strip_comments | add->strip_comments ; + conf->enabled = add->enabled | base->enabled; #ifndef GO_FASTER conf->verbose = base->verbose | add->verbose ; #endif } return conf ; } -#define REGFLAG(n,s,c) ( (s&&(strchr((s),(c))!=NULL)) ? (n) : 0 ) -#define XREGFLAG(n,s,c) ( (!s||(strchr((s),(c))==NULL)) ? (n) : 0 ) -static const char* set_urlmap(cmd_parms* cmd, void* CFG, - const char* from, const char* to, const char* flags) { - int regflags ; - proxy_html_conf* cfg = (proxy_html_conf*)CFG ; - urlmap* map ; - urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ; - - newmap->next = NULL ; +#define REGFLAG(n,s,c) ( (s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0 ) +#define XREGFLAG(n,s,c) ( (!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0 ) +static void comp_urlmap(apr_pool_t* pool, urlmap* newmap, + const char* from, const char* to, const char* flags, const char* cond) { + char* eq; newmap->flags = XREGFLAG(M_HTML,flags,'h') | XREGFLAG(M_EVENTS,flags,'e') @@ -929,29 +1025,78 @@ static const char* set_urlmap(cmd_parms* cmd, void* CFG, | REGFLAG(M_ATEND,flags,'$') | REGFLAG(M_REGEX,flags,'R') | REGFLAG(M_LAST,flags,'L') + | REGFLAG(M_NOTLAST,flags,'l') + | REGFLAG(M_INTERPOLATE_TO,flags,'V') + | REGFLAG(M_INTERPOLATE_FROM,flags,'v') ; - - if ( cfg->map ) { - for ( map = cfg->map ; map->next ; map = map->next ) ; - map->next = newmap ; - } else - cfg->map = newmap ; - - if ( ! (newmap->flags & M_REGEX) ) { - newmap->from.c = apr_pstrdup(cmd->pool, from) ; - newmap->to = apr_pstrdup(cmd->pool, to) ; + if ( ( newmap->flags & M_INTERPOLATE_FROM) + || ! (newmap->flags & M_REGEX) ) { + newmap->from.c = from ; + newmap->to = to ; } else { - regflags + newmap->regflags = REGFLAG(AP_REG_EXTENDED,flags,'x') | REGFLAG(AP_REG_ICASE,flags,'i') | REGFLAG(AP_REG_NOSUB,flags,'n') | REGFLAG(AP_REG_NEWLINE,flags,'s') ; - newmap->from.r = ap_pregcomp(cmd->pool, from, regflags) ; - newmap->to = apr_pstrdup(cmd->pool, to) ; + newmap->from.r = ap_pregcomp(pool, from, newmap->regflags) ; + newmap->to = to ; + } + if (cond != NULL) { + char* cond_copy; + newmap->cond = apr_pcalloc(pool, sizeof(rewritecond)); + if (cond[0] == '!') { + newmap->cond->rel = -1; + newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1); + } else { + newmap->cond->rel = 1; + newmap->cond->env = cond_copy = apr_pstrdup(pool, cond); + } + eq = ap_strchr(++cond_copy, '='); + if (eq) { + *eq = 0; + newmap->cond->val = eq+1; + } + } else { + newmap->cond = NULL; } - return NULL ; } +static const char* set_urlmap(cmd_parms* cmd, void* CFG, const char* args) { + proxy_html_conf* cfg = (proxy_html_conf*)CFG ; + urlmap* map ; + apr_pool_t* pool = cmd->pool; + urlmap* newmap ; + const char* usage = + "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]"; + const char* from; + const char* to; + const char* flags; + const char* cond = NULL; + + if (from = ap_getword_conf(cmd->pool, &args), !from) + return usage; + if (to = ap_getword_conf(cmd->pool, &args), !to) + return usage; + flags = ap_getword_conf(cmd->pool, &args); + if (flags && *flags) + cond = ap_getword_conf(cmd->pool, &args); + if (cond && !*cond) + cond = NULL; + + /* the args look OK, so let's use them */ + newmap = apr_palloc(pool, sizeof(urlmap) ) ; + newmap->next = NULL; + if ( cfg->map ) { + for ( map = cfg->map ; map->next ; map = map->next ) ; + map->next = newmap ; + } else + cfg->map = newmap ; + + comp_urlmap(cmd->pool, newmap, from, to, flags, cond); + return NULL; +} + static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t, const char* l) { proxy_html_conf* cfg = (proxy_html_conf*)CFG ; @@ -976,7 +1121,8 @@ static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t, } return NULL ; } -static void set_param(proxy_html_conf* cfg, const char* arg) { +static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg) { + proxy_html_conf* cfg = CFG; if ( arg && *arg ) { if ( !strcmp(arg, "lowercase") ) cfg->flags |= NORM_LC ; @@ -985,24 +1131,53 @@ static void set_param(proxy_html_conf* cfg, const char* arg) { else if ( !strcmp(arg, "reset") ) cfg->flags |= NORM_RESET ; } + return NULL ; +} +static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) { + tattr* attr; + proxy_html_conf* cfg = CFG; + if (cfg->events == NULL) + cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr)); + attr = apr_array_push(cfg->events) ; + attr->val = arg; + return NULL ; } -static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg1, - const char* arg2, const char* arg3) { - set_param( (proxy_html_conf*)CFG, arg1) ; - set_param( (proxy_html_conf*)CFG, arg2) ; - set_param( (proxy_html_conf*)CFG, arg3) ; +static const char* set_links(cmd_parms* cmd, void* CFG, + const char* elt, const char* att) { + apr_array_header_t* attrs; + tattr* attr ; + proxy_html_conf* cfg = CFG; + + if (cfg->links == NULL) + cfg->links = apr_hash_make(cmd->pool); + + attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING) ; + if (!attrs) { + attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)) ; + apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs) ; + } + attr = apr_array_push(attrs) ; + attr->val = att ; return NULL ; } static const command_rec proxy_html_cmds[] = { - AP_INIT_TAKE23("ProxyHTMLURLMap", set_urlmap, NULL, + AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL, + RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"), + AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL, + RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"), + AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL, RSRC_CONF|ACCESS_CONF, "Map URL From To" ) , AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL, RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) , - AP_INIT_TAKE123("ProxyHTMLFixups", set_flags, NULL, + AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL, RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) , AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot, (void*)APR_OFFSETOF(proxy_html_conf, metafix), RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) , + AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot, + (void*)APR_OFFSETOF(proxy_html_conf, interp), + RSRC_CONF|ACCESS_CONF, + "Support interpolation and conditions in URLMaps" ) , AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot, (void*)APR_OFFSETOF(proxy_html_conf, extfix), RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) , @@ -1017,18 +1192,52 @@ static const command_rec proxy_html_cmds[] = { AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot, (void*)APR_OFFSETOF(proxy_html_conf, bufsz), RSRC_CONF|ACCESS_CONF, "Buffer size" ) , + AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot, + (void*)APR_OFFSETOF(proxy_html_conf, charset_out), + RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) , + AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot, + (void*)APR_OFFSETOF(proxy_html_conf, enabled), + RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) , { NULL } } ; static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2, server_rec* s) { ap_add_version_component(p, VERSION_STRING) ; + seek_meta = ap_pregcomp(p, "]*(http-equiv)[^>]*>", + AP_REG_EXTENDED|AP_REG_ICASE) ; + seek_content = apr_strmatch_precompile(p, "content", 0); + memset(&sax, 0, sizeof(htmlSAXHandler)); + sax.startElement = pstartElement ; + sax.endElement = pendElement ; + sax.characters = pcharacters ; + sax.comment = pcomment ; + sax.cdataBlock = pcdata ; + xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset); + xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter); + if (!xml2enc_charset) { + ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2, + "I18n support in mod_proxy_html requires mod_xml2enc. " + "Without it, non-ASCII characters in proxied pages are " + "likely to display incorrectly."); + } return OK ; } +static void proxy_html_insert(request_rec* r) { + proxy_html_conf* cfg + = ap_get_module_config(r->per_dir_config, &proxy_html_module); + if (cfg->enabled) { + if (xml2enc_filter) + xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS); + ap_add_output_filter("proxy-html", NULL, r, r->connection); + } +} static void proxy_html_hooks(apr_pool_t* p) { - ap_register_output_filter("proxy-html", proxy_html_filter, - NULL, AP_FTYPE_RESOURCE) ; + static const char* aszSucc[] = { "mod_filter.c", NULL }; + ap_register_output_filter_protocol("proxy-html", proxy_html_filter, + NULL, AP_FTYPE_RESOURCE, + AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ; ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ; - ap_hook_child_init(proxy_html_child_init, NULL, NULL, APR_HOOK_MIDDLE) ; + ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ; } module AP_MODULE_DECLARE_DATA proxy_html_module = { STANDARD20_MODULE_STUFF,