/********************************************************************
- Copyright (c) 2003-4, WebThing Ltd
+ Copyright (c) 2003-9, WebThing Ltd
Author: Nick Kew <nick@webthing.com>
This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
+it under the terms of the GNU General Public License Version 2,
+as published by the Free Software Foundation.
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-*********************************************************************/
-
+You can obtain a copy of the GNU General Poblic License Version 2
+from http://www.gnu.org/licenses/old-licenses/gpl-2.0.html or
+http://apache.webthing.com/COPYING.txt
-/********************************************************************
- Note to Users
-
- You are requested to register as a user, at
- http://apache.webthing.com/registration.html
-
- This entitles you to support from the developer.
- I'm unlikely to reply to help/support requests from
- non-registered users, unless you're paying and/or offering
- constructive feedback such as bug reports or sensible
- suggestions for further development.
-
- It also makes a small contribution to the effort
- that's gone into developing this work.
*********************************************************************/
+/**** NOTICE TO PACKAGERS
+ *
+ * This module now relies on mod_xml2enc for i18n support.
+ * You should make mod_xml2enc a dependency in your packages.
+ */
+
/* End of Notices */
#ifdef GO_FASTER
#define VERBOSE(x)
+#define VERBOSEB(x)
#else
-#define VERBOSE(x) if ( verbose ) x
+#define VERBOSE(x) if (verbose) x
+#define VERBOSEB(x) if (verbose) {x}
#endif
-#define VERSION_STRING "proxy_html/2.4"
+/* 3.1.2 - trivial changes to fix compile on Windows */
+#define VERSION_STRING "proxy_html/3.1.2"
#include <ctype.h>
-/* libxml */
+/* libxml2 */
#include <libxml/HTMLparser.h>
/* apache */
#include <http_config.h>
#include <http_log.h>
#include <apr_strings.h>
+#include <apr_hash.h>
+#include <apr_strmatch.h>
+
+#include <apr_optional.h>
+#include <mod_xml2enc.h>
+#include <http_request.h>
+
+/* To support Apache 2.1/2.2, we need the ap_ forms of the
+ * regexp stuff, and they're now used in the code.
+ * To support 2.0 in the same compile, * we #define the
+ * AP_ versions if necessary.
+ */
+#ifndef AP_REG_ICASE
+/* it's 2.0, so we #define the ap_ versions */
+#define ap_regex_t regex_t
+#define ap_regmatch_t regmatch_t
+#define AP_REG_EXTENDED REG_EXTENDED
+#define AP_REG_ICASE REG_ICASE
+#define AP_REG_NOSUB REG_NOSUB
+#define AP_REG_NEWLINE REG_NEWLINE
+#define APACHE20
+#define ap_register_output_filter_protocol(a,b,c,d,e) ap_register_output_filter(a,b,c,d)
+#else
+#define APACHE22
+#endif
+
+/* globals set once at startup */
+static ap_regex_t* seek_meta ;
+static const apr_strmatch_pattern* seek_content ;
+static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
+static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
module AP_MODULE_DECLARE_DATA proxy_html_module ;
-#define M_HTML 0x01
-#define M_EVENTS 0x02
-#define M_CDATA 0x04
-#define M_REGEX 0x08
-#define M_ATSTART 0x10
-#define M_ATEND 0x20
-#define M_LAST 0x40
+#define M_HTML 0x01
+#define M_EVENTS 0x02
+#define M_CDATA 0x04
+#define M_REGEX 0x08
+#define M_ATSTART 0x10
+#define M_ATEND 0x20
+#define M_LAST 0x40
+#define M_NOTLAST 0x80
+#define M_INTERPOLATE_TO 0x100
+#define M_INTERPOLATE_FROM 0x200
typedef struct {
+ const char* val;
+} tattr;
+typedef struct {
unsigned int start ;
unsigned int end ;
} meta ;
+typedef struct {
+ const char* env;
+ const char* val;
+ int rel;
+} rewritecond;
typedef struct urlmap {
struct urlmap* next ;
unsigned int flags ;
+ unsigned int regflags ;
union {
const char* c ;
ap_regex_t* r ;
} from ;
const char* to ;
+ rewritecond* cond;
} urlmap ;
typedef struct {
urlmap* map ;
const char* doctype ;
const char* etag ;
unsigned int flags ;
+ size_t bufsz ;
+ apr_hash_t* links;
+ apr_array_header_t* events;
+ const char* charset_out;
int extfix ;
int metafix ;
int strip_comments ;
+ int interp;
+ int enabled;
#ifndef GO_FASTER
int verbose ;
#endif
- size_t bufsz ;
} proxy_html_conf ;
typedef struct {
- htmlSAXHandlerPtr sax ;
ap_filter_t* f ;
proxy_html_conf* cfg ;
htmlParserCtxtPtr parser ;
char* buf ;
size_t offset ;
size_t avail ;
+ const char* encoding;
+ urlmap* map;
} saxctxt ;
-static int is_empty_elt(const char* name) {
- const char** p ;
- static const char* empty_elts[] = {
- "br" ,
- "link" ,
- "img" ,
- "hr" ,
- "input" ,
- "meta" ,
- "base" ,
- "area" ,
- "param" ,
- "col" ,
- "frame" ,
- "isindex" ,
- "basefont" ,
- NULL
- } ;
- for ( p = empty_elts ; *p ; ++p )
- if ( !strcmp( *p, name) )
- return 1 ;
- return 0 ;
-}
-
-typedef struct {
- const char* name ;
- const char** attrs ;
-} elt_t ;
#define NORM_LC 0x1
#define NORM_MSSLASH 0x2
#define NORM_RESET 0x4
+static htmlSAXHandler sax ;
typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t ;
+static const char* const fpi_html =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
+static const char* const fpi_html_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
+static const char* const fpi_xhtml =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
+static const char* const fpi_xhtml_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
+static const char* const html_etag = ">" ;
+static const char* const xhtml_etag = " />" ;
+/*#define DEFAULT_DOCTYPE fpi_html */
+static const char* const DEFAULT_DOCTYPE = "" ;
+#define DEFAULT_ETAG html_etag
+
static void normalise(unsigned int flags, char* str) {
- xmlChar* p ;
+ char* p ;
if ( flags & NORM_LC )
for ( p = str ; *p ; ++p )
if ( isupper(*p) )
*p = tolower(*p) ;
if ( flags & NORM_MSSLASH )
- for ( p = strchr(str, '\\') ; p ; p = strchr(p+1, '\\') )
+ for ( p = ap_strchr(str, '\\') ; p ; p = ap_strchr(p+1, '\\') )
*p = '/' ;
}
+#define consume_buffer(ctx,inbuf,bytes,flag) \
+ htmlParseChunk(ctx->parser, inbuf, bytes, flag)
+
+#define AP_fwrite(ctx,inbuf,bytes,flush) \
+ ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
-#define FLUSH ap_fwrite(ctx->f->next, ctx->bb, (chars+begin), (i-begin)) ; begin = i+1
-static void pcharacters(void* ctxt, const xmlChar *chars, int length) {
+/* This is always utf-8 on entry. We can convert charset within FLUSH */
+#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0) ; begin = i+1
+static void pcharacters(void* ctxt, const xmlChar *uchars, int length) {
+ const char* chars = (const char*) uchars;
saxctxt* ctx = (saxctxt*) ctxt ;
int i ;
int begin ;
newbuf = realloc(ctx->buf, ctx->avail) ;
if ( newbuf != ctx->buf ) {
if ( ctx->buf )
- apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ;
+ apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (int(*)(void*))free);
apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
- (void*)free, apr_pool_cleanup_null);
+ (int(*)(void*))free, apr_pool_cleanup_null);
ctx->buf = newbuf ;
}
}
ap_regmatch_t pmatch[10] ;
char* subs ;
size_t len, offs ;
+ urlmap* themap = ctx->map;
#ifndef GO_FASTER
int verbose = ctx->cfg->verbose ;
#endif
pappend(ctx, &c, 1) ; /* append null byte */
/* parse the text for URLs */
- for ( m = ctx->cfg->map ; m ; m = m->next ) {
+ for ( m = themap ; m ; m = m->next ) {
if ( ! ( m->flags & M_CDATA ) )
continue ;
if ( m->flags & M_REGEX ) {
s_to = strlen(subs) ;
len = strlen(ctx->buf) ;
offs += match ;
- VERBOSE( {
+ VERBOSEB(
const char* f = apr_pstrndup(ctx->f->r->pool,
ctx->buf + offs , s_from ) ;
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
"C/RX: match at %s, substituting %s", f, subs) ;
- } )
+ )
if ( s_to > s_from) {
preserve(ctx, s_to - s_from) ;
memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
}
}
}
- ap_fputs(ctx->f->next, ctx->bb, ctx->buf) ;
+ AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1) ;
}
-static void pcdata(void* ctxt, const xmlChar *chars, int length) {
+static void pcdata(void* ctxt, const xmlChar *uchars, int length) {
+ const char* chars = (const char*) uchars;
saxctxt* ctx = (saxctxt*) ctxt ;
if ( ctx->cfg->extfix ) {
pappend(ctx, chars, length) ;
} else {
- ap_fwrite(ctx->f->next, ctx->bb, chars, length) ;
+ /* not sure if this should force-flush
+ * (i.e. can one cdata section come in multiple calls?)
+ */
+ AP_fwrite(ctx, chars, length, 0) ;
}
}
-static void pcomment(void* ctxt, const xmlChar *chars) {
+static void pcomment(void* ctxt, const xmlChar *uchars) {
+ const char* chars = (const char*) uchars;
saxctxt* ctx = (saxctxt*) ctxt ;
if ( ctx->cfg->strip_comments )
return ;
pappend(ctx, chars, strlen(chars) ) ;
pappend(ctx, "-->", 3) ;
} else {
- ap_fputstrs(ctx->f->next, ctx->bb, "<!--", chars, "-->", NULL) ;
+ ap_fputs(ctx->f->next, ctx->bb, "<!--") ;
+ AP_fwrite(ctx, chars, strlen(chars), 1) ;
+ ap_fputs(ctx->f->next, ctx->bb, "-->") ;
}
}
-static void pendElement(void* ctxt, const xmlChar* name) {
+static void pendElement(void* ctxt, const xmlChar* uname) {
saxctxt* ctx = (saxctxt*) ctxt ;
+ const char* name = (const char*) uname;
+ const htmlElemDesc* desc = htmlTagLookup(uname);
+
+ if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
+ /* enforce html */
+ if (!desc || desc->depr)
+ return;
+
+ } else if ((ctx->cfg->doctype == fpi_html)
+ || (ctx->cfg->doctype == fpi_xhtml)) {
+ /* enforce html legacy */
+ if (!desc)
+ return;
+ }
+ /* TODO - implement HTML "allowed here" using the stack */
+ /* nah. Keeping the stack is too much overhead */
+
if ( ctx->offset > 0 ) {
dump_content(ctx) ;
ctx->offset = 0 ; /* having dumped it, we can re-use the memory */
}
- if ( ! is_empty_elt(name) )
+ if ( !desc || ! desc->empty ) {
ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
+ }
}
-static void pstartElement(void* ctxt, const xmlChar* name,
- const xmlChar** attrs ) {
+static void pstartElement(void* ctxt, const xmlChar* uname,
+ const xmlChar** uattrs ) {
+ int required_attrs ;
int num_match ;
size_t offs, len ;
char* subs ;
rewrite_t is_uri ;
- const char** linkattrs ;
- const xmlChar** a ;
- const elt_t* elt ;
- const char** linkattr ;
+ const char** a ;
urlmap* m ;
size_t s_to, s_from, match ;
char* found ;
#ifndef GO_FASTER
int verbose = ctx->cfg->verbose ;
#endif
-
- static const char* href[] = { "href", NULL } ;
- static const char* cite[] = { "cite", NULL } ;
- static const char* action[] = { "action", NULL } ;
- static const char* imgattr[] = { "src", "longdesc", "usemap", NULL } ;
- static const char* inputattr[] = { "src", "usemap", NULL } ;
- static const char* scriptattr[] = { "src", "for", NULL } ;
- static const char* frameattr[] = { "src", "longdesc", NULL } ;
- static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ;
- static const char* profile[] = { "profile", NULL } ;
- static const char* background[] = { "background", NULL } ;
- static const char* codebase[] = { "codebase", NULL } ;
-
- static const elt_t linked_elts[] = {
- { "a" , href } ,
- { "img" , imgattr } ,
- { "form", action } ,
- { "link" , href } ,
- { "script" , scriptattr } ,
- { "base" , href } ,
- { "area" , href } ,
- { "input" , inputattr } ,
- { "frame", frameattr } ,
- { "iframe", frameattr } ,
- { "object", objattr } ,
- { "q" , cite } ,
- { "blockquote" , cite } ,
- { "ins" , cite } ,
- { "del" , cite } ,
- { "head" , profile } ,
- { "body" , background } ,
- { "applet", codebase } ,
- { NULL, NULL }
- } ;
- static const char* events[] = {
- "onclick" ,
- "ondblclick" ,
- "onmousedown" ,
- "onmouseup" ,
- "onmouseover" ,
- "onmousemove" ,
- "onmouseout" ,
- "onkeypress" ,
- "onkeydown" ,
- "onkeyup" ,
- "onfocus" ,
- "onblur" ,
- "onload" ,
- "onunload" ,
- "onsubmit" ,
- "onreset" ,
- "onselect" ,
- "onchange" ,
- NULL
- } ;
+ apr_array_header_t *linkattrs;
+ int i;
+ const char* name = (const char*) uname;
+ const char** attrs = (const char**) uattrs;
+ const htmlElemDesc* desc = htmlTagLookup(uname);
+ urlmap* themap = ctx->map;
+#ifdef HAVE_STACK
+ const void** descp;
+#endif
+ int enforce = 0;
+ if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
+ /* enforce html */
+ enforce = 2;
+ if (!desc || desc->depr)
+ return;
+
+ } else if ((ctx->cfg->doctype == fpi_html)
+ || (ctx->cfg->doctype == fpi_xhtml)) {
+ enforce = 1;
+ /* enforce html legacy */
+ if (!desc) {
+ return;
+ }
+ }
+ if (!desc && enforce) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
+ "Bogus HTML element %s dropped", name) ;
+ return;
+ }
+ if (desc && desc->depr && (enforce == 2) ) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
+ "Deprecated HTML element %s dropped", name) ;
+ return;
+ }
+#ifdef HAVE_STACK
+ descp = apr_array_push(ctx->stack);
+ *descp = desc;
+ /* TODO - implement HTML "allowed here" */
+#endif
ap_fputc(ctx->f->next, ctx->bb, '<') ;
ap_fputs(ctx->f->next, ctx->bb, name) ;
+ required_attrs = 0;
+ if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
+ for (a = desc->attrs_req; *a; a++)
+ ++required_attrs;
+
if ( attrs ) {
- linkattrs = 0 ;
- for ( elt = linked_elts; elt->name != NULL ; ++elt )
- if ( !strcmp(elt->name, name) ) {
- linkattrs = elt->attrs ;
- break ;
- }
+ linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING) ;
for ( a = attrs ; *a ; a += 2 ) {
+ if (desc && enforce > 0) {
+ switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
+ case HTML_INVALID:
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
+ "Bogus HTML attribute %s of %s dropped", *a, name);
+ continue;
+ case HTML_DEPRECATED:
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
+ "Deprecated HTML attribute %s of %s dropped", *a, name);
+ continue;
+ case HTML_REQUIRED:
+ required_attrs--; /* cross off the number still needed */
+ /* fallthrough - required implies valid */
+ default:
+ break;
+ }
+ }
ctx->offset = 0 ;
if ( a[1] ) {
pappend(ctx, a[1], strlen(a[1])+1) ;
is_uri = ATTR_IGNORE ;
if ( linkattrs ) {
- for ( linkattr = linkattrs ; *linkattr ; ++linkattr) {
- if ( !strcmp(*linkattr, *a) ) {
+ tattr* attrs = (tattr*) linkattrs->elts;
+ for (i=0; i < linkattrs->nelts; ++i) {
+ if ( !strcmp(*a, attrs[i].val)) {
is_uri = ATTR_URI ;
break ;
}
}
}
- if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix ) {
- for ( linkattr = events; *linkattr; ++linkattr ) {
- if ( !strcmp(*linkattr, *a) ) {
+ if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix
+ && (ctx->cfg->events != NULL) ) {
+ for (i=0; i < ctx->cfg->events->nelts; ++i) {
+ tattr* attrs = (tattr*) ctx->cfg->events->elts;
+ if ( !strcmp(*a, attrs[i].val)) {
is_uri = ATTR_EVENT ;
break ;
}
switch ( is_uri ) {
case ATTR_URI:
num_match = 0 ;
- for ( m = ctx->cfg->map ; m ; m = m->next ) {
+ for ( m = themap ; m ; m = m->next ) {
if ( ! ( m->flags & M_HTML ) )
continue ;
if ( m->flags & M_REGEX ) {
++num_match ;
offs = match = pmatch[0].rm_so ;
s_from = pmatch[0].rm_eo - match ;
- subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf,
nmatch, pmatch) ;
VERBOSE( {
const char* f = apr_pstrndup(ctx->f->r->pool,
break ;
}
}
- if ( num_match > 0 ) /* URIs only want one match */
+ /* URIs only want one match unless overridden in the config */
+ if ( (num_match > 0) && !( m->flags & M_NOTLAST ) )
break ;
}
break ;
case ATTR_EVENT:
- for ( m = ctx->cfg->map ; m ; m = m->next ) {
+ for ( m = themap ; m ; m = m->next ) {
num_match = 0 ; /* reset here since we're working per-rule */
if ( ! ( m->flags & M_EVENTS ) )
continue ;
anything that needs it in the value.
*/
ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ;
- pcharacters(ctx, ctx->buf, strlen(ctx->buf)) ;
+ pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)) ;
ap_fputc(ctx->f->next, ctx->bb, '"') ;
}
}
}
ctx->offset = 0 ;
- if ( is_empty_elt(name) )
+ if ( desc && desc->empty )
ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag) ;
else
ap_fputc(ctx->f->next, ctx->bb, '>') ;
-}
-static htmlSAXHandlerPtr setupSAX(apr_pool_t* pool) {
- htmlSAXHandlerPtr sax = apr_pcalloc(pool, sizeof(htmlSAXHandler) ) ;
- sax->startDocument = NULL ;
- sax->endDocument = NULL ;
- sax->startElement = pstartElement ;
- sax->endElement = pendElement ;
- sax->characters = pcharacters ;
- sax->comment = pcomment ;
- sax->cdataBlock = pcdata ;
- return sax ;
-}
-
-static ap_regex_t* seek_meta_ctype ;
-static ap_regex_t* seek_charset ;
-static ap_regex_t* seek_meta ;
-
-static void proxy_html_child_init(apr_pool_t* pool, server_rec* s) {
- seek_meta_ctype = ap_pregcomp(pool,
- "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
- seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
- seek_meta = ap_pregcomp(pool, "<meta[^>]*(http-equiv)[^>]*>",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
-}
-
-static xmlCharEncoding sniff_encoding(request_rec* r, const char* cbuf, size_t bytes
-#ifndef GO_FASTER
- , int verbose
-#endif
- ) {
- xmlCharEncoding ret ;
- char* encoding = NULL ;
- char* p ;
- char* q ;
- ap_regmatch_t match[2] ;
- unsigned char* buf = (unsigned char*)cbuf ;
-
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Content-Type is %s", r->content_type) ) ;
-
-/* If we've got it in the HTTP headers, there's nothing to do */
- if ( r->content_type &&
- ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
- p += 8 ;
- if ( encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) , encoding ) {
- if ( ret = xmlParseCharEncoding(encoding),
- ret != XML_CHAR_ENCODING_ERROR ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTTP headers", encoding) ) ;
- return ret ;
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Unsupported charset %s in HTTP headers", encoding) ;
- encoding = NULL ;
- }
- }
- }
-/* to sniff, first we look for BOM */
- if ( ret = xmlDetectCharEncoding(buf, bytes),
- ret != XML_CHAR_ENCODING_NONE ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset from XML rules.") ) ;
- return ret ;
+ if ((enforce > 0) && (required_attrs > 0)) {
+ /* if there are more required attributes than we found then complain */
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r,
+ "HTML element %s is missing %d required attributes",
+ name, required_attrs);
}
-
-/* If none of the above, look for a META-thingey */
- encoding = NULL ;
- if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) {
- p = apr_pstrndup(r->pool, buf + match[0].rm_so,
- match[0].rm_eo - match[0].rm_so) ;
- if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 )
- encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
- match[1].rm_eo - match[1].rm_so) ;
- }
-
-/* either it's set to something we found or it's still the default */
- if ( encoding )
- if ( ret = xmlParseCharEncoding(encoding),
- ret != XML_CHAR_ENCODING_ERROR ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTML META", encoding) ) ;
- return ret ;
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Unsupported charset %s in HTML META", encoding) ;
- }
-
-/* the old HTTP default is a last resort */
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
- "No usable charset information: using old HTTP default LATIN1") ;
- return XML_CHAR_ENCODING_8859_1 ;
}
+
static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
#ifndef GO_FASTER
, int verbose
header = apr_pstrndup(r->pool, p, q-p) ;
if ( strncasecmp(header, "Content-", 8) ) {
/* find content=... string */
- for ( p = strstr(buf+offs+pmatch[0].rm_so, "content") ; *p ; ) {
- p += 7 ;
- while ( *p && isspace(*p) )
- ++p ;
- if ( *p != '=' )
- continue ;
- while ( *p && isspace(*++p) ) ;
- if ( ( *p == '\'' ) || ( *p == '"' ) ) {
- delim = *p++ ;
- for ( q = p ; *q != delim ; ++q ) ;
- } else {
- for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
- }
- content = apr_pstrndup(r->pool, p, q-p) ;
- break ;
+ p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
+ pmatch[0].rm_eo - pmatch[0].rm_so);
+ /* if it doesn't contain "content", ignore, don't crash! */
+ if (p != NULL) {
+ while (*p) {
+ p += 7 ;
+ while ( *p && isspace(*p) )
+ ++p ;
+ if ( *p != '=' )
+ continue ;
+ while ( *p && isspace(*++p) ) ;
+ if ( ( *p == '\'' ) || ( *p == '"' ) ) {
+ delim = *p++ ;
+ for ( q = p ; *q != delim ; ++q ) ;
+ } else {
+ for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
+ }
+ content = apr_pstrndup(r->pool, p, q-p) ;
+ break ;
+ }
}
} else if ( !strncasecmp(header, "Content-Type", 12) ) {
ret = apr_palloc(r->pool, sizeof(meta) ) ;
return ret ;
}
-static int proxy_html_filter_init(ap_filter_t* f) {
- const char* env ;
- saxctxt* fctx ;
+static const char* interpolate_vars(request_rec* r, const char* str) {
+ const char* start;
+ const char* end;
+ const char* delim;
+ const char* before;
+ const char* after;
+ const char* replacement;
+ const char* var;
+ for (;;) {
+ start = str ;
+ if (start = ap_strstr_c(start, "${"), start == NULL)
+ break;
-#if 0
-/* remove content-length filter */
- ap_filter_rec_t* clf = ap_get_output_filter_handle("CONTENT_LENGTH") ;
- ap_filter_t* ff = f->next ;
-
- do {
- ap_filter_t* fnext = ff->next ;
- if ( ff->frec == clf )
- ap_remove_output_filter(ff) ;
- ff = fnext ;
- } while ( ff ) ;
-#endif
+ if (end = ap_strchr_c(start+2, '}'), end == NULL)
+ break;
- fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
- fctx->sax = setupSAX(f->r->pool) ;
- fctx->f = f ;
- fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
- fctx->cfg = ap_get_module_config(f->r->per_dir_config,&proxy_html_module);
-
- if ( f->r->proto_num >= 1001 ) {
- if ( ! f->r->main && ! f->r->prev ) {
- env = apr_table_get(f->r->subprocess_env, "force-response-1.0") ;
- if ( !env )
- f->r->chunked = 1 ;
+ delim = ap_strchr_c(start, '|');
+ before = apr_pstrndup(r->pool, str, start-str);
+ after = end+1;
+ if (delim) {
+ var = apr_pstrndup(r->pool, start+2, delim-start-2) ;
+ } else {
+ var = apr_pstrndup(r->pool, start+2, end-start-2) ;
}
+ replacement = apr_table_get(r->subprocess_env, var) ;
+ if (!replacement) {
+ if (delim)
+ replacement = apr_pstrndup(r->pool, delim+1, end-delim-1);
+ else
+ replacement = "";
+ }
+ str = apr_pstrcat(r->pool, before, replacement, after, NULL);
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "Interpolating %s => %s", var, replacement) ;
}
-
- apr_table_unset(f->r->headers_out, "Content-Length") ;
- apr_table_unset(f->r->headers_out, "ETag") ;
- return OK ;
+ return str;
}
-static saxctxt* check_filter_init (ap_filter_t* f) {
+static void fixup_rules(saxctxt* ctx) {
+ const char* thisval;
+ urlmap* newp;
+ urlmap* p;
+ urlmap* prev = NULL;
+ request_rec* r = ctx->f->r;
+ int has_cond;
+
+ for (p = ctx->cfg->map; p; p = p->next) {
+ has_cond = -1;
+ if (p->cond != NULL) {
+ thisval = apr_table_get(r->subprocess_env, p->cond->env);
+ if (!p->cond->val) {
+ /* required to be "anything" */
+ if (thisval)
+ has_cond = 1; /* satisfied */
+ else
+ has_cond = 0; /* unsatisfied */
+ } else {
+ if (thisval && !strcasecmp(p->cond->val, thisval)) {
+ has_cond = 1; /* satisfied */
+ } else {
+ has_cond = 0; /* unsatisfied */
+ }
+ }
+ if (((has_cond == 0) && (p->cond->rel ==1 ))
+ || ((has_cond == 1) && (p->cond->rel == -1))) {
+ continue; /* condition is unsatisfied */
+ }
+ }
- const char* errmsg = NULL ;
- if ( ! f->r->proxyreq ) {
- errmsg = "Non-proxy request; not inserting proxy-html filter" ;
- } else if ( ! f->r->content_type ) {
- errmsg = "No content-type; bailing out of proxy-html filter" ;
- } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
- strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
- errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ newp = apr_pmemdup(r->pool, p, sizeof(urlmap));
+
+ if (newp->flags & M_INTERPOLATE_FROM) {
+ newp->from.c = interpolate_vars(r, newp->from.c);
+ if (!newp->from.c || !*newp->from.c)
+ continue; /* don't use empty from-pattern */
+ if (newp->flags & M_REGEX) {
+ newp->from.r = ap_pregcomp(r->pool, newp->from.c, newp->regflags) ;
+ }
+ }
+ if (newp->flags & M_INTERPOLATE_TO) {
+ newp->to = interpolate_vars(r, newp->to);
+ }
+ /* evaluate p->cond; continue if unsatisfied */
+ /* create new urlmap with memcpy and append to map */
+ /* interpolate from if flagged to do so */
+ /* interpolate to if flagged to do so */
+
+ if (prev != NULL)
+ prev->next = newp ;
+ else
+ ctx->map = newp ;
+ prev = newp ;
}
- if ( errmsg ) {
-#ifndef GO_FASTER
+ if (prev)
+ prev->next = NULL;
+}
+static saxctxt* check_filter_init (ap_filter_t* f) {
+ saxctxt* fctx ;
+ if ( ! f->ctx) {
proxy_html_conf* cfg
= ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
- if ( cfg->verbose ) {
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
+ const char* force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
+
+ const char* errmsg = NULL ;
+ if ( !force ) {
+ if ( ! f->r->proxyreq ) {
+ errmsg = "Non-proxy request; not inserting proxy-html filter" ;
+ } else if ( ! f->r->content_type ) {
+ errmsg = "No content-type; bailing out of proxy-html filter" ;
+ } else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
+ strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
+ errmsg = "Non-HTML content; not inserting proxy-html filter" ;
+ }
+ }
+ if (!cfg->links) {
+ errmsg = "No links configured: nothing for proxy-html filter to do";
}
+
+ if ( errmsg ) {
+#ifndef GO_FASTER
+ if ( cfg->verbose ) {
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, "%s", errmsg) ;
+ }
#endif
- ap_remove_output_filter(f) ;
- return NULL ;
+ ap_remove_output_filter(f) ;
+ return NULL ;
+ }
+
+ fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)) ;
+ fctx->f = f ;
+ fctx->bb = apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ;
+ fctx->cfg = cfg;
+ apr_table_unset(f->r->headers_out, "Content-Length") ;
+
+ if (cfg->interp)
+ fixup_rules(fctx);
+ else
+ fctx->map = cfg->map;
+ /* defer dealing with charset_out until after sniffing charset_in
+ * so we can support setting one to t'other.
+ */
}
- if ( ! f->ctx )
- proxy_html_filter_init(f) ;
return f->ctx ;
}
static int proxy_html_filter(ap_filter_t* f, apr_bucket_brigade* bb) {
xmlCharEncoding enc ;
const char* buf = 0 ;
apr_size_t bytes = 0 ;
+#ifndef USE_OLD_LIBXML2
int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET |
XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ;
+#endif
saxctxt* ctxt = check_filter_init(f) ;
+#ifndef GO_FASTER
+ int verbose;
+#endif
if ( ! ctxt )
return ap_pass_brigade(f->next, bb) ;
+#ifndef GO_FASTER
+ verbose = ctxt->cfg->verbose;
+#endif
for ( b = APR_BRIGADE_FIRST(bb) ;
b != APR_BRIGADE_SENTINEL(bb) ;
b = APR_BUCKET_NEXT(b) ) {
- if ( APR_BUCKET_IS_EOS(b) ) {
- if ( ctxt->parser != NULL ) {
- htmlParseChunk(ctxt->parser, buf, 0, 1) ;
+ if ( APR_BUCKET_IS_METADATA(b) ) {
+ if ( APR_BUCKET_IS_EOS(b) ) {
+ if ( ctxt->parser != NULL ) {
+ consume_buffer(ctxt, buf, 0, 1);
+ }
+ APR_BRIGADE_INSERT_TAIL(ctxt->bb,
+ apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
+ ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
+ } else if ( APR_BUCKET_IS_FLUSH(b) ) {
+ /* pass on flush, except at start where it would cause
+ * headers to be sent before doc sniffing
+ */
+ if ( ctxt->parser != NULL ) {
+ ap_fflush(ctxt->f->next, ctxt->bb) ;
+ }
}
- APR_BRIGADE_INSERT_TAIL(ctxt->bb,
- apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
- ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
} else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
== APR_SUCCESS ) {
if ( ctxt->parser == NULL ) {
- if ( buf[bytes] != 0 ) {
- /* make a string for parse routines to play with */
- char* buf1 = apr_palloc(f->r->pool, bytes+1) ;
- memcpy(buf1, buf, bytes) ;
- buf1[bytes] = 0 ;
- buf = buf1 ;
+ const char* cenc;
+ if (!xml2enc_charset ||
+ (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
+ if (!xml2enc_charset)
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
+ "No i18n support found. Install mod_xml2enc if required") ;
+ enc = XML_CHAR_ENCODING_NONE;
+ ap_set_content_type(f->r, "text/html;charset=utf-8") ;
+ } else {
+ /* if we wanted a non-default charset_out, insert the
+ * xml2enc filter now that we've sniffed it
+ */
+ if (ctxt->cfg->charset_out && xml2enc_filter) {
+ if (*ctxt->cfg->charset_out != '*')
+ cenc = ctxt->cfg->charset_out;
+ xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
+ ap_set_content_type(f->r,
+ apr_pstrcat(f->r->pool, "text/html;charset=", cenc, NULL)) ;
+ } else /* Normal case, everything worked, utf-8 output */
+ ap_set_content_type(f->r, "text/html;charset=utf-8") ;
}
-#ifndef GO_FASTER
- enc = sniff_encoding(f->r, buf, bytes, ctxt->cfg->verbose) ;
+
+ ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
+ ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, 0, enc) ;
+ buf += 4;
+ bytes -= 4;
+ if (ctxt->parser == NULL) {
+ apr_status_t rv = ap_pass_brigade(f->next, bb) ;
+ ap_remove_output_filter(f) ;
+ return rv;
+ }
+ apr_pool_cleanup_register(f->r->pool, ctxt->parser,
+ (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null) ;
+#ifndef USE_OLD_LIBXML2
+ if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
+ "Unsupported parser opts %x", xmlopts) ;
+#endif
if ( ctxt->cfg->metafix )
+#ifndef GO_FASTER
m = metafix(f->r, buf, ctxt->cfg->verbose) ;
#else
- enc = sniff_encoding(f->r, buf, bytes) ;
- if ( ctxt->cfg->metafix )
m = metafix(f->r, buf) ;
#endif
- ap_set_content_type(f->r, "text/html;charset=utf-8") ;
- ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
if ( m ) {
- ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
- buf, m->start, 0, enc ) ;
- htmlParseChunk(ctxt->parser, buf+m->end, bytes-m->end, 0) ;
+ consume_buffer(ctxt, buf, m->start, 0) ;
+ consume_buffer(ctxt, buf+m->end, bytes-m->end, 0) ;
} else {
- ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
- buf, bytes, 0, enc ) ;
+ consume_buffer(ctxt, buf, bytes, 0) ;
}
- apr_pool_cleanup_register(f->r->pool, ctxt->parser,
- (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ;
- if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
- "Unsupported parser opts %x", xmlopts) ;
} else {
- htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
+ consume_buffer(ctxt, buf, bytes, 0) ;
}
} else {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
apr_brigade_cleanup(bb) ;
return APR_SUCCESS ;
}
-static const char* fpi_html =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
-static const char* fpi_html_legacy =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
-static const char* fpi_xhtml =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
-static const char* fpi_xhtml_legacy =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
-static const char* html_etag = ">" ;
-static const char* xhtml_etag = " />" ;
-/*#define DEFAULT_DOCTYPE fpi_html */
-static const char* DEFAULT_DOCTYPE = "" ;
-#define DEFAULT_ETAG html_etag
static void* proxy_html_config(apr_pool_t* pool, char* x) {
proxy_html_conf* ret = apr_pcalloc(pool, sizeof(proxy_html_conf) ) ;
ret->doctype = DEFAULT_DOCTYPE ;
ret->etag = DEFAULT_ETAG ;
ret->bufsz = 8192 ;
+ /* ret->interp = 1; */
+ /* don't initialise links and events until they get set/used */
return ret ;
}
static void* proxy_html_merge(apr_pool_t* pool, void* BASE, void* ADD) {
proxy_html_conf* add = (proxy_html_conf*) ADD ;
proxy_html_conf* conf = apr_palloc(pool, sizeof(proxy_html_conf)) ;
+ /* don't merge declarations - just use the most specific */
+ conf->links = (add->links == NULL) ? base->links : add->links;
+ conf->events = (add->events == NULL) ? base->events : add->events;
+
+ conf->charset_out = (add->charset_out == NULL)
+ ? base->charset_out : add->charset_out ;
+
if ( add->map && base->map ) {
urlmap* a ;
conf->map = NULL ;
conf->flags = add->flags ^ NORM_RESET ;
conf->metafix = add->metafix ;
conf->extfix = add->extfix ;
+ conf->interp = add->interp ;
conf->strip_comments = add->strip_comments ;
+ conf->enabled = add->enabled;
#ifndef GO_FASTER
conf->verbose = add->verbose ;
#endif
conf->flags = base->flags | add->flags ;
conf->metafix = base->metafix | add->metafix ;
conf->extfix = base->extfix | add->extfix ;
+ conf->interp = base->interp | add->interp ;
conf->strip_comments = base->strip_comments | add->strip_comments ;
+ conf->enabled = add->enabled | base->enabled;
#ifndef GO_FASTER
conf->verbose = base->verbose | add->verbose ;
#endif
}
return conf ;
}
-#define REGFLAG(n,s,c) ( (s&&(strchr((s),(c))!=NULL)) ? (n) : 0 )
-#define XREGFLAG(n,s,c) ( (!s||(strchr((s),(c))==NULL)) ? (n) : 0 )
-static const char* set_urlmap(cmd_parms* cmd, void* CFG,
- const char* from, const char* to, const char* flags) {
- int regflags ;
- proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
- urlmap* map ;
- urlmap* newmap = apr_palloc(cmd->pool, sizeof(urlmap) ) ;
-
- newmap->next = NULL ;
+#define REGFLAG(n,s,c) ( (s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0 )
+#define XREGFLAG(n,s,c) ( (!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0 )
+static void comp_urlmap(apr_pool_t* pool, urlmap* newmap,
+ const char* from, const char* to, const char* flags, const char* cond) {
+ char* eq;
newmap->flags
= XREGFLAG(M_HTML,flags,'h')
| XREGFLAG(M_EVENTS,flags,'e')
| REGFLAG(M_ATEND,flags,'$')
| REGFLAG(M_REGEX,flags,'R')
| REGFLAG(M_LAST,flags,'L')
+ | REGFLAG(M_NOTLAST,flags,'l')
+ | REGFLAG(M_INTERPOLATE_TO,flags,'V')
+ | REGFLAG(M_INTERPOLATE_FROM,flags,'v')
;
-
- if ( cfg->map ) {
- for ( map = cfg->map ; map->next ; map = map->next ) ;
- map->next = newmap ;
- } else
- cfg->map = newmap ;
-
- if ( ! (newmap->flags & M_REGEX) ) {
- newmap->from.c = apr_pstrdup(cmd->pool, from) ;
- newmap->to = apr_pstrdup(cmd->pool, to) ;
+ if ( ( newmap->flags & M_INTERPOLATE_FROM)
+ || ! (newmap->flags & M_REGEX) ) {
+ newmap->from.c = from ;
+ newmap->to = to ;
} else {
- regflags
+ newmap->regflags
= REGFLAG(AP_REG_EXTENDED,flags,'x')
| REGFLAG(AP_REG_ICASE,flags,'i')
| REGFLAG(AP_REG_NOSUB,flags,'n')
| REGFLAG(AP_REG_NEWLINE,flags,'s')
;
- newmap->from.r = ap_pregcomp(cmd->pool, from, regflags) ;
- newmap->to = apr_pstrdup(cmd->pool, to) ;
+ newmap->from.r = ap_pregcomp(pool, from, newmap->regflags) ;
+ newmap->to = to ;
+ }
+ if (cond != NULL) {
+ char* cond_copy;
+ newmap->cond = apr_pcalloc(pool, sizeof(rewritecond));
+ if (cond[0] == '!') {
+ newmap->cond->rel = -1;
+ newmap->cond->env = cond_copy = apr_pstrdup(pool, cond+1);
+ } else {
+ newmap->cond->rel = 1;
+ newmap->cond->env = cond_copy = apr_pstrdup(pool, cond);
+ }
+ eq = ap_strchr(++cond_copy, '=');
+ if (eq) {
+ *eq = 0;
+ newmap->cond->val = eq+1;
+ }
+ } else {
+ newmap->cond = NULL;
}
- return NULL ;
}
+static const char* set_urlmap(cmd_parms* cmd, void* CFG, const char* args) {
+ proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
+ urlmap* map ;
+ apr_pool_t* pool = cmd->pool;
+ urlmap* newmap ;
+ const char* usage =
+ "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]";
+ const char* from;
+ const char* to;
+ const char* flags;
+ const char* cond = NULL;
+
+ if (from = ap_getword_conf(cmd->pool, &args), !from)
+ return usage;
+ if (to = ap_getword_conf(cmd->pool, &args), !to)
+ return usage;
+ flags = ap_getword_conf(cmd->pool, &args);
+ if (flags && *flags)
+ cond = ap_getword_conf(cmd->pool, &args);
+ if (cond && !*cond)
+ cond = NULL;
+
+ /* the args look OK, so let's use them */
+ newmap = apr_palloc(pool, sizeof(urlmap) ) ;
+ newmap->next = NULL;
+ if ( cfg->map ) {
+ for ( map = cfg->map ; map->next ; map = map->next ) ;
+ map->next = newmap ;
+ } else
+ cfg->map = newmap ;
+
+ comp_urlmap(cmd->pool, newmap, from, to, flags, cond);
+ return NULL;
+}
+
static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
const char* l) {
proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
}
return NULL ;
}
-static void set_param(proxy_html_conf* cfg, const char* arg) {
+static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg) {
+ proxy_html_conf* cfg = CFG;
if ( arg && *arg ) {
if ( !strcmp(arg, "lowercase") )
cfg->flags |= NORM_LC ;
else if ( !strcmp(arg, "reset") )
cfg->flags |= NORM_RESET ;
}
+ return NULL ;
+}
+static const char* set_events(cmd_parms* cmd, void* CFG, const char* arg) {
+ tattr* attr;
+ proxy_html_conf* cfg = CFG;
+ if (cfg->events == NULL)
+ cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr));
+ attr = apr_array_push(cfg->events) ;
+ attr->val = arg;
+ return NULL ;
}
-static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg1,
- const char* arg2, const char* arg3) {
- set_param( (proxy_html_conf*)CFG, arg1) ;
- set_param( (proxy_html_conf*)CFG, arg2) ;
- set_param( (proxy_html_conf*)CFG, arg3) ;
+static const char* set_links(cmd_parms* cmd, void* CFG,
+ const char* elt, const char* att) {
+ apr_array_header_t* attrs;
+ tattr* attr ;
+ proxy_html_conf* cfg = CFG;
+
+ if (cfg->links == NULL)
+ cfg->links = apr_hash_make(cmd->pool);
+
+ attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING) ;
+ if (!attrs) {
+ attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)) ;
+ apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs) ;
+ }
+ attr = apr_array_push(attrs) ;
+ attr->val = att ;
return NULL ;
}
static const command_rec proxy_html_cmds[] = {
- AP_INIT_TAKE23("ProxyHTMLURLMap", set_urlmap, NULL,
+ AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
+ RSRC_CONF|ACCESS_CONF, "Strings to be treated as scripting events"),
+ AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
+ RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"),
+ AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL,
RSRC_CONF|ACCESS_CONF, "Map URL From To" ) ,
AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL,
RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) ,
- AP_INIT_TAKE123("ProxyHTMLFixups", set_flags, NULL,
+ AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL,
RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) ,
AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot,
(void*)APR_OFFSETOF(proxy_html_conf, metafix),
RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) ,
+ AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, interp),
+ RSRC_CONF|ACCESS_CONF,
+ "Support interpolation and conditions in URLMaps" ) ,
AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot,
(void*)APR_OFFSETOF(proxy_html_conf, extfix),
RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) ,
AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
(void*)APR_OFFSETOF(proxy_html_conf, bufsz),
RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
+ AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, charset_out),
+ RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset" ) ,
+ AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, enabled),
+ RSRC_CONF|ACCESS_CONF, "Enable proxy-html and xml2enc filters" ) ,
{ NULL }
} ;
static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
server_rec* s) {
ap_add_version_component(p, VERSION_STRING) ;
+ seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
+ AP_REG_EXTENDED|AP_REG_ICASE) ;
+ seek_content = apr_strmatch_precompile(p, "content", 0);
+ memset(&sax, 0, sizeof(htmlSAXHandler));
+ sax.startElement = pstartElement ;
+ sax.endElement = pendElement ;
+ sax.characters = pcharacters ;
+ sax.comment = pcomment ;
+ sax.cdataBlock = pcdata ;
+ xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
+ xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
+ if (!xml2enc_charset) {
+ ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2,
+ "I18n support in mod_proxy_html requires mod_xml2enc. "
+ "Without it, non-ASCII characters in proxied pages are "
+ "likely to display incorrectly.");
+ }
return OK ;
}
+static void proxy_html_insert(request_rec* r) {
+ proxy_html_conf* cfg
+ = ap_get_module_config(r->per_dir_config, &proxy_html_module);
+ if (cfg->enabled) {
+ if (xml2enc_filter)
+ xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
+ ap_add_output_filter("proxy-html", NULL, r, r->connection);
+ }
+}
static void proxy_html_hooks(apr_pool_t* p) {
- ap_register_output_filter("proxy-html", proxy_html_filter,
- NULL, AP_FTYPE_RESOURCE) ;
+ static const char* aszSucc[] = { "mod_filter.c", NULL };
+ ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
+ NULL, AP_FTYPE_RESOURCE,
+ AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH) ;
ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
- ap_hook_child_init(proxy_html_child_init, NULL, NULL, APR_HOOK_MIDDLE) ;
+ ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE) ;
}
module AP_MODULE_DECLARE_DATA proxy_html_module = {
STANDARD20_MODULE_STUFF,