/********************************************************************
- Copyright (c) 2003-4, WebThing Ltd
- Author: Nick Kew <nick@webthing.com>
+ Copyright (c) 2003-5, WebThing Ltd
+ Author: Nick Kew <nick@webthing.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/********************************************************************
- Note to Users
+ Note to Users
- You are requested to register as a user, at
- http://apache.webthing.com/registration.html
+ You are requested to register as a user, at
+ http://apache.webthing.com/registration.html
- This entitles you to support from the developer.
- I'm unlikely to reply to help/support requests from
- non-registered users, unless you're paying and/or offering
- constructive feedback such as bug reports or sensible
- suggestions for further development.
+ This entitles you to support from the developer.
+ I'm unlikely to reply to help/support requests from
+ non-registered users, unless you're paying and/or offering
+ constructive feedback such as bug reports or sensible
+ suggestions for further development.
- It also makes a small contribution to the effort
- that's gone into developing this work.
+ It also makes a small contribution to the effort
+ that's gone into developing this work.
*********************************************************************/
/* End of Notices */
+/* GO_FASTER
+ You can #define GO_FASTER to disable informational logging.
+ This disables the ProxyHTMLLogVerbose option altogether.
-/* GO_FASTER
-
- You can #define GO_FASTER to disable informational logging.
- This disables the ProxyHTMLLogVerbose option altogether.
-
- Default is to leave it undefined, and enable verbose logging
- as a configuration option. Binaries are supplied with verbose
- logging enabled.
+ Default is to leave it undefined, and enable verbose logging
+ as a configuration option. Binaries are supplied with verbose
+ logging enabled.
*/
#ifdef GO_FASTER
#define VERBOSE(x) if ( verbose ) x
#endif
-#define VERSION_STRING "proxy_html/2.4"
+#define VERSION_STRING "proxy_html/2.5"
#include <ctype.h>
#include <http_log.h>
#include <apr_strings.h>
+/* To support Apache 2.1/2.2, we need the ap_ forms of the
+ * regexp stuff, and they're now used in the code.
+ * To support 2.0 in the same compile, * we #define the
+ * AP_ versions if necessary.
+ */
+#ifndef AP_REG_ICASE
+/* it's 2.0, so we #define the ap_ versions */
+#define ap_regex_t regex_t
+#define ap_regmatch_t regmatch_t
+#define AP_REG_EXTENDED REG_EXTENDED
+#define AP_REG_ICASE REG_ICASE
+#define AP_REG_NOSUB REG_NOSUB
+#define AP_REG_NEWLINE REG_NEWLINE
+#endif
+
module AP_MODULE_DECLARE_DATA proxy_html_module ;
-#define M_HTML 0x01
-#define M_EVENTS 0x02
-#define M_CDATA 0x04
-#define M_REGEX 0x08
-#define M_ATSTART 0x10
-#define M_ATEND 0x20
-#define M_LAST 0x40
+#define M_HTML 0x01
+#define M_EVENTS 0x02
+#define M_CDATA 0x04
+#define M_REGEX 0x08
+#define M_ATSTART 0x10
+#define M_ATEND 0x20
+#define M_LAST 0x40
typedef struct {
unsigned int start ;
}
typedef struct {
- const char* name ;
- const char** attrs ;
+ const char* name ;
+ const char** attrs ;
} elt_t ;
#define NORM_LC 0x1
if ( flags & NORM_LC )
for ( p = str ; *p ; ++p )
if ( isupper(*p) )
- *p = tolower(*p) ;
+ *p = tolower(*p) ;
if ( flags & NORM_MSSLASH )
for ( p = strchr(str, '\\') ; p ; p = strchr(p+1, '\\') )
newbuf = realloc(ctx->buf, ctx->avail) ;
if ( newbuf != ctx->buf ) {
if ( ctx->buf )
- apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ;
+ apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, (void*)free) ;
apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
- (void*)free, apr_pool_cleanup_null);
+ (void*)free, apr_pool_cleanup_null);
ctx->buf = newbuf ;
}
}
int verbose = ctx->cfg->verbose ;
#endif
- pappend(ctx, &c, 1) ; /* append null byte */
- /* parse the text for URLs */
+ pappend(ctx, &c, 1) ; /* append null byte */
+ /* parse the text for URLs */
for ( m = ctx->cfg->map ; m ; m = m->next ) {
if ( ! ( m->flags & M_CDATA ) )
- continue ;
+ continue ;
if ( m->flags & M_REGEX ) {
nmatch = 10 ;
offs = 0 ;
while ( ! ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0) ) {
- match = pmatch[0].rm_so ;
- s_from = pmatch[0].rm_eo - match ;
- subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
- nmatch, pmatch) ;
- s_to = strlen(subs) ;
- len = strlen(ctx->buf) ;
- offs += match ;
- VERBOSE( {
- const char* f = apr_pstrndup(ctx->f->r->pool,
- ctx->buf + offs , s_from ) ;
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "C/RX: match at %s, substituting %s", f, subs) ;
- } )
- if ( s_to > s_from) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- memcpy(ctx->buf+offs, subs, s_to) ;
- } else {
- memcpy(ctx->buf + offs, subs, s_to) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- }
- offs += s_to ;
+ match = pmatch[0].rm_so ;
+ s_from = pmatch[0].rm_eo - match ;
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ nmatch, pmatch) ;
+ s_to = strlen(subs) ;
+ len = strlen(ctx->buf) ;
+ offs += match ;
+ VERBOSE( {
+ const char* f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs , s_from ) ;
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "C/RX: match at %s, substituting %s", f, subs) ;
+ } )
+ if ( s_to > s_from) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ memcpy(ctx->buf+offs, subs, s_to) ;
+ } else {
+ memcpy(ctx->buf + offs, subs, s_to) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ }
+ offs += s_to ;
}
} else {
s_from = strlen(m->from.c) ;
s_to = strlen(m->to) ;
for ( found = strstr(ctx->buf, m->from.c) ; found ;
- found = strstr(ctx->buf+match+s_to, m->from.c) ) {
- match = found - ctx->buf ;
- if ( ( m->flags & M_ATSTART ) && ( match != 0) )
- break ;
- len = strlen(ctx->buf) ;
- if ( ( m->flags & M_ATEND ) && ( match < (len - s_from) ) )
- continue ;
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "C: matched %s, substituting %s", m->from.c, m->to) ) ;
- if ( s_to > s_from ) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
- len + 1 - s_from - match) ;
- memcpy(ctx->buf+match, m->to, s_to) ;
- } else {
- memcpy(ctx->buf+match, m->to, s_to) ;
- memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
- len + 1 - s_from - match) ;
- }
+ found = strstr(ctx->buf+match+s_to, m->from.c) ) {
+ match = found - ctx->buf ;
+ if ( ( m->flags & M_ATSTART ) && ( match != 0) )
+ break ;
+ len = strlen(ctx->buf) ;
+ if ( ( m->flags & M_ATEND ) && ( match < (len - s_from) ) )
+ continue ;
+ VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "C: matched %s, substituting %s", m->from.c, m->to) ) ;
+ if ( s_to > s_from ) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match) ;
+ memcpy(ctx->buf+match, m->to, s_to) ;
+ } else {
+ memcpy(ctx->buf+match, m->to, s_to) ;
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match) ;
+ }
}
}
}
saxctxt* ctx = (saxctxt*) ctxt ;
if ( ctx->offset > 0 ) {
dump_content(ctx) ;
- ctx->offset = 0 ; /* having dumped it, we can re-use the memory */
+ ctx->offset = 0 ; /* having dumped it, we can re-use the memory */
}
if ( ! is_empty_elt(name) )
ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name) ;
}
static void pstartElement(void* ctxt, const xmlChar* name,
- const xmlChar** attrs ) {
+ const xmlChar** attrs ) {
int num_match ;
size_t offs, len ;
static const char* inputattr[] = { "src", "usemap", NULL } ;
static const char* scriptattr[] = { "src", "for", NULL } ;
static const char* frameattr[] = { "src", "longdesc", NULL } ;
- static const char* objattr[] = { "classid", "codebase", "data", "usemap", NULL } ;
+ static const char* objattr[] =
+ { "classid", "codebase", "data", "usemap", NULL } ;
static const char* profile[] = { "profile", NULL } ;
static const char* background[] = { "background", NULL } ;
static const char* codebase[] = { "codebase", NULL } ;
{ NULL, NULL }
} ;
static const char* events[] = {
- "onclick" ,
- "ondblclick" ,
- "onmousedown" ,
- "onmouseup" ,
- "onmouseover" ,
- "onmousemove" ,
- "onmouseout" ,
- "onkeypress" ,
- "onkeydown" ,
- "onkeyup" ,
- "onfocus" ,
- "onblur" ,
- "onload" ,
- "onunload" ,
- "onsubmit" ,
- "onreset" ,
- "onselect" ,
- "onchange" ,
- NULL
+ "onclick" ,
+ "ondblclick" ,
+ "onmousedown" ,
+ "onmouseup" ,
+ "onmouseover" ,
+ "onmousemove" ,
+ "onmouseout" ,
+ "onkeypress" ,
+ "onkeydown" ,
+ "onkeyup" ,
+ "onfocus" ,
+ "onblur" ,
+ "onload" ,
+ "onunload" ,
+ "onsubmit" ,
+ "onreset" ,
+ "onselect" ,
+ "onchange" ,
+ NULL
} ;
ap_fputc(ctx->f->next, ctx->bb, '<') ;
linkattrs = 0 ;
for ( elt = linked_elts; elt->name != NULL ; ++elt )
if ( !strcmp(elt->name, name) ) {
- linkattrs = elt->attrs ;
- break ;
+ linkattrs = elt->attrs ;
+ break ;
}
for ( a = attrs ; *a ; a += 2 ) {
ctx->offset = 0 ;
if ( a[1] ) {
- pappend(ctx, a[1], strlen(a[1])+1) ;
- is_uri = ATTR_IGNORE ;
- if ( linkattrs ) {
- for ( linkattr = linkattrs ; *linkattr ; ++linkattr) {
- if ( !strcmp(*linkattr, *a) ) {
- is_uri = ATTR_URI ;
- break ;
- }
- }
- }
- if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix ) {
- for ( linkattr = events; *linkattr; ++linkattr ) {
- if ( !strcmp(*linkattr, *a) ) {
- is_uri = ATTR_EVENT ;
- break ;
- }
- }
- }
- switch ( is_uri ) {
- case ATTR_URI:
- num_match = 0 ;
- for ( m = ctx->cfg->map ; m ; m = m->next ) {
- if ( ! ( m->flags & M_HTML ) )
- continue ;
- if ( m->flags & M_REGEX ) {
- nmatch = 10 ;
- if ( ! ap_regexec(m->from.r, ctx->buf, nmatch, pmatch, 0) ) {
- ++num_match ;
- offs = match = pmatch[0].rm_so ;
- s_from = pmatch[0].rm_eo - match ;
- subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
- nmatch, pmatch) ;
- VERBOSE( {
- const char* f = apr_pstrndup(ctx->f->r->pool,
- ctx->buf + offs , s_from ) ;
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "H/RX: match at %s, substituting %s", f, subs) ;
- } )
- s_to = strlen(subs) ;
- len = strlen(ctx->buf) ;
- if ( s_to > s_from) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- memcpy(ctx->buf+offs, subs, s_to) ;
- } else {
- memcpy(ctx->buf + offs, subs, s_to) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- }
- }
- } else {
- s_from = strlen(m->from.c) ;
- if ( ! strncasecmp(ctx->buf, m->from.c, s_from ) ) {
- ++num_match ;
- s_to = strlen(m->to) ;
- len = strlen(ctx->buf) ;
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "H: matched %s, substituting %s", m->from.c, m->to) ) ;
- if ( s_to > s_from ) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+s_to, ctx->buf+s_from,
- len + 1 - s_from ) ;
- memcpy(ctx->buf, m->to, s_to) ;
- } else { /* it fits in the existing space */
- memcpy(ctx->buf, m->to, s_to) ;
- memmove(ctx->buf+s_to, ctx->buf+s_from,
- len + 1 - s_from) ;
- }
- break ;
- }
- }
- if ( num_match > 0 ) /* URIs only want one match */
- break ;
- }
- break ;
- case ATTR_EVENT:
- for ( m = ctx->cfg->map ; m ; m = m->next ) {
- num_match = 0 ; /* reset here since we're working per-rule */
- if ( ! ( m->flags & M_EVENTS ) )
- continue ;
- if ( m->flags & M_REGEX ) {
- nmatch = 10 ;
- offs = 0 ;
- while ( ! ap_regexec(m->from.r, ctx->buf+offs,
- nmatch, pmatch, 0) ) {
- match = pmatch[0].rm_so ;
- s_from = pmatch[0].rm_eo - match ;
- subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
- nmatch, pmatch) ;
- VERBOSE( {
- const char* f = apr_pstrndup(ctx->f->r->pool,
- ctx->buf + offs , s_from ) ;
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "E/RX: match at %s, substituting %s", f, subs) ;
- } )
- s_to = strlen(subs) ;
- offs += match ;
- len = strlen(ctx->buf) ;
- if ( s_to > s_from) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- memcpy(ctx->buf+offs, subs, s_to) ;
- } else {
- memcpy(ctx->buf + offs, subs, s_to) ;
- memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
- len + 1 - s_from - offs) ;
- }
- offs += s_to ;
- ++num_match ;
- }
- } else {
- found = strstr(ctx->buf, m->from.c) ;
- if ( (m->flags & M_ATSTART) && ( found != ctx->buf) )
- continue ;
- while ( found ) {
- s_from = strlen(m->from.c) ;
- s_to = strlen(m->to) ;
- match = found - ctx->buf ;
- if ( ( s_from < strlen(found) ) && (m->flags & M_ATEND ) ) {
- found = strstr(ctx->buf+match+s_from, m->from.c) ;
- continue ;
- } else {
- found = strstr(ctx->buf+match+s_to, m->from.c) ;
- }
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
- "E: matched %s, substituting %s", m->from.c, m->to) ) ;
- len = strlen(ctx->buf) ;
- if ( s_to > s_from ) {
- preserve(ctx, s_to - s_from) ;
- memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
- len + 1 - s_from - match) ;
- memcpy(ctx->buf+match, m->to, s_to) ;
- } else {
- memcpy(ctx->buf+match, m->to, s_to) ;
- memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
- len + 1 - s_from - match) ;
- }
- ++num_match ;
- }
- }
- if ( num_match && ( m->flags & M_LAST ) )
- break ;
- }
- break ;
- case ATTR_IGNORE:
- break ;
- }
+ pappend(ctx, a[1], strlen(a[1])+1) ;
+ is_uri = ATTR_IGNORE ;
+ if ( linkattrs ) {
+ for ( linkattr = linkattrs ; *linkattr ; ++linkattr) {
+ if ( !strcmp(*linkattr, *a) ) {
+ is_uri = ATTR_URI ;
+ break ;
+ }
+ }
+ }
+ if ( (is_uri == ATTR_IGNORE) && ctx->cfg->extfix ) {
+ for ( linkattr = events; *linkattr; ++linkattr ) {
+ if ( !strcmp(*linkattr, *a) ) {
+ is_uri = ATTR_EVENT ;
+ break ;
+ }
+ }
+ }
+ switch ( is_uri ) {
+ case ATTR_URI:
+ num_match = 0 ;
+ for ( m = ctx->cfg->map ; m ; m = m->next ) {
+ if ( ! ( m->flags & M_HTML ) )
+ continue ;
+ if ( m->flags & M_REGEX ) {
+ nmatch = 10 ;
+ if ( ! ap_regexec(m->from.r, ctx->buf, nmatch, pmatch, 0) ) {
+ ++num_match ;
+ offs = match = pmatch[0].rm_so ;
+ s_from = pmatch[0].rm_eo - match ;
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ nmatch, pmatch) ;
+ VERBOSE( {
+ const char* f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs , s_from ) ;
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "H/RX: match at %s, substituting %s", f, subs) ;
+ } )
+ s_to = strlen(subs) ;
+ len = strlen(ctx->buf) ;
+ if ( s_to > s_from) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ memcpy(ctx->buf+offs, subs, s_to) ;
+ } else {
+ memcpy(ctx->buf + offs, subs, s_to) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ }
+ }
+ } else {
+ s_from = strlen(m->from.c) ;
+ if ( ! strncasecmp(ctx->buf, m->from.c, s_from ) ) {
+ ++num_match ;
+ s_to = strlen(m->to) ;
+ len = strlen(ctx->buf) ;
+ VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "H: matched %s, substituting %s", m->from.c, m->to) ) ;
+ if ( s_to > s_from ) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+s_to, ctx->buf+s_from,
+ len + 1 - s_from ) ;
+ memcpy(ctx->buf, m->to, s_to) ;
+ } else { /* it fits in the existing space */
+ memcpy(ctx->buf, m->to, s_to) ;
+ memmove(ctx->buf+s_to, ctx->buf+s_from,
+ len + 1 - s_from) ;
+ }
+ break ;
+ }
+ }
+ if ( num_match > 0 ) /* URIs only want one match */
+ break ;
+ }
+ break ;
+ case ATTR_EVENT:
+ for ( m = ctx->cfg->map ; m ; m = m->next ) {
+ num_match = 0 ; /* reset here since we're working per-rule */
+ if ( ! ( m->flags & M_EVENTS ) )
+ continue ;
+ if ( m->flags & M_REGEX ) {
+ nmatch = 10 ;
+ offs = 0 ;
+ while ( ! ap_regexec(m->from.r, ctx->buf+offs,
+ nmatch, pmatch, 0) ) {
+ match = pmatch[0].rm_so ;
+ s_from = pmatch[0].rm_eo - match ;
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ nmatch, pmatch) ;
+ VERBOSE( {
+ const char* f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs , s_from ) ;
+ ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "E/RX: match at %s, substituting %s", f, subs) ;
+ } )
+ s_to = strlen(subs) ;
+ offs += match ;
+ len = strlen(ctx->buf) ;
+ if ( s_to > s_from) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ memcpy(ctx->buf+offs, subs, s_to) ;
+ } else {
+ memcpy(ctx->buf + offs, subs, s_to) ;
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs) ;
+ }
+ offs += s_to ;
+ ++num_match ;
+ }
+ } else {
+ found = strstr(ctx->buf, m->from.c) ;
+ if ( (m->flags & M_ATSTART) && ( found != ctx->buf) )
+ continue ;
+ while ( found ) {
+ s_from = strlen(m->from.c) ;
+ s_to = strlen(m->to) ;
+ match = found - ctx->buf ;
+ if ( ( s_from < strlen(found) ) && (m->flags & M_ATEND ) ) {
+ found = strstr(ctx->buf+match+s_from, m->from.c) ;
+ continue ;
+ } else {
+ found = strstr(ctx->buf+match+s_to, m->from.c) ;
+ }
+ VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, ctx->f->r,
+ "E: matched %s, substituting %s", m->from.c, m->to) ) ;
+ len = strlen(ctx->buf) ;
+ if ( s_to > s_from ) {
+ preserve(ctx, s_to - s_from) ;
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match) ;
+ memcpy(ctx->buf+match, m->to, s_to) ;
+ } else {
+ memcpy(ctx->buf+match, m->to, s_to) ;
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match) ;
+ }
+ ++num_match ;
+ }
+ }
+ if ( num_match && ( m->flags & M_LAST ) )
+ break ;
+ }
+ break ;
+ case ATTR_IGNORE:
+ break ;
+ }
}
if ( ! a[1] )
- ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL) ;
else {
- if ( ctx->cfg->flags != 0 )
- normalise(ctx->cfg->flags, ctx->buf) ;
+ if ( ctx->cfg->flags != 0 )
+ normalise(ctx->cfg->flags, ctx->buf) ;
- /* write the attribute, using pcharacters to html-escape
- anything that needs it in the value.
- */
- ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ;
- pcharacters(ctx, ctx->buf, strlen(ctx->buf)) ;
- ap_fputc(ctx->f->next, ctx->bb, '"') ;
+ /* write the attribute, using pcharacters to html-escape
+ anything that needs it in the value.
+ */
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL) ;
+ pcharacters(ctx, ctx->buf, strlen(ctx->buf)) ;
+ ap_fputc(ctx->f->next, ctx->bb, '"') ;
}
}
}
static void proxy_html_child_init(apr_pool_t* pool, server_rec* s) {
seek_meta_ctype = ap_pregcomp(pool,
- "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
+ "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)",
+ AP_REG_EXTENDED|AP_REG_ICASE) ;
seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
+ AP_REG_EXTENDED|AP_REG_ICASE) ;
seek_meta = ap_pregcomp(pool, "<meta[^>]*(http-equiv)[^>]*>",
- AP_REG_EXTENDED|AP_REG_ICASE) ;
+ AP_REG_EXTENDED|AP_REG_ICASE) ;
}
-static xmlCharEncoding sniff_encoding(request_rec* r, const char* cbuf, size_t bytes
+static xmlCharEncoding sniff_encoding(
+ request_rec* r, const char* cbuf, size_t bytes
#ifndef GO_FASTER
- , int verbose
+ , int verbose
#endif
- ) {
+ ) {
xmlCharEncoding ret ;
char* encoding = NULL ;
char* p ;
- char* q ;
ap_regmatch_t match[2] ;
unsigned char* buf = (unsigned char*)cbuf ;
VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Content-Type is %s", r->content_type) ) ;
+ "Content-Type is %s", r->content_type) ) ;
/* If we've got it in the HTTP headers, there's nothing to do */
if ( r->content_type &&
- ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
+ ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
p += 8 ;
if ( encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) , encoding ) {
if ( ret = xmlParseCharEncoding(encoding),
- ret != XML_CHAR_ENCODING_ERROR ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTTP headers", encoding) ) ;
- return ret ;
+ ret != XML_CHAR_ENCODING_ERROR ) {
+ VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
+ "Got charset %s from HTTP headers", encoding) ) ;
+ return ret ;
} else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Unsupported charset %s in HTTP headers", encoding) ;
- encoding = NULL ;
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
+ "Unsupported charset %s in HTTP headers", encoding) ;
+ encoding = NULL ;
}
}
}
/* to sniff, first we look for BOM */
if ( ret = xmlDetectCharEncoding(buf, bytes),
- ret != XML_CHAR_ENCODING_NONE ) {
+ ret != XML_CHAR_ENCODING_NONE ) {
VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset from XML rules.") ) ;
+ "Got charset from XML rules.") ) ;
return ret ;
}
encoding = NULL ;
if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) {
p = apr_pstrndup(r->pool, buf + match[0].rm_so,
- match[0].rm_eo - match[0].rm_so) ;
+ match[0].rm_eo - match[0].rm_so) ;
if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 )
encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
- match[1].rm_eo - match[1].rm_so) ;
+ match[1].rm_eo - match[1].rm_so) ;
}
/* either it's set to something we found or it's still the default */
- if ( encoding )
+ if ( encoding ) {
if ( ret = xmlParseCharEncoding(encoding),
- ret != XML_CHAR_ENCODING_ERROR ) {
+ ret != XML_CHAR_ENCODING_ERROR ) {
VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTML META", encoding) ) ;
+ "Got charset %s from HTML META", encoding) ) ;
return ret ;
} else {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Unsupported charset %s in HTML META", encoding) ;
+ "Unsupported charset %s in HTML META", encoding) ;
}
-
+ }
/* the old HTTP default is a last resort */
ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
- "No usable charset information: using old HTTP default LATIN1") ;
+ "No usable charset information: using old HTTP default LATIN1") ;
return XML_CHAR_ENCODING_8859_1 ;
}
static meta* metafix(request_rec* r, const char* buf /*, size_t bytes*/
#ifndef GO_FASTER
- , int verbose
+ , int verbose
#endif
- ) {
+ ) {
meta* ret = NULL ;
size_t offs = 0 ;
const char* p ;
header = apr_pstrndup(r->pool, p, q-p) ;
if ( strncasecmp(header, "Content-", 8) ) {
/* find content=... string */
- for ( p = strstr(buf+offs+pmatch[0].rm_so, "content") ; *p ; ) {
- p += 7 ;
- while ( *p && isspace(*p) )
- ++p ;
- if ( *p != '=' )
- continue ;
- while ( *p && isspace(*++p) ) ;
- if ( ( *p == '\'' ) || ( *p == '"' ) ) {
- delim = *p++ ;
- for ( q = p ; *q != delim ; ++q ) ;
- } else {
- for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
- }
- content = apr_pstrndup(r->pool, p, q-p) ;
- break ;
+ for ( p = ap_strstr((char*)buf+offs+pmatch[0].rm_so, "content") ; *p ; ) {
+ p += 7 ;
+ while ( *p && isspace(*p) )
+ ++p ;
+ if ( *p != '=' )
+ continue ;
+ while ( *p && isspace(*++p) ) ;
+ if ( ( *p == '\'' ) || ( *p == '"' ) ) {
+ delim = *p++ ;
+ for ( q = p ; *q != delim ; ++q ) ;
+ } else {
+ for ( q = p ; *q && !isspace(*q) && (*q != '>') ; ++q ) ;
+ }
+ content = apr_pstrndup(r->pool, p, q-p) ;
+ break ;
}
} else if ( !strncasecmp(header, "Content-Type", 12) ) {
ret = apr_palloc(r->pool, sizeof(meta) ) ;
}
if ( header && content ) {
VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Adding header [%s: %s] from HTML META", header, content) ) ;
+ "Adding header [%s: %s] from HTML META", header, content) ) ;
apr_table_setn(r->headers_out, header, content) ;
}
offs += pmatch[0].rm_eo ;
if ( ! f->r->main && ! f->r->prev ) {
env = apr_table_get(f->r->subprocess_env, "force-response-1.0") ;
if ( !env )
- f->r->chunked = 1 ;
+ f->r->chunked = 1 ;
}
}
} else if ( ! f->r->content_type ) {
errmsg = "No content-type; bailing out of proxy-html filter" ;
} else if ( strncasecmp(f->r->content_type, "text/html", 9) &&
- strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
+ strncasecmp(f->r->content_type, "application/xhtml+xml", 21) ) {
errmsg = "Non-HTML content; not inserting proxy-html filter" ;
}
if ( errmsg ) {
#ifndef GO_FASTER
proxy_html_conf* cfg
- = ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
+ = ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
if ( cfg->verbose ) {
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, f->r, errmsg) ;
}
xmlCharEncoding enc ;
const char* buf = 0 ;
apr_size_t bytes = 0 ;
+#ifndef USE_OLD_LIBXML2
int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET |
- XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ;
+ XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING ;
+#endif
saxctxt* ctxt = check_filter_init(f) ;
if ( ! ctxt )
return ap_pass_brigade(f->next, bb) ;
for ( b = APR_BRIGADE_FIRST(bb) ;
- b != APR_BRIGADE_SENTINEL(bb) ;
- b = APR_BUCKET_NEXT(b) ) {
+ b != APR_BRIGADE_SENTINEL(bb) ;
+ b = APR_BUCKET_NEXT(b) ) {
if ( APR_BUCKET_IS_EOS(b) ) {
if ( ctxt->parser != NULL ) {
- htmlParseChunk(ctxt->parser, buf, 0, 1) ;
+ htmlParseChunk(ctxt->parser, buf, 0, 1) ;
}
APR_BRIGADE_INSERT_TAIL(ctxt->bb,
- apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
+ apr_bucket_eos_create(ctxt->bb->bucket_alloc) ) ;
ap_pass_brigade(ctxt->f->next, ctxt->bb) ;
- } else if ( apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
- == APR_SUCCESS ) {
+ } else if ( ! APR_BUCKET_IS_METADATA(b) &&
+ apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
+ == APR_SUCCESS ) {
if ( ctxt->parser == NULL ) {
- if ( buf[bytes] != 0 ) {
- /* make a string for parse routines to play with */
- char* buf1 = apr_palloc(f->r->pool, bytes+1) ;
- memcpy(buf1, buf, bytes) ;
- buf1[bytes] = 0 ;
- buf = buf1 ;
- }
+ if ( buf && buf[bytes] != 0 ) {
+ /* make a string for parse routines to play with */
+ char* buf1 = apr_palloc(f->r->pool, bytes+1) ;
+ memcpy(buf1, buf, bytes) ;
+ buf1[bytes] = 0 ;
+ buf = buf1 ;
+ }
#ifndef GO_FASTER
- enc = sniff_encoding(f->r, buf, bytes, ctxt->cfg->verbose) ;
- if ( ctxt->cfg->metafix )
- m = metafix(f->r, buf, ctxt->cfg->verbose) ;
+ enc = sniff_encoding(f->r, buf, bytes, ctxt->cfg->verbose) ;
+ if ( ctxt->cfg->metafix )
+ m = metafix(f->r, buf, ctxt->cfg->verbose) ;
#else
- enc = sniff_encoding(f->r, buf, bytes) ;
- if ( ctxt->cfg->metafix )
- m = metafix(f->r, buf) ;
+ enc = sniff_encoding(f->r, buf, bytes) ;
+ if ( ctxt->cfg->metafix )
+ m = metafix(f->r, buf) ;
+#endif
+ ap_set_content_type(f->r, "text/html;charset=utf-8") ;
+ ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
+ if ( m ) {
+ ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
+ buf, m->start, 0, enc ) ;
+ htmlParseChunk(ctxt->parser, buf+m->end, bytes-m->end, 0) ;
+ } else {
+ ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
+ buf, bytes, 0, enc ) ;
+ }
+ apr_pool_cleanup_register(f->r->pool, ctxt->parser,
+ (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ;
+#ifndef USE_OLD_LIBXML2
+ if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
+ "Unsupported parser opts %x", xmlopts) ;
#endif
- ap_set_content_type(f->r, "text/html;charset=utf-8") ;
- ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype) ;
- if ( m ) {
- ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
- buf, m->start, 0, enc ) ;
- htmlParseChunk(ctxt->parser, buf+m->end, bytes-m->end, 0) ;
- } else {
- ctxt->parser = htmlCreatePushParserCtxt(ctxt->sax, ctxt,
- buf, bytes, 0, enc ) ;
- }
- apr_pool_cleanup_register(f->r->pool, ctxt->parser,
- (void*)htmlFreeParserCtxt, apr_pool_cleanup_null) ;
- if ( xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts ), xmlopts )
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r,
- "Unsupported parser opts %x", xmlopts) ;
} else {
- htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
+ htmlParseChunk(ctxt->parser, buf, bytes, 0) ;
}
} else {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Error in bucket read") ;
}
}
- /*ap_fflush(ctxt->f->next, ctxt->bb) ; // uncomment for debug */
+ /*ap_fflush(ctxt->f->next, ctxt->bb) ; // uncomment for debug */
apr_brigade_cleanup(bb) ;
return APR_SUCCESS ;
}
static const char* fpi_html =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n" ;
static const char* fpi_html_legacy =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" ;
static const char* fpi_xhtml =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" ;
static const char* fpi_xhtml_legacy =
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" ;
static const char* html_etag = ">" ;
static const char* xhtml_etag = " />" ;
/*#define DEFAULT_DOCTYPE fpi_html */
conf->map = add->map ? add->map : base->map ;
conf->doctype = ( add->doctype == DEFAULT_DOCTYPE )
- ? base->doctype : add->doctype ;
+ ? base->doctype : add->doctype ;
conf->etag = ( add->etag == DEFAULT_ETAG ) ? base->etag : add->etag ;
conf->bufsz = add->bufsz ;
if ( add->flags & NORM_RESET ) {
}
return conf ;
}
-#define REGFLAG(n,s,c) ( (s&&(strchr((s),(c))!=NULL)) ? (n) : 0 )
-#define XREGFLAG(n,s,c) ( (!s||(strchr((s),(c))==NULL)) ? (n) : 0 )
+#define REGFLAG(n,s,c) ( (s&&(ap_strchr((char*)(s),(c))!=NULL)) ? (n) : 0 )
+#define XREGFLAG(n,s,c) ( (!s||(ap_strchr((char*)(s),(c))==NULL)) ? (n) : 0 )
static const char* set_urlmap(cmd_parms* cmd, void* CFG,
- const char* from, const char* to, const char* flags) {
+ const char* from, const char* to, const char* flags) {
int regflags ;
proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
urlmap* map ;
newmap->next = NULL ;
newmap->flags
- = XREGFLAG(M_HTML,flags,'h')
- | XREGFLAG(M_EVENTS,flags,'e')
- | XREGFLAG(M_CDATA,flags,'c')
- | REGFLAG(M_ATSTART,flags,'^')
- | REGFLAG(M_ATEND,flags,'$')
- | REGFLAG(M_REGEX,flags,'R')
- | REGFLAG(M_LAST,flags,'L')
+ = XREGFLAG(M_HTML,flags,'h')
+ | XREGFLAG(M_EVENTS,flags,'e')
+ | XREGFLAG(M_CDATA,flags,'c')
+ | REGFLAG(M_ATSTART,flags,'^')
+ | REGFLAG(M_ATEND,flags,'$')
+ | REGFLAG(M_REGEX,flags,'R')
+ | REGFLAG(M_LAST,flags,'L')
;
if ( cfg->map ) {
newmap->to = apr_pstrdup(cmd->pool, to) ;
} else {
regflags
- = REGFLAG(AP_REG_EXTENDED,flags,'x')
- | REGFLAG(AP_REG_ICASE,flags,'i')
- | REGFLAG(AP_REG_NOSUB,flags,'n')
- | REGFLAG(AP_REG_NEWLINE,flags,'s')
+ = REGFLAG(AP_REG_EXTENDED,flags,'x')
+ | REGFLAG(AP_REG_ICASE,flags,'i')
+ | REGFLAG(AP_REG_NOSUB,flags,'n')
+ | REGFLAG(AP_REG_NEWLINE,flags,'s')
;
newmap->from.r = ap_pregcomp(cmd->pool, from, regflags) ;
newmap->to = apr_pstrdup(cmd->pool, to) ;
return NULL ;
}
static const char* set_doctype(cmd_parms* cmd, void* CFG, const char* t,
- const char* l) {
+ const char* l) {
proxy_html_conf* cfg = (proxy_html_conf*)CFG ;
if ( !strcasecmp(t, "xhtml") ) {
cfg->etag = xhtml_etag ;
}
}
static const char* set_flags(cmd_parms* cmd, void* CFG, const char* arg1,
- const char* arg2, const char* arg3) {
+ const char* arg2, const char* arg3) {
set_param( (proxy_html_conf*)CFG, arg1) ;
set_param( (proxy_html_conf*)CFG, arg2) ;
set_param( (proxy_html_conf*)CFG, arg3) ;
}
static const command_rec proxy_html_cmds[] = {
AP_INIT_TAKE23("ProxyHTMLURLMap", set_urlmap, NULL,
- RSRC_CONF|ACCESS_CONF, "Map URL From To" ) ,
+ RSRC_CONF|ACCESS_CONF, "Map URL From To" ) ,
AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL,
- RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) ,
+ RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]" ) ,
AP_INIT_TAKE123("ProxyHTMLFixups", set_flags, NULL,
- RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) ,
+ RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath" ) ,
AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot,
- (void*)APR_OFFSETOF(proxy_html_conf, metafix),
- RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) ,
+ (void*)APR_OFFSETOF(proxy_html_conf, metafix),
+ RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements" ) ,
AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot,
- (void*)APR_OFFSETOF(proxy_html_conf, extfix),
- RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) ,
+ (void*)APR_OFFSETOF(proxy_html_conf, extfix),
+ RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS" ) ,
AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot,
- (void*)APR_OFFSETOF(proxy_html_conf, strip_comments),
- RSRC_CONF|ACCESS_CONF, "Strip out comments" ) ,
+ (void*)APR_OFFSETOF(proxy_html_conf, strip_comments),
+ RSRC_CONF|ACCESS_CONF, "Strip out comments" ) ,
#ifndef GO_FASTER
AP_INIT_FLAG("ProxyHTMLLogVerbose", ap_set_flag_slot,
- (void*)APR_OFFSETOF(proxy_html_conf, verbose),
- RSRC_CONF|ACCESS_CONF, "Verbose Logging (use with LogLevel Info)" ) ,
+ (void*)APR_OFFSETOF(proxy_html_conf, verbose),
+ RSRC_CONF|ACCESS_CONF, "Verbose Logging (use with LogLevel Info)" ) ,
#endif
AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
- (void*)APR_OFFSETOF(proxy_html_conf, bufsz),
- RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
+ (void*)APR_OFFSETOF(proxy_html_conf, bufsz),
+ RSRC_CONF|ACCESS_CONF, "Buffer size" ) ,
{ NULL }
} ;
static int mod_proxy_html(apr_pool_t* p, apr_pool_t* p1, apr_pool_t* p2,
- server_rec* s) {
+ server_rec* s) {
ap_add_version_component(p, VERSION_STRING) ;
return OK ;
}
static void proxy_html_hooks(apr_pool_t* p) {
ap_register_output_filter("proxy-html", proxy_html_filter,
- NULL, AP_FTYPE_RESOURCE) ;
+ NULL, AP_FTYPE_RESOURCE) ;
ap_hook_post_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE) ;
ap_hook_child_init(proxy_html_child_init, NULL, NULL, APR_HOOK_MIDDLE) ;
}
module AP_MODULE_DECLARE_DATA proxy_html_module = {
- STANDARD20_MODULE_STUFF,
- proxy_html_config,
- proxy_html_merge,
- NULL,
- NULL,
- proxy_html_cmds,
- proxy_html_hooks
+ STANDARD20_MODULE_STUFF,
+ proxy_html_config,
+ proxy_html_merge,
+ NULL,
+ NULL,
+ proxy_html_cmds,
+ proxy_html_hooks
} ;
+