-/* globals set once at startup */
-static ap_regex_t* seek_meta_ctype ;
-static ap_regex_t* seek_charset ;
-static ap_regex_t* seek_meta ;
-
-static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf,
- size_t bytes) {
-#ifndef GO_FASTER
- int verbose = ctx->cfg->verbose;
-#endif
- request_rec* r = ctx->f->r ;
- proxy_html_conf* cfg = ctx->cfg ;
- xmlCharEncoding ret ;
- char* p ;
- ap_regmatch_t match[2] ;
- char* buf = (char*)cbuf ;
- apr_xlate_t* convset;
-
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Content-Type is %s", r->content_type) ) ;
-
-/* If we've got it in the HTTP headers, there's nothing to do */
- if ( r->content_type &&
- ( p = ap_strcasestr(r->content_type, "charset=") , p > 0 ) ) {
- p += 8 ;
- if ( ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ) ,
- ctx->encoding ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTTP headers", ctx->encoding) ) ;
- if ( ret = xmlParseCharEncoding(ctx->encoding),
- ((ret != XML_CHAR_ENCODING_ERROR )
- && (ret != XML_CHAR_ENCODING_NONE))) {
- return ret ;
- }
- }
- }
-
-/* to sniff, first we look for BOM */
- if (ctx->encoding == NULL) {
- if ( ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes),
- ret != XML_CHAR_ENCODING_NONE ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset from XML rules.") ) ;
- return ret ;
- }
-
-/* If none of the above, look for a META-thingey */
- if ( ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0 ) {
- p = apr_pstrndup(r->pool, buf + match[0].rm_so,
- match[0].rm_eo - match[0].rm_so) ;
- if ( ap_regexec(seek_charset, p, 2, match, 0) == 0 )
- ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so,
- match[1].rm_eo - match[1].rm_so) ;
- }
- }
-
-/* either it's set to something we found or it's still the default */
- if ( ctx->encoding ) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
- "Got charset %s from HTML META", ctx->encoding) ) ;
- if ( ret = xmlParseCharEncoding(ctx->encoding),
- ((ret != XML_CHAR_ENCODING_ERROR )
- && (ret != XML_CHAR_ENCODING_NONE))) {
- return ret ;
- }
-/* Unsupported charset. Can we get (iconv) support through apr_xlate? */
-/* Aaargh! libxml2 has undocumented <META-crap> support. So this fails
- * if metafix is not active. Have to make it conditional.
- */
- if (cfg->metafix) {
- VERBOSE( ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
- "Charset %s not supported by libxml2; trying apr_xlate", ctx->encoding) ) ;
- if (apr_xlate_open(&convset, "UTF-8", ctx->encoding, r->pool) == APR_SUCCESS) {
- ctx->conv_in = apr_pcalloc(r->pool, sizeof(conv_t));
- ctx->conv_in->convset = convset ;
- return XML_CHAR_ENCODING_UTF8 ;
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Charset %s not supported. Consider aliasing it?", ctx->encoding) ;
- }
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Charset %s not supported. Consider aliasing it or use metafix?",
- ctx->encoding) ;
- }
- }
-
-
-/* Use configuration default as a last resort */
- ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
- "No usable charset information; using configuration default") ;
- return (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
- ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
-}