diff --git a/configure.ac b/configure.ac index 425b477..6482af0 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -dnl $Id: configure.ac,v 2.64 2003-10-17 16:10:59 rjkaes Exp $ +dnl $Id: configure.ac,v 2.65 2004-01-26 19:11:52 rjkaes Exp $ dnl Devlopers, please strive to achieve this order: dnl @@ -127,6 +127,17 @@ if test x"$upstream_enabled" = x"yes"; then AC_DEFINE(UPSTREAM_SUPPORT) fi +dnl Include support for reverse proxy? +AH_TEMPLATE([REVERSE_SUPPORT], + [Include support for reverse proxy.]) +AC_ARG_ENABLE(reverse, + [AC_HELP_STRING([--enable-reverse], + [Enable support for reverse proxy (default is NO)])], + reverse_enabled=$enableval, reverse_enabled=no) +if test x"$reverse_enabled" = x"yes"; then + AC_DEFINE(REVERSE_SUPPORT) +fi + dnl Include the transparent proxy support AH_TEMPLATE([TRANSPARENT_PROXY], [Include support for using tinyproxy as a transparent proxy.]) diff --git a/doc/tinyproxy.conf b/doc/tinyproxy.conf index 981cd26..d8af086 100644 --- a/doc/tinyproxy.conf +++ b/doc/tinyproxy.conf @@ -255,3 +255,42 @@ ViaProxyName "tinyproxy" # ConnectPort 443 ConnectPort 563 + +# +# Configure one or more ReversePath directives to enable reverse proxy +# support. With reverse proxying it's possible to make a number of +# sites appear as if they were part of a single site. +# +# If you uncomment the following two directives and run tinyproxy +# on your own computer at port 8888, you can access Google using +# http://localhost:8888/google/ and Wired News using +# http://localhost:8888/wired/news/. Neither will actually work +# until you uncomment ReverseMagic as they use absolute linking. +# +#ReversePath "/google/" "http://www.google.com/" +#ReversePath "/wired/" "http://www.wired.com/" + +# +# When using tinyproxy as a reverse proxy, it is STRONGLY recommended +# that the normal proxy is turned off by uncommenting the next directive. +# +#ReverseOnly Yes + +# +# Use a cookie to track reverse proxy mappings. If you need to reverse +# proxy sites which have absolute links you must uncomment this. +# +#ReverseMagic Yes + +# +# The URL that's used to access this reverse proxy. The URL is used to +# rewrite HTTP redirects so that they won't escape the proxy. If you +# have a chain of reverse proxies, you'll need to put the outermost +# URL here (the address which the end user types into his/her browser). +# +# If not set then no rewriting occurs. +# +#ReverseBaseURL "http://localhost:8888/" + + + diff --git a/src/conns.c b/src/conns.c index 6a320ac..4420721 100644 --- a/src/conns.c +++ b/src/conns.c @@ -1,4 +1,4 @@ -/* $Id: conns.c,v 1.19 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: conns.c,v 1.20 2004-01-26 19:11:51 rjkaes Exp $ * * Create and free the connection structure. One day there could be * other connection related tasks put here, but for now the header @@ -76,6 +76,10 @@ initialize_conn(int client_fd, const char* ipaddr, const char* string_addr) update_stats(STAT_OPEN); +#ifdef REVERSE_SUPPORT + connptr->reversepath = NULL; +#endif + return connptr; error_exit: @@ -123,6 +127,11 @@ destroy_conn(struct conn_s *connptr) if (connptr->client_string_addr) safefree(connptr->client_string_addr); +#ifdef REVERSE_SUPPORT + if (connptr->reversepath) + safefree(connptr->reversepath); +#endif + safefree(connptr); update_stats(STAT_CLOSE); diff --git a/src/conns.h b/src/conns.h index c5ad9a7..5d0422a 100644 --- a/src/conns.h +++ b/src/conns.h @@ -1,4 +1,4 @@ -/* $Id: conns.h,v 1.15 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: conns.h,v 1.16 2004-01-26 19:11:51 rjkaes Exp $ * * See 'conns.c' for a detailed description. * @@ -66,6 +66,13 @@ struct conn_s { unsigned int major; unsigned int minor; } protocol; + +#ifdef REVERSE_SUPPORT + /* + * Place to store the current per-connection reverse proxy path + */ + char* reversepath; +#endif }; /* diff --git a/src/grammar.y b/src/grammar.y index 00596dd..9f3a74c 100644 --- a/src/grammar.y +++ b/src/grammar.y @@ -1,4 +1,4 @@ -/* $Id: grammar.y,v 1.23 2003-06-26 18:17:09 rjkaes Exp $ +/* $Id: grammar.y,v 1.24 2004-01-26 19:11:51 rjkaes Exp $ * * This is the grammar for tinyproxy's configuration file. It needs to be * in sync with scanner.l. If you know more about yacc and lex than I do @@ -50,6 +50,7 @@ int yylex(void); %token KW_FILTER KW_FILTERURLS KW_FILTEREXTENDED KW_FILTER_DEFAULT_DENY %token KW_FILTER_CASESENSITIVE %token KW_UPSTREAM +%token KW_REVERSEPATH KW_REVERSEONLY KW_REVERSEMAGIC KW_REVERSEBASEURL %token KW_CONNECTPORT KW_BIND %token KW_STATHOST %token KW_ALLOW KW_DENY @@ -165,6 +166,46 @@ statement config.my_domain = $2; #else log_message(LOG_WARNING, "X-Tinyproxy header support was not compiled in."); +#endif + } + | KW_REVERSEPATH string + { +#ifdef REVERSE_SUPPORT + reversepath_add(NULL, $2); +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEPATH string string + { +#ifdef REVERSE_SUPPORT + reversepath_add($2, $3); +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEONLY yesno + { +#ifdef REVERSE_SUPPORT + config.reverseonly = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEMAGIC yesno + { +#ifdef REVERSE_SUPPORT + config.reversemagic = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); +#endif + } + | KW_REVERSEBASEURL string + { +#ifdef REVERSE_SUPPORT + config.reversebaseurl = $2; +#else + log_message(LOG_WARNING, "Reverse proxy support was not compiled in."); #endif } | KW_UPSTREAM unique_address ':' NUMBER diff --git a/src/reqs.c b/src/reqs.c index 3f33a7f..f00fee9 100644 --- a/src/reqs.c +++ b/src/reqs.c @@ -1,4 +1,4 @@ -/* $Id: reqs.c,v 1.108 2003-08-07 16:32:12 rjkaes Exp $ +/* $Id: reqs.c,v 1.109 2004-01-26 19:11:51 rjkaes Exp $ * * This is where all the work in tinyproxy is actually done. Incoming * connections have a new child created for them. The child then @@ -330,7 +330,7 @@ upstream_add(const char *host, int port, const char *domain) if (domain == NULL) { if (!host || host[0] == '\0' || port < 1) { - log_message(LOG_WARNING, "Nonsence upstream rule: invalid host or port"); + log_message(LOG_WARNING, "Nonsense upstream rule: invalid host or port"); goto upstream_cleanup; } @@ -404,7 +404,7 @@ upstream_add(const char *host, int port, const char *domain) return; - upstream_cleanup: +upstream_cleanup: safefree(up->host); safefree(up->domain); safefree(up); @@ -465,6 +465,68 @@ upstream_get(char *host) } #endif +#ifdef REVERSE_SUPPORT +/* + * Add entry to the reversepath list + */ +void +reversepath_add(const char *path, const char *url) +{ + struct reversepath *reverse; + + if (url == NULL) { + log_message(LOG_WARNING, "Illegal reverse proxy rule: missing url"); + return; + } + + if (!strstr(url, "://")) { + log_message(LOG_WARNING, + "Skipping reverse proxy rule: '%s' is not a valid url", url); + return; + } + + if (path && *path != '/') { + log_message(LOG_WARNING, + "Skipping reverse proxy rule: path '%s' doesn't start with a /", path); + return; + } + + if (!(reverse = safemalloc(sizeof (struct reversepath)))) { + log_message(LOG_ERR, "Unable to allocate memory in reversepath_add()"); + return; + } + + if (!path) reverse->path = safestrdup("/"); + else reverse->path = safestrdup(path); + + reverse->url = safestrdup(url); + + reverse->next = config.reversepath_list; + config.reversepath_list = reverse; + + log_message(LOG_INFO, + "Added reverse proxy rule: %s -> %s", reverse->path, reverse->url); +} + +/* + * Check if a request url is in the reversepath list + */ +static struct reversepath * +reversepath_get(char *url) +{ + struct reversepath *reverse = config.reversepath_list; + + while (reverse) { + if (strstr(url, reverse->path) == url) + return reverse; + + reverse = reverse->next; + } + + return NULL; +} +#endif + /* * Create a connection for HTTP connections. */ @@ -488,7 +550,7 @@ establish_http_connection(struct conn_s *connptr, struct request_s *request) } /* - * These two defines are for the SSL tunneling. + * These two defines are for the SSL tunnelling. */ #define SSL_CONNECTION_RESPONSE "HTTP/1.0 200 Connection established" #define PROXY_AGENT "Proxy-agent: " PACKAGE "/" VERSION @@ -517,6 +579,13 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) char *url; struct request_s *request; +#ifdef REVERSE_SUPPORT + char *rewrite_url = NULL; + char *cookie = NULL; + char *cookieval; + struct reversepath *reverse; +#endif + int ret; size_t request_len; @@ -577,6 +646,66 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) return NULL; } +#ifdef REVERSE_SUPPORT + /* + * Reverse proxy URL rewriting. + */ + if (config.reversepath_list != NULL) { + /* Reverse requests always start with a slash */ + if (*url == '/') { + /* First try locating the reverse mapping by request url */ + reverse = reversepath_get(url); + if (reverse) { + rewrite_url = safemalloc(strlen(url) + + strlen(reverse->url) + 1); + strcpy(rewrite_url, reverse->url); + strcat(rewrite_url, url + strlen(reverse->path)); + } else if (config.reversemagic && + hashmap_entry_by_key(hashofheaders, "cookie", + (void **)&cookie) > 0) { + + /* No match - try the magical tracking cookie next */ + if ((cookieval = strstr(cookie, REVERSE_COOKIE "=")) && + (reverse = reversepath_get(cookieval + + strlen(REVERSE_COOKIE) + 1))) { + + rewrite_url = safemalloc(strlen(url) + + strlen(reverse->url) + 1); + strcpy(rewrite_url, reverse->url); + strcat(rewrite_url, url + 1); + + log_message(LOG_INFO, + "Magical tracking cookie says: %s", + reverse->path); + } + } + } + + /* Forward proxy support off and no reverse path match found */ + if (config.reverseonly && !rewrite_url) { + log_message(LOG_ERR, "Bad request"); + indicate_http_error(connptr, 400, "Bad Request", + "detail", "Request has an invalid URL", + "url", url, + NULL); + + safefree(url); + free_request_struct(request); + + return NULL; + } + + log_message(LOG_CONN, "Rewriting URL: %s -> %s", + url, rewrite_url); + + safefree(url); + url = rewrite_url; + + /* Store reverse path so that the magical tracking cookie can be set */ + if (config.reversemagic) connptr->reversepath = safestrdup(reverse->path); + } +#endif + if (strncasecmp(url, "http://", 7) == 0 || (UPSTREAM_CONFIGURED() && strncasecmp(url, "ftp://", 6) == 0)) { char *skipped_type = strstr(url, "//") + 2; @@ -726,7 +855,7 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders) request->host); indicate_http_error(connptr, 403, "Filtered", - "detail", "The request you made has been filted", + "detail", "The request you made has been filtered", "url", url, NULL); @@ -1178,6 +1307,10 @@ process_server_headers(struct conn_s *connptr) int i; int ret; +#ifdef REVERSE_SUPPORT + struct reversepath *reverse = config.reversepath_list; +#endif + /* FIXME: Remember to handle a "simple_req" type */ /* Get the response line from the remote server. */ @@ -1251,6 +1384,41 @@ process_server_headers(struct conn_s *connptr) if (ret < 0) goto ERROR_EXIT; +#ifdef REVERSE_SUPPORT + /* Write tracking cookie for the magical reverse proxy path hack */ + if (config.reversemagic && connptr->reversepath) { + ret = write_message(connptr->client_fd, + "Set-Cookie: " REVERSE_COOKIE "=%s; path=/\r\n", + connptr->reversepath); + if (ret < 0) goto ERROR_EXIT; + } + + /* Rewrite the HTTP redirect if needed */ + if (config.reversebaseurl && + hashmap_entry_by_key(hashofheaders, "location", (void **)&header) > 0) { + + /* Look for a matching entry in the reversepath list */ + while (reverse) { + if (strncasecmp(header, + reverse->url, + (len = strlen(reverse->url))) == 0) break; + reverse = reverse->next; + } + + if (reverse) { + ret = write_message(connptr->client_fd, "Location: %s%s%s\r\n", + config.reversebaseurl, (reverse->path + 1), + (header + len)); + if (ret < 0) goto ERROR_EXIT; + + log_message(LOG_INFO, + "Rewriting HTTP redirect: %s -> %s%s%s", header, + config.reversebaseurl, (reverse->path + 1), (header + len)); + hashmap_remove(hashofheaders, "location"); + } + } +#endif + /* * All right, output all the remaining headers to the client. */ diff --git a/src/reqs.h b/src/reqs.h index b324858..d1dac79 100644 --- a/src/reqs.h +++ b/src/reqs.h @@ -1,4 +1,4 @@ -/* $Id: reqs.h,v 1.4 2003-05-29 19:43:57 rjkaes Exp $ +/* $Id: reqs.h,v 1.5 2004-01-26 19:11:51 rjkaes Exp $ * * See 'reqs.c' for a detailed description. * @@ -22,5 +22,6 @@ extern void handle_connection(int fd); extern void add_connect_port_allowed(int port); extern void upstream_add(const char *host, int port, const char *domain); +extern void reversepath_add(const char *path, const char *url); #endif diff --git a/src/scanner.l b/src/scanner.l index 54e01a4..8418ca0 100644 --- a/src/scanner.l +++ b/src/scanner.l @@ -1,4 +1,4 @@ -/* $Id: scanner.l,v 1.22 2003-06-26 18:26:10 rjkaes Exp $ +/* $Id: scanner.l,v 1.23 2004-01-26 19:11:51 rjkaes Exp $ * * This builds the scanner for the tinyproxy configuration file. This * file needs to stay in sync with grammar.y. If someone knows lex and yacc @@ -24,12 +24,12 @@ struct keyword { char *kw_name; - int kw_token; + int kw_token; }; static struct keyword keywords[] = { - /* statements */ - { "port", KW_PORT }, + /* statements */ + { "port", KW_PORT }, { "logfile", KW_LOGFILE }, { "syslog", KW_SYSLOG }, { "maxclients", KW_MAXCLIENTS }, @@ -44,24 +44,28 @@ static struct keyword keywords[] = { { "group", KW_GROUP }, { "anonymous", KW_ANONYMOUS }, { "filter", KW_FILTER }, - { "filterurls", KW_FILTERURLS }, - { "filterextended", KW_FILTEREXTENDED }, - { "filterdefaultdeny", KW_FILTER_DEFAULT_DENY }, - { "filtercasesensitive", KW_FILTER_CASESENSITIVE }, + { "filterurls", KW_FILTERURLS }, + { "filterextended", KW_FILTEREXTENDED }, + { "filterdefaultdeny", KW_FILTER_DEFAULT_DENY }, + { "filtercasesensitive", KW_FILTER_CASESENSITIVE }, { "xtinyproxy", KW_XTINYPROXY }, - { "upstream", KW_UPSTREAM }, + { "reversepath", KW_REVERSEPATH }, + { "reverseonly", KW_REVERSEONLY }, + { "reversemagic", KW_REVERSEMAGIC }, + { "reversebaseurl", KW_REVERSEBASEURL }, + { "upstream", KW_UPSTREAM }, { "allow", KW_ALLOW }, - { "deny", KW_DENY }, - { "connectport", KW_CONNECTPORT }, - { "bind", KW_BIND }, - { "viaproxyname", KW_VIA_PROXY_NAME }, - { "stathost", KW_STATHOST }, + { "deny", KW_DENY }, + { "connectport", KW_CONNECTPORT }, + { "bind", KW_BIND }, + { "viaproxyname", KW_VIA_PROXY_NAME }, + { "stathost", KW_STATHOST }, { "errorfile", KW_ERRORPAGE }, { "defaulterrorfile", KW_DEFAULT_ERRORPAGE }, { "statfile", KW_STATPAGE }, - /* loglevel and the settings */ - { "loglevel", KW_LOGLEVEL }, + /* loglevel and the settings */ + { "loglevel", KW_LOGLEVEL }, { "critical", KW_LOG_CRITICAL }, { "error", KW_LOG_ERROR }, { "warning", KW_LOG_WARNING }, diff --git a/src/tinyproxy.c b/src/tinyproxy.c index 76ff993..578a10a 100644 --- a/src/tinyproxy.c +++ b/src/tinyproxy.c @@ -1,4 +1,4 @@ -/* $Id: tinyproxy.c,v 1.46 2003-03-17 04:24:19 rjkaes Exp $ +/* $Id: tinyproxy.c,v 1.47 2004-01-26 19:11:51 rjkaes Exp $ * * The initialize routine. Basically sets up all the initial stuff (logfile, * listening socket, config options, etc.) and then sits there and loops @@ -130,9 +130,9 @@ Options:\n\ -v Display the version number.\n"); /* Display the modes compiled into tinyproxy */ - printf("\nFeatures Compiled In:\n"); + printf("\nFeatures compiled in:\n"); #ifdef XTINYPROXY_ENABLE - printf(" XTinyproxy Header\n"); + printf(" XTinyproxy header\n"); #endif /* XTINYPROXY */ #ifdef FILTER_ENABLE printf(" Filtering\n"); @@ -141,8 +141,11 @@ Options:\n\ printf(" Debugging code\n"); #endif /* NDEBUG */ #ifdef TRANSPARENT_PROXY - printf(" Transparent Proxy Support\n"); + printf(" Transparent proxy support\n"); #endif /* TRANSPARENT_PROXY */ +#ifdef REVERSE_SUPPORT + printf(" Reverse proxy support\n"); +#endif /* REVERSE_SUPPORT */ } int diff --git a/src/tinyproxy.h b/src/tinyproxy.h index 0c4e461..ad02120 100644 --- a/src/tinyproxy.h +++ b/src/tinyproxy.h @@ -1,4 +1,4 @@ -/* $Id: tinyproxy.h,v 1.42 2003-08-01 00:14:34 rjkaes Exp $ +/* $Id: tinyproxy.h,v 1.43 2004-01-26 19:11:51 rjkaes Exp $ * * See 'tinyproxy.c' for a detailed description. * @@ -36,6 +36,16 @@ struct upstream { }; #endif +#ifdef REVERSE_SUPPORT +struct reversepath { + struct reversepath *next; + char *path; + char *url; +}; + +#define REVERSE_COOKIE "yummy_magical_cookie" +#endif + /* * Hold all the configuration time information. */ @@ -58,6 +68,12 @@ struct config_s { #ifdef XTINYPROXY_ENABLE char *my_domain; #endif +#ifdef REVERSE_SUPPORT + struct reversepath *reversepath_list; + unsigned int reverseonly; /* boolean */ + unsigned int reversemagic; /* boolean */ + char *reversebaseurl; +#endif #ifdef UPSTREAM_SUPPORT struct upstream *upstream_list; #endif /* UPSTREAM_SUPPORT */