Added reverse proxy support from Kim Holviala. His comments regarding

this addition follow:

  The patch implements a simple reverse proxy (with one funky extra
  feature). It has all the regular features: mapping remote servers to local
  namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP
  redirect rewriting (ReverseBaseURL).

  The funky feature is this: You map Google to /google/ and the Google front
  page opens up fine. Type in stuff and click "Google Search" and you'll get
  an error from tinyproxy. Reason for this is that Google's form submits to
  "/search" which unfortunately bypasses our /google/ mapping (if they'd
  submit to "search" without the slash it would have worked ok). Turn on
  ReverseMagic and it starts working....

  ReverseMagic "hijacks" one cookie which it sends to the client browser.
  This cookie contains the current reverse proxy path mapping (in the above
  case /google/) so that even if the site uses absolute links the reverse
  proxy still knows where to map the request.

  And yes, it works. No, I've never seen this done before - I couldn't find
  _any_ working OSS reverse proxies, and the commercial ones I've seen try
  to parse the page and fix all links (in the above case changing "/search"
  to "/google/search"). The problem with modifying the html is that it might
  not be parsable (very common) or it might be encoded so that the proxy
  can't read it (mod_gzip or likes).

  Hope you like that patch. One caveat - I haven't coded with C in like
  three years so my code might be a bit messy.... There shouldn't be any
  security problems thou, but you never know. I did all the stuff out of my
  memory without reading any RFC's, but I tested everything with Moz, Konq,
  IE6, Links and Lynx and they all worked fine.
This commit is contained in:
Robert James Kaes 2004-01-26 19:11:52 +00:00
parent f2d846d057
commit 0a8e4e4d8d
10 changed files with 330 additions and 31 deletions

View File

@ -1,4 +1,4 @@
dnl $Id: configure.ac,v 2.64 2003-10-17 16:10:59 rjkaes Exp $ dnl $Id: configure.ac,v 2.65 2004-01-26 19:11:52 rjkaes Exp $
dnl Devlopers, please strive to achieve this order: dnl Devlopers, please strive to achieve this order:
dnl dnl
@ -127,6 +127,17 @@ if test x"$upstream_enabled" = x"yes"; then
AC_DEFINE(UPSTREAM_SUPPORT) AC_DEFINE(UPSTREAM_SUPPORT)
fi fi
dnl Include support for reverse proxy?
AH_TEMPLATE([REVERSE_SUPPORT],
[Include support for reverse proxy.])
AC_ARG_ENABLE(reverse,
[AC_HELP_STRING([--enable-reverse],
[Enable support for reverse proxy (default is NO)])],
reverse_enabled=$enableval, reverse_enabled=no)
if test x"$reverse_enabled" = x"yes"; then
AC_DEFINE(REVERSE_SUPPORT)
fi
dnl Include the transparent proxy support dnl Include the transparent proxy support
AH_TEMPLATE([TRANSPARENT_PROXY], AH_TEMPLATE([TRANSPARENT_PROXY],
[Include support for using tinyproxy as a transparent proxy.]) [Include support for using tinyproxy as a transparent proxy.])

View File

@ -255,3 +255,42 @@ ViaProxyName "tinyproxy"
# #
ConnectPort 443 ConnectPort 443
ConnectPort 563 ConnectPort 563
#
# Configure one or more ReversePath directives to enable reverse proxy
# support. With reverse proxying it's possible to make a number of
# sites appear as if they were part of a single site.
#
# If you uncomment the following two directives and run tinyproxy
# on your own computer at port 8888, you can access Google using
# http://localhost:8888/google/ and Wired News using
# http://localhost:8888/wired/news/. Neither will actually work
# until you uncomment ReverseMagic as they use absolute linking.
#
#ReversePath "/google/" "http://www.google.com/"
#ReversePath "/wired/" "http://www.wired.com/"
#
# When using tinyproxy as a reverse proxy, it is STRONGLY recommended
# that the normal proxy is turned off by uncommenting the next directive.
#
#ReverseOnly Yes
#
# Use a cookie to track reverse proxy mappings. If you need to reverse
# proxy sites which have absolute links you must uncomment this.
#
#ReverseMagic Yes
#
# The URL that's used to access this reverse proxy. The URL is used to
# rewrite HTTP redirects so that they won't escape the proxy. If you
# have a chain of reverse proxies, you'll need to put the outermost
# URL here (the address which the end user types into his/her browser).
#
# If not set then no rewriting occurs.
#
#ReverseBaseURL "http://localhost:8888/"

View File

@ -1,4 +1,4 @@
/* $Id: conns.c,v 1.19 2003-08-01 00:14:34 rjkaes Exp $ /* $Id: conns.c,v 1.20 2004-01-26 19:11:51 rjkaes Exp $
* *
* Create and free the connection structure. One day there could be * Create and free the connection structure. One day there could be
* other connection related tasks put here, but for now the header * other connection related tasks put here, but for now the header
@ -76,6 +76,10 @@ initialize_conn(int client_fd, const char* ipaddr, const char* string_addr)
update_stats(STAT_OPEN); update_stats(STAT_OPEN);
#ifdef REVERSE_SUPPORT
connptr->reversepath = NULL;
#endif
return connptr; return connptr;
error_exit: error_exit:
@ -123,6 +127,11 @@ destroy_conn(struct conn_s *connptr)
if (connptr->client_string_addr) if (connptr->client_string_addr)
safefree(connptr->client_string_addr); safefree(connptr->client_string_addr);
#ifdef REVERSE_SUPPORT
if (connptr->reversepath)
safefree(connptr->reversepath);
#endif
safefree(connptr); safefree(connptr);
update_stats(STAT_CLOSE); update_stats(STAT_CLOSE);

View File

@ -1,4 +1,4 @@
/* $Id: conns.h,v 1.15 2003-08-01 00:14:34 rjkaes Exp $ /* $Id: conns.h,v 1.16 2004-01-26 19:11:51 rjkaes Exp $
* *
* See 'conns.c' for a detailed description. * See 'conns.c' for a detailed description.
* *
@ -66,6 +66,13 @@ struct conn_s {
unsigned int major; unsigned int major;
unsigned int minor; unsigned int minor;
} protocol; } protocol;
#ifdef REVERSE_SUPPORT
/*
* Place to store the current per-connection reverse proxy path
*/
char* reversepath;
#endif
}; };
/* /*

View File

@ -1,4 +1,4 @@
/* $Id: grammar.y,v 1.23 2003-06-26 18:17:09 rjkaes Exp $ /* $Id: grammar.y,v 1.24 2004-01-26 19:11:51 rjkaes Exp $
* *
* This is the grammar for tinyproxy's configuration file. It needs to be * This is the grammar for tinyproxy's configuration file. It needs to be
* in sync with scanner.l. If you know more about yacc and lex than I do * in sync with scanner.l. If you know more about yacc and lex than I do
@ -50,6 +50,7 @@ int yylex(void);
%token KW_FILTER KW_FILTERURLS KW_FILTEREXTENDED KW_FILTER_DEFAULT_DENY %token KW_FILTER KW_FILTERURLS KW_FILTEREXTENDED KW_FILTER_DEFAULT_DENY
%token KW_FILTER_CASESENSITIVE %token KW_FILTER_CASESENSITIVE
%token KW_UPSTREAM %token KW_UPSTREAM
%token KW_REVERSEPATH KW_REVERSEONLY KW_REVERSEMAGIC KW_REVERSEBASEURL
%token KW_CONNECTPORT KW_BIND %token KW_CONNECTPORT KW_BIND
%token KW_STATHOST %token KW_STATHOST
%token KW_ALLOW KW_DENY %token KW_ALLOW KW_DENY
@ -165,6 +166,46 @@ statement
config.my_domain = $2; config.my_domain = $2;
#else #else
log_message(LOG_WARNING, "X-Tinyproxy header support was not compiled in."); log_message(LOG_WARNING, "X-Tinyproxy header support was not compiled in.");
#endif
}
| KW_REVERSEPATH string
{
#ifdef REVERSE_SUPPORT
reversepath_add(NULL, $2);
#else
log_message(LOG_WARNING, "Reverse proxy support was not compiled in.");
#endif
}
| KW_REVERSEPATH string string
{
#ifdef REVERSE_SUPPORT
reversepath_add($2, $3);
#else
log_message(LOG_WARNING, "Reverse proxy support was not compiled in.");
#endif
}
| KW_REVERSEONLY yesno
{
#ifdef REVERSE_SUPPORT
config.reverseonly = $2;
#else
log_message(LOG_WARNING, "Reverse proxy support was not compiled in.");
#endif
}
| KW_REVERSEMAGIC yesno
{
#ifdef REVERSE_SUPPORT
config.reversemagic = $2;
#else
log_message(LOG_WARNING, "Reverse proxy support was not compiled in.");
#endif
}
| KW_REVERSEBASEURL string
{
#ifdef REVERSE_SUPPORT
config.reversebaseurl = $2;
#else
log_message(LOG_WARNING, "Reverse proxy support was not compiled in.");
#endif #endif
} }
| KW_UPSTREAM unique_address ':' NUMBER | KW_UPSTREAM unique_address ':' NUMBER

View File

@ -1,4 +1,4 @@
/* $Id: reqs.c,v 1.108 2003-08-07 16:32:12 rjkaes Exp $ /* $Id: reqs.c,v 1.109 2004-01-26 19:11:51 rjkaes Exp $
* *
* This is where all the work in tinyproxy is actually done. Incoming * This is where all the work in tinyproxy is actually done. Incoming
* connections have a new child created for them. The child then * connections have a new child created for them. The child then
@ -330,7 +330,7 @@ upstream_add(const char *host, int port, const char *domain)
if (domain == NULL) { if (domain == NULL) {
if (!host || host[0] == '\0' || port < 1) { if (!host || host[0] == '\0' || port < 1) {
log_message(LOG_WARNING, "Nonsence upstream rule: invalid host or port"); log_message(LOG_WARNING, "Nonsense upstream rule: invalid host or port");
goto upstream_cleanup; goto upstream_cleanup;
} }
@ -404,7 +404,7 @@ upstream_add(const char *host, int port, const char *domain)
return; return;
upstream_cleanup: upstream_cleanup:
safefree(up->host); safefree(up->host);
safefree(up->domain); safefree(up->domain);
safefree(up); safefree(up);
@ -465,6 +465,68 @@ upstream_get(char *host)
} }
#endif #endif
#ifdef REVERSE_SUPPORT
/*
* Add entry to the reversepath list
*/
void
reversepath_add(const char *path, const char *url)
{
struct reversepath *reverse;
if (url == NULL) {
log_message(LOG_WARNING, "Illegal reverse proxy rule: missing url");
return;
}
if (!strstr(url, "://")) {
log_message(LOG_WARNING,
"Skipping reverse proxy rule: '%s' is not a valid url", url);
return;
}
if (path && *path != '/') {
log_message(LOG_WARNING,
"Skipping reverse proxy rule: path '%s' doesn't start with a /", path);
return;
}
if (!(reverse = safemalloc(sizeof (struct reversepath)))) {
log_message(LOG_ERR, "Unable to allocate memory in reversepath_add()");
return;
}
if (!path) reverse->path = safestrdup("/");
else reverse->path = safestrdup(path);
reverse->url = safestrdup(url);
reverse->next = config.reversepath_list;
config.reversepath_list = reverse;
log_message(LOG_INFO,
"Added reverse proxy rule: %s -> %s", reverse->path, reverse->url);
}
/*
* Check if a request url is in the reversepath list
*/
static struct reversepath *
reversepath_get(char *url)
{
struct reversepath *reverse = config.reversepath_list;
while (reverse) {
if (strstr(url, reverse->path) == url)
return reverse;
reverse = reverse->next;
}
return NULL;
}
#endif
/* /*
* Create a connection for HTTP connections. * Create a connection for HTTP connections.
*/ */
@ -488,7 +550,7 @@ establish_http_connection(struct conn_s *connptr, struct request_s *request)
} }
/* /*
* These two defines are for the SSL tunneling. * These two defines are for the SSL tunnelling.
*/ */
#define SSL_CONNECTION_RESPONSE "HTTP/1.0 200 Connection established" #define SSL_CONNECTION_RESPONSE "HTTP/1.0 200 Connection established"
#define PROXY_AGENT "Proxy-agent: " PACKAGE "/" VERSION #define PROXY_AGENT "Proxy-agent: " PACKAGE "/" VERSION
@ -517,6 +579,13 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders)
char *url; char *url;
struct request_s *request; struct request_s *request;
#ifdef REVERSE_SUPPORT
char *rewrite_url = NULL;
char *cookie = NULL;
char *cookieval;
struct reversepath *reverse;
#endif
int ret; int ret;
size_t request_len; size_t request_len;
@ -577,6 +646,66 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders)
return NULL; return NULL;
} }
#ifdef REVERSE_SUPPORT
/*
* Reverse proxy URL rewriting.
*/
if (config.reversepath_list != NULL) {
/* Reverse requests always start with a slash */
if (*url == '/') {
/* First try locating the reverse mapping by request url */
reverse = reversepath_get(url);
if (reverse) {
rewrite_url = safemalloc(strlen(url) +
strlen(reverse->url) + 1);
strcpy(rewrite_url, reverse->url);
strcat(rewrite_url, url + strlen(reverse->path));
} else if (config.reversemagic &&
hashmap_entry_by_key(hashofheaders, "cookie",
(void **)&cookie) > 0) {
/* No match - try the magical tracking cookie next */
if ((cookieval = strstr(cookie, REVERSE_COOKIE "=")) &&
(reverse = reversepath_get(cookieval +
strlen(REVERSE_COOKIE) + 1))) {
rewrite_url = safemalloc(strlen(url) +
strlen(reverse->url) + 1);
strcpy(rewrite_url, reverse->url);
strcat(rewrite_url, url + 1);
log_message(LOG_INFO,
"Magical tracking cookie says: %s",
reverse->path);
}
}
}
/* Forward proxy support off and no reverse path match found */
if (config.reverseonly && !rewrite_url) {
log_message(LOG_ERR, "Bad request");
indicate_http_error(connptr, 400, "Bad Request",
"detail", "Request has an invalid URL",
"url", url,
NULL);
safefree(url);
free_request_struct(request);
return NULL;
}
log_message(LOG_CONN, "Rewriting URL: %s -> %s",
url, rewrite_url);
safefree(url);
url = rewrite_url;
/* Store reverse path so that the magical tracking cookie can be set */
if (config.reversemagic) connptr->reversepath = safestrdup(reverse->path);
}
#endif
if (strncasecmp(url, "http://", 7) == 0 if (strncasecmp(url, "http://", 7) == 0
|| (UPSTREAM_CONFIGURED() && strncasecmp(url, "ftp://", 6) == 0)) { || (UPSTREAM_CONFIGURED() && strncasecmp(url, "ftp://", 6) == 0)) {
char *skipped_type = strstr(url, "//") + 2; char *skipped_type = strstr(url, "//") + 2;
@ -726,7 +855,7 @@ process_request(struct conn_s *connptr, hashmap_t hashofheaders)
request->host); request->host);
indicate_http_error(connptr, 403, "Filtered", indicate_http_error(connptr, 403, "Filtered",
"detail", "The request you made has been filted", "detail", "The request you made has been filtered",
"url", url, "url", url,
NULL); NULL);
@ -1178,6 +1307,10 @@ process_server_headers(struct conn_s *connptr)
int i; int i;
int ret; int ret;
#ifdef REVERSE_SUPPORT
struct reversepath *reverse = config.reversepath_list;
#endif
/* FIXME: Remember to handle a "simple_req" type */ /* FIXME: Remember to handle a "simple_req" type */
/* Get the response line from the remote server. */ /* Get the response line from the remote server. */
@ -1251,6 +1384,41 @@ process_server_headers(struct conn_s *connptr)
if (ret < 0) if (ret < 0)
goto ERROR_EXIT; goto ERROR_EXIT;
#ifdef REVERSE_SUPPORT
/* Write tracking cookie for the magical reverse proxy path hack */
if (config.reversemagic && connptr->reversepath) {
ret = write_message(connptr->client_fd,
"Set-Cookie: " REVERSE_COOKIE "=%s; path=/\r\n",
connptr->reversepath);
if (ret < 0) goto ERROR_EXIT;
}
/* Rewrite the HTTP redirect if needed */
if (config.reversebaseurl &&
hashmap_entry_by_key(hashofheaders, "location", (void **)&header) > 0) {
/* Look for a matching entry in the reversepath list */
while (reverse) {
if (strncasecmp(header,
reverse->url,
(len = strlen(reverse->url))) == 0) break;
reverse = reverse->next;
}
if (reverse) {
ret = write_message(connptr->client_fd, "Location: %s%s%s\r\n",
config.reversebaseurl, (reverse->path + 1),
(header + len));
if (ret < 0) goto ERROR_EXIT;
log_message(LOG_INFO,
"Rewriting HTTP redirect: %s -> %s%s%s", header,
config.reversebaseurl, (reverse->path + 1), (header + len));
hashmap_remove(hashofheaders, "location");
}
}
#endif
/* /*
* All right, output all the remaining headers to the client. * All right, output all the remaining headers to the client.
*/ */

View File

@ -1,4 +1,4 @@
/* $Id: reqs.h,v 1.4 2003-05-29 19:43:57 rjkaes Exp $ /* $Id: reqs.h,v 1.5 2004-01-26 19:11:51 rjkaes Exp $
* *
* See 'reqs.c' for a detailed description. * See 'reqs.c' for a detailed description.
* *
@ -22,5 +22,6 @@
extern void handle_connection(int fd); extern void handle_connection(int fd);
extern void add_connect_port_allowed(int port); extern void add_connect_port_allowed(int port);
extern void upstream_add(const char *host, int port, const char *domain); extern void upstream_add(const char *host, int port, const char *domain);
extern void reversepath_add(const char *path, const char *url);
#endif #endif

View File

@ -1,4 +1,4 @@
/* $Id: scanner.l,v 1.22 2003-06-26 18:26:10 rjkaes Exp $ /* $Id: scanner.l,v 1.23 2004-01-26 19:11:51 rjkaes Exp $
* *
* This builds the scanner for the tinyproxy configuration file. This * This builds the scanner for the tinyproxy configuration file. This
* file needs to stay in sync with grammar.y. If someone knows lex and yacc * file needs to stay in sync with grammar.y. If someone knows lex and yacc
@ -24,12 +24,12 @@
struct keyword { struct keyword {
char *kw_name; char *kw_name;
int kw_token; int kw_token;
}; };
static struct keyword keywords[] = { static struct keyword keywords[] = {
/* statements */ /* statements */
{ "port", KW_PORT }, { "port", KW_PORT },
{ "logfile", KW_LOGFILE }, { "logfile", KW_LOGFILE },
{ "syslog", KW_SYSLOG }, { "syslog", KW_SYSLOG },
{ "maxclients", KW_MAXCLIENTS }, { "maxclients", KW_MAXCLIENTS },
@ -44,24 +44,28 @@ static struct keyword keywords[] = {
{ "group", KW_GROUP }, { "group", KW_GROUP },
{ "anonymous", KW_ANONYMOUS }, { "anonymous", KW_ANONYMOUS },
{ "filter", KW_FILTER }, { "filter", KW_FILTER },
{ "filterurls", KW_FILTERURLS }, { "filterurls", KW_FILTERURLS },
{ "filterextended", KW_FILTEREXTENDED }, { "filterextended", KW_FILTEREXTENDED },
{ "filterdefaultdeny", KW_FILTER_DEFAULT_DENY }, { "filterdefaultdeny", KW_FILTER_DEFAULT_DENY },
{ "filtercasesensitive", KW_FILTER_CASESENSITIVE }, { "filtercasesensitive", KW_FILTER_CASESENSITIVE },
{ "xtinyproxy", KW_XTINYPROXY }, { "xtinyproxy", KW_XTINYPROXY },
{ "upstream", KW_UPSTREAM }, { "reversepath", KW_REVERSEPATH },
{ "reverseonly", KW_REVERSEONLY },
{ "reversemagic", KW_REVERSEMAGIC },
{ "reversebaseurl", KW_REVERSEBASEURL },
{ "upstream", KW_UPSTREAM },
{ "allow", KW_ALLOW }, { "allow", KW_ALLOW },
{ "deny", KW_DENY }, { "deny", KW_DENY },
{ "connectport", KW_CONNECTPORT }, { "connectport", KW_CONNECTPORT },
{ "bind", KW_BIND }, { "bind", KW_BIND },
{ "viaproxyname", KW_VIA_PROXY_NAME }, { "viaproxyname", KW_VIA_PROXY_NAME },
{ "stathost", KW_STATHOST }, { "stathost", KW_STATHOST },
{ "errorfile", KW_ERRORPAGE }, { "errorfile", KW_ERRORPAGE },
{ "defaulterrorfile", KW_DEFAULT_ERRORPAGE }, { "defaulterrorfile", KW_DEFAULT_ERRORPAGE },
{ "statfile", KW_STATPAGE }, { "statfile", KW_STATPAGE },
/* loglevel and the settings */ /* loglevel and the settings */
{ "loglevel", KW_LOGLEVEL }, { "loglevel", KW_LOGLEVEL },
{ "critical", KW_LOG_CRITICAL }, { "critical", KW_LOG_CRITICAL },
{ "error", KW_LOG_ERROR }, { "error", KW_LOG_ERROR },
{ "warning", KW_LOG_WARNING }, { "warning", KW_LOG_WARNING },

View File

@ -1,4 +1,4 @@
/* $Id: tinyproxy.c,v 1.46 2003-03-17 04:24:19 rjkaes Exp $ /* $Id: tinyproxy.c,v 1.47 2004-01-26 19:11:51 rjkaes Exp $
* *
* The initialize routine. Basically sets up all the initial stuff (logfile, * The initialize routine. Basically sets up all the initial stuff (logfile,
* listening socket, config options, etc.) and then sits there and loops * listening socket, config options, etc.) and then sits there and loops
@ -130,9 +130,9 @@ Options:\n\
-v Display the version number.\n"); -v Display the version number.\n");
/* Display the modes compiled into tinyproxy */ /* Display the modes compiled into tinyproxy */
printf("\nFeatures Compiled In:\n"); printf("\nFeatures compiled in:\n");
#ifdef XTINYPROXY_ENABLE #ifdef XTINYPROXY_ENABLE
printf(" XTinyproxy Header\n"); printf(" XTinyproxy header\n");
#endif /* XTINYPROXY */ #endif /* XTINYPROXY */
#ifdef FILTER_ENABLE #ifdef FILTER_ENABLE
printf(" Filtering\n"); printf(" Filtering\n");
@ -141,8 +141,11 @@ Options:\n\
printf(" Debugging code\n"); printf(" Debugging code\n");
#endif /* NDEBUG */ #endif /* NDEBUG */
#ifdef TRANSPARENT_PROXY #ifdef TRANSPARENT_PROXY
printf(" Transparent Proxy Support\n"); printf(" Transparent proxy support\n");
#endif /* TRANSPARENT_PROXY */ #endif /* TRANSPARENT_PROXY */
#ifdef REVERSE_SUPPORT
printf(" Reverse proxy support\n");
#endif /* REVERSE_SUPPORT */
} }
int int

View File

@ -1,4 +1,4 @@
/* $Id: tinyproxy.h,v 1.42 2003-08-01 00:14:34 rjkaes Exp $ /* $Id: tinyproxy.h,v 1.43 2004-01-26 19:11:51 rjkaes Exp $
* *
* See 'tinyproxy.c' for a detailed description. * See 'tinyproxy.c' for a detailed description.
* *
@ -36,6 +36,16 @@ struct upstream {
}; };
#endif #endif
#ifdef REVERSE_SUPPORT
struct reversepath {
struct reversepath *next;
char *path;
char *url;
};
#define REVERSE_COOKIE "yummy_magical_cookie"
#endif
/* /*
* Hold all the configuration time information. * Hold all the configuration time information.
*/ */
@ -58,6 +68,12 @@ struct config_s {
#ifdef XTINYPROXY_ENABLE #ifdef XTINYPROXY_ENABLE
char *my_domain; char *my_domain;
#endif #endif
#ifdef REVERSE_SUPPORT
struct reversepath *reversepath_list;
unsigned int reverseonly; /* boolean */
unsigned int reversemagic; /* boolean */
char *reversebaseurl;
#endif
#ifdef UPSTREAM_SUPPORT #ifdef UPSTREAM_SUPPORT
struct upstream *upstream_list; struct upstream *upstream_list;
#endif /* UPSTREAM_SUPPORT */ #endif /* UPSTREAM_SUPPORT */