tinyproxy/src/main.c

427 lines
12 KiB
C
Raw Normal View History

/* tinyproxy - A fast light-weight HTTP proxy
*
* Copyright (C) 1998 Steven Young <sdyoung@miranda.org>
* Copyright (C) 1998-2002 Robert James Kaes <rjkaes@users.sourceforge.net>
* Copyright (C) 2000 Chris Lightfoot <chris@ex-parrot.com>
2010-01-10 20:51:33 +08:00
* Copyright (C) 2009-2010 Mukund Sivaraman <muks@banu.com>
* Copyright (C) 2009-2010 Michael Adam <obnox@samba.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/* The initialize routine. Basically sets up all the initial stuff (logfile,
* listening socket, config options, etc.) and then sits there and loops
* over the new connections until the daemon is closed. Also has additional
* functions to handle the "user friendly" aspects of a program (usage,
* stats, etc.) Like any good program, most of the work is actually done
* elsewhere.
*/
2009-08-07 06:12:53 +08:00
#include "main.h"
#include "anonymous.h"
#include "buffer.h"
2009-09-21 12:11:59 +08:00
#include "conf.h"
#include "daemon.h"
#include "heap.h"
#include "filter.h"
#include "child.h"
#include "log.h"
#include "reqs.h"
#include "sock.h"
#include "stats.h"
#include "utils.h"
/*
* Global Structures
*/
struct config_s *config;
static struct config_s configs[2];
static const char* config_file;
2008-12-08 21:39:44 +08:00
unsigned int received_sighup = FALSE; /* boolean */
static struct config_s*
get_next_config(void)
{
if (config == &configs[0]) return &configs[1];
return &configs[0];
}
/*
* Handle a signal
*/
static void
2009-09-15 06:23:35 +08:00
takesig (int sig)
{
pid_t pid;
int status;
switch (sig) {
case SIGHUP:
received_sighup = TRUE;
break;
simplify codebase by using one thread/conn, instead of preforked procs the existing codebase used an elaborate and complex approach for its parallelism: 5 different config file options, namely - MaxClients - MinSpareServers - MaxSpareServers - StartServers - MaxRequestsPerChild were used to steer how (and how many) parallel processes tinyproxy would spin up at start, how many processes at each point needed to be idle, etc. it seems all preforked processes would listen on the server port and compete with each other about who would get assigned the new incoming connections. since some data needs to be shared across those processes, a half- baked "shared memory" implementation was provided for this purpose. that implementation used to use files in the filesystem, and since it had a big FIXME comment, the author was well aware of how hackish that approach was. this entire complexity is now removed. the main thread enters a loop which polls on the listening fds, then spins up a new thread per connection, until the maximum number of connections (MaxClients) is hit. this is the only of the 5 config options left after this cleanup. since threads share the same address space, the code necessary for shared memory access has been removed. this means that the other 4 mentioned config option will now produce a parse error, when encountered. currently each thread uses a hardcoded default of 256KB per thread for the thread stack size, which is quite lavish and should be sufficient for even the worst C libraries, but people may want to tweak this value to the bare minimum, thus we may provide a new config option for this purpose in the future. i suspect that on heavily optimized C libraries such a musl, a stack size of 8-16 KB per thread could be sufficient. since the existing list implementation in vector.c did not provide a way to remove a single item from an existing list, i added my own list implementation from my libulz library which offers this functionality, rather than trying to add an ad-hoc, and perhaps buggy implementation to the vector_t list code. the sblist code is contained in an 80 line C file and as simple as it can get, while offering good performance and is proven bugfree due to years of use in other projects.
2018-12-17 08:23:09 +08:00
case SIGINT:
case SIGTERM:
config->quit = TRUE;
break;
case SIGCHLD:
while ((pid = waitpid (-1, &status, WNOHANG)) > 0) ;
break;
}
return;
}
/*
* Display the version information for the user.
*/
2009-09-15 06:23:35 +08:00
static void
display_version (void)
{
printf ("%s %s\n", PACKAGE, VERSION);
}
/*
* Display usage to the user.
*/
2009-09-15 06:23:35 +08:00
static void
display_usage (void)
{
int features = 0;
printf ("Usage: %s [options]\n", PACKAGE);
printf ("\n"
"Options are:\n"
" -d Do not daemonize (run in foreground).\n"
" -c FILE Use an alternate configuration file.\n"
" -h Display this usage information.\n"
" -v Display version information.\n");
/* Display the modes compiled into tinyproxy */
printf ("\nFeatures compiled in:\n");
#ifdef XTINYPROXY_ENABLE
printf (" XTinyproxy header\n");
features++;
#endif /* XTINYPROXY */
#ifdef FILTER_ENABLE
printf (" Filtering\n");
features++;
#endif /* FILTER_ENABLE */
#ifndef NDEBUG
printf (" Debugging code\n");
features++;
#endif /* NDEBUG */
#ifdef TRANSPARENT_PROXY
printf (" Transparent proxy support\n");
features++;
#endif /* TRANSPARENT_PROXY */
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
#ifdef REVERSE_SUPPORT
printf (" Reverse proxy support\n");
features++;
#endif /* REVERSE_SUPPORT */
#ifdef UPSTREAM_SUPPORT
printf (" Upstream proxy support\n");
features++;
#endif /* UPSTREAM_SUPPORT */
if (0 == features)
printf (" None\n");
printf ("\n"
2010-02-17 02:59:29 +08:00
"For support and bug reporting instructions, please visit\n"
2016-01-04 05:16:21 +08:00
"<https://tinyproxy.github.io/>.\n");
}
2009-09-15 06:23:35 +08:00
static int
get_id (char *str)
{
char *tstr;
if (str == NULL)
return -1;
tstr = str;
while (*tstr != 0) {
if (!isdigit (*tstr))
return -1;
tstr++;
}
return atoi (str);
}
2009-09-15 04:30:20 +08:00
/**
* change_user:
* @program: The name of the program. Pass argv[0] here.
*
* This function tries to change UID and GID to the ones specified in
* the config file. This function is typically called during
* initialization when the effective user is root.
**/
static void
change_user (const char *program)
{
if (config->group && strlen (config->group) > 0) {
int gid = get_id (config->group);
if (gid < 0) {
struct group *thisgroup = getgrnam (config->group);
if (!thisgroup) {
fprintf (stderr,
"%s: Unable to find group \"%s\".\n",
program, config->group);
exit (EX_NOUSER);
}
gid = thisgroup->gr_gid;
}
if (setgid (gid) < 0) {
fprintf (stderr,
"%s: Unable to change to group \"%s\".\n",
program, config->group);
exit (EX_NOPERM);
}
#ifdef HAVE_SETGROUPS
/* Drop all supplementary groups, otherwise these are inherited from the calling process */
if (setgroups (0, NULL) < 0) {
fprintf (stderr,
"%s: Unable to drop supplementary groups.\n",
program);
exit (EX_NOPERM);
}
#endif
log_message (LOG_INFO, "Now running as group \"%s\".",
config->group);
}
if (config->user && strlen (config->user) > 0) {
int uid = get_id (config->user);
if (uid < 0) {
struct passwd *thisuser = getpwnam (config->user);
if (!thisuser) {
fprintf (stderr,
"%s: Unable to find user \"%s\".\n",
program, config->user);
exit (EX_NOUSER);
}
uid = thisuser->pw_uid;
}
if (setuid (uid) < 0) {
fprintf (stderr,
"%s: Unable to change to user \"%s\".\n",
program, config->user);
exit (EX_NOPERM);
}
log_message (LOG_INFO, "Now running as user \"%s\".",
config->user);
}
}
/**
* convenience wrapper around reload_config_file
* that also re-initializes logging.
*/
int reload_config (int reload_logging)
{
int ret;
struct config_s *c_next = get_next_config();
if (reload_logging) shutdown_logging ();
ret = reload_config_file (config_file, c_next);
if (ret != 0) {
goto done;
}
config = c_next;
if (reload_logging) ret = setup_logging ();
done:
return ret;
}
int
main (int argc, char **argv)
{
int opt, daemonized = TRUE;
2020-01-15 22:45:23 +08:00
simplify codebase by using one thread/conn, instead of preforked procs the existing codebase used an elaborate and complex approach for its parallelism: 5 different config file options, namely - MaxClients - MinSpareServers - MaxSpareServers - StartServers - MaxRequestsPerChild were used to steer how (and how many) parallel processes tinyproxy would spin up at start, how many processes at each point needed to be idle, etc. it seems all preforked processes would listen on the server port and compete with each other about who would get assigned the new incoming connections. since some data needs to be shared across those processes, a half- baked "shared memory" implementation was provided for this purpose. that implementation used to use files in the filesystem, and since it had a big FIXME comment, the author was well aware of how hackish that approach was. this entire complexity is now removed. the main thread enters a loop which polls on the listening fds, then spins up a new thread per connection, until the maximum number of connections (MaxClients) is hit. this is the only of the 5 config options left after this cleanup. since threads share the same address space, the code necessary for shared memory access has been removed. this means that the other 4 mentioned config option will now produce a parse error, when encountered. currently each thread uses a hardcoded default of 256KB per thread for the thread stack size, which is quite lavish and should be sufficient for even the worst C libraries, but people may want to tweak this value to the bare minimum, thus we may provide a new config option for this purpose in the future. i suspect that on heavily optimized C libraries such a musl, a stack size of 8-16 KB per thread could be sufficient. since the existing list implementation in vector.c did not provide a way to remove a single item from an existing list, i added my own list implementation from my libulz library which offers this functionality, rather than trying to add an ad-hoc, and perhaps buggy implementation to the vector_t list code. the sblist code is contained in an 80 line C file and as simple as it can get, while offering good performance and is proven bugfree due to years of use in other projects.
2018-12-17 08:23:09 +08:00
srand(time(NULL)); /* for hashmap seeds */
/* Only allow u+rw bits. This may be required for some versions
* of glibc so that mkstemp() doesn't make us vulnerable.
*/
umask (0177);
2010-03-03 05:05:28 +08:00
log_message (LOG_INFO, "Initializing " PACKAGE " ...");
2010-06-02 12:41:10 +08:00
if (config_compile_regex()) {
exit (EX_SOFTWARE);
}
config_file = SYSCONFDIR "/tinyproxy.conf";
2020-01-15 22:45:23 +08:00
while ((opt = getopt (argc, argv, "c:vdh")) != EOF) {
switch (opt) {
case 'v':
display_version ();
exit (EX_OK);
case 'd':
daemonized = FALSE;
2020-01-15 22:45:23 +08:00
break;
case 'c':
config_file = optarg;
2020-01-15 22:45:23 +08:00
break;
case 'h':
display_usage ();
exit (EX_OK);
default:
display_usage ();
exit (EX_USAGE);
}
}
if (reload_config(0)) {
exit (EX_SOFTWARE);
}
init_stats ();
/* If ANONYMOUS is turned on, make sure that Content-Length is
* in the list of allowed headers, since it is required in a
* HTTP/1.0 request. Also add the Content-Type header since it
* goes hand in hand with Content-Length. */
if (is_anonymous_enabled (config)) {
anonymous_insert (config, "Content-Length");
anonymous_insert (config, "Content-Type");
}
if (daemonized == TRUE) {
if (!config->syslog && config->logf_name == NULL)
fprintf(stderr, "WARNING: logging deactivated "
"(can't log to stdout when daemonized)\n");
makedaemon ();
}
if (set_signal_handler (SIGPIPE, SIG_IGN) == SIG_ERR) {
fprintf (stderr, "%s: Could not set the \"SIGPIPE\" signal.\n",
argv[0]);
exit (EX_OSERR);
2008-12-08 21:39:44 +08:00
}
#ifdef FILTER_ENABLE
if (config->filter)
filter_init ();
#endif /* FILTER_ENABLE */
/* Start listening on the selected port. */
if (child_listening_sockets(config->listen_addrs, config->port) < 0) {
fprintf (stderr, "%s: Could not create listening sockets.\n",
argv[0]);
exit (EX_OSERR);
2008-12-08 21:39:44 +08:00
}
/* Create pid file before we drop privileges */
if (config->pidpath) {
if (pidfile_create (config->pidpath) < 0) {
fprintf (stderr, "%s: Could not create PID file.\n",
argv[0]);
exit (EX_OSERR);
}
}
/* Switch to a different user if we're running as root */
if (geteuid () == 0)
change_user (argv[0]);
else
log_message (LOG_WARNING,
"Not running as root, so not changing UID/GID.");
/* Create log file after we drop privileges */
if (setup_logging ()) {
exit (EX_SOFTWARE);
}
/* These signals are only for the parent process. */
log_message (LOG_INFO, "Setting the various signals.");
if (set_signal_handler (SIGCHLD, takesig) == SIG_ERR) {
fprintf (stderr, "%s: Could not set the \"SIGCHLD\" signal.\n",
argv[0]);
exit (EX_OSERR);
}
if (set_signal_handler (SIGTERM, takesig) == SIG_ERR) {
fprintf (stderr, "%s: Could not set the \"SIGTERM\" signal.\n",
argv[0]);
exit (EX_OSERR);
}
if (daemonized && set_signal_handler (SIGHUP, takesig) == SIG_ERR) {
fprintf (stderr, "%s: Could not set the \"SIGHUP\" signal.\n",
argv[0]);
exit (EX_OSERR);
}
/* Start the main loop */
log_message (LOG_INFO, "Starting main loop. Accepting connections.");
child_main_loop ();
log_message (LOG_INFO, "Shutting down.");
child_kill_children (SIGTERM);
child_close_sock ();
/* Remove the PID file */
if (config->pidpath != NULL && unlink (config->pidpath) < 0) {
log_message (LOG_WARNING,
"Could not remove PID file \"%s\": %s.",
config->pidpath, strerror (errno));
2008-12-08 21:39:44 +08:00
}
#ifdef FILTER_ENABLE
if (config->filter)
filter_destroy ();
#endif /* FILTER_ENABLE */
shutdown_logging ();
return EXIT_SUCCESS;
}