From 235b1c10a76a02f4837febcf84efd9b7edcd020e Mon Sep 17 00:00:00 2001 From: rofl0r Date: Mon, 10 May 2021 00:28:54 +0100 Subject: [PATCH] implement filtertype keyword and fnmatch-based filtering as suggested in #212, it seems the majority of people don't understand that input was expected to be in regex format and people were using filter lists containing plain hostnames, e.g. `www.google.com`. apart from that, using fnmatch() for matching is actually a lot less computationally expensive and allows to use big blacklists without incurring a huge performance hit. the config file now understands a new option `FilterType` which can be one of `bre`, `ere` and `fnmatch`. The `FilterExtended` option was deprecated in favor of it. It still works, but will be removed in the release after the next. --- docs/man5/tinyproxy.conf.txt.in | 23 +++++++++++-- etc/tinyproxy.conf.in | 5 ++- src/conf-tokens.c | 1 + src/conf-tokens.gperf | 1 + src/conf-tokens.h | 1 + src/conf.c | 46 +++++++++++++++++++++---- src/conf.h | 4 +-- src/filter.c | 59 ++++++++++++++++++--------------- src/filter.h | 18 ++++++---- src/reqs.c | 18 ++++------ 10 files changed, 117 insertions(+), 59 deletions(-) diff --git a/docs/man5/tinyproxy.conf.txt.in b/docs/man5/tinyproxy.conf.txt.in index 758382c..1e1ee3e 100644 --- a/docs/man5/tinyproxy.conf.txt.in +++ b/docs/man5/tinyproxy.conf.txt.in @@ -267,7 +267,7 @@ domains. This option specifies the location of the file containing the filter rules, one rule per line. Rules are specified as POSIX basic regular expressions (BRE), unless -FilterExtended is activated. +another FilterType is specified. Comment lines start with a `#` character. Example filter file contents: @@ -287,6 +287,20 @@ Example filter file contents: # filter any domain that starts with adserver ^adserver +=item B + +This option can be set to one of `bre`, `ere`, or `fnmatch`. +If `bre` is set, the rules specified in the filter file are matched +using POSIX basic regular expressions, when set to `ere`, using +POSIX extended regular expressions, and when set to `fnmatch` using +the `fnmatch` function as specified in the manpage `man 3p fnmatch`. +`fnmatch` matching is identical to what's used in the shell to match +filenames, so for example `*.google.com` matches everything that +ends with `.google.com`. +If you don't know what regular expressions are or you're using filter +lists from 3rd party sources, `fnmatch` is probably what you want. +It's also the fastest matching method of the three. + =item B If this boolean option is set to `Yes` or `On`, filtering is @@ -300,6 +314,7 @@ recommended not to use this option. =item B +Deprecated. Use `FilterType ere` instead. If this boolean option is set to `Yes`, then extended POSIX regular expressions are used for matching the filter rules. The default is to use basic POSIX regular expressions. @@ -308,7 +323,11 @@ The default is to use basic POSIX regular expressions. If this boolean option is set to `Yes`, then the filter rules are matched in a case sensitive manner. The default is to -match case-insensitively. +match case-insensitively, unfortunately. +If you set this to `Yes`, then your matching will be almost +twice as fast. +This setting affects only `bre` and `ere` FilterTypes, fnmatch +is always case sensitive. =item B diff --git a/etc/tinyproxy.conf.in b/etc/tinyproxy.conf.in index ce27f7e..d268709 100644 --- a/etc/tinyproxy.conf.in +++ b/etc/tinyproxy.conf.in @@ -240,10 +240,9 @@ ViaProxyName "tinyproxy" #FilterURLs On # -# FilterExtended: Use POSIX Extended regular expressions rather than -# basic. +# FilterType: Use bre (default), ere, or fnmatch for filtering. # -#FilterExtended On +#FilterType fnmatch # # FilterCaseSensitive: Use case sensitive regular expressions. diff --git a/src/conf-tokens.c b/src/conf-tokens.c index bad7013..2a1ddbe 100644 --- a/src/conf-tokens.c +++ b/src/conf-tokens.c @@ -34,6 +34,7 @@ config_directive_find (register const char *str, register size_t len) {"defaulterrorfile", CD_defaulterrorfile}, {"startservers", CD_startservers}, {"filtercasesensitive", CD_filtercasesensitive}, + {"filtertype", CD_filtertype}, {"filterurls", CD_filterurls}, {"filter", CD_filter}, {"reversemagic", CD_reversemagic}, diff --git a/src/conf-tokens.gperf b/src/conf-tokens.gperf index ef93245..f027a23 100644 --- a/src/conf-tokens.gperf +++ b/src/conf-tokens.gperf @@ -51,6 +51,7 @@ filterurls, CD_filterurls filterextended, CD_filterextended filterdefaultdeny, CD_filterdefaultdeny filtercasesensitive, CD_filtercasesensitive +filtertype, CD_filtertype reversebaseurl, CD_reversebaseurl reverseonly, CD_reverseonly reversemagic, CD_reversemagic diff --git a/src/conf-tokens.h b/src/conf-tokens.h index d9f03cd..a6338f8 100644 --- a/src/conf-tokens.h +++ b/src/conf-tokens.h @@ -33,6 +33,7 @@ CD_errorfile, CD_addheader, CD_filter, CD_filterurls, +CD_filtertype, CD_filterextended, CD_filterdefaultdeny, CD_filtercasesensitive, diff --git a/src/conf.c b/src/conf.c index 7033661..7f94b65 100644 --- a/src/conf.c +++ b/src/conf.c @@ -135,6 +135,7 @@ static HANDLE_FUNC (handle_filtercasesensitive); static HANDLE_FUNC (handle_filterdefaultdeny); static HANDLE_FUNC (handle_filterextended); static HANDLE_FUNC (handle_filterurls); +static HANDLE_FUNC (handle_filtertype); #endif static HANDLE_FUNC (handle_group); static HANDLE_FUNC (handle_listen); @@ -234,6 +235,7 @@ struct { STDCONF (filterextended, BOOL, handle_filterextended), STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny), STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive), + STDCONF (filtertype, "(bre|ere|fnmatch)", handle_filtertype), #endif #ifdef REVERSE_SUPPORT /* Reverse proxy arguments */ @@ -952,6 +954,11 @@ static HANDLE_FUNC (handle_basicauth) } #ifdef FILTER_ENABLE + +static void warn_deprecated(const char *arg, unsigned long lineno) { + CP_WARN ("deprecated option %s", arg); +} + static HANDLE_FUNC (handle_filter) { return set_string_arg (&conf->filter, line, &match[2]); @@ -959,26 +966,53 @@ static HANDLE_FUNC (handle_filter) static HANDLE_FUNC (handle_filterurls) { - return set_bool_arg (&conf->filter_url, line, &match[2]); + conf->filter_opts |= + get_bool_arg (line, &match[2]) * FILTER_OPT_URL; + return 0; } static HANDLE_FUNC (handle_filterextended) { - return set_bool_arg (&conf->filter_extended, line, &match[2]); + warn_deprecated("FilterExtended, use FilterType", lineno); + conf->filter_opts |= + get_bool_arg (line, &match[2]) * FILTER_OPT_TYPE_ERE; + return 0; } static HANDLE_FUNC (handle_filterdefaultdeny) { assert (match[2].rm_so != -1); - - if (get_bool_arg (line, &match[2])) - filter_set_default_policy (FILTER_DEFAULT_DENY); + conf->filter_opts |= + get_bool_arg (line, &match[2]) * FILTER_OPT_DEFAULT_DENY; return 0; } static HANDLE_FUNC (handle_filtercasesensitive) { - return set_bool_arg (&conf->filter_casesensitive, line, &match[2]); + conf->filter_opts |= + get_bool_arg (line, &match[2]) * FILTER_OPT_CASESENSITIVE; + return 0; +} + +static HANDLE_FUNC (handle_filtertype) +{ + static const struct { unsigned short flag; char type[8]; } + ftmap[] = { + {FILTER_OPT_TYPE_ERE, "ere"}, + {FILTER_OPT_TYPE_BRE, "bre"}, + {FILTER_OPT_TYPE_FNMATCH, "fnmatch"}, + }; + char *type; + unsigned i; + type = get_string_arg(line, &match[2]); + if (!type) return -1; + + for(i=0;ifilter_opts |= ftmap[i].flag; + + safefree (type); + return 0; } #endif diff --git a/src/conf.h b/src/conf.h index 9914049..0a0f06f 100644 --- a/src/conf.h +++ b/src/conf.h @@ -50,9 +50,7 @@ struct config_s { sblist *listen_addrs; #ifdef FILTER_ENABLE char *filter; - unsigned int filter_url; /* boolean */ - unsigned int filter_extended; /* boolean */ - unsigned int filter_casesensitive; /* boolean */ + unsigned int filter_opts; /* enum filter_options */ #endif /* FILTER_ENABLE */ #ifdef XTINYPROXY_ENABLE unsigned int add_xtinyproxy; /* boolean */ diff --git a/src/filter.c b/src/filter.c index b9b5066..0dbc93d 100644 --- a/src/filter.c +++ b/src/filter.c @@ -25,6 +25,7 @@ #include "main.h" #include +#include #include "filter.h" #include "heap.h" #include "log.h" @@ -37,15 +38,17 @@ static int err; struct filter_list { - regex_t cpatb; + union { + regex_t cpatb; + char *pattern; + } u; }; static sblist *fl = NULL; static int already_init = 0; -static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW; /* - * Initializes a linked list of strings containing hosts/urls to be filtered + * Initializes a list of strings containing hosts/urls to be filtered */ void filter_init (void) { @@ -66,10 +69,8 @@ void filter_init (void) } cflags = REG_NEWLINE | REG_NOSUB; - if (config->filter_extended) - cflags |= REG_EXTENDED; - if (!config->filter_casesensitive) - cflags |= REG_ICASE; + cflags |= (REG_EXTENDED * !!(config->filter_opts & FILTER_OPT_TYPE_ERE)); + cflags |= (REG_ICASE * !(config->filter_opts & FILTER_OPT_CASESENSITIVE)); while (fgets (buf, FILTER_BUFFER_LEN, fd)) { ++lineno; @@ -107,13 +108,19 @@ void filter_init (void) if (!fl) fl = sblist_new(sizeof(struct filter_list), 4096/sizeof(struct filter_list)); - err = regcomp (&fe.cpatb, s, cflags); - if (err != 0) { - if (err == REG_ESPACE) goto oom; - fprintf (stderr, - "Bad regex in %s: line %d - %s\n", - config->filter, lineno, s); - exit (EX_DATAERR); + if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH) { + fe.u.pattern = safestrdup(s); + if (!fe.u.pattern) goto oom; + } else { + + err = regcomp (&fe.u.cpatb, s, cflags); + if (err != 0) { + if (err == REG_ESPACE) goto oom; + fprintf (stderr, + "Bad regex in %s: line %d - %s\n", + config->filter, lineno, s); + exit (EX_DATAERR); + } } if (!sblist_add(fl, &fe)) { oom:; @@ -142,7 +149,10 @@ void filter_destroy (void) if (fl) { for (i = 0; i < sblist_getsize(fl); ++i) { p = sblist_get(fl, i); - regfree (&p->cpatb); + if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH) + safefree(p->u.pattern); + else + regfree (&p->u.cpatb); } sblist_free(fl); } @@ -175,11 +185,14 @@ int filter_run (const char *str) for (i = 0; i < sblist_getsize(fl); ++i) { p = sblist_get(fl, i); - result = - regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0); + if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH) + result = fnmatch (p->u.pattern, str, 0); + else + result = + regexec (&p->u.cpatb, str, (size_t) 0, (regmatch_t *) 0, 0); if (result == 0) { - if (default_policy == FILTER_DEFAULT_ALLOW) + if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY)) return 1; else return 0; @@ -187,16 +200,8 @@ int filter_run (const char *str) } COMMON_EXIT: - if (default_policy == FILTER_DEFAULT_ALLOW) + if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY)) return 0; else return 1; } - -/* - * Set the default filtering policy - */ -void filter_set_default_policy (filter_policy_t policy) -{ - default_policy = policy; -} diff --git a/src/filter.h b/src/filter.h index 8a7575b..e5f3468 100644 --- a/src/filter.h +++ b/src/filter.h @@ -21,16 +21,22 @@ #ifndef _TINYPROXY_FILTER_H_ #define _TINYPROXY_FILTER_H_ -typedef enum { - FILTER_DEFAULT_ALLOW, - FILTER_DEFAULT_DENY -} filter_policy_t; +enum filter_options { + FILTER_OPT_CASESENSITIVE = 1 << 0, + FILTER_OPT_URL = 1 << 1, + FILTER_OPT_DEFAULT_DENY = 1 << 2, + + FILTER_OPT_TYPE_BRE = 1 << 8, + FILTER_OPT_TYPE_ERE = 1 << 9, + FILTER_OPT_TYPE_FNMATCH = 1 << 10, +}; + +#define FILTER_TYPE_MASK \ + (FILTER_OPT_TYPE_BRE | FILTER_OPT_TYPE_ERE | FILTER_OPT_TYPE_FNMATCH) extern void filter_init (void); extern void filter_destroy (void); extern void filter_reload (void); extern int filter_run (const char *str); -extern void filter_set_default_policy (filter_policy_t policy); - #endif diff --git a/src/reqs.c b/src/reqs.c index 50f6914..a6289a4 100644 --- a/src/reqs.c +++ b/src/reqs.c @@ -471,22 +471,16 @@ BAD_REQUEST_ERROR: * Filter restricted domains/urls */ if (config->filter) { - if (config->filter_url) - ret = filter_run (url); - else - ret = filter_run (request->host); + int fu = config->filter_opts & FILTER_OPT_URL; + ret = filter_run (fu ? url : request->host); if (ret) { update_stats (STAT_DENIED); - if (config->filter_url) - log_message (LOG_NOTICE, - "Proxying refused on filtered url \"%s\"", - url); - else - log_message (LOG_NOTICE, - "Proxying refused on filtered domain \"%s\"", - request->host); + log_message (LOG_NOTICE, + "Proxying refused on filtered %s \"%s\"", + fu ? "url" : "domain", + fu ? url : request->host); indicate_http_error (connptr, 403, "Filtered", "detail",