implement filtertype keyword and fnmatch-based filtering
as suggested in #212, it seems the majority of people don't understand that input was expected to be in regex format and people were using filter lists containing plain hostnames, e.g. `www.google.com`. apart from that, using fnmatch() for matching is actually a lot less computationally expensive and allows to use big blacklists without incurring a huge performance hit. the config file now understands a new option `FilterType` which can be one of `bre`, `ere` and `fnmatch`. The `FilterExtended` option was deprecated in favor of it. It still works, but will be removed in the release after the next.
This commit is contained in:
parent
26db3f6cc9
commit
235b1c10a7
@ -267,7 +267,7 @@ domains. This option specifies the location of the file
|
||||
containing the filter rules, one rule per line.
|
||||
|
||||
Rules are specified as POSIX basic regular expressions (BRE), unless
|
||||
FilterExtended is activated.
|
||||
another FilterType is specified.
|
||||
Comment lines start with a `#` character.
|
||||
|
||||
Example filter file contents:
|
||||
@ -287,6 +287,20 @@ Example filter file contents:
|
||||
# filter any domain that starts with adserver
|
||||
^adserver
|
||||
|
||||
=item B<FilterType>
|
||||
|
||||
This option can be set to one of `bre`, `ere`, or `fnmatch`.
|
||||
If `bre` is set, the rules specified in the filter file are matched
|
||||
using POSIX basic regular expressions, when set to `ere`, using
|
||||
POSIX extended regular expressions, and when set to `fnmatch` using
|
||||
the `fnmatch` function as specified in the manpage `man 3p fnmatch`.
|
||||
`fnmatch` matching is identical to what's used in the shell to match
|
||||
filenames, so for example `*.google.com` matches everything that
|
||||
ends with `.google.com`.
|
||||
If you don't know what regular expressions are or you're using filter
|
||||
lists from 3rd party sources, `fnmatch` is probably what you want.
|
||||
It's also the fastest matching method of the three.
|
||||
|
||||
=item B<FilterURLs>
|
||||
|
||||
If this boolean option is set to `Yes` or `On`, filtering is
|
||||
@ -300,6 +314,7 @@ recommended not to use this option.
|
||||
|
||||
=item B<FilterExtended>
|
||||
|
||||
Deprecated. Use `FilterType ere` instead.
|
||||
If this boolean option is set to `Yes`, then extended POSIX
|
||||
regular expressions are used for matching the filter rules.
|
||||
The default is to use basic POSIX regular expressions.
|
||||
@ -308,7 +323,11 @@ The default is to use basic POSIX regular expressions.
|
||||
|
||||
If this boolean option is set to `Yes`, then the filter rules
|
||||
are matched in a case sensitive manner. The default is to
|
||||
match case-insensitively.
|
||||
match case-insensitively, unfortunately.
|
||||
If you set this to `Yes`, then your matching will be almost
|
||||
twice as fast.
|
||||
This setting affects only `bre` and `ere` FilterTypes, fnmatch
|
||||
is always case sensitive.
|
||||
|
||||
=item B<FilterDefaultDeny>
|
||||
|
||||
|
@ -240,10 +240,9 @@ ViaProxyName "tinyproxy"
|
||||
#FilterURLs On
|
||||
|
||||
#
|
||||
# FilterExtended: Use POSIX Extended regular expressions rather than
|
||||
# basic.
|
||||
# FilterType: Use bre (default), ere, or fnmatch for filtering.
|
||||
#
|
||||
#FilterExtended On
|
||||
#FilterType fnmatch
|
||||
|
||||
#
|
||||
# FilterCaseSensitive: Use case sensitive regular expressions.
|
||||
|
@ -34,6 +34,7 @@ config_directive_find (register const char *str, register size_t len)
|
||||
{"defaulterrorfile", CD_defaulterrorfile},
|
||||
{"startservers", CD_startservers},
|
||||
{"filtercasesensitive", CD_filtercasesensitive},
|
||||
{"filtertype", CD_filtertype},
|
||||
{"filterurls", CD_filterurls},
|
||||
{"filter", CD_filter},
|
||||
{"reversemagic", CD_reversemagic},
|
||||
|
@ -51,6 +51,7 @@ filterurls, CD_filterurls
|
||||
filterextended, CD_filterextended
|
||||
filterdefaultdeny, CD_filterdefaultdeny
|
||||
filtercasesensitive, CD_filtercasesensitive
|
||||
filtertype, CD_filtertype
|
||||
reversebaseurl, CD_reversebaseurl
|
||||
reverseonly, CD_reverseonly
|
||||
reversemagic, CD_reversemagic
|
||||
|
@ -33,6 +33,7 @@ CD_errorfile,
|
||||
CD_addheader,
|
||||
CD_filter,
|
||||
CD_filterurls,
|
||||
CD_filtertype,
|
||||
CD_filterextended,
|
||||
CD_filterdefaultdeny,
|
||||
CD_filtercasesensitive,
|
||||
|
46
src/conf.c
46
src/conf.c
@ -135,6 +135,7 @@ static HANDLE_FUNC (handle_filtercasesensitive);
|
||||
static HANDLE_FUNC (handle_filterdefaultdeny);
|
||||
static HANDLE_FUNC (handle_filterextended);
|
||||
static HANDLE_FUNC (handle_filterurls);
|
||||
static HANDLE_FUNC (handle_filtertype);
|
||||
#endif
|
||||
static HANDLE_FUNC (handle_group);
|
||||
static HANDLE_FUNC (handle_listen);
|
||||
@ -234,6 +235,7 @@ struct {
|
||||
STDCONF (filterextended, BOOL, handle_filterextended),
|
||||
STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny),
|
||||
STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive),
|
||||
STDCONF (filtertype, "(bre|ere|fnmatch)", handle_filtertype),
|
||||
#endif
|
||||
#ifdef REVERSE_SUPPORT
|
||||
/* Reverse proxy arguments */
|
||||
@ -952,6 +954,11 @@ static HANDLE_FUNC (handle_basicauth)
|
||||
}
|
||||
|
||||
#ifdef FILTER_ENABLE
|
||||
|
||||
static void warn_deprecated(const char *arg, unsigned long lineno) {
|
||||
CP_WARN ("deprecated option %s", arg);
|
||||
}
|
||||
|
||||
static HANDLE_FUNC (handle_filter)
|
||||
{
|
||||
return set_string_arg (&conf->filter, line, &match[2]);
|
||||
@ -959,26 +966,53 @@ static HANDLE_FUNC (handle_filter)
|
||||
|
||||
static HANDLE_FUNC (handle_filterurls)
|
||||
{
|
||||
return set_bool_arg (&conf->filter_url, line, &match[2]);
|
||||
conf->filter_opts |=
|
||||
get_bool_arg (line, &match[2]) * FILTER_OPT_URL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HANDLE_FUNC (handle_filterextended)
|
||||
{
|
||||
return set_bool_arg (&conf->filter_extended, line, &match[2]);
|
||||
warn_deprecated("FilterExtended, use FilterType", lineno);
|
||||
conf->filter_opts |=
|
||||
get_bool_arg (line, &match[2]) * FILTER_OPT_TYPE_ERE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HANDLE_FUNC (handle_filterdefaultdeny)
|
||||
{
|
||||
assert (match[2].rm_so != -1);
|
||||
|
||||
if (get_bool_arg (line, &match[2]))
|
||||
filter_set_default_policy (FILTER_DEFAULT_DENY);
|
||||
conf->filter_opts |=
|
||||
get_bool_arg (line, &match[2]) * FILTER_OPT_DEFAULT_DENY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HANDLE_FUNC (handle_filtercasesensitive)
|
||||
{
|
||||
return set_bool_arg (&conf->filter_casesensitive, line, &match[2]);
|
||||
conf->filter_opts |=
|
||||
get_bool_arg (line, &match[2]) * FILTER_OPT_CASESENSITIVE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static HANDLE_FUNC (handle_filtertype)
|
||||
{
|
||||
static const struct { unsigned short flag; char type[8]; }
|
||||
ftmap[] = {
|
||||
{FILTER_OPT_TYPE_ERE, "ere"},
|
||||
{FILTER_OPT_TYPE_BRE, "bre"},
|
||||
{FILTER_OPT_TYPE_FNMATCH, "fnmatch"},
|
||||
};
|
||||
char *type;
|
||||
unsigned i;
|
||||
type = get_string_arg(line, &match[2]);
|
||||
if (!type) return -1;
|
||||
|
||||
for(i=0;i<sizeof(ftmap)/sizeof(ftmap[0]);++i)
|
||||
if(!strcmp(ftmap[i].type, type))
|
||||
conf->filter_opts |= ftmap[i].flag;
|
||||
|
||||
safefree (type);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -50,9 +50,7 @@ struct config_s {
|
||||
sblist *listen_addrs;
|
||||
#ifdef FILTER_ENABLE
|
||||
char *filter;
|
||||
unsigned int filter_url; /* boolean */
|
||||
unsigned int filter_extended; /* boolean */
|
||||
unsigned int filter_casesensitive; /* boolean */
|
||||
unsigned int filter_opts; /* enum filter_options */
|
||||
#endif /* FILTER_ENABLE */
|
||||
#ifdef XTINYPROXY_ENABLE
|
||||
unsigned int add_xtinyproxy; /* boolean */
|
||||
|
43
src/filter.c
43
src/filter.c
@ -25,6 +25,7 @@
|
||||
#include "main.h"
|
||||
|
||||
#include <regex.h>
|
||||
#include <fnmatch.h>
|
||||
#include "filter.h"
|
||||
#include "heap.h"
|
||||
#include "log.h"
|
||||
@ -37,15 +38,17 @@
|
||||
static int err;
|
||||
|
||||
struct filter_list {
|
||||
union {
|
||||
regex_t cpatb;
|
||||
char *pattern;
|
||||
} u;
|
||||
};
|
||||
|
||||
static sblist *fl = NULL;
|
||||
static int already_init = 0;
|
||||
static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
|
||||
|
||||
/*
|
||||
* Initializes a linked list of strings containing hosts/urls to be filtered
|
||||
* Initializes a list of strings containing hosts/urls to be filtered
|
||||
*/
|
||||
void filter_init (void)
|
||||
{
|
||||
@ -66,10 +69,8 @@ void filter_init (void)
|
||||
}
|
||||
|
||||
cflags = REG_NEWLINE | REG_NOSUB;
|
||||
if (config->filter_extended)
|
||||
cflags |= REG_EXTENDED;
|
||||
if (!config->filter_casesensitive)
|
||||
cflags |= REG_ICASE;
|
||||
cflags |= (REG_EXTENDED * !!(config->filter_opts & FILTER_OPT_TYPE_ERE));
|
||||
cflags |= (REG_ICASE * !(config->filter_opts & FILTER_OPT_CASESENSITIVE));
|
||||
|
||||
while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
|
||||
++lineno;
|
||||
@ -107,7 +108,12 @@ void filter_init (void)
|
||||
if (!fl) fl = sblist_new(sizeof(struct filter_list),
|
||||
4096/sizeof(struct filter_list));
|
||||
|
||||
err = regcomp (&fe.cpatb, s, cflags);
|
||||
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH) {
|
||||
fe.u.pattern = safestrdup(s);
|
||||
if (!fe.u.pattern) goto oom;
|
||||
} else {
|
||||
|
||||
err = regcomp (&fe.u.cpatb, s, cflags);
|
||||
if (err != 0) {
|
||||
if (err == REG_ESPACE) goto oom;
|
||||
fprintf (stderr,
|
||||
@ -115,6 +121,7 @@ void filter_init (void)
|
||||
config->filter, lineno, s);
|
||||
exit (EX_DATAERR);
|
||||
}
|
||||
}
|
||||
if (!sblist_add(fl, &fe)) {
|
||||
oom:;
|
||||
fprintf (stderr,
|
||||
@ -142,7 +149,10 @@ void filter_destroy (void)
|
||||
if (fl) {
|
||||
for (i = 0; i < sblist_getsize(fl); ++i) {
|
||||
p = sblist_get(fl, i);
|
||||
regfree (&p->cpatb);
|
||||
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH)
|
||||
safefree(p->u.pattern);
|
||||
else
|
||||
regfree (&p->u.cpatb);
|
||||
}
|
||||
sblist_free(fl);
|
||||
}
|
||||
@ -175,11 +185,14 @@ int filter_run (const char *str)
|
||||
|
||||
for (i = 0; i < sblist_getsize(fl); ++i) {
|
||||
p = sblist_get(fl, i);
|
||||
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH)
|
||||
result = fnmatch (p->u.pattern, str, 0);
|
||||
else
|
||||
result =
|
||||
regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
|
||||
regexec (&p->u.cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
|
||||
|
||||
if (result == 0) {
|
||||
if (default_policy == FILTER_DEFAULT_ALLOW)
|
||||
if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
@ -187,16 +200,8 @@ int filter_run (const char *str)
|
||||
}
|
||||
|
||||
COMMON_EXIT:
|
||||
if (default_policy == FILTER_DEFAULT_ALLOW)
|
||||
if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the default filtering policy
|
||||
*/
|
||||
void filter_set_default_policy (filter_policy_t policy)
|
||||
{
|
||||
default_policy = policy;
|
||||
}
|
||||
|
18
src/filter.h
18
src/filter.h
@ -21,16 +21,22 @@
|
||||
#ifndef _TINYPROXY_FILTER_H_
|
||||
#define _TINYPROXY_FILTER_H_
|
||||
|
||||
typedef enum {
|
||||
FILTER_DEFAULT_ALLOW,
|
||||
FILTER_DEFAULT_DENY
|
||||
} filter_policy_t;
|
||||
enum filter_options {
|
||||
FILTER_OPT_CASESENSITIVE = 1 << 0,
|
||||
FILTER_OPT_URL = 1 << 1,
|
||||
FILTER_OPT_DEFAULT_DENY = 1 << 2,
|
||||
|
||||
FILTER_OPT_TYPE_BRE = 1 << 8,
|
||||
FILTER_OPT_TYPE_ERE = 1 << 9,
|
||||
FILTER_OPT_TYPE_FNMATCH = 1 << 10,
|
||||
};
|
||||
|
||||
#define FILTER_TYPE_MASK \
|
||||
(FILTER_OPT_TYPE_BRE | FILTER_OPT_TYPE_ERE | FILTER_OPT_TYPE_FNMATCH)
|
||||
|
||||
extern void filter_init (void);
|
||||
extern void filter_destroy (void);
|
||||
extern void filter_reload (void);
|
||||
extern int filter_run (const char *str);
|
||||
|
||||
extern void filter_set_default_policy (filter_policy_t policy);
|
||||
|
||||
#endif
|
||||
|
16
src/reqs.c
16
src/reqs.c
@ -471,22 +471,16 @@ BAD_REQUEST_ERROR:
|
||||
* Filter restricted domains/urls
|
||||
*/
|
||||
if (config->filter) {
|
||||
if (config->filter_url)
|
||||
ret = filter_run (url);
|
||||
else
|
||||
ret = filter_run (request->host);
|
||||
int fu = config->filter_opts & FILTER_OPT_URL;
|
||||
ret = filter_run (fu ? url : request->host);
|
||||
|
||||
if (ret) {
|
||||
update_stats (STAT_DENIED);
|
||||
|
||||
if (config->filter_url)
|
||||
log_message (LOG_NOTICE,
|
||||
"Proxying refused on filtered url \"%s\"",
|
||||
url);
|
||||
else
|
||||
log_message (LOG_NOTICE,
|
||||
"Proxying refused on filtered domain \"%s\"",
|
||||
request->host);
|
||||
"Proxying refused on filtered %s \"%s\"",
|
||||
fu ? "url" : "domain",
|
||||
fu ? url : request->host);
|
||||
|
||||
indicate_http_error (connptr, 403, "Filtered",
|
||||
"detail",
|
||||
|
Loading…
Reference in New Issue
Block a user