implement filtertype keyword and fnmatch-based filtering
as suggested in #212, it seems the majority of people don't understand that input was expected to be in regex format and people were using filter lists containing plain hostnames, e.g. `www.google.com`. apart from that, using fnmatch() for matching is actually a lot less computationally expensive and allows to use big blacklists without incurring a huge performance hit. the config file now understands a new option `FilterType` which can be one of `bre`, `ere` and `fnmatch`. The `FilterExtended` option was deprecated in favor of it. It still works, but will be removed in the release after the next.
This commit is contained in:
parent
26db3f6cc9
commit
235b1c10a7
@ -267,7 +267,7 @@ domains. This option specifies the location of the file
|
|||||||
containing the filter rules, one rule per line.
|
containing the filter rules, one rule per line.
|
||||||
|
|
||||||
Rules are specified as POSIX basic regular expressions (BRE), unless
|
Rules are specified as POSIX basic regular expressions (BRE), unless
|
||||||
FilterExtended is activated.
|
another FilterType is specified.
|
||||||
Comment lines start with a `#` character.
|
Comment lines start with a `#` character.
|
||||||
|
|
||||||
Example filter file contents:
|
Example filter file contents:
|
||||||
@ -287,6 +287,20 @@ Example filter file contents:
|
|||||||
# filter any domain that starts with adserver
|
# filter any domain that starts with adserver
|
||||||
^adserver
|
^adserver
|
||||||
|
|
||||||
|
=item B<FilterType>
|
||||||
|
|
||||||
|
This option can be set to one of `bre`, `ere`, or `fnmatch`.
|
||||||
|
If `bre` is set, the rules specified in the filter file are matched
|
||||||
|
using POSIX basic regular expressions, when set to `ere`, using
|
||||||
|
POSIX extended regular expressions, and when set to `fnmatch` using
|
||||||
|
the `fnmatch` function as specified in the manpage `man 3p fnmatch`.
|
||||||
|
`fnmatch` matching is identical to what's used in the shell to match
|
||||||
|
filenames, so for example `*.google.com` matches everything that
|
||||||
|
ends with `.google.com`.
|
||||||
|
If you don't know what regular expressions are or you're using filter
|
||||||
|
lists from 3rd party sources, `fnmatch` is probably what you want.
|
||||||
|
It's also the fastest matching method of the three.
|
||||||
|
|
||||||
=item B<FilterURLs>
|
=item B<FilterURLs>
|
||||||
|
|
||||||
If this boolean option is set to `Yes` or `On`, filtering is
|
If this boolean option is set to `Yes` or `On`, filtering is
|
||||||
@ -300,6 +314,7 @@ recommended not to use this option.
|
|||||||
|
|
||||||
=item B<FilterExtended>
|
=item B<FilterExtended>
|
||||||
|
|
||||||
|
Deprecated. Use `FilterType ere` instead.
|
||||||
If this boolean option is set to `Yes`, then extended POSIX
|
If this boolean option is set to `Yes`, then extended POSIX
|
||||||
regular expressions are used for matching the filter rules.
|
regular expressions are used for matching the filter rules.
|
||||||
The default is to use basic POSIX regular expressions.
|
The default is to use basic POSIX regular expressions.
|
||||||
@ -308,7 +323,11 @@ The default is to use basic POSIX regular expressions.
|
|||||||
|
|
||||||
If this boolean option is set to `Yes`, then the filter rules
|
If this boolean option is set to `Yes`, then the filter rules
|
||||||
are matched in a case sensitive manner. The default is to
|
are matched in a case sensitive manner. The default is to
|
||||||
match case-insensitively.
|
match case-insensitively, unfortunately.
|
||||||
|
If you set this to `Yes`, then your matching will be almost
|
||||||
|
twice as fast.
|
||||||
|
This setting affects only `bre` and `ere` FilterTypes, fnmatch
|
||||||
|
is always case sensitive.
|
||||||
|
|
||||||
=item B<FilterDefaultDeny>
|
=item B<FilterDefaultDeny>
|
||||||
|
|
||||||
|
@ -240,10 +240,9 @@ ViaProxyName "tinyproxy"
|
|||||||
#FilterURLs On
|
#FilterURLs On
|
||||||
|
|
||||||
#
|
#
|
||||||
# FilterExtended: Use POSIX Extended regular expressions rather than
|
# FilterType: Use bre (default), ere, or fnmatch for filtering.
|
||||||
# basic.
|
|
||||||
#
|
#
|
||||||
#FilterExtended On
|
#FilterType fnmatch
|
||||||
|
|
||||||
#
|
#
|
||||||
# FilterCaseSensitive: Use case sensitive regular expressions.
|
# FilterCaseSensitive: Use case sensitive regular expressions.
|
||||||
|
@ -34,6 +34,7 @@ config_directive_find (register const char *str, register size_t len)
|
|||||||
{"defaulterrorfile", CD_defaulterrorfile},
|
{"defaulterrorfile", CD_defaulterrorfile},
|
||||||
{"startservers", CD_startservers},
|
{"startservers", CD_startservers},
|
||||||
{"filtercasesensitive", CD_filtercasesensitive},
|
{"filtercasesensitive", CD_filtercasesensitive},
|
||||||
|
{"filtertype", CD_filtertype},
|
||||||
{"filterurls", CD_filterurls},
|
{"filterurls", CD_filterurls},
|
||||||
{"filter", CD_filter},
|
{"filter", CD_filter},
|
||||||
{"reversemagic", CD_reversemagic},
|
{"reversemagic", CD_reversemagic},
|
||||||
|
@ -51,6 +51,7 @@ filterurls, CD_filterurls
|
|||||||
filterextended, CD_filterextended
|
filterextended, CD_filterextended
|
||||||
filterdefaultdeny, CD_filterdefaultdeny
|
filterdefaultdeny, CD_filterdefaultdeny
|
||||||
filtercasesensitive, CD_filtercasesensitive
|
filtercasesensitive, CD_filtercasesensitive
|
||||||
|
filtertype, CD_filtertype
|
||||||
reversebaseurl, CD_reversebaseurl
|
reversebaseurl, CD_reversebaseurl
|
||||||
reverseonly, CD_reverseonly
|
reverseonly, CD_reverseonly
|
||||||
reversemagic, CD_reversemagic
|
reversemagic, CD_reversemagic
|
||||||
|
@ -33,6 +33,7 @@ CD_errorfile,
|
|||||||
CD_addheader,
|
CD_addheader,
|
||||||
CD_filter,
|
CD_filter,
|
||||||
CD_filterurls,
|
CD_filterurls,
|
||||||
|
CD_filtertype,
|
||||||
CD_filterextended,
|
CD_filterextended,
|
||||||
CD_filterdefaultdeny,
|
CD_filterdefaultdeny,
|
||||||
CD_filtercasesensitive,
|
CD_filtercasesensitive,
|
||||||
|
46
src/conf.c
46
src/conf.c
@ -135,6 +135,7 @@ static HANDLE_FUNC (handle_filtercasesensitive);
|
|||||||
static HANDLE_FUNC (handle_filterdefaultdeny);
|
static HANDLE_FUNC (handle_filterdefaultdeny);
|
||||||
static HANDLE_FUNC (handle_filterextended);
|
static HANDLE_FUNC (handle_filterextended);
|
||||||
static HANDLE_FUNC (handle_filterurls);
|
static HANDLE_FUNC (handle_filterurls);
|
||||||
|
static HANDLE_FUNC (handle_filtertype);
|
||||||
#endif
|
#endif
|
||||||
static HANDLE_FUNC (handle_group);
|
static HANDLE_FUNC (handle_group);
|
||||||
static HANDLE_FUNC (handle_listen);
|
static HANDLE_FUNC (handle_listen);
|
||||||
@ -234,6 +235,7 @@ struct {
|
|||||||
STDCONF (filterextended, BOOL, handle_filterextended),
|
STDCONF (filterextended, BOOL, handle_filterextended),
|
||||||
STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny),
|
STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny),
|
||||||
STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive),
|
STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive),
|
||||||
|
STDCONF (filtertype, "(bre|ere|fnmatch)", handle_filtertype),
|
||||||
#endif
|
#endif
|
||||||
#ifdef REVERSE_SUPPORT
|
#ifdef REVERSE_SUPPORT
|
||||||
/* Reverse proxy arguments */
|
/* Reverse proxy arguments */
|
||||||
@ -952,6 +954,11 @@ static HANDLE_FUNC (handle_basicauth)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef FILTER_ENABLE
|
#ifdef FILTER_ENABLE
|
||||||
|
|
||||||
|
static void warn_deprecated(const char *arg, unsigned long lineno) {
|
||||||
|
CP_WARN ("deprecated option %s", arg);
|
||||||
|
}
|
||||||
|
|
||||||
static HANDLE_FUNC (handle_filter)
|
static HANDLE_FUNC (handle_filter)
|
||||||
{
|
{
|
||||||
return set_string_arg (&conf->filter, line, &match[2]);
|
return set_string_arg (&conf->filter, line, &match[2]);
|
||||||
@ -959,26 +966,53 @@ static HANDLE_FUNC (handle_filter)
|
|||||||
|
|
||||||
static HANDLE_FUNC (handle_filterurls)
|
static HANDLE_FUNC (handle_filterurls)
|
||||||
{
|
{
|
||||||
return set_bool_arg (&conf->filter_url, line, &match[2]);
|
conf->filter_opts |=
|
||||||
|
get_bool_arg (line, &match[2]) * FILTER_OPT_URL;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HANDLE_FUNC (handle_filterextended)
|
static HANDLE_FUNC (handle_filterextended)
|
||||||
{
|
{
|
||||||
return set_bool_arg (&conf->filter_extended, line, &match[2]);
|
warn_deprecated("FilterExtended, use FilterType", lineno);
|
||||||
|
conf->filter_opts |=
|
||||||
|
get_bool_arg (line, &match[2]) * FILTER_OPT_TYPE_ERE;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HANDLE_FUNC (handle_filterdefaultdeny)
|
static HANDLE_FUNC (handle_filterdefaultdeny)
|
||||||
{
|
{
|
||||||
assert (match[2].rm_so != -1);
|
assert (match[2].rm_so != -1);
|
||||||
|
conf->filter_opts |=
|
||||||
if (get_bool_arg (line, &match[2]))
|
get_bool_arg (line, &match[2]) * FILTER_OPT_DEFAULT_DENY;
|
||||||
filter_set_default_policy (FILTER_DEFAULT_DENY);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HANDLE_FUNC (handle_filtercasesensitive)
|
static HANDLE_FUNC (handle_filtercasesensitive)
|
||||||
{
|
{
|
||||||
return set_bool_arg (&conf->filter_casesensitive, line, &match[2]);
|
conf->filter_opts |=
|
||||||
|
get_bool_arg (line, &match[2]) * FILTER_OPT_CASESENSITIVE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HANDLE_FUNC (handle_filtertype)
|
||||||
|
{
|
||||||
|
static const struct { unsigned short flag; char type[8]; }
|
||||||
|
ftmap[] = {
|
||||||
|
{FILTER_OPT_TYPE_ERE, "ere"},
|
||||||
|
{FILTER_OPT_TYPE_BRE, "bre"},
|
||||||
|
{FILTER_OPT_TYPE_FNMATCH, "fnmatch"},
|
||||||
|
};
|
||||||
|
char *type;
|
||||||
|
unsigned i;
|
||||||
|
type = get_string_arg(line, &match[2]);
|
||||||
|
if (!type) return -1;
|
||||||
|
|
||||||
|
for(i=0;i<sizeof(ftmap)/sizeof(ftmap[0]);++i)
|
||||||
|
if(!strcmp(ftmap[i].type, type))
|
||||||
|
conf->filter_opts |= ftmap[i].flag;
|
||||||
|
|
||||||
|
safefree (type);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -50,9 +50,7 @@ struct config_s {
|
|||||||
sblist *listen_addrs;
|
sblist *listen_addrs;
|
||||||
#ifdef FILTER_ENABLE
|
#ifdef FILTER_ENABLE
|
||||||
char *filter;
|
char *filter;
|
||||||
unsigned int filter_url; /* boolean */
|
unsigned int filter_opts; /* enum filter_options */
|
||||||
unsigned int filter_extended; /* boolean */
|
|
||||||
unsigned int filter_casesensitive; /* boolean */
|
|
||||||
#endif /* FILTER_ENABLE */
|
#endif /* FILTER_ENABLE */
|
||||||
#ifdef XTINYPROXY_ENABLE
|
#ifdef XTINYPROXY_ENABLE
|
||||||
unsigned int add_xtinyproxy; /* boolean */
|
unsigned int add_xtinyproxy; /* boolean */
|
||||||
|
43
src/filter.c
43
src/filter.c
@ -25,6 +25,7 @@
|
|||||||
#include "main.h"
|
#include "main.h"
|
||||||
|
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
|
#include <fnmatch.h>
|
||||||
#include "filter.h"
|
#include "filter.h"
|
||||||
#include "heap.h"
|
#include "heap.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
@ -37,15 +38,17 @@
|
|||||||
static int err;
|
static int err;
|
||||||
|
|
||||||
struct filter_list {
|
struct filter_list {
|
||||||
|
union {
|
||||||
regex_t cpatb;
|
regex_t cpatb;
|
||||||
|
char *pattern;
|
||||||
|
} u;
|
||||||
};
|
};
|
||||||
|
|
||||||
static sblist *fl = NULL;
|
static sblist *fl = NULL;
|
||||||
static int already_init = 0;
|
static int already_init = 0;
|
||||||
static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initializes a linked list of strings containing hosts/urls to be filtered
|
* Initializes a list of strings containing hosts/urls to be filtered
|
||||||
*/
|
*/
|
||||||
void filter_init (void)
|
void filter_init (void)
|
||||||
{
|
{
|
||||||
@ -66,10 +69,8 @@ void filter_init (void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cflags = REG_NEWLINE | REG_NOSUB;
|
cflags = REG_NEWLINE | REG_NOSUB;
|
||||||
if (config->filter_extended)
|
cflags |= (REG_EXTENDED * !!(config->filter_opts & FILTER_OPT_TYPE_ERE));
|
||||||
cflags |= REG_EXTENDED;
|
cflags |= (REG_ICASE * !(config->filter_opts & FILTER_OPT_CASESENSITIVE));
|
||||||
if (!config->filter_casesensitive)
|
|
||||||
cflags |= REG_ICASE;
|
|
||||||
|
|
||||||
while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
|
while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
|
||||||
++lineno;
|
++lineno;
|
||||||
@ -107,7 +108,12 @@ void filter_init (void)
|
|||||||
if (!fl) fl = sblist_new(sizeof(struct filter_list),
|
if (!fl) fl = sblist_new(sizeof(struct filter_list),
|
||||||
4096/sizeof(struct filter_list));
|
4096/sizeof(struct filter_list));
|
||||||
|
|
||||||
err = regcomp (&fe.cpatb, s, cflags);
|
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH) {
|
||||||
|
fe.u.pattern = safestrdup(s);
|
||||||
|
if (!fe.u.pattern) goto oom;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
err = regcomp (&fe.u.cpatb, s, cflags);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
if (err == REG_ESPACE) goto oom;
|
if (err == REG_ESPACE) goto oom;
|
||||||
fprintf (stderr,
|
fprintf (stderr,
|
||||||
@ -115,6 +121,7 @@ void filter_init (void)
|
|||||||
config->filter, lineno, s);
|
config->filter, lineno, s);
|
||||||
exit (EX_DATAERR);
|
exit (EX_DATAERR);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (!sblist_add(fl, &fe)) {
|
if (!sblist_add(fl, &fe)) {
|
||||||
oom:;
|
oom:;
|
||||||
fprintf (stderr,
|
fprintf (stderr,
|
||||||
@ -142,7 +149,10 @@ void filter_destroy (void)
|
|||||||
if (fl) {
|
if (fl) {
|
||||||
for (i = 0; i < sblist_getsize(fl); ++i) {
|
for (i = 0; i < sblist_getsize(fl); ++i) {
|
||||||
p = sblist_get(fl, i);
|
p = sblist_get(fl, i);
|
||||||
regfree (&p->cpatb);
|
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH)
|
||||||
|
safefree(p->u.pattern);
|
||||||
|
else
|
||||||
|
regfree (&p->u.cpatb);
|
||||||
}
|
}
|
||||||
sblist_free(fl);
|
sblist_free(fl);
|
||||||
}
|
}
|
||||||
@ -175,11 +185,14 @@ int filter_run (const char *str)
|
|||||||
|
|
||||||
for (i = 0; i < sblist_getsize(fl); ++i) {
|
for (i = 0; i < sblist_getsize(fl); ++i) {
|
||||||
p = sblist_get(fl, i);
|
p = sblist_get(fl, i);
|
||||||
|
if (config->filter_opts & FILTER_OPT_TYPE_FNMATCH)
|
||||||
|
result = fnmatch (p->u.pattern, str, 0);
|
||||||
|
else
|
||||||
result =
|
result =
|
||||||
regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
|
regexec (&p->u.cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
|
||||||
|
|
||||||
if (result == 0) {
|
if (result == 0) {
|
||||||
if (default_policy == FILTER_DEFAULT_ALLOW)
|
if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY))
|
||||||
return 1;
|
return 1;
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
@ -187,16 +200,8 @@ int filter_run (const char *str)
|
|||||||
}
|
}
|
||||||
|
|
||||||
COMMON_EXIT:
|
COMMON_EXIT:
|
||||||
if (default_policy == FILTER_DEFAULT_ALLOW)
|
if (!(config->filter_opts & FILTER_OPT_DEFAULT_DENY))
|
||||||
return 0;
|
return 0;
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Set the default filtering policy
|
|
||||||
*/
|
|
||||||
void filter_set_default_policy (filter_policy_t policy)
|
|
||||||
{
|
|
||||||
default_policy = policy;
|
|
||||||
}
|
|
||||||
|
18
src/filter.h
18
src/filter.h
@ -21,16 +21,22 @@
|
|||||||
#ifndef _TINYPROXY_FILTER_H_
|
#ifndef _TINYPROXY_FILTER_H_
|
||||||
#define _TINYPROXY_FILTER_H_
|
#define _TINYPROXY_FILTER_H_
|
||||||
|
|
||||||
typedef enum {
|
enum filter_options {
|
||||||
FILTER_DEFAULT_ALLOW,
|
FILTER_OPT_CASESENSITIVE = 1 << 0,
|
||||||
FILTER_DEFAULT_DENY
|
FILTER_OPT_URL = 1 << 1,
|
||||||
} filter_policy_t;
|
FILTER_OPT_DEFAULT_DENY = 1 << 2,
|
||||||
|
|
||||||
|
FILTER_OPT_TYPE_BRE = 1 << 8,
|
||||||
|
FILTER_OPT_TYPE_ERE = 1 << 9,
|
||||||
|
FILTER_OPT_TYPE_FNMATCH = 1 << 10,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define FILTER_TYPE_MASK \
|
||||||
|
(FILTER_OPT_TYPE_BRE | FILTER_OPT_TYPE_ERE | FILTER_OPT_TYPE_FNMATCH)
|
||||||
|
|
||||||
extern void filter_init (void);
|
extern void filter_init (void);
|
||||||
extern void filter_destroy (void);
|
extern void filter_destroy (void);
|
||||||
extern void filter_reload (void);
|
extern void filter_reload (void);
|
||||||
extern int filter_run (const char *str);
|
extern int filter_run (const char *str);
|
||||||
|
|
||||||
extern void filter_set_default_policy (filter_policy_t policy);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
16
src/reqs.c
16
src/reqs.c
@ -471,22 +471,16 @@ BAD_REQUEST_ERROR:
|
|||||||
* Filter restricted domains/urls
|
* Filter restricted domains/urls
|
||||||
*/
|
*/
|
||||||
if (config->filter) {
|
if (config->filter) {
|
||||||
if (config->filter_url)
|
int fu = config->filter_opts & FILTER_OPT_URL;
|
||||||
ret = filter_run (url);
|
ret = filter_run (fu ? url : request->host);
|
||||||
else
|
|
||||||
ret = filter_run (request->host);
|
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
update_stats (STAT_DENIED);
|
update_stats (STAT_DENIED);
|
||||||
|
|
||||||
if (config->filter_url)
|
|
||||||
log_message (LOG_NOTICE,
|
log_message (LOG_NOTICE,
|
||||||
"Proxying refused on filtered url \"%s\"",
|
"Proxying refused on filtered %s \"%s\"",
|
||||||
url);
|
fu ? "url" : "domain",
|
||||||
else
|
fu ? url : request->host);
|
||||||
log_message (LOG_NOTICE,
|
|
||||||
"Proxying refused on filtered domain \"%s\"",
|
|
||||||
request->host);
|
|
||||||
|
|
||||||
indicate_http_error (connptr, 403, "Filtered",
|
indicate_http_error (connptr, 403, "Filtered",
|
||||||
"detail",
|
"detail",
|
||||||
|
Loading…
Reference in New Issue
Block a user