filter: reduce memory usage, fix OOM crashes

* check return values of memory allocation and abort gracefully
  in out-of-memory situations

* use sblist (linear dynamic array) instead of linked list
  - this removes one pointer per filter rule
  - removes need to manually allocate/free every single list item
    (instead block allocation is used)
  - simplifies code

* remove storage of (unused) input rule
  - removes one char* pointer per filter rule
  - removes storage of the raw bytes of each filter rule

* add line number to display on out-of-memory/invalid regex situation

* replace duplicate filter_domain()/filter_host() code with a single
  function filter_run()
  - reduces code size and management effort

with these improvements, >1 million regex rules can be loaded with
4 GB of RAM, whereas previously it crashed with about 950K.

the list for testing was assembled from
http://www.shallalist.de/Downloads/shallalist.tar.gz

closes #20
This commit is contained in:
rofl0r 2020-09-05 19:32:21 +01:00
parent c4dc3ba007
commit 233ce6de3b
3 changed files with 35 additions and 65 deletions

View File

@ -29,18 +29,17 @@
#include "log.h" #include "log.h"
#include "reqs.h" #include "reqs.h"
#include "conf.h" #include "conf.h"
#include "sblist.h"
#define FILTER_BUFFER_LEN (512) #define FILTER_BUFFER_LEN (512)
static int err; static int err;
struct filter_list { struct filter_list {
struct filter_list *next; regex_t cpatb;
char *pat;
regex_t *cpat;
}; };
static struct filter_list *fl = NULL; static sblist *fl = NULL;
static int already_init = 0; static int already_init = 0;
static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW; static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
@ -50,10 +49,10 @@ static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
void filter_init (void) void filter_init (void)
{ {
FILE *fd; FILE *fd;
struct filter_list *p; struct filter_list fe;
char buf[FILTER_BUFFER_LEN]; char buf[FILTER_BUFFER_LEN];
char *s, *start; char *s, *start;
int cflags; int cflags, lineno = 0;
if (fl || already_init) { if (fl || already_init) {
return; return;
@ -64,8 +63,6 @@ void filter_init (void)
return; return;
} }
p = NULL;
cflags = REG_NEWLINE | REG_NOSUB; cflags = REG_NEWLINE | REG_NOSUB;
if (config->filter_extended) if (config->filter_extended)
cflags |= REG_EXTENDED; cflags |= REG_EXTENDED;
@ -73,6 +70,7 @@ void filter_init (void)
cflags |= REG_ICASE; cflags |= REG_ICASE;
while (fgets (buf, FILTER_BUFFER_LEN, fd)) { while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
++lineno;
/* skip leading whitespace */ /* skip leading whitespace */
s = buf; s = buf;
while (*s && isspace ((unsigned char) *s)) while (*s && isspace ((unsigned char) *s))
@ -104,24 +102,22 @@ void filter_init (void)
if (*s == '\0') if (*s == '\0')
continue; continue;
if (!p) /* head of list */ if (!fl) fl = sblist_new(sizeof(struct filter_list),
fl = p = 4096/sizeof(struct filter_list));
(struct filter_list *)
safecalloc (1, sizeof (struct filter_list));
else { /* next entry */
p->next =
(struct filter_list *)
safecalloc (1, sizeof (struct filter_list));
p = p->next;
}
p->pat = safestrdup (s); err = regcomp (&fe.cpatb, s, cflags);
p->cpat = (regex_t *) safemalloc (sizeof (regex_t));
err = regcomp (p->cpat, p->pat, cflags);
if (err != 0) { if (err != 0) {
if (err == REG_ESPACE) goto oom;
fprintf (stderr, fprintf (stderr,
"Bad regex in %s: %s\n", "Bad regex in %s: line %d - %s\n",
config->filter, p->pat); config->filter, lineno, s);
exit (EX_DATAERR);
}
if (!sblist_add(fl, &fe)) {
oom:;
fprintf (stderr,
"out of memory parsing filter file %s: line %d\n",
config->filter, lineno);
exit (EX_DATAERR); exit (EX_DATAERR);
} }
} }
@ -137,15 +133,16 @@ void filter_init (void)
/* unlink the list */ /* unlink the list */
void filter_destroy (void) void filter_destroy (void)
{ {
struct filter_list *p, *q; struct filter_list *p;
size_t i;
if (already_init) { if (already_init) {
for (p = q = fl; p; p = q) { if (fl) {
regfree (p->cpat); for (i = 0; i < sblist_getsize(fl); ++i) {
safefree (p->cpat); p = sblist_get(fl, i);
safefree (p->pat); regfree (&p->cpatb);
q = p->next; }
safefree (p); sblist_free(fl);
} }
fl = NULL; fl = NULL;
already_init = 0; already_init = 0;
@ -165,45 +162,19 @@ void filter_reload (void)
} }
/* Return 0 to allow, non-zero to block */ /* Return 0 to allow, non-zero to block */
int filter_domain (const char *host) int filter_run (const char *str)
{ {
struct filter_list *p; struct filter_list *p;
size_t i;
int result; int result;
if (!fl || !already_init) if (!fl || !already_init)
goto COMMON_EXIT; goto COMMON_EXIT;
for (p = fl; p; p = p->next) { for (i = 0; i < sblist_getsize(fl); ++i) {
p = sblist_get(fl, i);
result = result =
regexec (p->cpat, host, (size_t) 0, (regmatch_t *) 0, 0); regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
if (result == 0) {
if (default_policy == FILTER_DEFAULT_ALLOW)
return 1;
else
return 0;
}
}
COMMON_EXIT:
if (default_policy == FILTER_DEFAULT_ALLOW)
return 0;
else
return 1;
}
/* returns 0 to allow, non-zero to block */
int filter_url (const char *url)
{
struct filter_list *p;
int result;
if (!fl || !already_init)
goto COMMON_EXIT;
for (p = fl; p; p = p->next) {
result =
regexec (p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0);
if (result == 0) { if (result == 0) {
if (default_policy == FILTER_DEFAULT_ALLOW) if (default_policy == FILTER_DEFAULT_ALLOW)

View File

@ -29,8 +29,7 @@ typedef enum {
extern void filter_init (void); extern void filter_init (void);
extern void filter_destroy (void); extern void filter_destroy (void);
extern void filter_reload (void); extern void filter_reload (void);
extern int filter_domain (const char *host); extern int filter_run (const char *str);
extern int filter_url (const char *url);
extern void filter_set_default_policy (filter_policy_t policy); extern void filter_set_default_policy (filter_policy_t policy);

View File

@ -457,9 +457,9 @@ BAD_REQUEST_ERROR:
*/ */
if (config->filter) { if (config->filter) {
if (config->filter_url) if (config->filter_url)
ret = filter_url (url); ret = filter_run (url);
else else
ret = filter_domain (request->host); ret = filter_run (request->host);
if (ret) { if (ret) {
update_stats (STAT_DENIED); update_stats (STAT_DENIED);