Added a copyright for James E. Flemer since these are his changes.

(filter_init): Added code to handle both host and URLs.  Also include code to use extended regular expressions.
(filter_domain): The old filter_url function has been renamed filter_domain().
(filter_url): This function now actually filters complete URLs.
This commit is contained in:
Robert James Kaes 2002-05-27 01:56:22 +00:00
parent 026c7d9a3d
commit b11015c2e1
2 changed files with 61 additions and 17 deletions

View File

@ -1,6 +1,7 @@
/* $Id: filter.c,v 1.10 2002-05-23 18:20:27 rjkaes Exp $ /* $Id: filter.c,v 1.11 2002-05-27 01:56:22 rjkaes Exp $
* *
* Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca) * Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
* Copyright (c) 2002 James E. Flemer (jflemer@acm.jhu.edu)
* *
* A substring of the domain to be filtered goes into the file * A substring of the domain to be filtered goes into the file
* pointed at by DEFAULT_FILTER. * pointed at by DEFAULT_FILTER.
@ -21,6 +22,9 @@
#include "filter.h" #include "filter.h"
#include "heap.h" #include "heap.h"
#include "regexp.h" #include "regexp.h"
#include "reqs.h"
#define FILTER_BUFFER_LEN (512)
static int err; static int err;
@ -33,21 +37,28 @@ struct filter_list {
static struct filter_list *fl = NULL; static struct filter_list *fl = NULL;
static int already_init = 0; static int already_init = 0;
/* initializes a linked list of strings containing hosts to be filtered */ /*
* Initializes a linked list of strings containing hosts/urls to be filtered
*/
void void
filter_init(void) filter_init(void)
{ {
FILE *fd; FILE *fd;
struct filter_list *p; struct filter_list *p;
char buf[255]; char buf[FILTER_BUFFER_LEN];
char *s; char *s, *t;
int cflags;
if (!fl && !already_init) { if (!fl && !already_init) {
fd = fopen(config.filter, "r"); fd = fopen(config.filter, "r");
if (fd) { if (fd) {
p = NULL; p = NULL;
while (fgets(buf, 255, fd)) { cflags = REG_NEWLINE | REG_NOSUB;
if (config.filter_extended)
cflags |= REG_EXTENDED;
while (fgets(buf, FILTER_BUFFER_LEN, fd)) {
s = buf; s = buf;
if (!p) /* head of list */ if (!p) /* head of list */
fl = p = fl = p =
@ -62,23 +73,38 @@ filter_init(void)
p = p->next; p = p->next;
} }
/* replace first whitespace with \0 */ /* strip trailing whitespace & comments */
while (*s++) t = s;
if (isspace((unsigned char) *s)) while (*s && *s != '#') {
*s = '\0'; if (!isspace((unsigned char)*(s++)))
t = s;
}
*t = '\0';
p->pat = safestrdup(buf); /* skip leading whitespace */
s = buf;
while (*s && isspace((unsigned char)*s))
s++;
/* skip blank lines and comments */
if (*s == '\0')
continue;
p->pat = safestrdup(s);
p->cpat = safemalloc(sizeof(regex_t)); p->cpat = safemalloc(sizeof(regex_t));
if ((err = if ((err = regcomp(p->cpat, p->pat, cflags)) != 0) {
regcomp(p->cpat, p->pat,
REG_NEWLINE | REG_NOSUB)) != 0) {
fprintf(stderr, "Bad regex in %s: %s\n", fprintf(stderr, "Bad regex in %s: %s\n",
config.filter, p->pat); config.filter, p->pat);
exit(EX_DATAERR); exit(EX_DATAERR);
} }
} }
already_init = 1; if (ferror(fd)) {
perror("fgets");
exit(EX_DATAERR);
}
fclose(fd); fclose(fd);
already_init = 1;
} }
} }
} }
@ -104,7 +130,7 @@ filter_destroy(void)
/* returns 0 if host is not an element of filter list, non-zero otherwise */ /* returns 0 if host is not an element of filter list, non-zero otherwise */
int int
filter_url(char *host) filter_domain(const char *host)
{ {
struct filter_list *p; struct filter_list *p;
char *s, *port; char *s, *port;
@ -130,3 +156,20 @@ filter_url(char *host)
safefree(s); safefree(s);
return (result); return (result);
} }
/* returns 0 if url is not an element of filter list, non-zero otherwise */
int
filter_url(const char *url)
{
struct filter_list *p;
if (!fl || !already_init)
return (0);
for (p = fl; p; p = p->next) {
if (!regexec(p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0)) {
return 1;
}
}
return 0;
}

View File

@ -1,4 +1,4 @@
/* $Id: filter.h,v 1.3 2000-11-23 04:46:25 rjkaes Exp $ /* $Id: filter.h,v 1.4 2002-05-27 01:56:22 rjkaes Exp $
* *
* See 'filter.c' for a detailed description. * See 'filter.c' for a detailed description.
* *
@ -20,6 +20,7 @@
extern void filter_init(void); extern void filter_init(void);
extern void filter_destroy(void); extern void filter_destroy(void);
extern int filter_url(char *host); extern int filter_domain(const char *host);
extern int filter_url(const char *url);
#endif #endif