Added a copyright for James E. Flemer since these are his changes.
(filter_init): Added code to handle both host and URLs. Also include code to use extended regular expressions. (filter_domain): The old filter_url function has been renamed filter_domain(). (filter_url): This function now actually filters complete URLs.
This commit is contained in:
parent
026c7d9a3d
commit
b11015c2e1
73
src/filter.c
73
src/filter.c
@ -1,6 +1,7 @@
|
|||||||
/* $Id: filter.c,v 1.10 2002-05-23 18:20:27 rjkaes Exp $
|
/* $Id: filter.c,v 1.11 2002-05-27 01:56:22 rjkaes Exp $
|
||||||
*
|
*
|
||||||
* Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
|
* Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
|
||||||
|
* Copyright (c) 2002 James E. Flemer (jflemer@acm.jhu.edu)
|
||||||
*
|
*
|
||||||
* A substring of the domain to be filtered goes into the file
|
* A substring of the domain to be filtered goes into the file
|
||||||
* pointed at by DEFAULT_FILTER.
|
* pointed at by DEFAULT_FILTER.
|
||||||
@ -21,6 +22,9 @@
|
|||||||
#include "filter.h"
|
#include "filter.h"
|
||||||
#include "heap.h"
|
#include "heap.h"
|
||||||
#include "regexp.h"
|
#include "regexp.h"
|
||||||
|
#include "reqs.h"
|
||||||
|
|
||||||
|
#define FILTER_BUFFER_LEN (512)
|
||||||
|
|
||||||
static int err;
|
static int err;
|
||||||
|
|
||||||
@ -33,21 +37,28 @@ struct filter_list {
|
|||||||
static struct filter_list *fl = NULL;
|
static struct filter_list *fl = NULL;
|
||||||
static int already_init = 0;
|
static int already_init = 0;
|
||||||
|
|
||||||
/* initializes a linked list of strings containing hosts to be filtered */
|
/*
|
||||||
|
* Initializes a linked list of strings containing hosts/urls to be filtered
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
filter_init(void)
|
filter_init(void)
|
||||||
{
|
{
|
||||||
FILE *fd;
|
FILE *fd;
|
||||||
struct filter_list *p;
|
struct filter_list *p;
|
||||||
char buf[255];
|
char buf[FILTER_BUFFER_LEN];
|
||||||
char *s;
|
char *s, *t;
|
||||||
|
int cflags;
|
||||||
|
|
||||||
if (!fl && !already_init) {
|
if (!fl && !already_init) {
|
||||||
fd = fopen(config.filter, "r");
|
fd = fopen(config.filter, "r");
|
||||||
if (fd) {
|
if (fd) {
|
||||||
p = NULL;
|
p = NULL;
|
||||||
|
|
||||||
while (fgets(buf, 255, fd)) {
|
cflags = REG_NEWLINE | REG_NOSUB;
|
||||||
|
if (config.filter_extended)
|
||||||
|
cflags |= REG_EXTENDED;
|
||||||
|
|
||||||
|
while (fgets(buf, FILTER_BUFFER_LEN, fd)) {
|
||||||
s = buf;
|
s = buf;
|
||||||
if (!p) /* head of list */
|
if (!p) /* head of list */
|
||||||
fl = p =
|
fl = p =
|
||||||
@ -62,23 +73,38 @@ filter_init(void)
|
|||||||
p = p->next;
|
p = p->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* replace first whitespace with \0 */
|
/* strip trailing whitespace & comments */
|
||||||
while (*s++)
|
t = s;
|
||||||
if (isspace((unsigned char) *s))
|
while (*s && *s != '#') {
|
||||||
*s = '\0';
|
if (!isspace((unsigned char)*(s++)))
|
||||||
|
t = s;
|
||||||
|
}
|
||||||
|
*t = '\0';
|
||||||
|
|
||||||
p->pat = safestrdup(buf);
|
/* skip leading whitespace */
|
||||||
|
s = buf;
|
||||||
|
while (*s && isspace((unsigned char)*s))
|
||||||
|
s++;
|
||||||
|
|
||||||
|
/* skip blank lines and comments */
|
||||||
|
if (*s == '\0')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
p->pat = safestrdup(s);
|
||||||
p->cpat = safemalloc(sizeof(regex_t));
|
p->cpat = safemalloc(sizeof(regex_t));
|
||||||
if ((err =
|
if ((err = regcomp(p->cpat, p->pat, cflags)) != 0) {
|
||||||
regcomp(p->cpat, p->pat,
|
|
||||||
REG_NEWLINE | REG_NOSUB)) != 0) {
|
|
||||||
fprintf(stderr, "Bad regex in %s: %s\n",
|
fprintf(stderr, "Bad regex in %s: %s\n",
|
||||||
config.filter, p->pat);
|
config.filter, p->pat);
|
||||||
exit(EX_DATAERR);
|
exit(EX_DATAERR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
already_init = 1;
|
if (ferror(fd)) {
|
||||||
|
perror("fgets");
|
||||||
|
exit(EX_DATAERR);
|
||||||
|
}
|
||||||
fclose(fd);
|
fclose(fd);
|
||||||
|
|
||||||
|
already_init = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,7 +130,7 @@ filter_destroy(void)
|
|||||||
|
|
||||||
/* returns 0 if host is not an element of filter list, non-zero otherwise */
|
/* returns 0 if host is not an element of filter list, non-zero otherwise */
|
||||||
int
|
int
|
||||||
filter_url(char *host)
|
filter_domain(const char *host)
|
||||||
{
|
{
|
||||||
struct filter_list *p;
|
struct filter_list *p;
|
||||||
char *s, *port;
|
char *s, *port;
|
||||||
@ -130,3 +156,20 @@ filter_url(char *host)
|
|||||||
safefree(s);
|
safefree(s);
|
||||||
return (result);
|
return (result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* returns 0 if url is not an element of filter list, non-zero otherwise */
|
||||||
|
int
|
||||||
|
filter_url(const char *url)
|
||||||
|
{
|
||||||
|
struct filter_list *p;
|
||||||
|
|
||||||
|
if (!fl || !already_init)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
for (p = fl; p; p = p->next) {
|
||||||
|
if (!regexec(p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* $Id: filter.h,v 1.3 2000-11-23 04:46:25 rjkaes Exp $
|
/* $Id: filter.h,v 1.4 2002-05-27 01:56:22 rjkaes Exp $
|
||||||
*
|
*
|
||||||
* See 'filter.c' for a detailed description.
|
* See 'filter.c' for a detailed description.
|
||||||
*
|
*
|
||||||
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
extern void filter_init(void);
|
extern void filter_init(void);
|
||||||
extern void filter_destroy(void);
|
extern void filter_destroy(void);
|
||||||
extern int filter_url(char *host);
|
extern int filter_domain(const char *host);
|
||||||
|
extern int filter_url(const char *url);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user