2002-05-27 09:56:22 +08:00
|
|
|
/* $Id: filter.c,v 1.11 2002-05-27 01:56:22 rjkaes Exp $
|
2000-02-17 01:32:49 +08:00
|
|
|
*
|
|
|
|
* Copyright (c) 1999 George Talusan (gstalusan@uwaterloo.ca)
|
2002-05-27 09:56:22 +08:00
|
|
|
* Copyright (c) 2002 James E. Flemer (jflemer@acm.jhu.edu)
|
2000-02-17 01:32:49 +08:00
|
|
|
*
|
|
|
|
* A substring of the domain to be filtered goes into the file
|
|
|
|
* pointed at by DEFAULT_FILTER.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2, or (at your option) any
|
|
|
|
* later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
|
2000-09-12 07:43:59 +08:00
|
|
|
#include "tinyproxy.h"
|
|
|
|
|
2000-02-17 01:32:49 +08:00
|
|
|
#include "filter.h"
|
2002-05-24 02:20:27 +08:00
|
|
|
#include "heap.h"
|
2000-02-17 01:32:49 +08:00
|
|
|
#include "regexp.h"
|
2002-05-27 09:56:22 +08:00
|
|
|
#include "reqs.h"
|
|
|
|
|
|
|
|
#define FILTER_BUFFER_LEN (512)
|
2000-02-17 01:32:49 +08:00
|
|
|
|
|
|
|
static int err;
|
|
|
|
|
|
|
|
struct filter_list {
|
|
|
|
struct filter_list *next;
|
|
|
|
char *pat;
|
|
|
|
regex_t *cpat;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct filter_list *fl = NULL;
|
|
|
|
static int already_init = 0;
|
|
|
|
|
2002-05-27 09:56:22 +08:00
|
|
|
/*
|
|
|
|
* Initializes a linked list of strings containing hosts/urls to be filtered
|
|
|
|
*/
|
2001-11-22 08:31:10 +08:00
|
|
|
void
|
|
|
|
filter_init(void)
|
2000-02-17 01:32:49 +08:00
|
|
|
{
|
|
|
|
FILE *fd;
|
|
|
|
struct filter_list *p;
|
2002-05-27 09:56:22 +08:00
|
|
|
char buf[FILTER_BUFFER_LEN];
|
|
|
|
char *s, *t;
|
|
|
|
int cflags;
|
2000-02-17 01:32:49 +08:00
|
|
|
|
|
|
|
if (!fl && !already_init) {
|
|
|
|
fd = fopen(config.filter, "r");
|
|
|
|
if (fd) {
|
|
|
|
p = NULL;
|
|
|
|
|
2002-05-27 09:56:22 +08:00
|
|
|
cflags = REG_NEWLINE | REG_NOSUB;
|
|
|
|
if (config.filter_extended)
|
|
|
|
cflags |= REG_EXTENDED;
|
|
|
|
|
|
|
|
while (fgets(buf, FILTER_BUFFER_LEN, fd)) {
|
2000-02-17 01:32:49 +08:00
|
|
|
s = buf;
|
|
|
|
if (!p) /* head of list */
|
2001-11-22 08:31:10 +08:00
|
|
|
fl = p =
|
|
|
|
safecalloc(1,
|
|
|
|
sizeof(struct
|
|
|
|
filter_list));
|
2000-02-17 01:32:49 +08:00
|
|
|
else { /* next entry */
|
2001-11-22 08:31:10 +08:00
|
|
|
p->next =
|
|
|
|
safecalloc(1,
|
|
|
|
sizeof(struct
|
|
|
|
filter_list));
|
2000-02-17 01:32:49 +08:00
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
|
2002-05-27 09:56:22 +08:00
|
|
|
/* strip trailing whitespace & comments */
|
|
|
|
t = s;
|
|
|
|
while (*s && *s != '#') {
|
|
|
|
if (!isspace((unsigned char)*(s++)))
|
|
|
|
t = s;
|
|
|
|
}
|
|
|
|
*t = '\0';
|
|
|
|
|
|
|
|
/* skip leading whitespace */
|
|
|
|
s = buf;
|
|
|
|
while (*s && isspace((unsigned char)*s))
|
|
|
|
s++;
|
|
|
|
|
|
|
|
/* skip blank lines and comments */
|
|
|
|
if (*s == '\0')
|
|
|
|
continue;
|
2000-02-17 01:32:49 +08:00
|
|
|
|
2002-05-27 09:56:22 +08:00
|
|
|
p->pat = safestrdup(s);
|
2001-09-09 02:58:37 +08:00
|
|
|
p->cpat = safemalloc(sizeof(regex_t));
|
2002-05-27 09:56:22 +08:00
|
|
|
if ((err = regcomp(p->cpat, p->pat, cflags)) != 0) {
|
2001-11-22 08:31:10 +08:00
|
|
|
fprintf(stderr, "Bad regex in %s: %s\n",
|
2000-02-17 01:32:49 +08:00
|
|
|
config.filter, p->pat);
|
|
|
|
exit(EX_DATAERR);
|
|
|
|
}
|
|
|
|
}
|
2002-05-27 09:56:22 +08:00
|
|
|
if (ferror(fd)) {
|
|
|
|
perror("fgets");
|
|
|
|
exit(EX_DATAERR);
|
|
|
|
}
|
2000-02-17 01:32:49 +08:00
|
|
|
fclose(fd);
|
2002-05-27 09:56:22 +08:00
|
|
|
|
|
|
|
already_init = 1;
|
2000-02-17 01:32:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* unlink the list */
|
2001-11-22 08:31:10 +08:00
|
|
|
void
|
|
|
|
filter_destroy(void)
|
2000-02-17 01:32:49 +08:00
|
|
|
{
|
|
|
|
struct filter_list *p, *q;
|
|
|
|
|
|
|
|
if (already_init) {
|
|
|
|
for (p = q = fl; p; p = q) {
|
|
|
|
regfree(p->cpat);
|
|
|
|
safefree(p->cpat);
|
|
|
|
safefree(p->pat);
|
|
|
|
q = p->next;
|
|
|
|
safefree(p);
|
|
|
|
}
|
|
|
|
fl = NULL;
|
|
|
|
already_init = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* returns 0 if host is not an element of filter list, non-zero otherwise */
|
2001-11-22 08:31:10 +08:00
|
|
|
int
|
2002-05-27 09:56:22 +08:00
|
|
|
filter_domain(const char *host)
|
2000-02-17 01:32:49 +08:00
|
|
|
{
|
|
|
|
struct filter_list *p;
|
|
|
|
char *s, *port;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
if (!fl || !already_init)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
/* strip off the port number */
|
2002-04-19 01:59:21 +08:00
|
|
|
s = safestrdup(host);
|
2000-02-17 01:32:49 +08:00
|
|
|
port = strchr(s, ':');
|
|
|
|
if (port)
|
|
|
|
*port = '\0';
|
|
|
|
|
|
|
|
result = 0;
|
|
|
|
|
|
|
|
for (p = fl; p; p = p->next) {
|
|
|
|
result = !regexec(p->cpat, s, (size_t) 0, (regmatch_t *) 0, 0);
|
|
|
|
|
|
|
|
if (result)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
safefree(s);
|
|
|
|
return (result);
|
|
|
|
}
|
2002-05-27 09:56:22 +08:00
|
|
|
|
|
|
|
/* returns 0 if url is not an element of filter list, non-zero otherwise */
|
|
|
|
int
|
|
|
|
filter_url(const char *url)
|
|
|
|
{
|
|
|
|
struct filter_list *p;
|
|
|
|
|
|
|
|
if (!fl || !already_init)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
for (p = fl; p; p = p->next) {
|
|
|
|
if (!regexec(p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|