Compare commits

...

13 Commits

Author SHA1 Message Date
rofl0r
417c258d14 conf: speed up parsing 10x by using ragel if available
conf_regex.rl is generated from the output of conf_regex_print.c using
re2r (https://github.com/rofl0r/re2r).
if ragel is available on the build host, it is being used to generate
finite state machines from the regexes used by the config file parser
for an impressive speed boost, while only adding moderately to binary
size.
a stripped x86_64 tinyproxy binary compiled with -O2 is still only ~100KB.
2020-10-16 12:40:56 +01:00
rofl0r
3a920b7163 conf: add tool to print regex name/regex pairs as re2r input
this is currently not included in the build system and needs to be
compiled by hand.
2020-10-16 12:03:28 +01:00
rofl0r
42bb446c96 conf: shrink back RE_MAX_MATCHES to 16
with the IPv4 regex simplification from 22f059dc5e
we're back to max 15 match groups according to re2r analysis
(the most elaborate regex is the upstream one).
2020-10-16 11:58:48 +01:00
rofl0r
dabfd1ad6c conf: remove pointless assert() statement 2020-10-15 22:39:46 +01:00
rofl0r
ae4cbcabd1 conf: remove trailing whitespace via C code, not regex 2020-10-15 22:36:10 +01:00
rofl0r
22f059dc5e conf: simplify ipv4 regex
use one matching group rather than 3.
2020-10-12 20:05:06 +01:00
rofl0r
86379b4b66 conf: parse regexes case-sensitive
rather than treating everything as case insensitive, we explicitly
allow upper/lowercase where it makes sense.
2020-10-09 01:43:46 +01:00
rofl0r
57f932a33b conf: skip leading whitespace instead of adding it to each regex 2020-10-09 01:26:50 +01:00
rofl0r
173c5b66a7 conf: remove obsolete whitespace from regex start
we already deal with leading whitespace before a command in a manual
way before comparing keywords.
2020-10-09 01:04:44 +01:00
rofl0r
393e51ba45 conf: remove second instance of empty parens ERE group
likewise
2020-10-09 01:00:56 +01:00
rofl0r
b07f7a8422 conf: remove empty parens group from regex
using an empty group () is not defined in the posix spec, and as such
"undefined behaviour", even though it happened to work with both GLIBC
and MUSL libc, as well as with oniguruma's POSIX compatibility API.

we used this idiom as a trick when refactoring the regex parsing,
in order not to change the match indices of all the handler functions,
ignorant that this is not explicitly allowed by the spec.

to make future refactoring easier, we introduce a MGROUP1 macro that's
added to each match group index, so we have only a single knob to turn
in case a similar change becomes necessary again.
2020-10-09 00:38:13 +01:00
rofl0r
3eb238634a conf: properly escape tab in whitespace class 2020-10-09 00:23:47 +01:00
rofl0r
f1f3994d09 conf: factor out list of regex into separate header
this allows to include the regexes in another file and apply
transformations and experiments.
2020-10-09 00:22:14 +01:00
6 changed files with 727 additions and 157 deletions

View File

@ -213,6 +213,16 @@ if test "x$GPERF" != "x" -a "x$GPERF" != "xno" ; then
AC_DEFINE(HAVE_GPERF) AC_DEFINE(HAVE_GPERF)
fi fi
AC_PATH_PROG(RAGEL, ragel, no)
AM_CONDITIONAL(HAVE_RAGEL, test "x$RAGEL" != "x" -a "x$RAGEL" != "xno")
AH_TEMPLATE([HAVE_RAGEL],
[Whether you have ragel installed for faster config parsing.])
if test "x$RAGEL" != "x" -a "x$RAGEL" != "xno" ; then
AC_DEFINE(HAVE_RAGEL)
fi
AC_CONFIG_FILES([ AC_CONFIG_FILES([
Makefile Makefile
src/Makefile src/Makefile

View File

@ -60,11 +60,22 @@ EXTRA_tinyproxy_SOURCES = filter.c filter.h \
tinyproxy_DEPENDENCIES = @ADDITIONAL_OBJECTS@ tinyproxy_DEPENDENCIES = @ADDITIONAL_OBJECTS@
tinyproxy_LDADD = @ADDITIONAL_OBJECTS@ -lpthread tinyproxy_LDADD = @ADDITIONAL_OBJECTS@ -lpthread
CLEANFILES =
if HAVE_GPERF if HAVE_GPERF
conf-tokens.c: conf-tokens-gperf.inc conf-tokens.c: conf-tokens-gperf.inc
conf-tokens-gperf.inc: conf-tokens.gperf conf-tokens-gperf.inc: conf-tokens.gperf
$(GPERF) $< > $@ $(GPERF) $< > $@
endif endif
EXTRA_DIST = conf-tokens.gperf if HAVE_RAGEL
conf.c: conf_regex.inc
conf_regex.inc: conf_regex.rl
$(RAGEL) $(RAGEL_FLAGS) -o $@ $<
CLEANFILES += conf_regex.inc
endif
EXTRA_DIST = conf-tokens.gperf conf_regex.rl

View File

@ -40,41 +40,12 @@
#include "basicauth.h" #include "basicauth.h"
#include "conf-tokens.h" #include "conf-tokens.h"
/*
* The configuration directives are defined in the structure below. Each
* directive requires a regular expression to match against, and a
* function to call when the regex is matched.
*
* Below are defined certain constant regular expression strings that
* can (and likely should) be used when building the regex for the
* given directive.
*/
#define DIGIT "[0-9]"
#define SPACE "[ \t]"
#define WS SPACE "+"
#define STR "\"([^\"]+)\""
#define BOOL "(yes|on|no|off)"
#define INT "(()" DIGIT "+)"
#define ALNUM "([-a-z0-9._]+)"
#define USERNAME "([^:]*)"
#define PASSWORD "([^@]*)"
#define IP "((([0-9]{1,3})\\.){3}[0-9]{1,3})"
#define IPMASK "(" IP "(/" DIGIT "+)?)"
#define IPV6 "(" \
"(([0-9a-f:]{2,39}))|" \
"(([0-9a-f:]{0,29}:" IP "))" \
")"
#define IPV6MASK "(" IPV6 "(/" DIGIT "+)?)"
#define BEGIN "^" SPACE "*"
#define END SPACE "*$"
/* /*
* Limit the maximum number of substring matches to a reasonably high * Limit the maximum number of substring matches to a reasonably high
* number. Given the usual structure of the configuration file, sixteen * number. Given the usual structure of the configuration file, sixteen
* substring matches should be plenty. * substring matches should be plenty.
*/ */
#define RE_MAX_MATCHES 24 #define RE_MAX_MATCHES 16
#define CP_WARN(FMT, ...) \ #define CP_WARN(FMT, ...) \
log_message (LOG_WARNING, "line %lu: " FMT, lineno, __VA_ARGS__) log_message (LOG_WARNING, "line %lu: " FMT, lineno, __VA_ARGS__)
@ -171,7 +142,14 @@ static void config_free_regex (void);
* do not follow the pattern above. This macro is for convenience * do not follow the pattern above. This macro is for convenience
* only. * only.
*/ */
#define STDCONF(d, re, func) [CD_ ## d] = { BEGIN "()" WS re END, func, NULL } #ifdef HAVE_RAGEL
#define RE2R_EXPORT static
#include "conf_regex.inc"
typedef int (*matchfunc)(const char*, const char*, size_t, regmatch_t[]);
#define STDCONF(d, re, func) [CD_ ## d] = { func, re2r_match_ ## d }
#else
#define STDCONF(d, re, func) [CD_ ## d] = { func, BEGIN re END, NULL }
#endif
/* /*
* Holds the regular expression used to match the configuration directive, * Holds the regular expression used to match the configuration directive,
@ -180,73 +158,15 @@ static void config_free_regex (void);
* to be compiled one. * to be compiled one.
*/ */
struct { struct {
const char *re;
CONFFILE_HANDLER handler; CONFFILE_HANDLER handler;
#ifndef HAVE_RAGEL
const char *re;
regex_t *cre; regex_t *cre;
#else
matchfunc mf;
#endif
} directives[] = { } directives[] = {
/* string arguments */ #include "conf_regex.h"
STDCONF (logfile, STR, handle_logfile),
STDCONF (pidfile, STR, handle_pidfile),
STDCONF (anonymous, STR, handle_anonymous),
STDCONF (viaproxyname, STR, handle_viaproxyname),
STDCONF (defaulterrorfile, STR, handle_defaulterrorfile),
STDCONF (statfile, STR, handle_statfile),
STDCONF (stathost, STR, handle_stathost),
STDCONF (xtinyproxy, BOOL, handle_xtinyproxy),
/* boolean arguments */
STDCONF (syslog, BOOL, handle_syslog),
STDCONF (bindsame, BOOL, handle_bindsame),
STDCONF (disableviaheader, BOOL, handle_disableviaheader),
/* integer arguments */
STDCONF (port, INT, handle_port),
STDCONF (maxclients, INT, handle_maxclients),
STDCONF (maxspareservers, INT, handle_obsolete),
STDCONF (minspareservers, INT, handle_obsolete),
STDCONF (startservers, INT, handle_obsolete),
STDCONF (maxrequestsperchild, INT, handle_obsolete),
STDCONF (timeout, INT, handle_timeout),
STDCONF (connectport, INT, handle_connectport),
/* alphanumeric arguments */
STDCONF (user, ALNUM, handle_user),
STDCONF (group, ALNUM, handle_group),
/* ip arguments */
STDCONF (listen, "(" IP "|" IPV6 ")", handle_listen),
STDCONF (allow, "(" "(" IPMASK "|" IPV6MASK ")" "|" ALNUM ")",
handle_allow),
STDCONF (deny, "(" "(" IPMASK "|" IPV6MASK ")" "|" ALNUM ")",
handle_deny),
STDCONF (bind, "(" IP "|" IPV6 ")", handle_bind),
/* other */
STDCONF (basicauth, ALNUM WS ALNUM, handle_basicauth),
STDCONF (errorfile, INT WS STR, handle_errorfile),
STDCONF (addheader, STR WS STR, handle_addheader),
#ifdef FILTER_ENABLE
/* filtering */
STDCONF (filter, STR, handle_filter),
STDCONF (filterurls, BOOL, handle_filterurls),
STDCONF (filterextended, BOOL, handle_filterextended),
STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny),
STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive),
#endif
#ifdef REVERSE_SUPPORT
/* Reverse proxy arguments */
STDCONF (reversebaseurl, STR, handle_reversebaseurl),
STDCONF (reverseonly, BOOL, handle_reverseonly),
STDCONF (reversemagic, BOOL, handle_reversemagic),
STDCONF (reversepath, STR "(" WS STR ")?", handle_reversepath),
#endif
#ifdef UPSTREAM_SUPPORT
STDCONF (upstream,
"(" "(none)" WS STR ")|" \
"(" "(http|socks4|socks5)" WS \
"(" USERNAME /*username*/ ":" PASSWORD /*password*/ "@" ")?"
"(" IP "|" ALNUM ")"
":" INT "(" WS STR ")?" ")", handle_upstream),
#endif
/* loglevel */
STDCONF (loglevel, "(critical|error|warning|notice|connect|info)",
handle_loglevel)
}; };
const unsigned int ndirectives = sizeof (directives) / sizeof (directives[0]); const unsigned int ndirectives = sizeof (directives) / sizeof (directives[0]);
@ -339,23 +259,26 @@ config_init (void)
{ {
unsigned int i, r; unsigned int i, r;
(void) r;
for (i = 0; i != ndirectives; ++i) { for (i = 0; i != ndirectives; ++i) {
assert (!directives[i].cre);
if (!directives[i].handler) { if (!directives[i].handler) {
directives[i].handler = handle_disabled_feature; directives[i].handler = handle_disabled_feature;
continue; continue;
} }
#ifndef HAVE_RAGEL
directives[i].cre = (regex_t *) safemalloc (sizeof (regex_t)); directives[i].cre = (regex_t *) safemalloc (sizeof (regex_t));
if (!directives[i].cre) if (!directives[i].cre)
return -1; return -1;
r = regcomp (directives[i].cre, r = regcomp (directives[i].cre,
directives[i].re, directives[i].re,
REG_EXTENDED | REG_ICASE | REG_NEWLINE); REG_EXTENDED | REG_NEWLINE);
if (r) if (r)
return r; return r;
#endif
} }
atexit (config_free_regex); atexit (config_free_regex);
@ -370,6 +293,7 @@ config_init (void)
static void static void
config_free_regex (void) config_free_regex (void)
{ {
#ifndef HAVE_RAGEL
unsigned int i; unsigned int i;
for (i = 0; i < ndirectives; i++) { for (i = 0; i < ndirectives; i++) {
@ -379,6 +303,7 @@ config_free_regex (void)
directives[i].cre = NULL; directives[i].cre = NULL;
} }
} }
#endif
} }
/* /*
@ -389,18 +314,25 @@ config_free_regex (void)
* Returns 0 if a match was found and successfully processed; otherwise, * Returns 0 if a match was found and successfully processed; otherwise,
* a negative number is returned. * a negative number is returned.
*/ */
static int check_match (struct config_s *conf, const char *line, static int check_match (struct config_s *conf,
const char *line, const char* lineend,
unsigned long lineno, enum config_directive cd) unsigned long lineno, enum config_directive cd)
{ {
regmatch_t match[RE_MAX_MATCHES]; regmatch_t match[RE_MAX_MATCHES];
unsigned int i = cd; unsigned int i = cd;
#ifndef HAVE_RAGEL
(void) lineend;
if (!directives[i].cre) if (!directives[i].cre)
return (*directives[i].handler) (conf, line, lineno, match); return (*directives[i].handler) (conf, line, lineno, match);
if (!regexec if (!regexec
(directives[i].cre, line, RE_MAX_MATCHES, match, 0)) (directives[i].cre, line, RE_MAX_MATCHES, match, 0))
return (*directives[i].handler) (conf, line, lineno, match); return (*directives[i].handler) (conf, line, lineno, match);
#else
if (!directives[i].mf(line, lineend, RE_MAX_MATCHES, match))
return (*directives[i].handler) (conf, line, lineno, match);
#endif
return -1; return -1;
} }
@ -409,7 +341,7 @@ static int check_match (struct config_s *conf, const char *line,
*/ */
static int config_parse (struct config_s *conf, FILE * f) static int config_parse (struct config_s *conf, FILE * f)
{ {
char buffer[LINE_MAX], *p, *q, c; char buffer[LINE_MAX], *p, *q;
const struct config_directive_entry *e; const struct config_directive_entry *e;
unsigned long lineno = 1; unsigned long lineno = 1;
@ -420,11 +352,14 @@ static int config_parse (struct config_s *conf, FILE * f)
if(!*p) continue; if(!*p) continue;
q = p; q = p;
while(!isspace(*q))q++; while(!isspace(*q))q++;
c = *q;
*q = 0; *q = 0;
e = config_directive_find(p, strlen(p)); e = config_directive_find(p, strlen(p));
*q = c; ++q;
if (!e || e->value == CD_NIL || check_match (conf, q, lineno, e->value)) { while(isspace(*q))++q;
p = q;
while(*p && *p != '\n') ++p;
while(isspace(*p)) *(p--) = 0;
if (!e || e->value == CD_NIL || check_match (conf, q, ++p, lineno, e->value)) {
fprintf (stderr, "ERROR: Syntax error on line %lu\n", lineno); fprintf (stderr, "ERROR: Syntax error on line %lu\n", lineno);
return 1; return 1;
} }
@ -625,19 +560,21 @@ set_int_arg (unsigned int *var, const char *line, regmatch_t * match)
* *
***********************************************************************/ ***********************************************************************/
#define MGROUP1 -1
static HANDLE_FUNC (handle_logfile) static HANDLE_FUNC (handle_logfile)
{ {
return set_string_arg (&conf->logf_name, line, &match[2]); return set_string_arg (&conf->logf_name, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_pidfile) static HANDLE_FUNC (handle_pidfile)
{ {
return set_string_arg (&conf->pidpath, line, &match[2]); return set_string_arg (&conf->pidpath, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_anonymous) static HANDLE_FUNC (handle_anonymous)
{ {
char *arg = get_string_arg (line, &match[2]); char *arg = get_string_arg (line, &match[MGROUP1+2]);
if (!arg) if (!arg)
return -1; return -1;
@ -653,7 +590,7 @@ static HANDLE_FUNC (handle_anonymous)
static HANDLE_FUNC (handle_viaproxyname) static HANDLE_FUNC (handle_viaproxyname)
{ {
int r = set_string_arg (&conf->via_proxy_name, line, &match[2]); int r = set_string_arg (&conf->via_proxy_name, line, &match[MGROUP1+2]);
if (r) if (r)
return r; return r;
@ -665,7 +602,7 @@ static HANDLE_FUNC (handle_viaproxyname)
static HANDLE_FUNC (handle_disableviaheader) static HANDLE_FUNC (handle_disableviaheader)
{ {
int r = set_bool_arg (&conf->disable_viaheader, line, &match[2]); int r = set_bool_arg (&conf->disable_viaheader, line, &match[MGROUP1+2]);
if (r) { if (r) {
return r; return r;
@ -678,17 +615,17 @@ static HANDLE_FUNC (handle_disableviaheader)
static HANDLE_FUNC (handle_defaulterrorfile) static HANDLE_FUNC (handle_defaulterrorfile)
{ {
return set_string_arg (&conf->errorpage_undef, line, &match[2]); return set_string_arg (&conf->errorpage_undef, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_statfile) static HANDLE_FUNC (handle_statfile)
{ {
return set_string_arg (&conf->statpage, line, &match[2]); return set_string_arg (&conf->statpage, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_stathost) static HANDLE_FUNC (handle_stathost)
{ {
int r = set_string_arg (&conf->stathost, line, &match[2]); int r = set_string_arg (&conf->stathost, line, &match[MGROUP1+2]);
if (r) if (r)
return r; return r;
@ -699,7 +636,7 @@ static HANDLE_FUNC (handle_stathost)
static HANDLE_FUNC (handle_xtinyproxy) static HANDLE_FUNC (handle_xtinyproxy)
{ {
#ifdef XTINYPROXY_ENABLE #ifdef XTINYPROXY_ENABLE
return set_bool_arg (&conf->add_xtinyproxy, line, &match[2]); return set_bool_arg (&conf->add_xtinyproxy, line, &match[MGROUP1+2]);
#else #else
fprintf (stderr, fprintf (stderr,
"XTinyproxy NOT Enabled! Recompile with --enable-xtinyproxy\n"); "XTinyproxy NOT Enabled! Recompile with --enable-xtinyproxy\n");
@ -709,12 +646,12 @@ static HANDLE_FUNC (handle_xtinyproxy)
static HANDLE_FUNC (handle_syslog) static HANDLE_FUNC (handle_syslog)
{ {
return set_bool_arg (&conf->syslog, line, &match[2]); return set_bool_arg (&conf->syslog, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_bindsame) static HANDLE_FUNC (handle_bindsame)
{ {
int r = set_bool_arg (&conf->bindsame, line, &match[2]); int r = set_bool_arg (&conf->bindsame, line, &match[MGROUP1+2]);
if (r) if (r)
return r; return r;
@ -724,7 +661,7 @@ static HANDLE_FUNC (handle_bindsame)
static HANDLE_FUNC (handle_port) static HANDLE_FUNC (handle_port)
{ {
set_int_arg (&conf->port, line, &match[2]); set_int_arg (&conf->port, line, &match[MGROUP1+2]);
if (conf->port > 65535) { if (conf->port > 65535) {
fprintf (stderr, "Bad port number (%d) supplied for Port.\n", fprintf (stderr, "Bad port number (%d) supplied for Port.\n",
@ -737,7 +674,7 @@ static HANDLE_FUNC (handle_port)
static HANDLE_FUNC (handle_maxclients) static HANDLE_FUNC (handle_maxclients)
{ {
set_int_arg (&conf->maxclients, line, &match[2]); set_int_arg (&conf->maxclients, line, &match[MGROUP1+2]);
return 0; return 0;
} }
@ -750,24 +687,24 @@ static HANDLE_FUNC (handle_obsolete)
static HANDLE_FUNC (handle_timeout) static HANDLE_FUNC (handle_timeout)
{ {
return set_int_arg (&conf->idletimeout, line, &match[2]); return set_int_arg (&conf->idletimeout, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_connectport) static HANDLE_FUNC (handle_connectport)
{ {
add_connect_port_allowed (get_long_arg (line, &match[2]), add_connect_port_allowed (get_long_arg (line, &match[MGROUP1+2]),
&conf->connect_ports); &conf->connect_ports);
return 0; return 0;
} }
static HANDLE_FUNC (handle_user) static HANDLE_FUNC (handle_user)
{ {
return set_string_arg (&conf->user, line, &match[2]); return set_string_arg (&conf->user, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_group) static HANDLE_FUNC (handle_group)
{ {
return set_string_arg (&conf->group, line, &match[2]); return set_string_arg (&conf->group, line, &match[MGROUP1+2]);
} }
static void warn_invalid_address(char *arg, unsigned long lineno) { static void warn_invalid_address(char *arg, unsigned long lineno) {
@ -776,7 +713,7 @@ static void warn_invalid_address(char *arg, unsigned long lineno) {
static HANDLE_FUNC (handle_allow) static HANDLE_FUNC (handle_allow)
{ {
char *arg = get_string_arg (line, &match[2]); char *arg = get_string_arg (line, &match[MGROUP1+2]);
if(insert_acl (arg, ACL_ALLOW, &conf->access_list) < 0) if(insert_acl (arg, ACL_ALLOW, &conf->access_list) < 0)
warn_invalid_address (arg, lineno); warn_invalid_address (arg, lineno);
@ -786,7 +723,7 @@ static HANDLE_FUNC (handle_allow)
static HANDLE_FUNC (handle_deny) static HANDLE_FUNC (handle_deny)
{ {
char *arg = get_string_arg (line, &match[2]); char *arg = get_string_arg (line, &match[MGROUP1+2]);
if(insert_acl (arg, ACL_DENY, &conf->access_list) < 0) if(insert_acl (arg, ACL_DENY, &conf->access_list) < 0)
warn_invalid_address (arg, lineno); warn_invalid_address (arg, lineno);
@ -796,7 +733,7 @@ static HANDLE_FUNC (handle_deny)
static HANDLE_FUNC (handle_bind) static HANDLE_FUNC (handle_bind)
{ {
int r = set_string_arg (&conf->bind_address, line, &match[2]); int r = set_string_arg (&conf->bind_address, line, &match[MGROUP1+2]);
if (r) if (r)
return r; return r;
@ -807,7 +744,7 @@ static HANDLE_FUNC (handle_bind)
static HANDLE_FUNC (handle_listen) static HANDLE_FUNC (handle_listen)
{ {
char *arg = get_string_arg (line, &match[2]); char *arg = get_string_arg (line, &match[MGROUP1+2]);
if (arg == NULL) { if (arg == NULL) {
return -1; return -1;
@ -832,15 +769,8 @@ static HANDLE_FUNC (handle_listen)
static HANDLE_FUNC (handle_errorfile) static HANDLE_FUNC (handle_errorfile)
{ {
/* unsigned long int err = get_long_arg (line, &match[MGROUP1+2]);
* Because an integer is defined as ((0x)?[[:digit:]]+) _two_ char *page = get_string_arg (line, &match[MGROUP1+3]);
* match places are used. match[2] matches the full digit
* string, while match[3] matches only the "0x" part if
* present. This is why the "string" is located at
* match[4] (rather than the more intuitive match[3].
*/
unsigned long int err = get_long_arg (line, &match[2]);
char *page = get_string_arg (line, &match[4]);
if(add_new_errorpage (conf, page, err) < 0) { if(add_new_errorpage (conf, page, err) < 0) {
CP_WARN ("add_new_errorpage() failed: '%s'", page); CP_WARN ("add_new_errorpage() failed: '%s'", page);
@ -851,8 +781,8 @@ static HANDLE_FUNC (handle_errorfile)
static HANDLE_FUNC (handle_addheader) static HANDLE_FUNC (handle_addheader)
{ {
char *name = get_string_arg (line, &match[2]); char *name = get_string_arg (line, &match[MGROUP1+2]);
char *value = get_string_arg (line, &match[3]); char *value = get_string_arg (line, &match[MGROUP1+3]);
http_header_t header; http_header_t header;
if (!conf->add_headers) { if (!conf->add_headers) {
@ -893,7 +823,7 @@ static HANDLE_FUNC (handle_loglevel)
sizeof (log_levels) / sizeof (log_levels[0]); sizeof (log_levels) / sizeof (log_levels[0]);
unsigned int i; unsigned int i;
char *arg = get_string_arg (line, &match[2]); char *arg = get_string_arg (line, &match[MGROUP1+2]);
for (i = 0; i != nlevels; ++i) { for (i = 0; i != nlevels; ++i) {
if (!strcasecmp (arg, log_levels[i].string)) { if (!strcasecmp (arg, log_levels[i].string)) {
@ -910,10 +840,10 @@ static HANDLE_FUNC (handle_loglevel)
static HANDLE_FUNC (handle_basicauth) static HANDLE_FUNC (handle_basicauth)
{ {
char *user, *pass; char *user, *pass;
user = get_string_arg(line, &match[2]); user = get_string_arg(line, &match[MGROUP1+2]);
if (!user) if (!user)
return -1; return -1;
pass = get_string_arg(line, &match[3]); pass = get_string_arg(line, &match[MGROUP1+3]);
if (!pass) { if (!pass) {
safefree (user); safefree (user);
return -1; return -1;
@ -931,48 +861,48 @@ static HANDLE_FUNC (handle_basicauth)
#ifdef FILTER_ENABLE #ifdef FILTER_ENABLE
static HANDLE_FUNC (handle_filter) static HANDLE_FUNC (handle_filter)
{ {
return set_string_arg (&conf->filter, line, &match[2]); return set_string_arg (&conf->filter, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_filterurls) static HANDLE_FUNC (handle_filterurls)
{ {
return set_bool_arg (&conf->filter_url, line, &match[2]); return set_bool_arg (&conf->filter_url, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_filterextended) static HANDLE_FUNC (handle_filterextended)
{ {
return set_bool_arg (&conf->filter_extended, line, &match[2]); return set_bool_arg (&conf->filter_extended, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_filterdefaultdeny) static HANDLE_FUNC (handle_filterdefaultdeny)
{ {
assert (match[2].rm_so != -1); assert (match[MGROUP1+2].rm_so != -1);
if (get_bool_arg (line, &match[2])) if (get_bool_arg (line, &match[MGROUP1+2]))
filter_set_default_policy (FILTER_DEFAULT_DENY); filter_set_default_policy (FILTER_DEFAULT_DENY);
return 0; return 0;
} }
static HANDLE_FUNC (handle_filtercasesensitive) static HANDLE_FUNC (handle_filtercasesensitive)
{ {
return set_bool_arg (&conf->filter_casesensitive, line, &match[2]); return set_bool_arg (&conf->filter_casesensitive, line, &match[MGROUP1+2]);
} }
#endif #endif
#ifdef REVERSE_SUPPORT #ifdef REVERSE_SUPPORT
static HANDLE_FUNC (handle_reverseonly) static HANDLE_FUNC (handle_reverseonly)
{ {
return set_bool_arg (&conf->reverseonly, line, &match[2]); return set_bool_arg (&conf->reverseonly, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_reversemagic) static HANDLE_FUNC (handle_reversemagic)
{ {
return set_bool_arg (&conf->reversemagic, line, &match[2]); return set_bool_arg (&conf->reversemagic, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_reversebaseurl) static HANDLE_FUNC (handle_reversebaseurl)
{ {
return set_string_arg (&conf->reversebaseurl, line, &match[2]); return set_string_arg (&conf->reversebaseurl, line, &match[MGROUP1+2]);
} }
static HANDLE_FUNC (handle_reversepath) static HANDLE_FUNC (handle_reversepath)
@ -982,12 +912,12 @@ static HANDLE_FUNC (handle_reversepath)
*/ */
char *arg1, *arg2; char *arg1, *arg2;
arg1 = get_string_arg (line, &match[2]); arg1 = get_string_arg (line, &match[MGROUP1+2]);
if (!arg1) if (!arg1)
return -1; return -1;
if (match[4].rm_so != -1) { if (match[MGROUP1+4].rm_so != -1) {
arg2 = get_string_arg (line, &match[4]); arg2 = get_string_arg (line, &match[MGROUP1+4]);
if (!arg2) { if (!arg2) {
safefree (arg1); safefree (arg1);
return -1; return -1;
@ -1028,12 +958,12 @@ static HANDLE_FUNC (handle_upstream)
enum proxy_type pt; enum proxy_type pt;
enum upstream_build_error ube; enum upstream_build_error ube;
if (match[3].rm_so != -1) { if (match[MGROUP1+3].rm_so != -1) {
tmp = get_string_arg (line, &match[3]); tmp = get_string_arg (line, &match[MGROUP1+3]);
if(!strcmp(tmp, "none")) { if(!strcmp(tmp, "none")) {
safefree(tmp); safefree(tmp);
if (match[4].rm_so == -1) return -1; if (match[MGROUP1+4].rm_so == -1) return -1;
domain = get_string_arg (line, &match[4]); domain = get_string_arg (line, &match[MGROUP1+4]);
if (!domain) if (!domain)
return -1; return -1;
ube = upstream_add (NULL, 0, domain, 0, 0, PT_NONE, &conf->upstream_list); ube = upstream_add (NULL, 0, domain, 0, 0, PT_NONE, &conf->upstream_list);
@ -1042,7 +972,7 @@ static HANDLE_FUNC (handle_upstream)
} }
} }
mi = 6; mi = MGROUP1+6;
tmp = get_string_arg (line, &match[mi]); tmp = get_string_arg (line, &match[mi]);
pt = pt_from_string(tmp); pt = pt_from_string(tmp);
@ -1060,10 +990,10 @@ static HANDLE_FUNC (handle_upstream)
ip = get_string_arg (line, &match[mi]); ip = get_string_arg (line, &match[mi]);
if (!ip) if (!ip)
return -1; return -1;
mi += 5; mi += 3;
port = (int) get_long_arg (line, &match[mi]); port = (int) get_long_arg (line, &match[mi]);
mi += 3; mi += 2;
if (match[mi].rm_so != -1) if (match[mi].rm_so != -1)
domain = get_string_arg (line, &match[mi]); domain = get_string_arg (line, &match[mi]);

93
src/conf_regex.h Normal file
View File

@ -0,0 +1,93 @@
/*
* The configuration directives are defined in the structure below. Each
* directive requires a regular expression to match against, and a
* function to call when the regex is matched.
*
* Below are defined certain constant regular expression strings that
* can (and likely should) be used when building the regex for the
* given directive.
*/
#define DIGIT "[0-9]"
#define SPACE "[ \\t]"
#define WS SPACE "+"
#define STR "\"([^\"]+)\""
#define BOOL "([Yy][Ee][Ss]|[Oo][Nn]|[Nn][Oo]|[Oo][Ff][Ff])"
#define INT "(" DIGIT "+)"
#define ALNUM "([-A-Za-z0-9._]+)"
#define USERNAME "([^:]*)"
#define PASSWORD "([^@]*)"
#define IP "([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+)"
#define IPMASK "(" IP "(/" DIGIT "+)?)"
#define IPV6 "(" \
"(([0-9a-fA-F:]{2,39}))|" \
"(([0-9a-fA-F:]{0,29}:" IP "))" \
")"
#define IPV6MASK "(" IPV6 "(/" DIGIT "+)?)"
#define BEGIN "^"
#define END "$"
STDCONF (logfile, STR, handle_logfile),
STDCONF (pidfile, STR, handle_pidfile),
STDCONF (anonymous, STR, handle_anonymous),
STDCONF (viaproxyname, STR, handle_viaproxyname),
STDCONF (defaulterrorfile, STR, handle_defaulterrorfile),
STDCONF (statfile, STR, handle_statfile),
STDCONF (stathost, STR, handle_stathost),
STDCONF (xtinyproxy, BOOL, handle_xtinyproxy),
/* boolean arguments */
STDCONF (syslog, BOOL, handle_syslog),
STDCONF (bindsame, BOOL, handle_bindsame),
STDCONF (disableviaheader, BOOL, handle_disableviaheader),
/* integer arguments */
STDCONF (port, INT, handle_port),
STDCONF (maxclients, INT, handle_maxclients),
STDCONF (maxspareservers, INT, handle_obsolete),
STDCONF (minspareservers, INT, handle_obsolete),
STDCONF (startservers, INT, handle_obsolete),
STDCONF (maxrequestsperchild, INT, handle_obsolete),
STDCONF (timeout, INT, handle_timeout),
STDCONF (connectport, INT, handle_connectport),
/* alphanumeric arguments */
STDCONF (user, ALNUM, handle_user),
STDCONF (group, ALNUM, handle_group),
/* ip arguments */
STDCONF (listen, "(" IP "|" IPV6 ")", handle_listen),
STDCONF (allow, "(" "(" IPMASK "|" IPV6MASK ")" "|" ALNUM ")",
handle_allow),
STDCONF (deny, "(" "(" IPMASK "|" IPV6MASK ")" "|" ALNUM ")",
handle_deny),
STDCONF (bind, "(" IP "|" IPV6 ")", handle_bind),
/* other */
STDCONF (basicauth, ALNUM WS ALNUM, handle_basicauth),
STDCONF (errorfile, INT WS STR, handle_errorfile),
STDCONF (addheader, STR WS STR, handle_addheader),
#ifdef FILTER_ENABLE
/* filtering */
STDCONF (filter, STR, handle_filter),
STDCONF (filterurls, BOOL, handle_filterurls),
STDCONF (filterextended, BOOL, handle_filterextended),
STDCONF (filterdefaultdeny, BOOL, handle_filterdefaultdeny),
STDCONF (filtercasesensitive, BOOL, handle_filtercasesensitive),
#endif
#ifdef REVERSE_SUPPORT
/* Reverse proxy arguments */
STDCONF (reversebaseurl, STR, handle_reversebaseurl),
STDCONF (reverseonly, BOOL, handle_reverseonly),
STDCONF (reversemagic, BOOL, handle_reversemagic),
STDCONF (reversepath, STR "(" WS STR ")?", handle_reversepath),
#endif
#ifdef UPSTREAM_SUPPORT
STDCONF (upstream,
"(" "(none)" WS STR ")|" \
"(" "(http|socks4|socks5)" WS \
"(" USERNAME /*username*/ ":" PASSWORD /*password*/ "@" ")?"
"(" IP "|" ALNUM ")"
":" INT "(" WS STR ")?" ")", handle_upstream),
#endif
/* loglevel */
STDCONF (loglevel, "([Cc]ritical|[Ee]rror|[Ww]arning|[Nn]otice|[Cc]onnect|[Ii]nfo)",
handle_loglevel)

512
src/conf_regex.rl Normal file
View File

@ -0,0 +1,512 @@
/* automatically generated with re2r by rofl0r */
%%{
machine logfile;
action A1 { matches[1].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
main := '"'([^"]+) >A1 %E1 '"' ;
}%%
RE2R_EXPORT int re2r_match_logfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_pidfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_anonymous(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_viaproxyname(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_defaulterrorfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_statfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_stathost(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
%%{
machine xtinyproxy;
action A1 { matches[1].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
main := ([Yy][Ee][Ss]|[Oo][Nn]|[Nn][Oo]|[Oo][Ff][Ff]) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_xtinyproxy(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_syslog(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_bindsame(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_disableviaheader(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
%%{
machine port;
action A1 { matches[1].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
main := ([0-9]+) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_port(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_maxclients(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_maxspareservers(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_minspareservers(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_startservers(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_maxrequestsperchild(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_timeout(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_connectport(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_port(p, pe, nmatch, matches);
}
%%{
machine user;
action A1 { matches[1].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
main := (('-'|[A-Za-z0-9._])+) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_user(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_group(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_user(p, pe, nmatch, matches);
}
%%{
machine listen;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action A3 { matches[3].rm_so = p-start; }
action A4 { matches[4].rm_so = p-start; }
action A5 { matches[5].rm_so = p-start; }
action A6 { matches[6].rm_so = p-start; }
action A7 { matches[7].rm_so = p-start; }
action A8 { matches[8].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
action E3 { matches[3].rm_eo = p-start; }
action E4 { matches[4].rm_eo = p-start; }
action E5 { matches[5].rm_eo = p-start; }
action E6 { matches[6].rm_eo = p-start; }
action E7 { matches[7].rm_eo = p-start; }
action E8 { matches[8].rm_eo = p-start; }
main := (([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A2 %E2 |((([0-9a-fA-F:]{2,39}) >A5 %E5 ) >A4 %E4 |(([0-9a-fA-F:]{0,29} ":" ([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A8 %E8 ) >A7 %E7 ) >A6 %E6 ) >A3 %E3 ) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_listen(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=1,[4]=3,[5]=4,[6]=3,[7]=6,[8]=7,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
%%{
machine allow;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action A3 { matches[3].rm_so = p-start; }
action A4 { matches[4].rm_so = p-start; }
action A5 { matches[5].rm_so = p-start; }
action A6 { matches[6].rm_so = p-start; }
action A7 { matches[7].rm_so = p-start; }
action A8 { matches[8].rm_so = p-start; }
action A9 { matches[9].rm_so = p-start; }
action A10 { matches[10].rm_so = p-start; }
action A11 { matches[11].rm_so = p-start; }
action A12 { matches[12].rm_so = p-start; }
action A13 { matches[13].rm_so = p-start; }
action A14 { matches[14].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
action E3 { matches[3].rm_eo = p-start; }
action E4 { matches[4].rm_eo = p-start; }
action E5 { matches[5].rm_eo = p-start; }
action E6 { matches[6].rm_eo = p-start; }
action E7 { matches[7].rm_eo = p-start; }
action E8 { matches[8].rm_eo = p-start; }
action E9 { matches[9].rm_eo = p-start; }
action E10 { matches[10].rm_eo = p-start; }
action E11 { matches[11].rm_eo = p-start; }
action E12 { matches[12].rm_eo = p-start; }
action E13 { matches[13].rm_eo = p-start; }
action E14 { matches[14].rm_eo = p-start; }
main := (((([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A4 %E4 ( "/" [0-9]+)? >A5 %E5 ) >A3 %E3 |(((([0-9a-fA-F:]{2,39}) >A9 %E9 ) >A8 %E8 |(([0-9a-fA-F:]{0,29} ":" ([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A12 %E12 ) >A11 %E11 ) >A10 %E10 ) >A7 %E7 ( "/" [0-9]+)? >A13 %E13 ) >A6 %E6 ) >A2 %E2 |(('-'|[A-Za-z0-9._])+) >A14 %E14 ) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_allow(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=2,[4]=3,[5]=3,[6]=2,[7]=6,[8]=7,[9]=8,[10]=7,[11]=10,[12]=11,[13]=6,[14]=1,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_deny(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_allow(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_bind(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_listen(p, pe, nmatch, matches);
}
%%{
machine basicauth;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
main := (('-'|[A-Za-z0-9._])+) >A1 %E1 [ \t]+(('-'|[A-Za-z0-9._])+) >A2 %E2 ;
}%%
RE2R_EXPORT int re2r_match_basicauth(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
%%{
machine errorfile;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
main := ([0-9]+) >A1 %E1 [ \t]+'"'([^"]+) >A2 %E2 '"' ;
}%%
RE2R_EXPORT int re2r_match_errorfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
%%{
machine addheader;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
main := '"'([^"]+) >A1 %E1 '"'[ \t]+'"'([^"]+) >A2 %E2 '"' ;
}%%
RE2R_EXPORT int re2r_match_addheader(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
RE2R_EXPORT int re2r_match_filter(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_filterurls(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_filterextended(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_filterdefaultdeny(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_filtercasesensitive(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_reversebaseurl(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_logfile(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_reverseonly(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
RE2R_EXPORT int re2r_match_reversemagic(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
return re2r_match_xtinyproxy(p, pe, nmatch, matches);
}
%%{
machine reversepath;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action A3 { matches[3].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
action E3 { matches[3].rm_eo = p-start; }
main := '"'([^"]+) >A1 %E1 '"'([ \t]+'"'([^"]+) >A3 %E3 '"')? >A2 %E2 ;
}%%
RE2R_EXPORT int re2r_match_reversepath(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,[3]=2,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
%%{
machine upstream;
action A1 { matches[1].rm_so = p-start; }
action A2 { matches[2].rm_so = p-start; }
action A3 { matches[3].rm_so = p-start; }
action A4 { matches[4].rm_so = p-start; }
action A5 { matches[5].rm_so = p-start; }
action A6 { matches[6].rm_so = p-start; }
action A7 { matches[7].rm_so = p-start; }
action A8 { matches[8].rm_so = p-start; }
action A9 { matches[9].rm_so = p-start; }
action A10 { matches[10].rm_so = p-start; }
action A11 { matches[11].rm_so = p-start; }
action A12 { matches[12].rm_so = p-start; }
action A13 { matches[13].rm_so = p-start; }
action A14 { matches[14].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
action E2 { matches[2].rm_eo = p-start; }
action E3 { matches[3].rm_eo = p-start; }
action E4 { matches[4].rm_eo = p-start; }
action E5 { matches[5].rm_eo = p-start; }
action E6 { matches[6].rm_eo = p-start; }
action E7 { matches[7].rm_eo = p-start; }
action E8 { matches[8].rm_eo = p-start; }
action E9 { matches[9].rm_eo = p-start; }
action E10 { matches[10].rm_eo = p-start; }
action E11 { matches[11].rm_eo = p-start; }
action E12 { matches[12].rm_eo = p-start; }
action E13 { matches[13].rm_eo = p-start; }
action E14 { matches[14].rm_eo = p-start; }
main := (( "none" ) >A2 %E2 [ \t]+'"'([^"]+) >A3 %E3 '"') >A1 %E1 |(( "http" | "socks4" | "socks5" ) >A5 %E5 [ \t]+(([^:]*) >A7 %E7 ":" ([^@]*) >A8 %E8 "@" )? >A6 %E6 (([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A10 %E10 |(('-'|[A-Za-z0-9._])+) >A11 %E11 ) >A9 %E9 ":" ([0-9]+) >A12 %E12 ([ \t]+'"'([^"]+) >A14 %E14 '"')? >A13 %E13 ) >A4 %E4 ;
}%%
RE2R_EXPORT int re2r_match_upstream(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=1,[4]=0,[5]=4,[6]=4,[7]=6,[8]=6,[9]=4,[10]=9,[11]=9,[12]=4,[13]=4,[14]=13,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}
%%{
machine loglevel;
action A1 { matches[1].rm_so = p-start; }
action E1 { matches[1].rm_eo = p-start; }
main := ([Cc] "ritical" |[Ee] "rror" |[Ww] "arning" |[Nn] "otice" |[Cc] "onnect" |[Ii] "nfo" ) >A1 %E1 ;
}%%
RE2R_EXPORT int re2r_match_loglevel(const char *p, const char* pe, size_t nmatch, regmatch_t matches[])
{
size_t i, cs;
int par;
static const unsigned char parents[] = {[0]=0,[1]=0,};
const char *start = p, *eof = pe;
%% write data nofinal noerror noentry;
for(i=0;i<nmatch;++i) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
%% write init;
%% write exec;
if(cs < %%{ write first_final; }%% ) return -1;
matches[0] = (regmatch_t){.rm_so = 0, .rm_eo = eof-start};
for(i=1;i<nmatch;++i) if(matches[i].rm_eo == -1) matches[i].rm_so = -1;
else if(matches[i].rm_so == matches[i].rm_eo) matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1};
else { par = i; while((par = parents[par])) if(matches[par].rm_eo == -1) { matches[i] = (regmatch_t){.rm_so = -1, .rm_eo = -1}; break; }}
return 0;
}

14
src/conf_regex_print.c Normal file
View File

@ -0,0 +1,14 @@
/* this is a tool to print regexname regex pairs as input for re2r.
compile with gcc -I. src/conf_regex_print.c
*/
#include "config.h"
#include <stdio.h>
#define STDCONF(A, B, C) printf("%s %s\n", #A, B)
int main() {
#include "conf_regex.h"
;
}