From 417c258d145928a919e006dd90ecca58ddeed6c7 Mon Sep 17 00:00:00 2001 From: rofl0r Date: Fri, 16 Oct 2020 12:40:56 +0100 Subject: [PATCH] conf: speed up parsing 10x by using ragel if available conf_regex.rl is generated from the output of conf_regex_print.c using re2r (https://github.com/rofl0r/re2r). if ragel is available on the build host, it is being used to generate finite state machines from the regexes used by the config file parser for an impressive speed boost, while only adding moderately to binary size. a stripped x86_64 tinyproxy binary compiled with -O2 is still only ~100KB. --- configure.ac | 10 + src/Makefile.am | 13 +- src/conf.c | 32 ++- src/conf_regex.rl | 512 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 562 insertions(+), 5 deletions(-) create mode 100644 src/conf_regex.rl diff --git a/configure.ac b/configure.ac index 00f7f0e..dd91279 100644 --- a/configure.ac +++ b/configure.ac @@ -213,6 +213,16 @@ if test "x$GPERF" != "x" -a "x$GPERF" != "xno" ; then AC_DEFINE(HAVE_GPERF) fi +AC_PATH_PROG(RAGEL, ragel, no) +AM_CONDITIONAL(HAVE_RAGEL, test "x$RAGEL" != "x" -a "x$RAGEL" != "xno") +AH_TEMPLATE([HAVE_RAGEL], + [Whether you have ragel installed for faster config parsing.]) + +if test "x$RAGEL" != "x" -a "x$RAGEL" != "xno" ; then + AC_DEFINE(HAVE_RAGEL) +fi + + AC_CONFIG_FILES([ Makefile src/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index 6d806e0..9fc3b05 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -60,11 +60,22 @@ EXTRA_tinyproxy_SOURCES = filter.c filter.h \ tinyproxy_DEPENDENCIES = @ADDITIONAL_OBJECTS@ tinyproxy_LDADD = @ADDITIONAL_OBJECTS@ -lpthread +CLEANFILES = + if HAVE_GPERF conf-tokens.c: conf-tokens-gperf.inc conf-tokens-gperf.inc: conf-tokens.gperf $(GPERF) $< > $@ endif -EXTRA_DIST = conf-tokens.gperf +if HAVE_RAGEL +conf.c: conf_regex.inc +conf_regex.inc: conf_regex.rl + $(RAGEL) $(RAGEL_FLAGS) -o $@ $< + +CLEANFILES += conf_regex.inc +endif + + +EXTRA_DIST = conf-tokens.gperf conf_regex.rl diff --git a/src/conf.c b/src/conf.c index b94b71e..58c1216 100644 --- a/src/conf.c +++ b/src/conf.c @@ -142,7 +142,14 @@ static void config_free_regex (void); * do not follow the pattern above. This macro is for convenience * only. */ -#define STDCONF(d, re, func) [CD_ ## d] = { BEGIN re END, func, NULL } +#ifdef HAVE_RAGEL +#define RE2R_EXPORT static +#include "conf_regex.inc" +typedef int (*matchfunc)(const char*, const char*, size_t, regmatch_t[]); +#define STDCONF(d, re, func) [CD_ ## d] = { func, re2r_match_ ## d } +#else +#define STDCONF(d, re, func) [CD_ ## d] = { func, BEGIN re END, NULL } +#endif /* * Holds the regular expression used to match the configuration directive, @@ -151,9 +158,13 @@ static void config_free_regex (void); * to be compiled one. */ struct { - const char *re; CONFFILE_HANDLER handler; +#ifndef HAVE_RAGEL + const char *re; regex_t *cre; +#else + matchfunc mf; +#endif } directives[] = { #include "conf_regex.h" }; @@ -248,6 +259,8 @@ config_init (void) { unsigned int i, r; + (void) r; + for (i = 0; i != ndirectives; ++i) { if (!directives[i].handler) { @@ -255,6 +268,7 @@ config_init (void) continue; } +#ifndef HAVE_RAGEL directives[i].cre = (regex_t *) safemalloc (sizeof (regex_t)); if (!directives[i].cre) return -1; @@ -264,6 +278,7 @@ config_init (void) REG_EXTENDED | REG_NEWLINE); if (r) return r; +#endif } atexit (config_free_regex); @@ -278,6 +293,7 @@ config_init (void) static void config_free_regex (void) { +#ifndef HAVE_RAGEL unsigned int i; for (i = 0; i < ndirectives; i++) { @@ -287,6 +303,7 @@ config_free_regex (void) directives[i].cre = NULL; } } +#endif } /* @@ -297,18 +314,25 @@ config_free_regex (void) * Returns 0 if a match was found and successfully processed; otherwise, * a negative number is returned. */ -static int check_match (struct config_s *conf, const char *line, +static int check_match (struct config_s *conf, + const char *line, const char* lineend, unsigned long lineno, enum config_directive cd) { regmatch_t match[RE_MAX_MATCHES]; unsigned int i = cd; +#ifndef HAVE_RAGEL + (void) lineend; if (!directives[i].cre) return (*directives[i].handler) (conf, line, lineno, match); if (!regexec (directives[i].cre, line, RE_MAX_MATCHES, match, 0)) return (*directives[i].handler) (conf, line, lineno, match); +#else + if (!directives[i].mf(line, lineend, RE_MAX_MATCHES, match)) + return (*directives[i].handler) (conf, line, lineno, match); +#endif return -1; } @@ -335,7 +359,7 @@ static int config_parse (struct config_s *conf, FILE * f) p = q; while(*p && *p != '\n') ++p; while(isspace(*p)) *(p--) = 0; - if (!e || e->value == CD_NIL || check_match (conf, q, lineno, e->value)) { + if (!e || e->value == CD_NIL || check_match (conf, q, ++p, lineno, e->value)) { fprintf (stderr, "ERROR: Syntax error on line %lu\n", lineno); return 1; } diff --git a/src/conf_regex.rl b/src/conf_regex.rl new file mode 100644 index 0000000..aa5c600 --- /dev/null +++ b/src/conf_regex.rl @@ -0,0 +1,512 @@ +/* automatically generated with re2r by rofl0r */ +%%{ +machine logfile; +action A1 { matches[1].rm_so = p-start; } +action E1 { matches[1].rm_eo = p-start; } +main := '"'([^"]+) >A1 %E1 '"' ; +}%% + +RE2R_EXPORT int re2r_match_logfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_xtinyproxy(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_port(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_user(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA2 %E2 |((([0-9a-fA-F:]{2,39}) >A5 %E5 ) >A4 %E4 |(([0-9a-fA-F:]{0,29} ":" ([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A8 %E8 ) >A7 %E7 ) >A6 %E6 ) >A3 %E3 ) >A1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_listen(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=1,[4]=3,[5]=4,[6]=3,[7]=6,[8]=7,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA4 %E4 ( "/" [0-9]+)? >A5 %E5 ) >A3 %E3 |(((([0-9a-fA-F:]{2,39}) >A9 %E9 ) >A8 %E8 |(([0-9a-fA-F:]{0,29} ":" ([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A12 %E12 ) >A11 %E11 ) >A10 %E10 ) >A7 %E7 ( "/" [0-9]+)? >A13 %E13 ) >A6 %E6 ) >A2 %E2 |(('-'|[A-Za-z0-9._])+) >A14 %E14 ) >A1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_allow(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=2,[4]=3,[5]=3,[6]=2,[7]=6,[8]=7,[9]=8,[10]=7,[11]=10,[12]=11,[13]=6,[14]=1,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 [ \t]+(('-'|[A-Za-z0-9._])+) >A2 %E2 ; +}%% + +RE2R_EXPORT int re2r_match_basicauth(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 [ \t]+'"'([^"]+) >A2 %E2 '"' ; +}%% + +RE2R_EXPORT int re2r_match_errorfile(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 '"'[ \t]+'"'([^"]+) >A2 %E2 '"' ; +}%% + +RE2R_EXPORT int re2r_match_addheader(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 '"'([ \t]+'"'([^"]+) >A3 %E3 '"')? >A2 %E2 ; +}%% + +RE2R_EXPORT int re2r_match_reversepath(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=0,[3]=2,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA2 %E2 [ \t]+'"'([^"]+) >A3 %E3 '"') >A1 %E1 |(( "http" | "socks4" | "socks5" ) >A5 %E5 [ \t]+(([^:]*) >A7 %E7 ":" ([^@]*) >A8 %E8 "@" )? >A6 %E6 (([0-9]+[.][0-9]+[.][0-9]+[.][0-9]+) >A10 %E10 |(('-'|[A-Za-z0-9._])+) >A11 %E11 ) >A9 %E9 ":" ([0-9]+) >A12 %E12 ([ \t]+'"'([^"]+) >A14 %E14 '"')? >A13 %E13 ) >A4 %E4 ; +}%% + +RE2R_EXPORT int re2r_match_upstream(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,[2]=1,[3]=1,[4]=0,[5]=4,[6]=4,[7]=6,[8]=6,[9]=4,[10]=9,[11]=9,[12]=4,[13]=4,[14]=13,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;iA1 %E1 ; +}%% + +RE2R_EXPORT int re2r_match_loglevel(const char *p, const char* pe, size_t nmatch, regmatch_t matches[]) +{ + size_t i, cs; + int par; + static const unsigned char parents[] = {[0]=0,[1]=0,}; + const char *start = p, *eof = pe; + %% write data nofinal noerror noentry; + for(i=0;i