tinyproxy/src/scanner.l

174 lines
4.6 KiB
Plaintext
Raw Normal View History

/* $Id: scanner.l,v 1.24 2004-04-27 18:53:14 rjkaes Exp $
*
* This builds the scanner for the tinyproxy configuration file. This
* file needs to stay in sync with grammar.y. If someone knows lex and yacc
* better than I do, please update these files.
*
* Copyright (C) 2000 Robert James Kaes (rjkaes@flarenet.com)
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
%{
#include "tinyproxy.h"
#include "grammar.h"
struct keyword {
char *kw_name;
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
int kw_token;
};
static struct keyword keywords[] = {
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
/* statements */
{ "port", KW_PORT },
{ "logfile", KW_LOGFILE },
{ "syslog", KW_SYSLOG },
{ "maxclients", KW_MAXCLIENTS },
{ "maxspareservers", KW_MAXSPARESERVERS },
{ "minspareservers", KW_MINSPARESERVERS },
{ "startservers", KW_STARTSERVERS },
{ "maxrequestsperchild", KW_MAXREQUESTSPERCHILD },
{ "pidfile", KW_PIDFILE },
{ "timeout", KW_TIMEOUT },
{ "listen", KW_LISTEN },
{ "user", KW_USER },
{ "group", KW_GROUP },
{ "anonymous", KW_ANONYMOUS },
{ "filter", KW_FILTER },
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
{ "filterurls", KW_FILTERURLS },
{ "filterextended", KW_FILTEREXTENDED },
{ "filterdefaultdeny", KW_FILTER_DEFAULT_DENY },
{ "filtercasesensitive", KW_FILTER_CASESENSITIVE },
{ "xtinyproxy", KW_XTINYPROXY },
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
{ "reversepath", KW_REVERSEPATH },
{ "reverseonly", KW_REVERSEONLY },
{ "reversemagic", KW_REVERSEMAGIC },
{ "reversebaseurl", KW_REVERSEBASEURL },
{ "upstream", KW_UPSTREAM },
{ "allow", KW_ALLOW },
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
{ "deny", KW_DENY },
{ "connectport", KW_CONNECTPORT },
{ "bind", KW_BIND },
{ "bindsame", KW_BINDSAME },
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
{ "viaproxyname", KW_VIA_PROXY_NAME },
{ "stathost", KW_STATHOST },
{ "errorfile", KW_ERRORPAGE },
{ "defaulterrorfile", KW_DEFAULT_ERRORPAGE },
{ "statfile", KW_STATPAGE },
Added reverse proxy support from Kim Holviala. His comments regarding this addition follow: The patch implements a simple reverse proxy (with one funky extra feature). It has all the regular features: mapping remote servers to local namespace (ReversePath), disabling forward proxying (ReverseOnly) and HTTP redirect rewriting (ReverseBaseURL). The funky feature is this: You map Google to /google/ and the Google front page opens up fine. Type in stuff and click "Google Search" and you'll get an error from tinyproxy. Reason for this is that Google's form submits to "/search" which unfortunately bypasses our /google/ mapping (if they'd submit to "search" without the slash it would have worked ok). Turn on ReverseMagic and it starts working.... ReverseMagic "hijacks" one cookie which it sends to the client browser. This cookie contains the current reverse proxy path mapping (in the above case /google/) so that even if the site uses absolute links the reverse proxy still knows where to map the request. And yes, it works. No, I've never seen this done before - I couldn't find _any_ working OSS reverse proxies, and the commercial ones I've seen try to parse the page and fix all links (in the above case changing "/search" to "/google/search"). The problem with modifying the html is that it might not be parsable (very common) or it might be encoded so that the proxy can't read it (mod_gzip or likes). Hope you like that patch. One caveat - I haven't coded with C in like three years so my code might be a bit messy.... There shouldn't be any security problems thou, but you never know. I did all the stuff out of my memory without reading any RFC's, but I tested everything with Moz, Konq, IE6, Links and Lynx and they all worked fine.
2004-01-27 03:11:52 +08:00
/* loglevel and the settings */
{ "loglevel", KW_LOGLEVEL },
{ "critical", KW_LOG_CRITICAL },
{ "error", KW_LOG_ERROR },
{ "warning", KW_LOG_WARNING },
{ "notice", KW_LOG_NOTICE },
2001-08-27 05:10:04 +08:00
{ "connect", KW_LOG_CONNECT },
{ "info", KW_LOG_INFO },
/* on/off switches */
{ "yes", KW_YES },
{ "on", KW_YES },
{ "no", KW_NO },
{ "off", KW_NO }
};
#define YY_NO_UNPUT 1
#define MAX_REGEXP_LEN 1024
char tiny_buf[MAX_REGEXP_LEN];
char *tiny_str;
static int check_reserved_words(char *token);
static void append_string(int length, char *str);
static void append_char(char c);
%}
2003-06-27 02:26:10 +08:00
%option noyywrap batch yylineno
white [ \t]
digit [0-9]
alpha [a-zA-Z]
alphanum [a-zA-Z0-9]
word [^ \#'"\(\)\{\}\\;\n\t,|\.]
%x string
%%
\#.*$ ;
\n { return '\n'; }
":" { return ':'; }
{white}+ ;
0x{digit}+ { yylval.num = strtol(yytext, NULL, 16); return NUMBER; }
0{digit}+ { yylval.num = strtol(yytext, NULL, 8); return NUMBER; }
{digit}+ { yylval.num = atoi(yytext); return NUMBER; }
{alpha}({alphanum}|[-._])+ { return check_reserved_words(yytext); }
\" {
tiny_str = tiny_buf;
BEGIN(string);
}
<string>\\a { append_char(7); }
<string>\\n { append_char(10); }
<string>\\r { append_char(13); }
<string>\\t { append_char(9); }
<string>\\v { append_char(11); }
<string>\\[^anrtv] { append_string(1, yytext + 1); }
<string>\" {
BEGIN(INITIAL);
yylval.cptr = strdup(tiny_buf);
return STRING;
}
<string>[^"\\]+ { append_string(strlen(yytext), yytext); }
({digit}{1,3}\.){3}{digit}{1,3} { yylval.cptr = strdup(yytext); return NUMERIC_ADDRESS; }
({digit}{1,3}\.){3}{digit}{1,3}\/{digit}+ { yylval.cptr = strdup(yytext); return NETMASK_ADDRESS; }
%%
int
check_reserved_words(char *token)
{
int i;
for (i = 0; i < (sizeof(keywords) / sizeof(struct keyword)); i++) {
if (strcasecmp(keywords[i].kw_name, token) == 0) {
return keywords[i].kw_token;
}
}
yylval.cptr = strdup(token);
return IDENTIFIER;
}
static void
append_string(int length, char *s)
{
int to_copy = min(MAX_REGEXP_LEN - (tiny_str - tiny_buf) - 1, length);
memcpy(tiny_str, s, to_copy);
tiny_str += to_copy;
*tiny_str = 0;
}
static void
append_char(char c)
{
*tiny_str = c;
tiny_str++;
*tiny_str = 0;
}