mirror of
https://github.com/3proxy/3proxy.git
synced 2025-02-24 02:55:40 +08:00
Update PCRE version
This commit is contained in:
parent
e2884b182a
commit
68deec5bcd
@ -1,5 +1,13 @@
|
|||||||
all: $(BUILDDIR)PCREPlugin$(DLSUFFICS)
|
all: $(BUILDDIR)PCREPlugin$(DLSUFFICS)
|
||||||
|
|
||||||
|
pcre_maketables$(OBJSUFFICS): pcre_maketables.c
|
||||||
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_maketables.c
|
||||||
|
|
||||||
|
pcre_table$(OBJSUFFICS): pcre_table.c
|
||||||
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_table.c
|
||||||
|
|
||||||
|
pcre_ucd$(OBJSUFFICS): pcre_ucd.c
|
||||||
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_ucd.c
|
||||||
|
|
||||||
pcre_compile$(OBJSUFFICS): pcre_compile.c
|
pcre_compile$(OBJSUFFICS): pcre_compile.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_compile.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_compile.c
|
||||||
@ -22,12 +30,6 @@ pcre_get$(OBJSUFFICS): pcre_get.c
|
|||||||
pcre_globals$(OBJSUFFICS): pcre_globals.c
|
pcre_globals$(OBJSUFFICS): pcre_globals.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_globals.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_globals.c
|
||||||
|
|
||||||
pcre_info$(OBJSUFFICS): pcre_info.c
|
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_info.c
|
|
||||||
|
|
||||||
pcre_maketables$(OBJSUFFICS): pcre_maketables.c
|
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_maketables.c
|
|
||||||
|
|
||||||
pcre_newline$(OBJSUFFICS): pcre_newline.c
|
pcre_newline$(OBJSUFFICS): pcre_newline.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_newline.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_newline.c
|
||||||
|
|
||||||
@ -43,12 +45,6 @@ pcre_study$(OBJSUFFICS): pcre_study.c
|
|||||||
pcre_tables$(OBJSUFFICS): pcre_tables.c
|
pcre_tables$(OBJSUFFICS): pcre_tables.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_tables.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_tables.c
|
||||||
|
|
||||||
pcre_try_flipped$(OBJSUFFICS): pcre_try_flipped.c
|
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_try_flipped.c
|
|
||||||
|
|
||||||
pcre_ucp_searchfuncs$(OBJSUFFICS): pcre_ucp_searchfuncs.c
|
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_ucp_searchfuncs.c
|
|
||||||
|
|
||||||
pcre_valid_utf8$(OBJSUFFICS): pcre_valid_utf8.c
|
pcre_valid_utf8$(OBJSUFFICS): pcre_valid_utf8.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_valid_utf8.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_valid_utf8.c
|
||||||
|
|
||||||
@ -58,14 +54,9 @@ pcre_version$(OBJSUFFICS): pcre_version.c
|
|||||||
pcre_xclass$(OBJSUFFICS): pcre_xclass.c
|
pcre_xclass$(OBJSUFFICS): pcre_xclass.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_xclass.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_xclass.c
|
||||||
|
|
||||||
pcre_dftables$(OBJSUFFICS): pcre_dftables.c
|
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_dftables.c
|
|
||||||
|
|
||||||
pcre_plugin$(OBJSUFFICS): pcre_plugin.c
|
pcre_plugin$(OBJSUFFICS): pcre_plugin.c
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcre_plugin.c
|
$(CC) $(DCFLAGS) $(CFLAGS) pcre_plugin.c
|
||||||
|
|
||||||
pcreposix$(OBJSUFFICS): pcreposix.c
|
$(BUILDDIR)PCREPlugin$(DLSUFFICS): pcre_compile$(OBJSUFFICS) pcre_config$(OBJSUFFICS) pcre_dfa_exec$(OBJSUFFICS) pcre_exec$(OBJSUFFICS) pcre_fullinfo$(OBJSUFFICS) pcre_get$(OBJSUFFICS) pcre_globals$(OBJSUFFICS) pcre_newline$(OBJSUFFICS) pcre_ord2utf8$(OBJSUFFICS) pcre_refcount$(OBJSUFFICS) pcre_study$(OBJSUFFICS) pcre_tables$(OBJSUFFICS) pcre_valid_utf8$(OBJSUFFICS) pcre_version$(OBJSUFFICS) pcre_xclass$(OBJSUFFICS) pcre_plugin$(OBJSUFFICS) pcre_maketables$(OBJSUFFICS) pcre_ucd$(OBJSUFFICS) pcre_table$(OBJSUFFICS)
|
||||||
$(CC) $(DCFLAGS) $(CFLAGS) pcreposix.c
|
$(LN) $(LNOUT)../../$(BUILDDIR)PCREPlugin$(DLSUFFICS) $(LDFLAGS) $(DLFLAGS) pcre_compile$(OBJSUFFICS) pcre_config$(OBJSUFFICS) pcre_dfa_exec$(OBJSUFFICS) pcre_exec$(OBJSUFFICS) pcre_fullinfo$(OBJSUFFICS) pcre_get$(OBJSUFFICS) pcre_globals$(OBJSUFFICS) pcre_newline$(OBJSUFFICS) pcre_ord2utf8$(OBJSUFFICS) pcre_refcount$(OBJSUFFICS) pcre_study$(OBJSUFFICS) pcre_tables$(OBJSUFFICS) pcre_valid_utf8$(OBJSUFFICS) pcre_version$(OBJSUFFICS) pcre_xclass$(OBJSUFFICS) pcre_plugin$(OBJSUFFICS) pcre_maketables$(OBJSUFFICS) pcre_ucd$(OBJSUFFICS) pcre_table$(OBJSUFFICS)
|
||||||
|
|
||||||
$(BUILDDIR)PCREPlugin$(DLSUFFICS): pcre_compile$(OBJSUFFICS) pcre_config$(OBJSUFFICS) pcre_dfa_exec$(OBJSUFFICS) pcre_exec$(OBJSUFFICS) pcre_fullinfo$(OBJSUFFICS) pcre_get$(OBJSUFFICS) pcre_globals$(OBJSUFFICS) pcre_info$(OBJSUFFICS) pcre_maketables$(OBJSUFFICS) pcre_newline$(OBJSUFFICS) pcre_ord2utf8$(OBJSUFFICS) pcre_refcount$(OBJSUFFICS) pcre_study$(OBJSUFFICS) pcre_tables$(OBJSUFFICS) pcre_try_flipped$(OBJSUFFICS) pcre_ucp_searchfuncs$(OBJSUFFICS) pcre_valid_utf8$(OBJSUFFICS) pcre_version$(OBJSUFFICS) pcre_xclass$(OBJSUFFICS) pcre_dftables$(OBJSUFFICS) pcre_plugin$(OBJSUFFICS) pcreposix$(OBJSUFFICS)
|
|
||||||
$(LN) $(LNOUT)../../$(BUILDDIR)PCREPlugin$(DLSUFFICS) $(LDFLAGS) $(DLFLAGS) pcre_compile$(OBJSUFFICS) pcre_config$(OBJSUFFICS) pcre_dfa_exec$(OBJSUFFICS) pcre_exec$(OBJSUFFICS) pcre_fullinfo$(OBJSUFFICS) pcre_get$(OBJSUFFICS) pcre_globals$(OBJSUFFICS) pcre_info$(OBJSUFFICS) pcre_maketables$(OBJSUFFICS) pcre_newline$(OBJSUFFICS) pcre_ord2utf8$(OBJSUFFICS) pcre_refcount$(OBJSUFFICS) pcre_study$(OBJSUFFICS) pcre_tables$(OBJSUFFICS) pcre_try_flipped$(OBJSUFFICS) pcre_ucp_searchfuncs$(OBJSUFFICS) pcre_valid_utf8$(OBJSUFFICS) pcre_version$(OBJSUFFICS) pcre_xclass$(OBJSUFFICS) pcre_dftables$(OBJSUFFICS) pcre_plugin$(OBJSUFFICS) pcreposix$(OBJSUFFICS)
|
|
||||||
|
@ -1,145 +1,350 @@
|
|||||||
|
#define PCRE_STATIC
|
||||||
|
/* config.h. Generated from config.h.in by configure. */
|
||||||
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
/* On Unix-like systems config.in is converted by "configure" into config.h.
|
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||||
Some other environments also support the use of "configure". PCRE is written in
|
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
systems.
|
||||||
it to run on SunOS4 and other "close to standard" systems.
|
|
||||||
|
|
||||||
On a non-Unix-like system you should just copy this file into config.h, and set
|
In environments that support the GNU autotools, config.h.in is converted into
|
||||||
up the macros the way you need them. You should normally change the definitions
|
config.h by the "configure" script. In environments that use CMake,
|
||||||
of HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way
|
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||||
autoconf works, these cannot be made the defaults. If your system has bcopy()
|
hand" without using "configure" or CMake, you should copy the distributed
|
||||||
and not memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE.
|
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||||
If your system has neither bcopy() nor memmove(), leave them both as 0; an
|
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||||
emulation function will be used. */
|
so that config.h is included at the start of every source.
|
||||||
|
|
||||||
|
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||||
|
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
||||||
|
but if you do, default values will be taken from config.h for non-boolean
|
||||||
|
macros that are not defined on the command line.
|
||||||
|
|
||||||
|
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE8 should either be defined
|
||||||
|
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||||
|
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||||
|
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||||
|
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||||
|
|
||||||
|
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||||
|
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||||
|
sure both macros are undefined; an emulation function will then be used. */
|
||||||
|
|
||||||
|
/* By default, the \R escape sequence matches any Unicode line ending
|
||||||
|
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||||
|
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||||
|
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||||
|
/* #undef BSR_ANYCRLF */
|
||||||
|
|
||||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||||
character codes, define this macro as 1. On systems that can use "configure",
|
character codes, define this macro to any value. You must also edit the
|
||||||
this can be done via --enable-ebcdic. */
|
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||||
|
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||||
|
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||||
|
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||||
|
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||||
|
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
|
||||||
|
/* #undef EBCDIC */
|
||||||
|
|
||||||
#ifndef EBCDIC
|
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||||
#define EBCDIC 0
|
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||||
#endif
|
LF does in an ASCII/Unicode environment. The value must also be set in the
|
||||||
|
NEWLINE macro below. On systems that can use "configure" or CMake to set
|
||||||
|
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
|
||||||
|
/* #undef EBCDIC_NL25 */
|
||||||
|
|
||||||
#define PCRE_STATIC
|
/* Define to 1 if you have the `bcopy' function. */
|
||||||
|
/* #undef HAVE_BCOPY */
|
||||||
|
|
||||||
/* If you are compiling for a system other than a Unix-like system or Win32,
|
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||||
and it needs some magic to be inserted before the definition of a function that
|
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||||
is exported by the library, define this macro to contain the relevant magic. If
|
|
||||||
you do not define this macro, it defaults to "extern" for a C compiler and
|
|
||||||
"extern C" for a C++ compiler on non-Win32 systems. This macro apears at the
|
|
||||||
start of every exported function that is part of the external API. It does not
|
|
||||||
appear on functions that are "external" in the C sense, but which are internal
|
|
||||||
to the library. */
|
|
||||||
|
|
||||||
/* #define PCRE_DATA_SCOPE */
|
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||||
|
/* #undef HAVE_BZLIB_H */
|
||||||
|
|
||||||
/* Define the following macro to empty if the "const" keyword does not work. */
|
/* Define to 1 if you have the <dirent.h> header file. */
|
||||||
|
/* #undef HAVE_DIRENT_H */
|
||||||
|
|
||||||
#undef const
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
|
/* #undef HAVE_DLFCN_H */
|
||||||
|
|
||||||
/* Define the following macro to "unsigned" if <stddef.h> does not define
|
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||||
size_t. */
|
/* #undef HAVE_EDITLINE_READLINE_H */
|
||||||
|
|
||||||
#undef size_t
|
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||||
|
|
||||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
does not have the strerror() or memmove() functions that should be present in
|
/* #undef HAVE_INTTYPES_H */
|
||||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
|
||||||
normally be defined with the value 1 for other systems, but unfortunately we
|
|
||||||
cannot make this the default because "configure" files generated by autoconf
|
|
||||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
|
||||||
found. */
|
|
||||||
|
|
||||||
#define HAVE_STRERROR 0
|
/* Define to 1 if you have the <limits.h> header file. */
|
||||||
#define HAVE_MEMMOVE 0
|
/* #undef HAVE_LIMITS_H */
|
||||||
|
|
||||||
/* There are some non-Unix-like systems that don't even have bcopy(). If this
|
/* Define to 1 if the system has the type `long long'. */
|
||||||
macro is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
/* #undef HAVE_LONG_LONG */
|
||||||
HAVE_BCOPY is not relevant. */
|
|
||||||
|
|
||||||
#define HAVE_BCOPY 0
|
/* Define to 1 if you have the `memmove' function. */
|
||||||
|
/* #undef HAVE_MEMMOVE */
|
||||||
|
|
||||||
/* The value of NEWLINE determines the newline character. The default is to
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
leave it up to the compiler, but some sites want to force a particular value.
|
/* #undef HAVE_MEMORY_H */
|
||||||
On Unix-like systems, "configure" can be used to override this default. */
|
|
||||||
|
|
||||||
#ifndef NEWLINE
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
#define NEWLINE '\n'
|
/* #undef HAVE_PTHREAD */
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store links as
|
/* Have PTHREAD_PRIO_INHERIT. */
|
||||||
offsets within the compiled regex. The default is 2, which allows for compiled
|
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||||
patterns up to 64K long. This covers the vast majority of cases. However, PCRE
|
|
||||||
can also be compiled to use 3 or 4 bytes instead. This allows for longer
|
|
||||||
patterns in extreme cases. On systems that support it, "configure" can be used
|
|
||||||
to override this default. */
|
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||||
|
/* #undef HAVE_READLINE_HISTORY_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
|
/* #undef HAVE_STDINT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
|
/* #undef HAVE_STDLIB_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `strerror' function. */
|
||||||
|
/* #undef HAVE_STRERROR */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string> header file. */
|
||||||
|
/* #undef HAVE_STRING */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
|
/* #undef HAVE_STRINGS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
|
/* #undef HAVE_STRING_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have `strtoimax'. */
|
||||||
|
/* #undef HAVE_STRTOIMAX */
|
||||||
|
|
||||||
|
/* Define to 1 if you have `strtoll'. */
|
||||||
|
/* #undef HAVE_STRTOLL */
|
||||||
|
|
||||||
|
/* Define to 1 if you have `strtoq'. */
|
||||||
|
/* #undef HAVE_STRTOQ */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_STAT_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
|
/* #undef HAVE_SYS_TYPES_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||||
|
/* #undef HAVE_TYPE_TRAITS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
|
/* #undef HAVE_UNISTD_H */
|
||||||
|
|
||||||
|
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||||
|
/* #undef HAVE_UNSIGNED_LONG_LONG */
|
||||||
|
|
||||||
|
/* Define to 1 if the compiler supports simple visibility declarations. */
|
||||||
|
/* #undef HAVE_VISIBILITY */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <windows.h> header file. */
|
||||||
|
/* #undef HAVE_WINDOWS_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
|
/* #undef HAVE_ZLIB_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have `_strtoi64'. */
|
||||||
|
/* #undef HAVE__STRTOI64 */
|
||||||
|
|
||||||
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
|
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||||
|
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||||
|
for longer patterns in extreme cases. */
|
||||||
#ifndef LINK_SIZE
|
#ifndef LINK_SIZE
|
||||||
#define LINK_SIZE 2
|
#define LINK_SIZE 2
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||||
required for holding the pointers to capturing substrings because PCRE requires
|
/* This is ignored unless you are using libtool. */
|
||||||
three integers per substring, whereas the POSIX interface provides only two. If
|
#ifndef LT_OBJDIR
|
||||||
the number of expected substrings is small, the wrapper function uses space on
|
#define LT_OBJDIR ".libs/"
|
||||||
the stack, because this is faster than using malloc() for each call. The
|
|
||||||
threshold above which the stack is no longer used is defined by POSIX_MALLOC_
|
|
||||||
THRESHOLD. On systems that support it, "configure" can be used to override this
|
|
||||||
default. */
|
|
||||||
|
|
||||||
#ifndef POSIX_MALLOC_THRESHOLD
|
|
||||||
#define POSIX_MALLOC_THRESHOLD 10
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
This can sometimes be a problem on systems that have stacks of limited size.
|
internal match() function can be called during a single execution of
|
||||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||||
function; instead it creates its own stack by steam using pcre_recurse_malloc()
|
The limit exists in order to catch runaway regular expressions that take
|
||||||
to obtain memory from the heap. For more detail, see the comments and other
|
for ever to determine that they do not match. The default is set very large
|
||||||
stuff just above the match() function. On systems that support it, "configure"
|
so that it does not accidentally catch legitimate cases. */
|
||||||
can be used to set this in the Makefile (use --disable-stack-for-recursion). */
|
|
||||||
|
|
||||||
/* #define NO_RECURSE */
|
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the internal
|
|
||||||
match() function can be called during a single execution of pcre_exec(). There
|
|
||||||
is a runtime interface for setting a different limit. The limit exists in order
|
|
||||||
to catch runaway regular expressions that take for ever to determine that they
|
|
||||||
do not match. The default is set very large so that it does not accidentally
|
|
||||||
catch legitimate cases. On systems that support it, "configure" can be used to
|
|
||||||
override this default default. */
|
|
||||||
|
|
||||||
#ifndef MATCH_LIMIT
|
#ifndef MATCH_LIMIT
|
||||||
#define MATCH_LIMIT 10000000
|
#define MATCH_LIMIT 10000000
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The above limit applies to all calls of match(), whether or not they
|
/* The above limit applies to all calls of match(), whether or not they
|
||||||
increase the recursion depth. In some environments it is desirable to limit the
|
increase the recursion depth. In some environments it is desirable to limit
|
||||||
depth of recursive calls of match() more strictly, in order to restrict the
|
the depth of recursive calls of match() more strictly, in order to restrict
|
||||||
maximum amount of stack (or heap, if NO_RECURSE is defined) that is used. The
|
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||||
value of MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||||
have any useful effect, it must be less than the value of MATCH_LIMIT. There is
|
match(). To have any useful effect, it must be less than the value of
|
||||||
a runtime method for setting a different limit. On systems that support it,
|
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||||
"configure" can be used to override this default default. */
|
a runtime method for setting a different limit. */
|
||||||
|
|
||||||
#ifndef MATCH_LIMIT_RECURSION
|
#ifndef MATCH_LIMIT_RECURSION
|
||||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* These three limits are parameterized just in case anybody ever wants to
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
change them. Care must be taken if they are increased, because they guard
|
Care must be taken if it is increased, because it guards against integer
|
||||||
against integer overflow caused by enormously large patterns. */
|
overflow caused by enormously large patterns. */
|
||||||
|
|
||||||
#ifndef MAX_NAME_SIZE
|
|
||||||
#define MAX_NAME_SIZE 32
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef MAX_NAME_COUNT
|
#ifndef MAX_NAME_COUNT
|
||||||
#define MAX_NAME_COUNT 10000
|
#define MAX_NAME_COUNT 10000
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef MAX_DUPLENGTH
|
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||||
#define MAX_DUPLENGTH 30000
|
Care must be taken if it is increased, because it guards against integer
|
||||||
|
overflow caused by enormously large patterns. */
|
||||||
|
#ifndef MAX_NAME_SIZE
|
||||||
|
#define MAX_NAME_SIZE 32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* End */
|
/* The value of NEWLINE determines the default newline character sequence.
|
||||||
|
PCRE client programs can override this by selecting other values at run
|
||||||
|
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
|
||||||
|
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
|
||||||
|
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
|
||||||
|
0x25) that are used as the NL line terminator that is equivalent to ASCII
|
||||||
|
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
|
||||||
|
or -2 (ANYCRLF). */
|
||||||
|
#ifndef NEWLINE
|
||||||
|
#define NEWLINE 10
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||||
|
This can sometimes be a problem on systems that have stacks of limited
|
||||||
|
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||||
|
recursion in the match() function; instead it creates its own stack by
|
||||||
|
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||||
|
detail, see the comments and other stuff just above the match() function.
|
||||||
|
*/
|
||||||
|
/* #undef NO_RECURSE */
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#define PACKAGE "pcre"
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#define PACKAGE_BUGREPORT ""
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#define PACKAGE_NAME "PCRE"
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#define PACKAGE_STRING "PCRE 8.39"
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#define PACKAGE_TARNAME "pcre"
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#define PACKAGE_VERSION "8.39"
|
||||||
|
|
||||||
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
stack that is used while compiling a pattern. */
|
||||||
|
#ifndef PARENS_NEST_LIMIT
|
||||||
|
#define PARENS_NEST_LIMIT 250
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||||
|
pcregrep to hold parts of the file it is searching. This is also the
|
||||||
|
minimum value. The actual amount of memory used by pcregrep is three times
|
||||||
|
this number, because it allows for the buffering of "before" and "after"
|
||||||
|
lines. */
|
||||||
|
#ifndef PCREGREP_BUFSIZE
|
||||||
|
#define PCREGREP_BUFSIZE 20480
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If you are compiling for a system other than a Unix-like system or
|
||||||
|
Win32, and it needs some magic to be inserted before the definition
|
||||||
|
of a function that is exported by the library, define this macro to
|
||||||
|
contain the relevant magic. If you do not define this macro, a suitable
|
||||||
|
__declspec value is used for Windows systems; in other environments
|
||||||
|
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||||
|
This macro apears at the start of every exported function that is part
|
||||||
|
of the external API. It does not appear on functions that are "external"
|
||||||
|
in the C sense, but which are internal to the library. */
|
||||||
|
/* #undef PCRE_EXP_DEFN */
|
||||||
|
|
||||||
|
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||||
|
/* #undef PCRE_STATIC */
|
||||||
|
|
||||||
|
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||||
|
required for holding the pointers to capturing substrings because PCRE
|
||||||
|
requires three integers per substring, whereas the POSIX interface provides
|
||||||
|
only two. If the number of expected substrings is small, the wrapper
|
||||||
|
function uses space on the stack, because this is faster than using
|
||||||
|
malloc() for each call. The threshold above which the stack is no longer
|
||||||
|
used is defined by POSIX_MALLOC_THRESHOLD. */
|
||||||
|
#ifndef POSIX_MALLOC_THRESHOLD
|
||||||
|
#define POSIX_MALLOC_THRESHOLD 10
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||||
|
your system. */
|
||||||
|
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
|
/* #undef STDC_HEADERS */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||||
|
/* #undef SUPPORT_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||||
|
is able to handle .bz2 files. */
|
||||||
|
/* #undef SUPPORT_LIBBZ2 */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||||
|
/* #undef SUPPORT_LIBEDIT */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||||
|
/* #undef SUPPORT_LIBREADLINE */
|
||||||
|
|
||||||
|
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||||
|
able to handle .gz files. */
|
||||||
|
/* #undef SUPPORT_LIBZ */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 16 bit PCRE library. */
|
||||||
|
/* #undef SUPPORT_PCRE16 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 32 bit PCRE library. */
|
||||||
|
/* #undef SUPPORT_PCRE32 */
|
||||||
|
|
||||||
|
/* Define to any value to enable the 8 bit PCRE library. */
|
||||||
|
/* #undef SUPPORT_PCRE8 */
|
||||||
|
|
||||||
|
/* Define to any value to enable JIT support in pcregrep. */
|
||||||
|
/* #undef SUPPORT_PCREGREP_JIT */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for Unicode properties. */
|
||||||
|
/* #undef SUPPORT_UCP */
|
||||||
|
|
||||||
|
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||||
|
This will work even in an EBCDIC environment, but it is incompatible with
|
||||||
|
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||||
|
ASCII/UTF-8/16/32, but not both at once. */
|
||||||
|
/* #undef SUPPORT_UTF */
|
||||||
|
|
||||||
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#define VERSION "8.39"
|
||||||
|
|
||||||
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
|
/* #undef const */
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
/* #undef int64_t */
|
||||||
|
|
||||||
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
|
/* #undef size_t */
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
#include "config.h"
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +5,7 @@
|
|||||||
/* This is the public header file for the PCRE library, to be #included by
|
/* This is the public header file for the PCRE library, to be #included by
|
||||||
applications that call the PCRE functions.
|
applications that call the PCRE functions.
|
||||||
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2014 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -42,10 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
/* The current PCRE version information. */
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
#define PCRE_MAJOR 7
|
#define PCRE_MAJOR 8
|
||||||
#define PCRE_MINOR 4
|
#define PCRE_MINOR 39
|
||||||
#define PCRE_PRERELEASE
|
#define PCRE_PRERELEASE
|
||||||
#define PCRE_DATE 2007-09-21
|
#define PCRE_DATE 2016-06-14
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE, the appropriate
|
imported have to be identified as such. When building PCRE, the appropriate
|
||||||
@ -96,35 +95,80 @@ it is needed here for malloc. */
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Options */
|
/* Public options. Some are compile-time only, some are run-time only, and some
|
||||||
|
are both. Most of the compile-time options are saved with the compiled regex so
|
||||||
|
that they can be inspected during studying (and therefore JIT compiling). Note
|
||||||
|
that pcre_study() has its own set of options. Originally, all the options
|
||||||
|
defined here used distinct bits. However, almost all the bits in a 32-bit word
|
||||||
|
are now used, so in order to conserve them, option bits that were previously
|
||||||
|
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
|
||||||
|
also be used for compile-time options that affect only compiling and are not
|
||||||
|
relevant for studying or JIT compiling.
|
||||||
|
|
||||||
#define PCRE_CASELESS 0x00000001
|
Some options for pcre_compile() change its behaviour but do not affect the
|
||||||
#define PCRE_MULTILINE 0x00000002
|
behaviour of the execution functions. Other options are passed through to the
|
||||||
#define PCRE_DOTALL 0x00000004
|
execution functions and affect their behaviour, with or without affecting the
|
||||||
#define PCRE_EXTENDED 0x00000008
|
behaviour of pcre_compile().
|
||||||
#define PCRE_ANCHORED 0x00000010
|
|
||||||
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
Options that can be passed to pcre_compile() are tagged Cx below, with these
|
||||||
#define PCRE_EXTRA 0x00000040
|
variants:
|
||||||
#define PCRE_NOTBOL 0x00000080
|
|
||||||
#define PCRE_NOTEOL 0x00000100
|
C1 Affects compile only
|
||||||
#define PCRE_UNGREEDY 0x00000200
|
C2 Does not affect compile; affects exec, dfa_exec
|
||||||
#define PCRE_NOTEMPTY 0x00000400
|
C3 Affects compile, exec, dfa_exec
|
||||||
#define PCRE_UTF8 0x00000800
|
C4 Affects compile, exec, dfa_exec, study
|
||||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
C5 Affects compile, exec, study
|
||||||
#define PCRE_NO_UTF8_CHECK 0x00002000
|
|
||||||
#define PCRE_AUTO_CALLOUT 0x00004000
|
Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with
|
||||||
#define PCRE_PARTIAL 0x00008000
|
E and D, respectively. They take precedence over C3, C4, and C5 settings passed
|
||||||
#define PCRE_DFA_SHORTEST 0x00010000
|
from pcre_compile(). Those that are compatible with JIT execution are flagged
|
||||||
#define PCRE_DFA_RESTART 0x00020000
|
with J. */
|
||||||
#define PCRE_FIRSTLINE 0x00040000
|
|
||||||
#define PCRE_DUPNAMES 0x00080000
|
#define PCRE_CASELESS 0x00000001 /* C1 */
|
||||||
#define PCRE_NEWLINE_CR 0x00100000
|
#define PCRE_MULTILINE 0x00000002 /* C1 */
|
||||||
#define PCRE_NEWLINE_LF 0x00200000
|
#define PCRE_DOTALL 0x00000004 /* C1 */
|
||||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
#define PCRE_EXTENDED 0x00000008 /* C1 */
|
||||||
#define PCRE_NEWLINE_ANY 0x00400000
|
#define PCRE_ANCHORED 0x00000010 /* C4 E D */
|
||||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */
|
||||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
#define PCRE_EXTRA 0x00000040 /* C1 */
|
||||||
#define PCRE_BSR_UNICODE 0x01000000
|
#define PCRE_NOTBOL 0x00000080 /* E D J */
|
||||||
|
#define PCRE_NOTEOL 0x00000100 /* E D J */
|
||||||
|
#define PCRE_UNGREEDY 0x00000200 /* C1 */
|
||||||
|
#define PCRE_NOTEMPTY 0x00000400 /* E D J */
|
||||||
|
#define PCRE_UTF8 0x00000800 /* C4 ) */
|
||||||
|
#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */
|
||||||
|
#define PCRE_UTF32 0x00000800 /* C4 ) */
|
||||||
|
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */
|
||||||
|
#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */
|
||||||
|
#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */
|
||||||
|
#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */
|
||||||
|
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
|
||||||
|
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
|
||||||
|
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
|
||||||
|
|
||||||
|
/* This pair use the same bit. */
|
||||||
|
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
||||||
|
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
||||||
|
|
||||||
|
/* This pair use the same bit. */
|
||||||
|
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
|
||||||
|
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
|
||||||
|
|
||||||
|
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
||||||
|
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
||||||
|
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
||||||
|
#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */
|
||||||
|
#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */
|
||||||
|
#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */
|
||||||
|
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */
|
||||||
|
#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */
|
||||||
|
#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */
|
||||||
|
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */
|
||||||
|
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */
|
||||||
|
#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */
|
||||||
|
#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */
|
||||||
|
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */
|
||||||
|
#define PCRE_UCP 0x20000000 /* C3 */
|
||||||
|
|
||||||
/* Exec-time and get/set-time error codes */
|
/* Exec-time and get/set-time error codes */
|
||||||
|
|
||||||
@ -138,8 +182,11 @@ extern "C" {
|
|||||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||||
#define PCRE_ERROR_BADUTF8 (-10)
|
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */
|
||||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */
|
||||||
|
#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */
|
||||||
|
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
|
||||||
|
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
|
||||||
#define PCRE_ERROR_PARTIAL (-12)
|
#define PCRE_ERROR_PARTIAL (-12)
|
||||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||||
#define PCRE_ERROR_INTERNAL (-14)
|
#define PCRE_ERROR_INTERNAL (-14)
|
||||||
@ -152,6 +199,58 @@ extern "C" {
|
|||||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||||
|
#define PCRE_ERROR_BADOFFSET (-24)
|
||||||
|
#define PCRE_ERROR_SHORTUTF8 (-25)
|
||||||
|
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
|
||||||
|
#define PCRE_ERROR_RECURSELOOP (-26)
|
||||||
|
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
|
||||||
|
#define PCRE_ERROR_BADMODE (-28)
|
||||||
|
#define PCRE_ERROR_BADENDIANNESS (-29)
|
||||||
|
#define PCRE_ERROR_DFA_BADRESTART (-30)
|
||||||
|
#define PCRE_ERROR_JIT_BADOPTION (-31)
|
||||||
|
#define PCRE_ERROR_BADLENGTH (-32)
|
||||||
|
#define PCRE_ERROR_UNSET (-33)
|
||||||
|
|
||||||
|
/* Specific error codes for UTF-8 validity checks */
|
||||||
|
|
||||||
|
#define PCRE_UTF8_ERR0 0
|
||||||
|
#define PCRE_UTF8_ERR1 1
|
||||||
|
#define PCRE_UTF8_ERR2 2
|
||||||
|
#define PCRE_UTF8_ERR3 3
|
||||||
|
#define PCRE_UTF8_ERR4 4
|
||||||
|
#define PCRE_UTF8_ERR5 5
|
||||||
|
#define PCRE_UTF8_ERR6 6
|
||||||
|
#define PCRE_UTF8_ERR7 7
|
||||||
|
#define PCRE_UTF8_ERR8 8
|
||||||
|
#define PCRE_UTF8_ERR9 9
|
||||||
|
#define PCRE_UTF8_ERR10 10
|
||||||
|
#define PCRE_UTF8_ERR11 11
|
||||||
|
#define PCRE_UTF8_ERR12 12
|
||||||
|
#define PCRE_UTF8_ERR13 13
|
||||||
|
#define PCRE_UTF8_ERR14 14
|
||||||
|
#define PCRE_UTF8_ERR15 15
|
||||||
|
#define PCRE_UTF8_ERR16 16
|
||||||
|
#define PCRE_UTF8_ERR17 17
|
||||||
|
#define PCRE_UTF8_ERR18 18
|
||||||
|
#define PCRE_UTF8_ERR19 19
|
||||||
|
#define PCRE_UTF8_ERR20 20
|
||||||
|
#define PCRE_UTF8_ERR21 21
|
||||||
|
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
|
||||||
|
|
||||||
|
/* Specific error codes for UTF-16 validity checks */
|
||||||
|
|
||||||
|
#define PCRE_UTF16_ERR0 0
|
||||||
|
#define PCRE_UTF16_ERR1 1
|
||||||
|
#define PCRE_UTF16_ERR2 2
|
||||||
|
#define PCRE_UTF16_ERR3 3
|
||||||
|
#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
|
||||||
|
|
||||||
|
/* Specific error codes for UTF-32 validity checks */
|
||||||
|
|
||||||
|
#define PCRE_UTF32_ERR0 0
|
||||||
|
#define PCRE_UTF32_ERR1 1
|
||||||
|
#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
|
||||||
|
#define PCRE_UTF32_ERR3 3
|
||||||
|
|
||||||
/* Request types for pcre_fullinfo() */
|
/* Request types for pcre_fullinfo() */
|
||||||
|
|
||||||
@ -171,6 +270,17 @@ extern "C" {
|
|||||||
#define PCRE_INFO_OKPARTIAL 12
|
#define PCRE_INFO_OKPARTIAL 12
|
||||||
#define PCRE_INFO_JCHANGED 13
|
#define PCRE_INFO_JCHANGED 13
|
||||||
#define PCRE_INFO_HASCRORLF 14
|
#define PCRE_INFO_HASCRORLF 14
|
||||||
|
#define PCRE_INFO_MINLENGTH 15
|
||||||
|
#define PCRE_INFO_JIT 16
|
||||||
|
#define PCRE_INFO_JITSIZE 17
|
||||||
|
#define PCRE_INFO_MAXLOOKBEHIND 18
|
||||||
|
#define PCRE_INFO_FIRSTCHARACTER 19
|
||||||
|
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
|
||||||
|
#define PCRE_INFO_REQUIREDCHAR 21
|
||||||
|
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
||||||
|
#define PCRE_INFO_MATCHLIMIT 23
|
||||||
|
#define PCRE_INFO_RECURSIONLIMIT 24
|
||||||
|
#define PCRE_INFO_MATCH_EMPTY 25
|
||||||
|
|
||||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||||
compatible. */
|
compatible. */
|
||||||
@ -184,8 +294,21 @@ compatible. */
|
|||||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||||
#define PCRE_CONFIG_BSR 8
|
#define PCRE_CONFIG_BSR 8
|
||||||
|
#define PCRE_CONFIG_JIT 9
|
||||||
|
#define PCRE_CONFIG_UTF16 10
|
||||||
|
#define PCRE_CONFIG_JITTARGET 11
|
||||||
|
#define PCRE_CONFIG_UTF32 12
|
||||||
|
#define PCRE_CONFIG_PARENS_LIMIT 13
|
||||||
|
|
||||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||||
|
compatible. */
|
||||||
|
|
||||||
|
#define PCRE_STUDY_JIT_COMPILE 0x0001
|
||||||
|
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
|
||||||
|
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
|
||||||
|
#define PCRE_STUDY_EXTRA_NEEDED 0x0008
|
||||||
|
|
||||||
|
/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
|
||||||
these bits, just add new ones on the end, in order to remain compatible. */
|
these bits, just add new ones on the end, in order to remain compatible. */
|
||||||
|
|
||||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||||
@ -193,12 +316,51 @@ these bits, just add new ones on the end, in order to remain compatible. */
|
|||||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||||
#define PCRE_EXTRA_TABLES 0x0008
|
#define PCRE_EXTRA_TABLES 0x0008
|
||||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||||
|
#define PCRE_EXTRA_MARK 0x0020
|
||||||
|
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
|
||||||
|
|
||||||
/* Types */
|
/* Types */
|
||||||
|
|
||||||
struct real_pcre; /* declaration; the definition is private */
|
struct real_pcre; /* declaration; the definition is private */
|
||||||
typedef struct real_pcre pcre;
|
typedef struct real_pcre pcre;
|
||||||
|
|
||||||
|
struct real_pcre16; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre16 pcre16;
|
||||||
|
|
||||||
|
struct real_pcre32; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre32 pcre32;
|
||||||
|
|
||||||
|
struct real_pcre_jit_stack; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre_jit_stack pcre_jit_stack;
|
||||||
|
|
||||||
|
struct real_pcre16_jit_stack; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
|
||||||
|
|
||||||
|
struct real_pcre32_jit_stack; /* declaration; the definition is private */
|
||||||
|
typedef struct real_pcre32_jit_stack pcre32_jit_stack;
|
||||||
|
|
||||||
|
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
|
||||||
|
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||||
|
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||||
|
#ifndef PCRE_UCHAR16
|
||||||
|
#define PCRE_UCHAR16 unsigned short
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef PCRE_SPTR16
|
||||||
|
#define PCRE_SPTR16 const PCRE_UCHAR16 *
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
|
||||||
|
a 32 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||||
|
pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||||
|
#ifndef PCRE_UCHAR32
|
||||||
|
#define PCRE_UCHAR32 unsigned int
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef PCRE_SPTR32
|
||||||
|
#define PCRE_SPTR32 const PCRE_UCHAR32 *
|
||||||
|
#endif
|
||||||
|
|
||||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||||
replaced with a custom type. For conventional use, the public interface is a
|
replaced with a custom type. For conventional use, the public interface is a
|
||||||
const char *. */
|
const char *. */
|
||||||
@ -218,8 +380,36 @@ typedef struct pcre_extra {
|
|||||||
void *callout_data; /* Data passed back in callouts */
|
void *callout_data; /* Data passed back in callouts */
|
||||||
const unsigned char *tables; /* Pointer to character tables */
|
const unsigned char *tables; /* Pointer to character tables */
|
||||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||||
|
unsigned char **mark; /* For passing back a mark pointer */
|
||||||
|
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||||
} pcre_extra;
|
} pcre_extra;
|
||||||
|
|
||||||
|
/* Same structure as above, but with 16 bit char pointers. */
|
||||||
|
|
||||||
|
typedef struct pcre16_extra {
|
||||||
|
unsigned long int flags; /* Bits for which fields are set */
|
||||||
|
void *study_data; /* Opaque data from pcre_study() */
|
||||||
|
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||||
|
void *callout_data; /* Data passed back in callouts */
|
||||||
|
const unsigned char *tables; /* Pointer to character tables */
|
||||||
|
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||||
|
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
|
||||||
|
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||||
|
} pcre16_extra;
|
||||||
|
|
||||||
|
/* Same structure as above, but with 32 bit char pointers. */
|
||||||
|
|
||||||
|
typedef struct pcre32_extra {
|
||||||
|
unsigned long int flags; /* Bits for which fields are set */
|
||||||
|
void *study_data; /* Opaque data from pcre_study() */
|
||||||
|
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||||
|
void *callout_data; /* Data passed back in callouts */
|
||||||
|
const unsigned char *tables; /* Pointer to character tables */
|
||||||
|
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||||
|
PCRE_UCHAR32 **mark; /* For passing back a mark pointer */
|
||||||
|
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||||
|
} pcre32_extra;
|
||||||
|
|
||||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||||
structure so that new fields can be added on the end in future versions,
|
structure so that new fields can be added on the end in future versions,
|
||||||
without changing the API of the function, thereby allowing old clients to work
|
without changing the API of the function, thereby allowing old clients to work
|
||||||
@ -240,9 +430,55 @@ typedef struct pcre_callout_block {
|
|||||||
/* ------------------- Added for Version 1 -------------------------- */
|
/* ------------------- Added for Version 1 -------------------------- */
|
||||||
int pattern_position; /* Offset to next item in the pattern */
|
int pattern_position; /* Offset to next item in the pattern */
|
||||||
int next_item_length; /* Length of next item in the pattern */
|
int next_item_length; /* Length of next item in the pattern */
|
||||||
|
/* ------------------- Added for Version 2 -------------------------- */
|
||||||
|
const unsigned char *mark; /* Pointer to current mark or NULL */
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
} pcre_callout_block;
|
} pcre_callout_block;
|
||||||
|
|
||||||
|
/* Same structure as above, but with 16 bit char pointers. */
|
||||||
|
|
||||||
|
typedef struct pcre16_callout_block {
|
||||||
|
int version; /* Identifies version of block */
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */
|
||||||
|
int callout_number; /* Number compiled into pattern */
|
||||||
|
int *offset_vector; /* The offset vector */
|
||||||
|
PCRE_SPTR16 subject; /* The subject being matched */
|
||||||
|
int subject_length; /* The length of the subject */
|
||||||
|
int start_match; /* Offset to start of this match attempt */
|
||||||
|
int current_position; /* Where we currently are in the subject */
|
||||||
|
int capture_top; /* Max current capture */
|
||||||
|
int capture_last; /* Most recently closed capture */
|
||||||
|
void *callout_data; /* Data passed in with the call */
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */
|
||||||
|
int pattern_position; /* Offset to next item in the pattern */
|
||||||
|
int next_item_length; /* Length of next item in the pattern */
|
||||||
|
/* ------------------- Added for Version 2 -------------------------- */
|
||||||
|
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
} pcre16_callout_block;
|
||||||
|
|
||||||
|
/* Same structure as above, but with 32 bit char pointers. */
|
||||||
|
|
||||||
|
typedef struct pcre32_callout_block {
|
||||||
|
int version; /* Identifies version of block */
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */
|
||||||
|
int callout_number; /* Number compiled into pattern */
|
||||||
|
int *offset_vector; /* The offset vector */
|
||||||
|
PCRE_SPTR32 subject; /* The subject being matched */
|
||||||
|
int subject_length; /* The length of the subject */
|
||||||
|
int start_match; /* Offset to start of this match attempt */
|
||||||
|
int current_position; /* Where we currently are in the subject */
|
||||||
|
int capture_top; /* Max current capture */
|
||||||
|
int capture_last; /* Most recently closed capture */
|
||||||
|
void *callout_data; /* Data passed in with the call */
|
||||||
|
/* ------------------- Added for Version 1 -------------------------- */
|
||||||
|
int pattern_position; /* Offset to next item in the pattern */
|
||||||
|
int next_item_length; /* Length of next item in the pattern */
|
||||||
|
/* ------------------- Added for Version 2 -------------------------- */
|
||||||
|
const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
} pcre32_callout_block;
|
||||||
|
|
||||||
/* Indirection for store get and free functions. These can be set to
|
/* Indirection for store get and free functions. These can be set to
|
||||||
alternative malloc/free functions if required. Special ones are used in the
|
alternative malloc/free functions if required. Special ones are used in the
|
||||||
non-recursive case for "frames". There is also an optional callout function
|
non-recursive case for "frames". There is also an optional callout function
|
||||||
@ -255,47 +491,184 @@ PCRE_EXP_DECL void (*pcre_free)(void *);
|
|||||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||||
|
PCRE_EXP_DECL int (*pcre_stack_guard)(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||||
|
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
|
||||||
|
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
|
||||||
|
PCRE_EXP_DECL int (*pcre16_stack_guard)(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre32_free)(void *);
|
||||||
|
PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
|
||||||
|
PCRE_EXP_DECL void (*pcre32_stack_free)(void *);
|
||||||
|
PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *);
|
||||||
|
PCRE_EXP_DECL int (*pcre32_stack_guard)(void);
|
||||||
#else /* VPCOMPAT */
|
#else /* VPCOMPAT */
|
||||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||||
PCRE_EXP_DECL void pcre_free(void *);
|
PCRE_EXP_DECL void pcre_free(void *);
|
||||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||||
|
PCRE_EXP_DECL int pcre_stack_guard(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre16_free(void *);
|
||||||
|
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre16_stack_free(void *);
|
||||||
|
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
|
||||||
|
PCRE_EXP_DECL int pcre16_stack_guard(void);
|
||||||
|
|
||||||
|
PCRE_EXP_DECL void *pcre32_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre32_free(void *);
|
||||||
|
PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
|
||||||
|
PCRE_EXP_DECL void pcre32_stack_free(void *);
|
||||||
|
PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *);
|
||||||
|
PCRE_EXP_DECL int pcre32_stack_guard(void);
|
||||||
#endif /* VPCOMPAT */
|
#endif /* VPCOMPAT */
|
||||||
|
|
||||||
|
/* User defined callback which provides a stack just before the match starts. */
|
||||||
|
|
||||||
|
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
|
||||||
|
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
|
||||||
|
typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
|
||||||
|
|
||||||
/* Exported PCRE functions */
|
/* Exported PCRE functions */
|
||||||
|
|
||||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||||
const unsigned char *);
|
const unsigned char *);
|
||||||
|
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
|
||||||
|
const unsigned char *);
|
||||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||||
int *, const unsigned char *);
|
int *, const unsigned char *);
|
||||||
|
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
|
||||||
|
int *, const unsigned char *);
|
||||||
|
PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
|
||||||
|
int *, const unsigned char *);
|
||||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||||
|
PCRE_EXP_DECL int pcre16_config(int, void *);
|
||||||
|
PCRE_EXP_DECL int pcre32_config(int, void *);
|
||||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||||
int *, int, const char *, char *, int);
|
int *, int, const char *, char *, int);
|
||||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||||
int);
|
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
|
||||||
|
int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
|
||||||
|
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
|
||||||
|
char *, int);
|
||||||
|
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
|
||||||
|
PCRE_UCHAR16 *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
|
||||||
|
PCRE_UCHAR32 *, int);
|
||||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||||
const char *, int, int, int, int *, int , int *, int);
|
const char *, int, int, int, int *, int , int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
|
||||||
|
PCRE_SPTR16, int, int, int, int *, int , int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
|
||||||
|
PCRE_SPTR32, int, int, int, int *, int , int *, int);
|
||||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||||
int, int, int, int *, int);
|
int, int, int, int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
|
||||||
|
PCRE_SPTR16, int, int, int, int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *,
|
||||||
|
PCRE_SPTR32, int, int, int, int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *,
|
||||||
|
PCRE_SPTR, int, int, int, int *, int,
|
||||||
|
pcre_jit_stack *);
|
||||||
|
PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
|
||||||
|
PCRE_SPTR16, int, int, int, int *, int,
|
||||||
|
pcre16_jit_stack *);
|
||||||
|
PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
|
||||||
|
PCRE_SPTR32, int, int, int, int *, int,
|
||||||
|
pcre32_jit_stack *);
|
||||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||||
|
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
|
||||||
|
PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
|
||||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||||
|
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
|
||||||
|
PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
|
||||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||||
void *);
|
void *);
|
||||||
|
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
|
||||||
|
void *);
|
||||||
|
PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
|
||||||
|
void *);
|
||||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||||
int *, int, const char *, const char **);
|
int *, int, const char *, const char **);
|
||||||
|
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||||
|
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
|
||||||
|
PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
|
||||||
|
int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
|
||||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||||
|
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
|
||||||
|
PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
|
||||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||||
char **, char **);
|
char **, char **);
|
||||||
|
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
|
||||||
|
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
|
||||||
|
PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
|
||||||
|
PCRE_UCHAR32 **, PCRE_UCHAR32 **);
|
||||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||||
const char **);
|
const char **);
|
||||||
|
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
|
||||||
|
PCRE_SPTR16 *);
|
||||||
|
PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int,
|
||||||
|
PCRE_SPTR32 *);
|
||||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||||
const char ***);
|
const char ***);
|
||||||
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
|
||||||
|
PCRE_SPTR16 **);
|
||||||
|
PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int,
|
||||||
|
PCRE_SPTR32 **);
|
||||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||||
|
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
|
||||||
|
PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
|
||||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||||
|
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int);
|
||||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||||
|
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
|
||||||
|
PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
|
||||||
|
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
|
||||||
|
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
|
||||||
|
PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
|
||||||
PCRE_EXP_DECL const char *pcre_version(void);
|
PCRE_EXP_DECL const char *pcre_version(void);
|
||||||
|
PCRE_EXP_DECL const char *pcre16_version(void);
|
||||||
|
PCRE_EXP_DECL const char *pcre32_version(void);
|
||||||
|
|
||||||
|
/* Utility functions for byte order swaps. */
|
||||||
|
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
|
||||||
|
const unsigned char *);
|
||||||
|
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
|
||||||
|
PCRE_SPTR16, int, int *, int);
|
||||||
|
PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
|
||||||
|
PCRE_SPTR32, int, int *, int);
|
||||||
|
|
||||||
|
/* JIT compiler related functions. */
|
||||||
|
|
||||||
|
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
|
||||||
|
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
|
||||||
|
PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
|
||||||
|
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
|
||||||
|
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
|
||||||
|
PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
|
||||||
|
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
|
||||||
|
pcre_jit_callback, void *);
|
||||||
|
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
||||||
|
pcre16_jit_callback, void *);
|
||||||
|
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
||||||
|
pcre32_jit_callback, void *);
|
||||||
|
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
|
||||||
|
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
|
||||||
|
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -45,6 +46,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Keep the original link size. */
|
||||||
|
static int real_link_size = LINK_SIZE;
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
|
||||||
@ -62,18 +66,57 @@ Arguments:
|
|||||||
Returns: 0 if data returned, negative on error
|
Returns: 0 if data returned, negative on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_EXP_DEFN int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_config(int what, void *where)
|
pcre_config(int what, void *where)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_config(int what, void *where)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_config(int what, void *where)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
switch (what)
|
switch (what)
|
||||||
{
|
{
|
||||||
case PCRE_CONFIG_UTF8:
|
case PCRE_CONFIG_UTF8:
|
||||||
#ifdef SUPPORT_UTF8
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
||||||
|
*((int *)where) = 0;
|
||||||
|
return PCRE_ERROR_BADOPTION;
|
||||||
|
#else
|
||||||
|
#if defined SUPPORT_UTF
|
||||||
*((int *)where) = 1;
|
*((int *)where) = 1;
|
||||||
#else
|
#else
|
||||||
*((int *)where) = 0;
|
*((int *)where) = 0;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case PCRE_CONFIG_UTF16:
|
||||||
|
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
|
||||||
|
*((int *)where) = 0;
|
||||||
|
return PCRE_ERROR_BADOPTION;
|
||||||
|
#else
|
||||||
|
#if defined SUPPORT_UTF
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case PCRE_CONFIG_UTF32:
|
||||||
|
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
|
||||||
|
*((int *)where) = 0;
|
||||||
|
return PCRE_ERROR_BADOPTION;
|
||||||
|
#else
|
||||||
|
#if defined SUPPORT_UTF
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||||
#ifdef SUPPORT_UCP
|
#ifdef SUPPORT_UCP
|
||||||
@ -83,6 +126,22 @@ switch (what)
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_JIT:
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
*((int *)where) = 1;
|
||||||
|
#else
|
||||||
|
*((int *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_JITTARGET:
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
*((const char **)where) = PRIV(jit_get_target)();
|
||||||
|
#else
|
||||||
|
*((const char **)where) = NULL;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_NEWLINE:
|
case PCRE_CONFIG_NEWLINE:
|
||||||
*((int *)where) = NEWLINE;
|
*((int *)where) = NEWLINE;
|
||||||
break;
|
break;
|
||||||
@ -96,19 +155,23 @@ switch (what)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_LINK_SIZE:
|
case PCRE_CONFIG_LINK_SIZE:
|
||||||
*((int *)where) = LINK_SIZE;
|
*((int *)where) = real_link_size;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_CONFIG_PARENS_LIMIT:
|
||||||
|
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_MATCH_LIMIT:
|
case PCRE_CONFIG_MATCH_LIMIT:
|
||||||
*((unsigned int *)where) = MATCH_LIMIT;
|
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||||
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_CONFIG_STACKRECURSE:
|
case PCRE_CONFIG_STACKRECURSE:
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2013 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -65,13 +66,21 @@ Arguments:
|
|||||||
Returns: 0 if data returned, negative on error
|
Returns: 0 if data returned, negative on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_EXP_DEFN int
|
#if defined COMPILE_PCRE8
|
||||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
void *where)
|
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
|
||||||
|
int what, void *where)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
|
||||||
|
int what, void *where)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
|
||||||
|
int what, void *where)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
real_pcre internal_re;
|
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
|
||||||
pcre_study_data internal_study;
|
|
||||||
const real_pcre *re = (const real_pcre *)argument_re;
|
|
||||||
const pcre_study_data *study = NULL;
|
const pcre_study_data *study = NULL;
|
||||||
|
|
||||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||||
@ -79,17 +88,23 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
|||||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||||
study = (const pcre_study_data *)extra_data->study_data;
|
study = (const pcre_study_data *)extra_data->study_data;
|
||||||
|
|
||||||
|
/* Check that the first field in the block is the magic number. If it is not,
|
||||||
|
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||||
|
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
|
||||||
|
means that the pattern is likely compiled with different endianness. */
|
||||||
|
|
||||||
if (re->magic_number != MAGIC_NUMBER)
|
if (re->magic_number != MAGIC_NUMBER)
|
||||||
{
|
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||||
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
|
||||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
|
||||||
if (study != NULL) study = &internal_study;
|
/* Check that this pattern was compiled in the correct bit mode */
|
||||||
}
|
|
||||||
|
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||||
|
|
||||||
switch (what)
|
switch (what)
|
||||||
{
|
{
|
||||||
case PCRE_INFO_OPTIONS:
|
case PCRE_INFO_OPTIONS:
|
||||||
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_SIZE:
|
case PCRE_INFO_SIZE:
|
||||||
@ -100,6 +115,18 @@ switch (what)
|
|||||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_JITSIZE:
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
*((size_t *)where) =
|
||||||
|
(extra_data != NULL &&
|
||||||
|
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||||
|
extra_data->executable_jit != NULL)?
|
||||||
|
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
|
||||||
|
#else
|
||||||
|
*((size_t *)where) = 0;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_CAPTURECOUNT:
|
case PCRE_INFO_CAPTURECOUNT:
|
||||||
*((int *)where) = re->top_bracket;
|
*((int *)where) = re->top_bracket;
|
||||||
break;
|
break;
|
||||||
@ -110,22 +137,55 @@ switch (what)
|
|||||||
|
|
||||||
case PCRE_INFO_FIRSTBYTE:
|
case PCRE_INFO_FIRSTBYTE:
|
||||||
*((int *)where) =
|
*((int *)where) =
|
||||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char :
|
||||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTCHARACTER:
|
||||||
|
*((pcre_uint32 *)where) =
|
||||||
|
(re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_FIRSTCHARACTERFLAGS:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->flags & PCRE_FIRSTSET) != 0) ? 1 :
|
||||||
|
((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0;
|
||||||
|
break;
|
||||||
|
|
||||||
/* Make sure we pass back the pointer to the bit vector in the external
|
/* Make sure we pass back the pointer to the bit vector in the external
|
||||||
block, not the internal copy (with flipped integer fields). */
|
block, not the internal copy (with flipped integer fields). */
|
||||||
|
|
||||||
case PCRE_INFO_FIRSTTABLE:
|
case PCRE_INFO_FIRSTTABLE:
|
||||||
*((const uschar **)where) =
|
*((const pcre_uint8 **)where) =
|
||||||
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
|
||||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_MINLENGTH:
|
||||||
|
*((int *)where) =
|
||||||
|
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
|
||||||
|
(int)(study->minlength) : -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_JIT:
|
||||||
|
*((int *)where) = extra_data != NULL &&
|
||||||
|
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||||
|
extra_data->executable_jit != NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_LASTLITERAL:
|
case PCRE_INFO_LASTLITERAL:
|
||||||
*((int *)where) =
|
*((int *)where) =
|
||||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_REQUIREDCHAR:
|
||||||
|
*((pcre_uint32 *)where) =
|
||||||
|
((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_REQUIREDCHARFLAGS:
|
||||||
|
*((int *)where) =
|
||||||
|
((re->flags & PCRE_REQCHSET) != 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_NAMEENTRYSIZE:
|
case PCRE_INFO_NAMEENTRYSIZE:
|
||||||
@ -137,13 +197,16 @@ switch (what)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_NAMETABLE:
|
case PCRE_INFO_NAMETABLE:
|
||||||
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE_INFO_DEFAULT_TABLES:
|
case PCRE_INFO_DEFAULT_TABLES:
|
||||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* From release 8.00 this will always return TRUE because NOPARTIAL is
|
||||||
|
no longer ever set (the restrictions have been removed). */
|
||||||
|
|
||||||
case PCRE_INFO_OKPARTIAL:
|
case PCRE_INFO_OKPARTIAL:
|
||||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||||
break;
|
break;
|
||||||
@ -156,6 +219,24 @@ switch (what)
|
|||||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_MAXLOOKBEHIND:
|
||||||
|
*((int *)where) = re->max_lookbehind;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_MATCHLIMIT:
|
||||||
|
if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
|
||||||
|
*((pcre_uint32 *)where) = re->limit_match;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_RECURSIONLIMIT:
|
||||||
|
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
|
||||||
|
*((pcre_uint32 *)where) = re->limit_recursion;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE_INFO_MATCH_EMPTY:
|
||||||
|
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
|
||||||
|
break;
|
||||||
|
|
||||||
default: return PCRE_ERROR_BADOPTION;
|
default: return PCRE_ERROR_BADOPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -65,14 +66,23 @@ Returns: the number of the named parentheses, or a negative number
|
|||||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
int entrysize;
|
int entrysize;
|
||||||
int top, bot;
|
int top, bot;
|
||||||
uschar *nametable;
|
pcre_uchar *nametable;
|
||||||
|
|
||||||
|
#ifdef COMPILE_PCRE8
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
@ -81,14 +91,36 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
|||||||
return rc;
|
return rc;
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
|
#endif
|
||||||
|
#ifdef COMPILE_PCRE16
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
#endif
|
||||||
|
#ifdef COMPILE_PCRE32
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
#endif
|
||||||
|
|
||||||
bot = 0;
|
bot = 0;
|
||||||
while (top > bot)
|
while (top > bot)
|
||||||
{
|
{
|
||||||
int mid = (top + bot) / 2;
|
int mid = (top + bot) / 2;
|
||||||
uschar *entry = nametable + entrysize*mid;
|
pcre_uchar *entry = nametable + entrysize*mid;
|
||||||
int c = strcmp(stringname, (char *)(entry + 2));
|
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||||
|
if (c == 0) return GET2(entry, 0);
|
||||||
if (c > 0) bot = mid + 1; else top = mid;
|
if (c > 0) bot = mid + 1; else top = mid;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,15 +146,26 @@ Returns: the length of each entry, or a negative number
|
|||||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||||
char **firstptr, char **lastptr)
|
char **firstptr, char **lastptr)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
|
||||||
|
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
|
||||||
|
PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
int entrysize;
|
int entrysize;
|
||||||
int top, bot;
|
int top, bot;
|
||||||
uschar *nametable, *lastentry;
|
pcre_uchar *nametable, *lastentry;
|
||||||
|
|
||||||
|
#ifdef COMPILE_PCRE8
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
@ -131,30 +174,62 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
|||||||
return rc;
|
return rc;
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
return rc;
|
return rc;
|
||||||
|
#endif
|
||||||
|
#ifdef COMPILE_PCRE16
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
#endif
|
||||||
|
#ifdef COMPILE_PCRE32
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||||
|
return rc;
|
||||||
|
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||||
|
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||||
|
return rc;
|
||||||
|
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||||
|
return rc;
|
||||||
|
#endif
|
||||||
|
|
||||||
lastentry = nametable + entrysize * (top - 1);
|
lastentry = nametable + entrysize * (top - 1);
|
||||||
bot = 0;
|
bot = 0;
|
||||||
while (top > bot)
|
while (top > bot)
|
||||||
{
|
{
|
||||||
int mid = (top + bot) / 2;
|
int mid = (top + bot) / 2;
|
||||||
uschar *entry = nametable + entrysize*mid;
|
pcre_uchar *entry = nametable + entrysize*mid;
|
||||||
int c = strcmp(stringname, (char *)(entry + 2));
|
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||||
|
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||||
if (c == 0)
|
if (c == 0)
|
||||||
{
|
{
|
||||||
uschar *first = entry;
|
pcre_uchar *first = entry;
|
||||||
uschar *last = entry;
|
pcre_uchar *last = entry;
|
||||||
while (first > nametable)
|
while (first > nametable)
|
||||||
{
|
{
|
||||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||||
|
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
|
||||||
first -= entrysize;
|
first -= entrysize;
|
||||||
}
|
}
|
||||||
while (last < lastentry)
|
while (last < lastentry)
|
||||||
{
|
{
|
||||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||||
|
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
|
||||||
last += entrysize;
|
last += entrysize;
|
||||||
}
|
}
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
*firstptr = (char *)first;
|
*firstptr = (char *)first;
|
||||||
*lastptr = (char *)last;
|
*lastptr = (char *)last;
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
*firstptr = (PCRE_UCHAR16 *)first;
|
||||||
|
*lastptr = (PCRE_UCHAR16 *)last;
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
*firstptr = (PCRE_UCHAR32 *)first;
|
||||||
|
*lastptr = (PCRE_UCHAR32 *)last;
|
||||||
|
#endif
|
||||||
return entrysize;
|
return entrysize;
|
||||||
}
|
}
|
||||||
if (c > 0) bot = mid + 1; else top = mid;
|
if (c > 0) bot = mid + 1; else top = mid;
|
||||||
@ -176,29 +251,58 @@ Arguments:
|
|||||||
code the compiled regex
|
code the compiled regex
|
||||||
stringname the name of the capturing substring
|
stringname the name of the capturing substring
|
||||||
ovector the vector of matched substrings
|
ovector the vector of matched substrings
|
||||||
|
stringcount number of captured substrings
|
||||||
|
|
||||||
Returns: the number of the first that is set,
|
Returns: the number of the first that is set,
|
||||||
or the number of the last one if none are set,
|
or the number of the last one if none are set,
|
||||||
or a negative number on error
|
or a negative number on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
static int
|
static int
|
||||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
get_first_set(const pcre *code, const char *stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
static int
|
||||||
|
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
static int
|
||||||
|
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
|
||||||
|
int stringcount)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
const real_pcre *re = (const real_pcre *)code;
|
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||||
int entrysize;
|
int entrysize;
|
||||||
|
pcre_uchar *entry;
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
char *first, *last;
|
char *first, *last;
|
||||||
uschar *entry;
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_UCHAR16 *first, *last;
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_UCHAR32 *first, *last;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||||
return pcre_get_stringnumber(code, stringname);
|
return pcre_get_stringnumber(code, stringname);
|
||||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||||
|
return pcre16_get_stringnumber(code, stringname);
|
||||||
|
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||||
|
return pcre32_get_stringnumber(code, stringname);
|
||||||
|
entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
|
||||||
|
#endif
|
||||||
if (entrysize <= 0) return entrysize;
|
if (entrysize <= 0) return entrysize;
|
||||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||||
{
|
{
|
||||||
int n = (entry[0] << 8) + entry[1];
|
int n = GET2(entry, 0);
|
||||||
if (ovector[n*2] >= 0) return n;
|
if (n < stringcount && ovector[n*2] >= 0) return n;
|
||||||
}
|
}
|
||||||
return (first[0] << 8) + first[1];
|
return GET2(entry, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -231,9 +335,19 @@ Returns: if successful:
|
|||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||||
int stringnumber, char *buffer, int size)
|
int stringnumber, char *buffer, int size)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, PCRE_UCHAR16 *buffer, int size)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, PCRE_UCHAR32 *buffer, int size)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int yield;
|
int yield;
|
||||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||||
@ -241,7 +355,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
|
|||||||
stringnumber *= 2;
|
stringnumber *= 2;
|
||||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||||
buffer[yield] = 0;
|
buffer[yield] = 0;
|
||||||
return yield;
|
return yield;
|
||||||
}
|
}
|
||||||
@ -276,13 +390,32 @@ Returns: if successful:
|
|||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
int stringcount, const char *stringname, char *buffer, int size)
|
pcre_copy_named_substring(const pcre *code, const char *subject,
|
||||||
|
int *ovector, int stringcount, const char *stringname,
|
||||||
|
char *buffer, int size)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||||
|
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||||
|
PCRE_UCHAR16 *buffer, int size)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||||
|
int *ovector, int stringcount, PCRE_SPTR32 stringname,
|
||||||
|
PCRE_UCHAR32 *buffer, int size)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int n = get_first_set(code, stringname, ovector);
|
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -308,29 +441,48 @@ Returns: if successful: 0
|
|||||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||||
const char ***listptr)
|
const char ***listptr)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||||
|
PCRE_SPTR16 **listptr)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||||
|
PCRE_SPTR32 **listptr)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int size = sizeof(char *);
|
int size = sizeof(pcre_uchar *);
|
||||||
int double_count = stringcount * 2;
|
int double_count = stringcount * 2;
|
||||||
char **stringlist;
|
pcre_uchar **stringlist;
|
||||||
char *p;
|
pcre_uchar *p;
|
||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
|
||||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
|
||||||
|
|
||||||
stringlist = (char **)(pcre_malloc)(size);
|
|
||||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
|
||||||
|
|
||||||
*listptr = (const char **)stringlist;
|
|
||||||
p = (char *)(stringlist + stringcount + 1);
|
|
||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
for (i = 0; i < double_count; i += 2)
|
||||||
{
|
{
|
||||||
int len = ovector[i+1] - ovector[i];
|
size += sizeof(pcre_uchar *) + IN_UCHARS(1);
|
||||||
memcpy(p, subject + ovector[i], len);
|
if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||||
|
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
|
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
|
*listptr = (const char **)stringlist;
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
*listptr = (PCRE_SPTR16 *)stringlist;
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
*listptr = (PCRE_SPTR32 *)stringlist;
|
||||||
|
#endif
|
||||||
|
p = (pcre_uchar *)(stringlist + stringcount + 1);
|
||||||
|
|
||||||
|
for (i = 0; i < double_count; i += 2)
|
||||||
|
{
|
||||||
|
int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||||
|
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||||
*stringlist++ = p;
|
*stringlist++ = p;
|
||||||
p += len;
|
p += len;
|
||||||
*p++ = 0;
|
*p++ = 0;
|
||||||
@ -347,16 +499,25 @@ return 0;
|
|||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function exists for the benefit of people calling PCRE from non-C
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
programs that can call its functions, but not free() or (PUBL(free))()
|
||||||
|
directly.
|
||||||
|
|
||||||
Argument: the result of a previous pcre_get_substring_list()
|
Argument: the result of a previous pcre_get_substring_list()
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
pcre_free_substring_list(const char **pointer)
|
pcre_free_substring_list(const char **pointer)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
|
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
|
pcre32_free_substring_list(PCRE_SPTR32 *pointer)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
(pcre_free)((void *)pointer);
|
(PUBL(free))((void *)pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -386,21 +547,37 @@ Returns: if successful:
|
|||||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||||
int stringnumber, const char **stringptr)
|
int stringnumber, const char **stringptr)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, PCRE_SPTR16 *stringptr)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||||
|
int stringnumber, PCRE_SPTR32 *stringptr)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int yield;
|
int yield;
|
||||||
char *substring;
|
pcre_uchar *substring;
|
||||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||||
return PCRE_ERROR_NOSUBSTRING;
|
return PCRE_ERROR_NOSUBSTRING;
|
||||||
stringnumber *= 2;
|
stringnumber *= 2;
|
||||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||||
substring = (char *)(pcre_malloc)(yield + 1);
|
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
|
||||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||||
substring[yield] = 0;
|
substring[yield] = 0;
|
||||||
*stringptr = substring;
|
#if defined COMPILE_PCRE8
|
||||||
|
*stringptr = (const char *)substring;
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
*stringptr = (PCRE_SPTR16)substring;
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
*stringptr = (PCRE_SPTR32)substring;
|
||||||
|
#endif
|
||||||
return yield;
|
return yield;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -433,13 +610,32 @@ Returns: if successful:
|
|||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
#if defined COMPILE_PCRE8
|
||||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
int stringcount, const char *stringname, const char **stringptr)
|
pcre_get_named_substring(const pcre *code, const char *subject,
|
||||||
|
int *ovector, int stringcount, const char *stringname,
|
||||||
|
const char **stringptr)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||||
|
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||||
|
PCRE_SPTR16 *stringptr)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||||
|
int *ovector, int stringcount, PCRE_SPTR32 stringname,
|
||||||
|
PCRE_SPTR32 *stringptr)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int n = get_first_set(code, stringname, ovector);
|
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||||
if (n <= 0) return n;
|
if (n <= 0) return n;
|
||||||
|
#if defined COMPILE_PCRE8
|
||||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -450,16 +646,25 @@ return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
|||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function exists for the benefit of people calling PCRE from non-C
|
/* This function exists for the benefit of people calling PCRE from non-C
|
||||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
programs that can call its functions, but not free() or (PUBL(free))()
|
||||||
|
directly.
|
||||||
|
|
||||||
Argument: the result of a previous pcre_get_substring()
|
Argument: the result of a previous pcre_get_substring()
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
pcre_free_substring(const char *pointer)
|
pcre_free_substring(const char *pointer)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
|
pcre16_free_substring(PCRE_SPTR16 pointer)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||||
|
pcre32_free_substring(PCRE_SPTR32 pointer)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
(pcre_free)((void *)pointer);
|
(PUBL(free))((void *)pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre_get.c */
|
/* End of pcre_get.c */
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2014 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -43,8 +44,14 @@ PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
|||||||
However, it calls memory allocation and freeing functions via the four
|
However, it calls memory allocation and freeing functions via the four
|
||||||
indirections below, and it can optionally do callouts, using the fifth
|
indirections below, and it can optionally do callouts, using the fifth
|
||||||
indirection. These values can be changed by the caller, but are shared between
|
indirection. These values can be changed by the caller, but are shared between
|
||||||
all threads. However, when compiling for Virtual Pascal, things are done
|
all threads.
|
||||||
differently, and global variables are not used (see pcre.in). */
|
|
||||||
|
For MS Visual Studio and Symbian OS, there are problems in initializing these
|
||||||
|
variables to non-local functions. In these cases, therefore, an indirection via
|
||||||
|
a local function is used.
|
||||||
|
|
||||||
|
Also, when compiling for Virtual Pascal, things are done differently, and
|
||||||
|
global variables are not used. */
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
@ -52,12 +59,29 @@ differently, and global variables are not used (see pcre.in). */
|
|||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
#ifndef VPCOMPAT
|
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
static void* LocalPcreMalloc(size_t aSize)
|
||||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
{
|
||||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
return malloc(aSize);
|
||||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
|
}
|
||||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
static void LocalPcreFree(void* aPtr)
|
||||||
|
{
|
||||||
|
free(aPtr);
|
||||||
|
}
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||||
|
|
||||||
|
#elif !defined VPCOMPAT
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
|
||||||
|
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||||
|
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||||
|
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* End of pcre_globals.c */
|
/* End of pcre_globals.c */
|
||||||
|
@ -1,93 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This module contains the external function pcre_info(), which gives some
|
|
||||||
information about a compiled pattern. However, use of this function is now
|
|
||||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "config.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* (Obsolete) Return info about compiled pattern *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This is the original "info" function. It picks potentially useful data out
|
|
||||||
of the private structure, but its interface was too rigid. It remains for
|
|
||||||
backwards compatibility. The public options are passed back in an int - though
|
|
||||||
the re->options field has been expanded to a long int, all the public options
|
|
||||||
at the low end of it, and so even on 16-bit systems this will still be OK.
|
|
||||||
Therefore, I haven't changed the API for pcre_info().
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
argument_re points to compiled code
|
|
||||||
optptr where to pass back the options
|
|
||||||
first_byte where to pass back the first character,
|
|
||||||
or -1 if multiline and all branches start ^,
|
|
||||||
or -2 otherwise
|
|
||||||
|
|
||||||
Returns: number of capturing subpatterns
|
|
||||||
or negative values on error
|
|
||||||
*/
|
|
||||||
|
|
||||||
PCRE_EXP_DEFN int
|
|
||||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
|
||||||
{
|
|
||||||
real_pcre internal_re;
|
|
||||||
const real_pcre *re = (const real_pcre *)argument_re;
|
|
||||||
if (re == NULL) return PCRE_ERROR_NULL;
|
|
||||||
if (re->magic_number != MAGIC_NUMBER)
|
|
||||||
{
|
|
||||||
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
|
||||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
|
||||||
}
|
|
||||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
|
||||||
if (first_byte != NULL)
|
|
||||||
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
|
||||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
|
||||||
return re->top_bracket;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of pcre_info.c */
|
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -67,23 +68,33 @@ Arguments:
|
|||||||
type the newline type
|
type the newline type
|
||||||
endptr pointer to the end of the string
|
endptr pointer to the end of the string
|
||||||
lenptr where to return the length
|
lenptr where to return the length
|
||||||
utf8 TRUE if in utf8 mode
|
utf TRUE if in utf mode
|
||||||
|
|
||||||
Returns: TRUE or FALSE
|
Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
|
||||||
int *lenptr, BOOL utf8)
|
BOOL utf)
|
||||||
{
|
{
|
||||||
int c;
|
pcre_uint32 c;
|
||||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
(void)utf;
|
||||||
|
#ifdef SUPPORT_UTF
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
GETCHAR(c, ptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UTF */
|
||||||
|
c = *ptr;
|
||||||
|
|
||||||
|
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||||
|
|
||||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
case CHAR_LF: *lenptr = 1; return TRUE;
|
||||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||||
return TRUE; /* CR */
|
return TRUE;
|
||||||
default: return FALSE;
|
default: return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,14 +102,29 @@ if (type == NLTYPE_ANYCRLF) switch(c)
|
|||||||
|
|
||||||
else switch(c)
|
else switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: /* LF */
|
#ifdef EBCDIC
|
||||||
case 0x000b: /* VT */
|
case CHAR_NEL:
|
||||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
#endif
|
||||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
case CHAR_LF:
|
||||||
return TRUE; /* CR */
|
case CHAR_VT:
|
||||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
case CHAR_FF: *lenptr = 1; return TRUE;
|
||||||
|
|
||||||
|
case CHAR_CR:
|
||||||
|
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
#ifndef EBCDIC
|
||||||
|
#ifdef COMPILE_PCRE8
|
||||||
|
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||||
|
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
|
||||||
|
case CHAR_NEL:
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||||
|
#endif /* COMPILE_PCRE8 */
|
||||||
|
#endif /* Not EBCDIC */
|
||||||
|
|
||||||
default: return FALSE;
|
default: return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -117,46 +143,67 @@ Arguments:
|
|||||||
type the newline type
|
type the newline type
|
||||||
startptr pointer to the start of the string
|
startptr pointer to the start of the string
|
||||||
lenptr where to return the length
|
lenptr where to return the length
|
||||||
utf8 TRUE if in utf8 mode
|
utf TRUE if in utf mode
|
||||||
|
|
||||||
Returns: TRUE or FALSE
|
Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
|
||||||
int *lenptr, BOOL utf8)
|
BOOL utf)
|
||||||
{
|
{
|
||||||
int c;
|
pcre_uint32 c;
|
||||||
|
(void)utf;
|
||||||
ptr--;
|
ptr--;
|
||||||
#ifdef SUPPORT_UTF8
|
#ifdef SUPPORT_UTF
|
||||||
if (utf8)
|
if (utf)
|
||||||
{
|
{
|
||||||
BACKCHAR(ptr);
|
BACKCHAR(ptr);
|
||||||
GETCHAR(c, ptr);
|
GETCHAR(c, ptr);
|
||||||
}
|
}
|
||||||
else c = *ptr;
|
else
|
||||||
#else /* no UTF-8 support */
|
#endif /* SUPPORT_UTF */
|
||||||
c = *ptr;
|
c = *ptr;
|
||||||
#endif /* SUPPORT_UTF8 */
|
|
||||||
|
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||||
|
|
||||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
case CHAR_LF:
|
||||||
return TRUE; /* LF */
|
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
return TRUE;
|
||||||
|
|
||||||
|
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||||
default: return FALSE;
|
default: return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* NLTYPE_ANY */
|
||||||
|
|
||||||
else switch(c)
|
else switch(c)
|
||||||
{
|
{
|
||||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
case CHAR_LF:
|
||||||
return TRUE; /* LF */
|
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||||
case 0x000b: /* VT */
|
return TRUE;
|
||||||
case 0x000c: /* FF */
|
|
||||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
#ifdef EBCDIC
|
||||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
case CHAR_NEL:
|
||||||
|
#endif
|
||||||
|
case CHAR_VT:
|
||||||
|
case CHAR_FF:
|
||||||
|
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||||
|
|
||||||
|
#ifndef EBCDIC
|
||||||
|
#ifdef COMPILE_PCRE8
|
||||||
|
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||||
|
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
|
||||||
|
case CHAR_NEL:
|
||||||
|
case 0x2028: /* LS */
|
||||||
|
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||||
|
#endif /* COMPILE_PCRE8 */
|
||||||
|
#endif /* NotEBCDIC */
|
||||||
|
|
||||||
default: return FALSE;
|
default: return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -45,41 +46,50 @@ character value into a UTF8 string. */
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
#define COMPILE_PCRE8
|
||||||
|
|
||||||
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Convert character value to UTF-8 *
|
* Convert character value to UTF-8 *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function takes an integer value in the range 0 - 0x7fffffff
|
/* This function takes an integer value in the range 0 - 0x10ffff
|
||||||
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
and encodes it as a UTF-8 character in 1 to 4 pcre_uchars.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
cvalue the character value
|
cvalue the character value
|
||||||
buffer pointer to buffer for result - at least 6 bytes long
|
buffer pointer to buffer for result - at least 6 pcre_uchars long
|
||||||
|
|
||||||
Returns: number of characters placed in the buffer
|
Returns: number of characters placed in the buffer
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
unsigned
|
||||||
int
|
int
|
||||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF8
|
#ifdef SUPPORT_UTF
|
||||||
|
|
||||||
register int i, j;
|
register int i, j;
|
||||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
|
||||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||||
|
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||||
buffer += i;
|
buffer += i;
|
||||||
for (j = i; j > 0; j--)
|
for (j = i; j > 0; j--)
|
||||||
{
|
{
|
||||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||||
cvalue >>= 6;
|
cvalue >>= 6;
|
||||||
}
|
}
|
||||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||||
return i + 1;
|
return i + 1;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
return 0; /* Keep compiler happy; this function won't ever be */
|
|
||||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||||
|
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre_ord2utf8.c */
|
/* End of pcre_ord2utf8.c */
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include "../../structures.h"
|
#include "../../structures.h"
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "pcre.h"
|
#include "pcre.h"
|
||||||
#include "pcreposix.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -349,12 +348,8 @@ static struct commands pcre_commandhandlers[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static struct symbol regexp_symbols[] = {
|
static struct symbol regexp_symbols[] = {
|
||||||
{regexp_symbols+1, "regcomp", (void*) regcomp},
|
{regexp_symbols+1, "pcre_compile", (void*) pcre_compile},
|
||||||
{regexp_symbols+2, "regexec", (void*) regexec},
|
{regexp_symbols+2, "pcre_exec", (void*) pcre_exec},
|
||||||
{regexp_symbols+3, "regerror", (void*) regerror},
|
|
||||||
{regexp_symbols+4, "regfree", (void*) regfree},
|
|
||||||
{regexp_symbols+5, "pcre_compile", (void*) pcre_compile},
|
|
||||||
{regexp_symbols+6, "pcre_exec", (void*) pcre_exec},
|
|
||||||
{NULL, "pcre_free", NULL},
|
{NULL, "pcre_free", NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -68,11 +69,21 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
|||||||
a negative error number
|
a negative error number
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE_EXP_DEFN int
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
pcre_refcount(pcre *argument_re, int adjust)
|
pcre_refcount(pcre *argument_re, int adjust)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre16_refcount(pcre16 *argument_re, int adjust)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||||
|
pcre32_refcount(pcre32 *argument_re, int adjust)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
real_pcre *re = (real_pcre *)argument_re;
|
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
||||||
if (re == NULL) return PCRE_ERROR_NULL;
|
if (re == NULL) return PCRE_ERROR_NULL;
|
||||||
|
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
|
||||||
|
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||||
re->ref_count = (-adjust > re->ref_count)? 0 :
|
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||||
(adjust + re->ref_count > 65535)? 65535 :
|
(adjust + re->ref_count > 65535)? 65535 :
|
||||||
re->ref_count + adjust;
|
re->ref_count + adjust;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,25 +1,27 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This file is automatically written by the dftables auxiliary
|
/* This file was automatically written by the dftables auxiliary
|
||||||
program. If you edit it by hand, you might like to edit the Makefile to
|
program. It contains character tables that are used when no external
|
||||||
prevent its ever being regenerated.
|
tables are passed to PCRE by the application that calls it. The tables
|
||||||
|
are used only for characters whose code values are less than 256.
|
||||||
|
|
||||||
This file contains the default tables for characters with codes less than
|
The following #includes are present because without them gcc 4.x may remove
|
||||||
128 (ASCII characters). These tables are used when no external tables are
|
|
||||||
passed to PCRE.
|
|
||||||
|
|
||||||
The following #include is present because without it gcc 4.x may remove
|
|
||||||
the array definition from the final binary if PCRE is built into a static
|
the array definition from the final binary if PCRE is built into a static
|
||||||
library and dead code stripping is activated. This leads to link errors.
|
library and dead code stripping is activated. This leads to link errors.
|
||||||
Pulling in the header ensures that the array gets flagged as "someone
|
Pulling in the header ensures that the array gets flagged as "someone
|
||||||
outside this compilation unit might reference this" and so it will always
|
outside this compilation unit might reference this" and so it will always
|
||||||
be supplied to the linker. */
|
be supplied to the linker. */
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
const unsigned char _pcre_default_tables[] = {
|
const pcre_uint8 PRIV(default_tables)[] = {
|
||||||
|
|
||||||
/* This table is a lower casing table. */
|
/* This table is a lower casing table. */
|
||||||
|
|
||||||
@ -189,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
|||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||||
|
|
||||||
/* End of chartables.c */
|
/* End of pcre_chartables.c */
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -37,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef PCRE_INCLUDED
|
||||||
|
|
||||||
/* This module contains some fixed tables that are used by more than one of the
|
/* This module contains some fixed tables that are used by more than one of the
|
||||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||||
@ -50,11 +52,18 @@ clashes with the library. */
|
|||||||
|
|
||||||
#include "pcre_internal.h"
|
#include "pcre_internal.h"
|
||||||
|
|
||||||
|
#endif /* PCRE_INCLUDED */
|
||||||
|
|
||||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||||
|
|
||||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||||
|
|
||||||
|
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||||
|
adding to classes. */
|
||||||
|
|
||||||
|
const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||||
|
const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -65,28 +74,123 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
|||||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||||
character. */
|
character. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UTF8
|
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|
||||||
|
|| (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
|
||||||
|
|
||||||
const int _pcre_utf8_table1[] =
|
/* These tables are also required by pcretest in 16- or 32-bit mode. */
|
||||||
|
|
||||||
|
const int PRIV(utf8_table1)[] =
|
||||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||||
|
|
||||||
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
|
||||||
|
|
||||||
/* These are the indicator bits and the mask for the data bits to set in the
|
/* These are the indicator bits and the mask for the data bits to set in the
|
||||||
first byte of a character, indexed by the number of additional bytes. */
|
first byte of a character, indexed by the number of additional bytes. */
|
||||||
|
|
||||||
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||||
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||||
|
|
||||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||||
|
|
||||||
const uschar _pcre_utf8_table4[] = {
|
const pcre_uint8 PRIV(utf8_table4)[] = {
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||||
|
|
||||||
|
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UTF
|
||||||
|
|
||||||
|
/* Table to translate from particular type value to the general value. */
|
||||||
|
|
||||||
|
const pcre_uint32 PRIV(ucp_gentype)[] = {
|
||||||
|
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||||
|
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||||
|
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||||
|
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||||
|
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||||
|
ucp_P, ucp_P, /* Ps, Po */
|
||||||
|
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||||
|
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This table encodes the rules for finding the end of an extended grapheme
|
||||||
|
cluster. Every code point has a grapheme break property which is one of the
|
||||||
|
ucp_gbXX values defined in ucp.h. The 2-dimensional table is indexed by the
|
||||||
|
properties of two adjacent code points. The left property selects a word from
|
||||||
|
the table, and the right property selects a bit from that word like this:
|
||||||
|
|
||||||
|
ucp_gbtable[left-property] & (1 << right-property)
|
||||||
|
|
||||||
|
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||||
|
two code points. The breaking rules are as follows:
|
||||||
|
|
||||||
|
1. Break at the start and end of text (pretty obviously).
|
||||||
|
|
||||||
|
2. Do not break between a CR and LF; otherwise, break before and after
|
||||||
|
controls.
|
||||||
|
|
||||||
|
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||||
|
|
||||||
|
L may be followed by L, V, LV or LVT
|
||||||
|
LV or V may be followed by V or T
|
||||||
|
LVT or T may be followed by T
|
||||||
|
|
||||||
|
4. Do not break before extending characters.
|
||||||
|
|
||||||
|
The next two rules are only for extended grapheme clusters (but that's what we
|
||||||
|
are implementing).
|
||||||
|
|
||||||
|
5. Do not break before SpacingMarks.
|
||||||
|
|
||||||
|
6. Do not break after Prepend characters.
|
||||||
|
|
||||||
|
7. Otherwise, break everywhere.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const pcre_uint32 PRIV(ucp_gbtable[]) = {
|
||||||
|
(1<<ucp_gbLF), /* 0 CR */
|
||||||
|
0, /* 1 LF */
|
||||||
|
0, /* 2 Control */
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||||
|
(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
|
||||||
|
(1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
|
||||||
|
(1<<ucp_gbLVT)|(1<<ucp_gbOther),
|
||||||
|
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
|
||||||
|
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
|
||||||
|
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
|
||||||
|
(1<<ucp_gbT),
|
||||||
|
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
|
||||||
|
(1<<ucp_gbT),
|
||||||
|
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
|
||||||
|
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
|
||||||
|
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef SUPPORT_JIT
|
||||||
|
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||||
|
of a memory load. */
|
||||||
|
|
||||||
|
const int PRIV(ucp_typerange)[] = {
|
||||||
|
ucp_Cc, ucp_Cs,
|
||||||
|
ucp_Ll, ucp_Lu,
|
||||||
|
ucp_Mc, ucp_Mn,
|
||||||
|
ucp_Nd, ucp_No,
|
||||||
|
ucp_Pc, ucp_Ps,
|
||||||
|
ucp_Sc, ucp_So,
|
||||||
|
ucp_Zl, ucp_Zs,
|
||||||
|
};
|
||||||
|
#endif /* SUPPORT_JIT */
|
||||||
|
|
||||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||||
code values. It is searched by binary chop, so must be in collating sequence of
|
code values. It is searched by binary chop, so must be in collating sequence of
|
||||||
name. Originally, the table contained pointers to the name strings in the first
|
name. Originally, the table contained pointers to the name strings in the first
|
||||||
@ -94,225 +198,531 @@ field of each entry. However, that leads to a large number of relocations when
|
|||||||
a shared library is dynamically loaded. A significant reduction is made by
|
a shared library is dynamically loaded. A significant reduction is made by
|
||||||
putting all the names into a single, large string and then using offsets in the
|
putting all the names into a single, large string and then using offsets in the
|
||||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||||
data is unlikely. */
|
data are unlikely.
|
||||||
|
|
||||||
const char _pcre_utt_names[] =
|
July 2008: There is now a script called maint/GenerateUtt.py that can be used
|
||||||
"Any\0"
|
to generate this data automatically instead of maintaining it by hand.
|
||||||
"Arabic\0"
|
|
||||||
"Armenian\0"
|
|
||||||
"Balinese\0"
|
|
||||||
"Bengali\0"
|
|
||||||
"Bopomofo\0"
|
|
||||||
"Braille\0"
|
|
||||||
"Buginese\0"
|
|
||||||
"Buhid\0"
|
|
||||||
"C\0"
|
|
||||||
"Canadian_Aboriginal\0"
|
|
||||||
"Cc\0"
|
|
||||||
"Cf\0"
|
|
||||||
"Cherokee\0"
|
|
||||||
"Cn\0"
|
|
||||||
"Co\0"
|
|
||||||
"Common\0"
|
|
||||||
"Coptic\0"
|
|
||||||
"Cs\0"
|
|
||||||
"Cuneiform\0"
|
|
||||||
"Cypriot\0"
|
|
||||||
"Cyrillic\0"
|
|
||||||
"Deseret\0"
|
|
||||||
"Devanagari\0"
|
|
||||||
"Ethiopic\0"
|
|
||||||
"Georgian\0"
|
|
||||||
"Glagolitic\0"
|
|
||||||
"Gothic\0"
|
|
||||||
"Greek\0"
|
|
||||||
"Gujarati\0"
|
|
||||||
"Gurmukhi\0"
|
|
||||||
"Han\0"
|
|
||||||
"Hangul\0"
|
|
||||||
"Hanunoo\0"
|
|
||||||
"Hebrew\0"
|
|
||||||
"Hiragana\0"
|
|
||||||
"Inherited\0"
|
|
||||||
"Kannada\0"
|
|
||||||
"Katakana\0"
|
|
||||||
"Kharoshthi\0"
|
|
||||||
"Khmer\0"
|
|
||||||
"L\0"
|
|
||||||
"L&\0"
|
|
||||||
"Lao\0"
|
|
||||||
"Latin\0"
|
|
||||||
"Limbu\0"
|
|
||||||
"Linear_B\0"
|
|
||||||
"Ll\0"
|
|
||||||
"Lm\0"
|
|
||||||
"Lo\0"
|
|
||||||
"Lt\0"
|
|
||||||
"Lu\0"
|
|
||||||
"M\0"
|
|
||||||
"Malayalam\0"
|
|
||||||
"Mc\0"
|
|
||||||
"Me\0"
|
|
||||||
"Mn\0"
|
|
||||||
"Mongolian\0"
|
|
||||||
"Myanmar\0"
|
|
||||||
"N\0"
|
|
||||||
"Nd\0"
|
|
||||||
"New_Tai_Lue\0"
|
|
||||||
"Nko\0"
|
|
||||||
"Nl\0"
|
|
||||||
"No\0"
|
|
||||||
"Ogham\0"
|
|
||||||
"Old_Italic\0"
|
|
||||||
"Old_Persian\0"
|
|
||||||
"Oriya\0"
|
|
||||||
"Osmanya\0"
|
|
||||||
"P\0"
|
|
||||||
"Pc\0"
|
|
||||||
"Pd\0"
|
|
||||||
"Pe\0"
|
|
||||||
"Pf\0"
|
|
||||||
"Phags_Pa\0"
|
|
||||||
"Phoenician\0"
|
|
||||||
"Pi\0"
|
|
||||||
"Po\0"
|
|
||||||
"Ps\0"
|
|
||||||
"Runic\0"
|
|
||||||
"S\0"
|
|
||||||
"Sc\0"
|
|
||||||
"Shavian\0"
|
|
||||||
"Sinhala\0"
|
|
||||||
"Sk\0"
|
|
||||||
"Sm\0"
|
|
||||||
"So\0"
|
|
||||||
"Syloti_Nagri\0"
|
|
||||||
"Syriac\0"
|
|
||||||
"Tagalog\0"
|
|
||||||
"Tagbanwa\0"
|
|
||||||
"Tai_Le\0"
|
|
||||||
"Tamil\0"
|
|
||||||
"Telugu\0"
|
|
||||||
"Thaana\0"
|
|
||||||
"Thai\0"
|
|
||||||
"Tibetan\0"
|
|
||||||
"Tifinagh\0"
|
|
||||||
"Ugaritic\0"
|
|
||||||
"Yi\0"
|
|
||||||
"Z\0"
|
|
||||||
"Zl\0"
|
|
||||||
"Zp\0"
|
|
||||||
"Zs\0";
|
|
||||||
|
|
||||||
const ucp_type_table _pcre_utt[] = {
|
The script was updated in March 2009 to generate a new EBCDIC-compliant
|
||||||
|
version. Like all other character and string literals that are compared against
|
||||||
|
the regular expression pattern, we must use STR_ macros instead of literal
|
||||||
|
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||||
|
|
||||||
|
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||||
|
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||||
|
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||||
|
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||||
|
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||||
|
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||||
|
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||||
|
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||||
|
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||||
|
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
|
||||||
|
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||||
|
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
|
||||||
|
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
|
||||||
|
#define STRING_C0 STR_C "\0"
|
||||||
|
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||||
|
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Cc0 STR_C STR_c "\0"
|
||||||
|
#define STRING_Cf0 STR_C STR_f "\0"
|
||||||
|
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||||
|
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||||
|
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||||
|
#define STRING_Cn0 STR_C STR_n "\0"
|
||||||
|
#define STRING_Co0 STR_C STR_o "\0"
|
||||||
|
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||||
|
#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
|
||||||
|
#define STRING_Cs0 STR_C STR_s "\0"
|
||||||
|
#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
|
||||||
|
#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
|
||||||
|
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||||
|
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||||
|
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||||
|
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||||
|
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||||
|
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||||
|
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||||
|
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||||
|
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||||
|
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||||
|
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||||
|
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||||
|
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||||
|
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||||
|
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||||
|
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||||
|
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||||
|
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||||
|
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||||
|
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||||
|
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||||
|
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||||
|
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
|
||||||
|
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||||
|
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||||
|
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||||
|
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||||
|
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||||
|
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||||
|
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||||
|
#define STRING_L0 STR_L "\0"
|
||||||
|
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||||
|
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||||
|
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||||
|
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||||
|
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||||
|
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
|
||||||
|
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||||
|
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||||
|
#define STRING_Ll0 STR_L STR_l "\0"
|
||||||
|
#define STRING_Lm0 STR_L STR_m "\0"
|
||||||
|
#define STRING_Lo0 STR_L STR_o "\0"
|
||||||
|
#define STRING_Lt0 STR_L STR_t "\0"
|
||||||
|
#define STRING_Lu0 STR_L STR_u "\0"
|
||||||
|
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_M0 STR_M "\0"
|
||||||
|
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||||
|
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||||
|
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||||
|
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||||
|
#define STRING_Mc0 STR_M STR_c "\0"
|
||||||
|
#define STRING_Me0 STR_M STR_e "\0"
|
||||||
|
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||||
|
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||||
|
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||||
|
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||||
|
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||||
|
#define STRING_Mn0 STR_M STR_n "\0"
|
||||||
|
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||||
|
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||||
|
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||||
|
#define STRING_N0 STR_N "\0"
|
||||||
|
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||||
|
#define STRING_Nd0 STR_N STR_d "\0"
|
||||||
|
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||||
|
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||||
|
#define STRING_Nl0 STR_N STR_l "\0"
|
||||||
|
#define STRING_No0 STR_N STR_o "\0"
|
||||||
|
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||||
|
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||||
|
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||||
|
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||||
|
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||||
|
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||||
|
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||||
|
#define STRING_P0 STR_P "\0"
|
||||||
|
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||||
|
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||||
|
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
|
||||||
|
#define STRING_Pc0 STR_P STR_c "\0"
|
||||||
|
#define STRING_Pd0 STR_P STR_d "\0"
|
||||||
|
#define STRING_Pe0 STR_P STR_e "\0"
|
||||||
|
#define STRING_Pf0 STR_P STR_f "\0"
|
||||||
|
#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
|
||||||
|
#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Pi0 STR_P STR_i "\0"
|
||||||
|
#define STRING_Po0 STR_P STR_o "\0"
|
||||||
|
#define STRING_Ps0 STR_P STR_s "\0"
|
||||||
|
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||||
|
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||||
|
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||||
|
#define STRING_S0 STR_S "\0"
|
||||||
|
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||||
|
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||||
|
#define STRING_Sc0 STR_S STR_c "\0"
|
||||||
|
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||||
|
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||||
|
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||||
|
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||||
|
#define STRING_Sk0 STR_S STR_k "\0"
|
||||||
|
#define STRING_Sm0 STR_S STR_m "\0"
|
||||||
|
#define STRING_So0 STR_S STR_o "\0"
|
||||||
|
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||||
|
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||||
|
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
|
||||||
|
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
|
||||||
|
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||||
|
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||||
|
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
|
||||||
|
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
|
||||||
|
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||||
|
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
|
||||||
|
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||||
|
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||||
|
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||||
|
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||||
|
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||||
|
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||||
|
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||||
|
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||||
|
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||||
|
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||||
|
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||||
|
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||||
|
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||||
|
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
|
||||||
|
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||||
|
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||||
|
#define STRING_Z0 STR_Z "\0"
|
||||||
|
#define STRING_Zl0 STR_Z STR_l "\0"
|
||||||
|
#define STRING_Zp0 STR_Z STR_p "\0"
|
||||||
|
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||||
|
|
||||||
|
const char PRIV(utt_names)[] =
|
||||||
|
STRING_Any0
|
||||||
|
STRING_Arabic0
|
||||||
|
STRING_Armenian0
|
||||||
|
STRING_Avestan0
|
||||||
|
STRING_Balinese0
|
||||||
|
STRING_Bamum0
|
||||||
|
STRING_Bassa_Vah0
|
||||||
|
STRING_Batak0
|
||||||
|
STRING_Bengali0
|
||||||
|
STRING_Bopomofo0
|
||||||
|
STRING_Brahmi0
|
||||||
|
STRING_Braille0
|
||||||
|
STRING_Buginese0
|
||||||
|
STRING_Buhid0
|
||||||
|
STRING_C0
|
||||||
|
STRING_Canadian_Aboriginal0
|
||||||
|
STRING_Carian0
|
||||||
|
STRING_Caucasian_Albanian0
|
||||||
|
STRING_Cc0
|
||||||
|
STRING_Cf0
|
||||||
|
STRING_Chakma0
|
||||||
|
STRING_Cham0
|
||||||
|
STRING_Cherokee0
|
||||||
|
STRING_Cn0
|
||||||
|
STRING_Co0
|
||||||
|
STRING_Common0
|
||||||
|
STRING_Coptic0
|
||||||
|
STRING_Cs0
|
||||||
|
STRING_Cuneiform0
|
||||||
|
STRING_Cypriot0
|
||||||
|
STRING_Cyrillic0
|
||||||
|
STRING_Deseret0
|
||||||
|
STRING_Devanagari0
|
||||||
|
STRING_Duployan0
|
||||||
|
STRING_Egyptian_Hieroglyphs0
|
||||||
|
STRING_Elbasan0
|
||||||
|
STRING_Ethiopic0
|
||||||
|
STRING_Georgian0
|
||||||
|
STRING_Glagolitic0
|
||||||
|
STRING_Gothic0
|
||||||
|
STRING_Grantha0
|
||||||
|
STRING_Greek0
|
||||||
|
STRING_Gujarati0
|
||||||
|
STRING_Gurmukhi0
|
||||||
|
STRING_Han0
|
||||||
|
STRING_Hangul0
|
||||||
|
STRING_Hanunoo0
|
||||||
|
STRING_Hebrew0
|
||||||
|
STRING_Hiragana0
|
||||||
|
STRING_Imperial_Aramaic0
|
||||||
|
STRING_Inherited0
|
||||||
|
STRING_Inscriptional_Pahlavi0
|
||||||
|
STRING_Inscriptional_Parthian0
|
||||||
|
STRING_Javanese0
|
||||||
|
STRING_Kaithi0
|
||||||
|
STRING_Kannada0
|
||||||
|
STRING_Katakana0
|
||||||
|
STRING_Kayah_Li0
|
||||||
|
STRING_Kharoshthi0
|
||||||
|
STRING_Khmer0
|
||||||
|
STRING_Khojki0
|
||||||
|
STRING_Khudawadi0
|
||||||
|
STRING_L0
|
||||||
|
STRING_L_AMPERSAND0
|
||||||
|
STRING_Lao0
|
||||||
|
STRING_Latin0
|
||||||
|
STRING_Lepcha0
|
||||||
|
STRING_Limbu0
|
||||||
|
STRING_Linear_A0
|
||||||
|
STRING_Linear_B0
|
||||||
|
STRING_Lisu0
|
||||||
|
STRING_Ll0
|
||||||
|
STRING_Lm0
|
||||||
|
STRING_Lo0
|
||||||
|
STRING_Lt0
|
||||||
|
STRING_Lu0
|
||||||
|
STRING_Lycian0
|
||||||
|
STRING_Lydian0
|
||||||
|
STRING_M0
|
||||||
|
STRING_Mahajani0
|
||||||
|
STRING_Malayalam0
|
||||||
|
STRING_Mandaic0
|
||||||
|
STRING_Manichaean0
|
||||||
|
STRING_Mc0
|
||||||
|
STRING_Me0
|
||||||
|
STRING_Meetei_Mayek0
|
||||||
|
STRING_Mende_Kikakui0
|
||||||
|
STRING_Meroitic_Cursive0
|
||||||
|
STRING_Meroitic_Hieroglyphs0
|
||||||
|
STRING_Miao0
|
||||||
|
STRING_Mn0
|
||||||
|
STRING_Modi0
|
||||||
|
STRING_Mongolian0
|
||||||
|
STRING_Mro0
|
||||||
|
STRING_Myanmar0
|
||||||
|
STRING_N0
|
||||||
|
STRING_Nabataean0
|
||||||
|
STRING_Nd0
|
||||||
|
STRING_New_Tai_Lue0
|
||||||
|
STRING_Nko0
|
||||||
|
STRING_Nl0
|
||||||
|
STRING_No0
|
||||||
|
STRING_Ogham0
|
||||||
|
STRING_Ol_Chiki0
|
||||||
|
STRING_Old_Italic0
|
||||||
|
STRING_Old_North_Arabian0
|
||||||
|
STRING_Old_Permic0
|
||||||
|
STRING_Old_Persian0
|
||||||
|
STRING_Old_South_Arabian0
|
||||||
|
STRING_Old_Turkic0
|
||||||
|
STRING_Oriya0
|
||||||
|
STRING_Osmanya0
|
||||||
|
STRING_P0
|
||||||
|
STRING_Pahawh_Hmong0
|
||||||
|
STRING_Palmyrene0
|
||||||
|
STRING_Pau_Cin_Hau0
|
||||||
|
STRING_Pc0
|
||||||
|
STRING_Pd0
|
||||||
|
STRING_Pe0
|
||||||
|
STRING_Pf0
|
||||||
|
STRING_Phags_Pa0
|
||||||
|
STRING_Phoenician0
|
||||||
|
STRING_Pi0
|
||||||
|
STRING_Po0
|
||||||
|
STRING_Ps0
|
||||||
|
STRING_Psalter_Pahlavi0
|
||||||
|
STRING_Rejang0
|
||||||
|
STRING_Runic0
|
||||||
|
STRING_S0
|
||||||
|
STRING_Samaritan0
|
||||||
|
STRING_Saurashtra0
|
||||||
|
STRING_Sc0
|
||||||
|
STRING_Sharada0
|
||||||
|
STRING_Shavian0
|
||||||
|
STRING_Siddham0
|
||||||
|
STRING_Sinhala0
|
||||||
|
STRING_Sk0
|
||||||
|
STRING_Sm0
|
||||||
|
STRING_So0
|
||||||
|
STRING_Sora_Sompeng0
|
||||||
|
STRING_Sundanese0
|
||||||
|
STRING_Syloti_Nagri0
|
||||||
|
STRING_Syriac0
|
||||||
|
STRING_Tagalog0
|
||||||
|
STRING_Tagbanwa0
|
||||||
|
STRING_Tai_Le0
|
||||||
|
STRING_Tai_Tham0
|
||||||
|
STRING_Tai_Viet0
|
||||||
|
STRING_Takri0
|
||||||
|
STRING_Tamil0
|
||||||
|
STRING_Telugu0
|
||||||
|
STRING_Thaana0
|
||||||
|
STRING_Thai0
|
||||||
|
STRING_Tibetan0
|
||||||
|
STRING_Tifinagh0
|
||||||
|
STRING_Tirhuta0
|
||||||
|
STRING_Ugaritic0
|
||||||
|
STRING_Vai0
|
||||||
|
STRING_Warang_Citi0
|
||||||
|
STRING_Xan0
|
||||||
|
STRING_Xps0
|
||||||
|
STRING_Xsp0
|
||||||
|
STRING_Xuc0
|
||||||
|
STRING_Xwd0
|
||||||
|
STRING_Yi0
|
||||||
|
STRING_Z0
|
||||||
|
STRING_Zl0
|
||||||
|
STRING_Zp0
|
||||||
|
STRING_Zs0;
|
||||||
|
|
||||||
|
const ucp_type_table PRIV(utt)[] = {
|
||||||
{ 0, PT_ANY, 0 },
|
{ 0, PT_ANY, 0 },
|
||||||
{ 4, PT_SC, ucp_Arabic },
|
{ 4, PT_SC, ucp_Arabic },
|
||||||
{ 11, PT_SC, ucp_Armenian },
|
{ 11, PT_SC, ucp_Armenian },
|
||||||
{ 20, PT_SC, ucp_Balinese },
|
{ 20, PT_SC, ucp_Avestan },
|
||||||
{ 29, PT_SC, ucp_Bengali },
|
{ 28, PT_SC, ucp_Balinese },
|
||||||
{ 37, PT_SC, ucp_Bopomofo },
|
{ 37, PT_SC, ucp_Bamum },
|
||||||
{ 46, PT_SC, ucp_Braille },
|
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||||
{ 54, PT_SC, ucp_Buginese },
|
{ 53, PT_SC, ucp_Batak },
|
||||||
{ 63, PT_SC, ucp_Buhid },
|
{ 59, PT_SC, ucp_Bengali },
|
||||||
{ 69, PT_GC, ucp_C },
|
{ 67, PT_SC, ucp_Bopomofo },
|
||||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
{ 76, PT_SC, ucp_Brahmi },
|
||||||
{ 91, PT_PC, ucp_Cc },
|
{ 83, PT_SC, ucp_Braille },
|
||||||
{ 94, PT_PC, ucp_Cf },
|
{ 91, PT_SC, ucp_Buginese },
|
||||||
{ 97, PT_SC, ucp_Cherokee },
|
{ 100, PT_SC, ucp_Buhid },
|
||||||
{ 106, PT_PC, ucp_Cn },
|
{ 106, PT_GC, ucp_C },
|
||||||
{ 109, PT_PC, ucp_Co },
|
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||||
{ 112, PT_SC, ucp_Common },
|
{ 128, PT_SC, ucp_Carian },
|
||||||
{ 119, PT_SC, ucp_Coptic },
|
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||||
{ 126, PT_PC, ucp_Cs },
|
{ 154, PT_PC, ucp_Cc },
|
||||||
{ 129, PT_SC, ucp_Cuneiform },
|
{ 157, PT_PC, ucp_Cf },
|
||||||
{ 139, PT_SC, ucp_Cypriot },
|
{ 160, PT_SC, ucp_Chakma },
|
||||||
{ 147, PT_SC, ucp_Cyrillic },
|
{ 167, PT_SC, ucp_Cham },
|
||||||
{ 156, PT_SC, ucp_Deseret },
|
{ 172, PT_SC, ucp_Cherokee },
|
||||||
{ 164, PT_SC, ucp_Devanagari },
|
{ 181, PT_PC, ucp_Cn },
|
||||||
{ 175, PT_SC, ucp_Ethiopic },
|
{ 184, PT_PC, ucp_Co },
|
||||||
{ 184, PT_SC, ucp_Georgian },
|
{ 187, PT_SC, ucp_Common },
|
||||||
{ 193, PT_SC, ucp_Glagolitic },
|
{ 194, PT_SC, ucp_Coptic },
|
||||||
{ 204, PT_SC, ucp_Gothic },
|
{ 201, PT_PC, ucp_Cs },
|
||||||
{ 211, PT_SC, ucp_Greek },
|
{ 204, PT_SC, ucp_Cuneiform },
|
||||||
{ 217, PT_SC, ucp_Gujarati },
|
{ 214, PT_SC, ucp_Cypriot },
|
||||||
{ 226, PT_SC, ucp_Gurmukhi },
|
{ 222, PT_SC, ucp_Cyrillic },
|
||||||
{ 235, PT_SC, ucp_Han },
|
{ 231, PT_SC, ucp_Deseret },
|
||||||
{ 239, PT_SC, ucp_Hangul },
|
{ 239, PT_SC, ucp_Devanagari },
|
||||||
{ 246, PT_SC, ucp_Hanunoo },
|
{ 250, PT_SC, ucp_Duployan },
|
||||||
{ 254, PT_SC, ucp_Hebrew },
|
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||||
{ 261, PT_SC, ucp_Hiragana },
|
{ 280, PT_SC, ucp_Elbasan },
|
||||||
{ 270, PT_SC, ucp_Inherited },
|
{ 288, PT_SC, ucp_Ethiopic },
|
||||||
{ 280, PT_SC, ucp_Kannada },
|
{ 297, PT_SC, ucp_Georgian },
|
||||||
{ 288, PT_SC, ucp_Katakana },
|
{ 306, PT_SC, ucp_Glagolitic },
|
||||||
{ 297, PT_SC, ucp_Kharoshthi },
|
{ 317, PT_SC, ucp_Gothic },
|
||||||
{ 308, PT_SC, ucp_Khmer },
|
{ 324, PT_SC, ucp_Grantha },
|
||||||
{ 314, PT_GC, ucp_L },
|
{ 332, PT_SC, ucp_Greek },
|
||||||
{ 316, PT_LAMP, 0 },
|
{ 338, PT_SC, ucp_Gujarati },
|
||||||
{ 319, PT_SC, ucp_Lao },
|
{ 347, PT_SC, ucp_Gurmukhi },
|
||||||
{ 323, PT_SC, ucp_Latin },
|
{ 356, PT_SC, ucp_Han },
|
||||||
{ 329, PT_SC, ucp_Limbu },
|
{ 360, PT_SC, ucp_Hangul },
|
||||||
{ 335, PT_SC, ucp_Linear_B },
|
{ 367, PT_SC, ucp_Hanunoo },
|
||||||
{ 344, PT_PC, ucp_Ll },
|
{ 375, PT_SC, ucp_Hebrew },
|
||||||
{ 347, PT_PC, ucp_Lm },
|
{ 382, PT_SC, ucp_Hiragana },
|
||||||
{ 350, PT_PC, ucp_Lo },
|
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||||
{ 353, PT_PC, ucp_Lt },
|
{ 408, PT_SC, ucp_Inherited },
|
||||||
{ 356, PT_PC, ucp_Lu },
|
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||||
{ 359, PT_GC, ucp_M },
|
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||||
{ 361, PT_SC, ucp_Malayalam },
|
{ 463, PT_SC, ucp_Javanese },
|
||||||
{ 371, PT_PC, ucp_Mc },
|
{ 472, PT_SC, ucp_Kaithi },
|
||||||
{ 374, PT_PC, ucp_Me },
|
{ 479, PT_SC, ucp_Kannada },
|
||||||
{ 377, PT_PC, ucp_Mn },
|
{ 487, PT_SC, ucp_Katakana },
|
||||||
{ 380, PT_SC, ucp_Mongolian },
|
{ 496, PT_SC, ucp_Kayah_Li },
|
||||||
{ 390, PT_SC, ucp_Myanmar },
|
{ 505, PT_SC, ucp_Kharoshthi },
|
||||||
{ 398, PT_GC, ucp_N },
|
{ 516, PT_SC, ucp_Khmer },
|
||||||
{ 400, PT_PC, ucp_Nd },
|
{ 522, PT_SC, ucp_Khojki },
|
||||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
{ 529, PT_SC, ucp_Khudawadi },
|
||||||
{ 415, PT_SC, ucp_Nko },
|
{ 539, PT_GC, ucp_L },
|
||||||
{ 419, PT_PC, ucp_Nl },
|
{ 541, PT_LAMP, 0 },
|
||||||
{ 422, PT_PC, ucp_No },
|
{ 544, PT_SC, ucp_Lao },
|
||||||
{ 425, PT_SC, ucp_Ogham },
|
{ 548, PT_SC, ucp_Latin },
|
||||||
{ 431, PT_SC, ucp_Old_Italic },
|
{ 554, PT_SC, ucp_Lepcha },
|
||||||
{ 442, PT_SC, ucp_Old_Persian },
|
{ 561, PT_SC, ucp_Limbu },
|
||||||
{ 454, PT_SC, ucp_Oriya },
|
{ 567, PT_SC, ucp_Linear_A },
|
||||||
{ 460, PT_SC, ucp_Osmanya },
|
{ 576, PT_SC, ucp_Linear_B },
|
||||||
{ 468, PT_GC, ucp_P },
|
{ 585, PT_SC, ucp_Lisu },
|
||||||
{ 470, PT_PC, ucp_Pc },
|
{ 590, PT_PC, ucp_Ll },
|
||||||
{ 473, PT_PC, ucp_Pd },
|
{ 593, PT_PC, ucp_Lm },
|
||||||
{ 476, PT_PC, ucp_Pe },
|
{ 596, PT_PC, ucp_Lo },
|
||||||
{ 479, PT_PC, ucp_Pf },
|
{ 599, PT_PC, ucp_Lt },
|
||||||
{ 482, PT_SC, ucp_Phags_Pa },
|
{ 602, PT_PC, ucp_Lu },
|
||||||
{ 491, PT_SC, ucp_Phoenician },
|
{ 605, PT_SC, ucp_Lycian },
|
||||||
{ 502, PT_PC, ucp_Pi },
|
{ 612, PT_SC, ucp_Lydian },
|
||||||
{ 505, PT_PC, ucp_Po },
|
{ 619, PT_GC, ucp_M },
|
||||||
{ 508, PT_PC, ucp_Ps },
|
{ 621, PT_SC, ucp_Mahajani },
|
||||||
{ 511, PT_SC, ucp_Runic },
|
{ 630, PT_SC, ucp_Malayalam },
|
||||||
{ 517, PT_GC, ucp_S },
|
{ 640, PT_SC, ucp_Mandaic },
|
||||||
{ 519, PT_PC, ucp_Sc },
|
{ 648, PT_SC, ucp_Manichaean },
|
||||||
{ 522, PT_SC, ucp_Shavian },
|
{ 659, PT_PC, ucp_Mc },
|
||||||
{ 530, PT_SC, ucp_Sinhala },
|
{ 662, PT_PC, ucp_Me },
|
||||||
{ 538, PT_PC, ucp_Sk },
|
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||||
{ 541, PT_PC, ucp_Sm },
|
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||||
{ 544, PT_PC, ucp_So },
|
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||||
{ 560, PT_SC, ucp_Syriac },
|
{ 730, PT_SC, ucp_Miao },
|
||||||
{ 567, PT_SC, ucp_Tagalog },
|
{ 735, PT_PC, ucp_Mn },
|
||||||
{ 575, PT_SC, ucp_Tagbanwa },
|
{ 738, PT_SC, ucp_Modi },
|
||||||
{ 584, PT_SC, ucp_Tai_Le },
|
{ 743, PT_SC, ucp_Mongolian },
|
||||||
{ 591, PT_SC, ucp_Tamil },
|
{ 753, PT_SC, ucp_Mro },
|
||||||
{ 597, PT_SC, ucp_Telugu },
|
{ 757, PT_SC, ucp_Myanmar },
|
||||||
{ 604, PT_SC, ucp_Thaana },
|
{ 765, PT_GC, ucp_N },
|
||||||
{ 611, PT_SC, ucp_Thai },
|
{ 767, PT_SC, ucp_Nabataean },
|
||||||
{ 616, PT_SC, ucp_Tibetan },
|
{ 777, PT_PC, ucp_Nd },
|
||||||
{ 624, PT_SC, ucp_Tifinagh },
|
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||||
{ 633, PT_SC, ucp_Ugaritic },
|
{ 792, PT_SC, ucp_Nko },
|
||||||
{ 642, PT_SC, ucp_Yi },
|
{ 796, PT_PC, ucp_Nl },
|
||||||
{ 645, PT_GC, ucp_Z },
|
{ 799, PT_PC, ucp_No },
|
||||||
{ 647, PT_PC, ucp_Zl },
|
{ 802, PT_SC, ucp_Ogham },
|
||||||
{ 650, PT_PC, ucp_Zp },
|
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||||
{ 653, PT_PC, ucp_Zs }
|
{ 817, PT_SC, ucp_Old_Italic },
|
||||||
|
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||||
|
{ 846, PT_SC, ucp_Old_Permic },
|
||||||
|
{ 857, PT_SC, ucp_Old_Persian },
|
||||||
|
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||||
|
{ 887, PT_SC, ucp_Old_Turkic },
|
||||||
|
{ 898, PT_SC, ucp_Oriya },
|
||||||
|
{ 904, PT_SC, ucp_Osmanya },
|
||||||
|
{ 912, PT_GC, ucp_P },
|
||||||
|
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||||
|
{ 927, PT_SC, ucp_Palmyrene },
|
||||||
|
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||||
|
{ 949, PT_PC, ucp_Pc },
|
||||||
|
{ 952, PT_PC, ucp_Pd },
|
||||||
|
{ 955, PT_PC, ucp_Pe },
|
||||||
|
{ 958, PT_PC, ucp_Pf },
|
||||||
|
{ 961, PT_SC, ucp_Phags_Pa },
|
||||||
|
{ 970, PT_SC, ucp_Phoenician },
|
||||||
|
{ 981, PT_PC, ucp_Pi },
|
||||||
|
{ 984, PT_PC, ucp_Po },
|
||||||
|
{ 987, PT_PC, ucp_Ps },
|
||||||
|
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||||
|
{ 1006, PT_SC, ucp_Rejang },
|
||||||
|
{ 1013, PT_SC, ucp_Runic },
|
||||||
|
{ 1019, PT_GC, ucp_S },
|
||||||
|
{ 1021, PT_SC, ucp_Samaritan },
|
||||||
|
{ 1031, PT_SC, ucp_Saurashtra },
|
||||||
|
{ 1042, PT_PC, ucp_Sc },
|
||||||
|
{ 1045, PT_SC, ucp_Sharada },
|
||||||
|
{ 1053, PT_SC, ucp_Shavian },
|
||||||
|
{ 1061, PT_SC, ucp_Siddham },
|
||||||
|
{ 1069, PT_SC, ucp_Sinhala },
|
||||||
|
{ 1077, PT_PC, ucp_Sk },
|
||||||
|
{ 1080, PT_PC, ucp_Sm },
|
||||||
|
{ 1083, PT_PC, ucp_So },
|
||||||
|
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||||
|
{ 1099, PT_SC, ucp_Sundanese },
|
||||||
|
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||||
|
{ 1122, PT_SC, ucp_Syriac },
|
||||||
|
{ 1129, PT_SC, ucp_Tagalog },
|
||||||
|
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||||
|
{ 1146, PT_SC, ucp_Tai_Le },
|
||||||
|
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||||
|
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||||
|
{ 1171, PT_SC, ucp_Takri },
|
||||||
|
{ 1177, PT_SC, ucp_Tamil },
|
||||||
|
{ 1183, PT_SC, ucp_Telugu },
|
||||||
|
{ 1190, PT_SC, ucp_Thaana },
|
||||||
|
{ 1197, PT_SC, ucp_Thai },
|
||||||
|
{ 1202, PT_SC, ucp_Tibetan },
|
||||||
|
{ 1210, PT_SC, ucp_Tifinagh },
|
||||||
|
{ 1219, PT_SC, ucp_Tirhuta },
|
||||||
|
{ 1227, PT_SC, ucp_Ugaritic },
|
||||||
|
{ 1236, PT_SC, ucp_Vai },
|
||||||
|
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||||
|
{ 1252, PT_ALNUM, 0 },
|
||||||
|
{ 1256, PT_PXSPACE, 0 },
|
||||||
|
{ 1260, PT_SPACE, 0 },
|
||||||
|
{ 1264, PT_UCNC, 0 },
|
||||||
|
{ 1268, PT_WORD, 0 },
|
||||||
|
{ 1272, PT_SC, ucp_Yi },
|
||||||
|
{ 1275, PT_GC, ucp_Z },
|
||||||
|
{ 1277, PT_PC, ucp_Zl },
|
||||||
|
{ 1280, PT_PC, ucp_Zp },
|
||||||
|
{ 1283, PT_PC, ucp_Zs }
|
||||||
};
|
};
|
||||||
|
|
||||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||||
|
|
||||||
#endif /* SUPPORT_UTF8 */
|
#endif /* SUPPORT_UTF */
|
||||||
|
|
||||||
/* End of pcre_tables.c */
|
/* End of pcre_tables.c */
|
||||||
|
@ -1,137 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This module contains an internal function that tests a compiled pattern to
|
|
||||||
see if it was compiled with the opposite endianness. If so, it uses an
|
|
||||||
auxiliary local function to flip the appropriate bytes. */
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "config.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Flip bytes in an integer *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function is called when the magic number in a regex doesn't match, in
|
|
||||||
order to flip its bytes to see if we are dealing with a pattern that was
|
|
||||||
compiled on a host of different endianness. If so, this function is used to
|
|
||||||
flip other byte values.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
value the number to flip
|
|
||||||
n the number of bytes to flip (assumed to be 2 or 4)
|
|
||||||
|
|
||||||
Returns: the flipped value
|
|
||||||
*/
|
|
||||||
|
|
||||||
static unsigned long int
|
|
||||||
byteflip(unsigned long int value, int n)
|
|
||||||
{
|
|
||||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
|
||||||
return ((value & 0x000000ff) << 24) |
|
|
||||||
((value & 0x0000ff00) << 8) |
|
|
||||||
((value & 0x00ff0000) >> 8) |
|
|
||||||
((value & 0xff000000) >> 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Test for a byte-flipped compiled regex *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
|
||||||
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
|
||||||
is, it was compiled on a system of opposite endianness. The function is called
|
|
||||||
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
|
||||||
we flip all the relevant values into a different data block, and return it.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
re points to the regex
|
|
||||||
study points to study data, or NULL
|
|
||||||
internal_re points to a new regex block
|
|
||||||
internal_study points to a new study block
|
|
||||||
|
|
||||||
Returns: the new block if is is indeed a byte-flipped regex
|
|
||||||
NULL if it is not
|
|
||||||
*/
|
|
||||||
|
|
||||||
real_pcre *
|
|
||||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
|
||||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
|
||||||
{
|
|
||||||
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
*internal_re = *re; /* To copy other fields */
|
|
||||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
|
||||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
|
||||||
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
|
|
||||||
internal_re->top_bracket =
|
|
||||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
|
||||||
internal_re->top_backref =
|
|
||||||
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
|
||||||
internal_re->first_byte =
|
|
||||||
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
|
||||||
internal_re->req_byte =
|
|
||||||
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
|
||||||
internal_re->name_table_offset =
|
|
||||||
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
|
||||||
internal_re->name_entry_size =
|
|
||||||
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
|
||||||
internal_re->name_count =
|
|
||||||
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
|
||||||
|
|
||||||
if (study != NULL)
|
|
||||||
{
|
|
||||||
*internal_study = *study; /* To copy other fields */
|
|
||||||
internal_study->size = byteflip(study->size, sizeof(study->size));
|
|
||||||
internal_study->options = byteflip(study->options, sizeof(study->options));
|
|
||||||
}
|
|
||||||
|
|
||||||
return internal_re;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of pcre_tryflipped.c */
|
|
3631
src/plugins/PCREPlugin/pcre_ucd.c
Normal file
3631
src/plugins/PCREPlugin/pcre_ucd.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,179 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This module contains code for searching the table of Unicode character
|
|
||||||
properties. */
|
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "config.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
#include "ucp.h" /* Category definitions */
|
|
||||||
#include "ucpinternal.h" /* Internal table details */
|
|
||||||
#include "ucptable.h" /* The table itself */
|
|
||||||
|
|
||||||
|
|
||||||
/* Table to translate from particular type value to the general value. */
|
|
||||||
|
|
||||||
static const int ucp_gentype[] = {
|
|
||||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
|
||||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
|
||||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
|
||||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
|
||||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
|
||||||
ucp_P, ucp_P, /* Ps, Po */
|
|
||||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
|
||||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Search table and return type *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
|
||||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
c the character value
|
|
||||||
type_ptr the detailed character type is returned here
|
|
||||||
script_ptr the script is returned here
|
|
||||||
|
|
||||||
Returns: the character type category
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
|
||||||
{
|
|
||||||
int bot = 0;
|
|
||||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
|
||||||
int mid;
|
|
||||||
|
|
||||||
/* The table is searched using a binary chop. You might think that using
|
|
||||||
intermediate variables to hold some of the common expressions would speed
|
|
||||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
|
||||||
makes things a lot slower. */
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
if (top <= bot)
|
|
||||||
{
|
|
||||||
*type_ptr = ucp_Cn;
|
|
||||||
*script_ptr = ucp_Common;
|
|
||||||
return ucp_C;
|
|
||||||
}
|
|
||||||
mid = (bot + top) >> 1;
|
|
||||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
|
||||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
|
||||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
|
||||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
|
||||||
bot = mid + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Found an entry in the table. Set the script and detailed type values, and
|
|
||||||
return the general type. */
|
|
||||||
|
|
||||||
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
|
|
||||||
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
|
|
||||||
|
|
||||||
return ucp_gentype[*type_ptr];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Search table and return other case *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* If the given character is a letter, and there is another case for the
|
|
||||||
letter, return the other case. Otherwise, return -1.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
c the character value
|
|
||||||
|
|
||||||
Returns: the other case or NOTACHAR if none
|
|
||||||
*/
|
|
||||||
|
|
||||||
unsigned int
|
|
||||||
_pcre_ucp_othercase(const unsigned int c)
|
|
||||||
{
|
|
||||||
int bot = 0;
|
|
||||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
|
||||||
int mid, offset;
|
|
||||||
|
|
||||||
/* The table is searched using a binary chop. You might think that using
|
|
||||||
intermediate variables to hold some of the common expressions would speed
|
|
||||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
|
||||||
makes things a lot slower. */
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
if (top <= bot) return -1;
|
|
||||||
mid = (bot + top) >> 1;
|
|
||||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
|
||||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
|
||||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
|
||||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
|
||||||
bot = mid + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
|
|
||||||
return the other case if there is one, else NOTACHAR. */
|
|
||||||
|
|
||||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
|
|
||||||
|
|
||||||
offset = ucp_table[mid].f1 & f1_casemask;
|
|
||||||
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
|
|
||||||
return (offset == 0)? NOTACHAR : c + offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* End of pcre_ucp_searchfuncs.c */
|
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2013 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -54,109 +55,248 @@ strings. */
|
|||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is called (optionally) at the start of compile or match, to
|
/* This function is called (optionally) at the start of compile or match, to
|
||||||
validate that a supposed UTF-8 string is actually valid. The early check means
|
check that a supposed UTF-8 string is actually valid. The early check means
|
||||||
that subsequent code can assume it is dealing with a valid string. The check
|
that subsequent code can assume it is dealing with a valid string. The check
|
||||||
can be turned off for maximum performance, but the consequences of supplying
|
can be turned off for maximum performance, but the consequences of supplying an
|
||||||
an invalid string are then undefined.
|
invalid string are then undefined.
|
||||||
|
|
||||||
Originally, this function checked according to RFC 2279, allowing for values in
|
Originally, this function checked according to RFC 2279, allowing for values in
|
||||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||||
obsoletes 2279), additional restrictions were applies. The values are now
|
obsoletes 2279), additional restrictions were applied. The values are now
|
||||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||||
subrange 0xd000 to 0xdfff is excluded.
|
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||||
|
characters is still checked.
|
||||||
|
|
||||||
|
From release 8.13 more information about the details of the error are passed
|
||||||
|
back in the returned value:
|
||||||
|
|
||||||
|
PCRE_UTF8_ERR0 No error
|
||||||
|
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||||
|
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||||
|
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||||
|
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||||
|
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||||
|
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||||
|
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||||
|
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||||
|
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||||
|
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||||
|
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||||
|
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||||
|
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||||
|
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||||
|
PCRE_UTF8_ERR15 Overlong 2-byte sequence
|
||||||
|
PCRE_UTF8_ERR16 Overlong 3-byte sequence
|
||||||
|
PCRE_UTF8_ERR17 Overlong 4-byte sequence
|
||||||
|
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||||
|
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||||
|
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||||
|
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||||
|
PCRE_UTF8_ERR22 Unused (was non-character)
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
string points to the string
|
string points to the string
|
||||||
length length of string, or -1 if the string is zero-terminated
|
length length of string, or -1 if the string is zero-terminated
|
||||||
|
errp pointer to an error position offset variable
|
||||||
|
|
||||||
Returns: < 0 if the string is a valid UTF-8 string
|
Returns: = 0 if the string is a valid UTF-8 string
|
||||||
>= 0 otherwise; the value is the offset of the bad byte
|
> 0 otherwise, setting the offset of the bad character
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
_pcre_valid_utf8(const uschar *string, int length)
|
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
|
||||||
{
|
{
|
||||||
#ifdef SUPPORT_UTF8
|
#ifdef SUPPORT_UTF
|
||||||
register const uschar *p;
|
register PCRE_PUCHAR p;
|
||||||
|
|
||||||
if (length < 0)
|
if (length < 0)
|
||||||
{
|
{
|
||||||
for (p = string; *p != 0; p++);
|
for (p = string; *p != 0; p++);
|
||||||
length = p - string;
|
length = (int)(p - string);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (p = string; length-- > 0; p++)
|
for (p = string; length-- > 0; p++)
|
||||||
{
|
{
|
||||||
register int ab;
|
register pcre_uchar ab, c, d;
|
||||||
register int c = *p;
|
|
||||||
if (c < 128) continue;
|
c = *p;
|
||||||
if (c < 0xc0) return p - string;
|
if (c < 128) continue; /* ASCII character */
|
||||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
|
||||||
if (length < ab || ab > 3) return p - string;
|
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||||
length -= ab;
|
{
|
||||||
|
*erroroffset = (int)(p - string);
|
||||||
|
return PCRE_UTF8_ERR20;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string);
|
||||||
|
return PCRE_UTF8_ERR21;
|
||||||
|
}
|
||||||
|
|
||||||
|
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||||
|
if (length < ab)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string); /* Missing bytes */
|
||||||
|
return ab - length; /* Codes ERR1 to ERR5 */
|
||||||
|
}
|
||||||
|
length -= ab; /* Length remaining */
|
||||||
|
|
||||||
/* Check top bits in the second byte */
|
/* Check top bits in the second byte */
|
||||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
|
||||||
|
|
||||||
/* Check for overlong sequences for each different length, and for the
|
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||||
excluded range 0xd000 to 0xdfff. */
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 1;
|
||||||
|
return PCRE_UTF8_ERR6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||||
|
set and not the 0x40 bit. Then check for an overlong sequence, and for the
|
||||||
|
excluded range 0xd800 to 0xdfff. */
|
||||||
|
|
||||||
switch (ab)
|
switch (ab)
|
||||||
{
|
{
|
||||||
/* Check for xx00 000x (overlong sequence) */
|
/* 2-byte character. No further bytes to check for 0x80. Check first byte
|
||||||
|
for for xx00 000x (overlong sequence). */
|
||||||
|
|
||||||
case 1:
|
case 1: if ((c & 0x3e) == 0)
|
||||||
if ((c & 0x3e) == 0) return p - string;
|
{
|
||||||
continue; /* We know there aren't any more bytes to check */
|
*erroroffset = (int)(p - string) - 1;
|
||||||
|
return PCRE_UTF8_ERR15;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
|
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
|
||||||
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
|
for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||||
|
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
if ((c == 0xe0 && (*p & 0x20) == 0) ||
|
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||||
(c == 0xed && *p >= 0xa0))
|
{
|
||||||
return p - string;
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR7;
|
||||||
|
}
|
||||||
|
if (c == 0xe0 && (d & 0x20) == 0)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR16;
|
||||||
|
}
|
||||||
|
if (c == 0xed && d >= 0xa0)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR14;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
|
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
|
||||||
greater than 0x0010ffff (f4 8f bf bf) */
|
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
|
||||||
|
character greater than 0x0010ffff (f4 8f bf bf) */
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
if ((c == 0xf0 && (*p & 0x30) == 0) ||
|
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||||
(c > 0xf4 ) ||
|
|
||||||
(c == 0xf4 && *p > 0x8f))
|
|
||||||
return p - string;
|
|
||||||
break;
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* These cases can no longer occur, as we restrict to a maximum of four
|
|
||||||
bytes nowadays. Leave the code here in case we ever want to add an option
|
|
||||||
for longer sequences. */
|
|
||||||
|
|
||||||
/* Check for 1111 1000, xx00 0xxx */
|
|
||||||
case 4:
|
|
||||||
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
|
|
||||||
case 5:
|
|
||||||
if (c == 0xfe || c == 0xff ||
|
|
||||||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
|
||||||
while (--ab > 0)
|
|
||||||
{
|
{
|
||||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR7;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 3;
|
||||||
|
return PCRE_UTF8_ERR8;
|
||||||
|
}
|
||||||
|
if (c == 0xf0 && (d & 0x30) == 0)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 3;
|
||||||
|
return PCRE_UTF8_ERR17;
|
||||||
|
}
|
||||||
|
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 3;
|
||||||
|
return PCRE_UTF8_ERR13;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
|
||||||
|
rejected by the length test below. However, we do the appropriate tests
|
||||||
|
here so that overlong sequences get diagnosed, and also in case there is
|
||||||
|
ever an option for handling these larger code points. */
|
||||||
|
|
||||||
|
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
|
||||||
|
1111 1000, xx00 0xxx */
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR7;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 3;
|
||||||
|
return PCRE_UTF8_ERR8;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 4;
|
||||||
|
return PCRE_UTF8_ERR9;
|
||||||
|
}
|
||||||
|
if (c == 0xf8 && (d & 0x38) == 0)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 4;
|
||||||
|
return PCRE_UTF8_ERR18;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
|
||||||
|
1111 1100, xx00 00xx. */
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 2;
|
||||||
|
return PCRE_UTF8_ERR7;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 3;
|
||||||
|
return PCRE_UTF8_ERR8;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 4;
|
||||||
|
return PCRE_UTF8_ERR9;
|
||||||
|
}
|
||||||
|
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 5;
|
||||||
|
return PCRE_UTF8_ERR10;
|
||||||
|
}
|
||||||
|
if (c == 0xfc && (d & 0x3c) == 0)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - 5;
|
||||||
|
return PCRE_UTF8_ERR19;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
|
||||||
|
excluded by RFC 3629. The pointer p is currently at the last byte of the
|
||||||
|
character. */
|
||||||
|
|
||||||
|
if (ab > 3)
|
||||||
|
{
|
||||||
|
*erroroffset = (int)(p - string) - ab;
|
||||||
|
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else /* Not SUPPORT_UTF */
|
||||||
|
(void)(string); /* Keep picky compilers happy */
|
||||||
|
(void)(length);
|
||||||
|
(void)(erroroffset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return -1;
|
return PCRE_UTF8_ERR0; /* This indicates success */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre_valid_utf8.c */
|
/* End of pcre_valid_utf8.c */
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -79,8 +80,16 @@ I could find no way of detecting that a macro is defined as an empty string at
|
|||||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||||
the STRING macro with an empty argument when doing the test. */
|
the STRING macro with an empty argument when doing the test. */
|
||||||
|
|
||||||
PCRE_EXP_DEFN const char *
|
#if defined COMPILE_PCRE8
|
||||||
|
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||||
pcre_version(void)
|
pcre_version(void)
|
||||||
|
#elif defined COMPILE_PCRE16
|
||||||
|
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||||
|
pcre16_version(void)
|
||||||
|
#elif defined COMPILE_PCRE32
|
||||||
|
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||||
|
pcre32_version(void)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#define HAVE_CONFIG_H
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Perl-Compatible Regular Expressions *
|
* Perl-Compatible Regular Expressions *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
@ -6,7 +7,7 @@
|
|||||||
and semantics are as close as possible to those of the Perl 5 language.
|
and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
Copyright (c) 1997-2013 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -39,8 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
|
|
||||||
/* This module contains an internal function that is used to match an extended
|
/* This module contains an internal function that is used to match an extended
|
||||||
class (one that contains characters whose values are > 255). It is used by both
|
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||||
pcre_exec() and pcre_def_exec(). */
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
@ -55,7 +55,7 @@ pcre_exec() and pcre_def_exec(). */
|
|||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is called to match a character against an extended class that
|
/* This function is called to match a character against an extended class that
|
||||||
might contain values > 255.
|
might contain values > 255 and/or Unicode properties.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
c the character
|
c the character
|
||||||
@ -65,18 +65,30 @@ Returns: TRUE if character matches, else FALSE
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
_pcre_xclass(int c, const uschar *data)
|
PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
|
||||||
{
|
{
|
||||||
int t;
|
pcre_uchar t;
|
||||||
BOOL negated = (*data & XCL_NOT) != 0;
|
BOOL negated = (*data & XCL_NOT) != 0;
|
||||||
|
|
||||||
|
(void)utf;
|
||||||
|
#ifdef COMPILE_PCRE8
|
||||||
|
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||||
|
utf = TRUE;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||||
not, we still carry on, because there may be ranges that start below 256 in the
|
not, we still carry on, because there may be ranges that start below 256 in the
|
||||||
additional data. */
|
additional data. */
|
||||||
|
|
||||||
if (c < 256)
|
if (c < 256)
|
||||||
{
|
{
|
||||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
if ((*data & XCL_HASPROP) == 0)
|
||||||
|
{
|
||||||
|
if ((*data & XCL_MAP) == 0) return negated;
|
||||||
|
return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
|
||||||
|
}
|
||||||
|
if ((*data & XCL_MAP) != 0 &&
|
||||||
|
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||||
return !negated; /* char found */
|
return !negated; /* char found */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,50 +96,159 @@ if (c < 256)
|
|||||||
properties or large chars or ranges that end with a large char. We won't ever
|
properties or large chars or ranges that end with a large char. We won't ever
|
||||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||||
|
|
||||||
if ((*data++ & XCL_MAP) != 0) data += 32;
|
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
|
||||||
|
|
||||||
while ((t = *data++) != XCL_END)
|
while ((t = *data++) != XCL_END)
|
||||||
{
|
{
|
||||||
int x, y;
|
pcre_uint32 x, y;
|
||||||
if (t == XCL_SINGLE)
|
if (t == XCL_SINGLE)
|
||||||
{
|
{
|
||||||
GETCHARINC(x, data);
|
#ifdef SUPPORT_UTF
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
x = *data++;
|
||||||
if (c == x) return !negated;
|
if (c == x) return !negated;
|
||||||
}
|
}
|
||||||
else if (t == XCL_RANGE)
|
else if (t == XCL_RANGE)
|
||||||
{
|
{
|
||||||
GETCHARINC(x, data);
|
#ifdef SUPPORT_UTF
|
||||||
GETCHARINC(y, data);
|
if (utf)
|
||||||
|
{
|
||||||
|
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||||
|
GETCHARINC(y, data); /* macro generates multiple statements */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
x = *data++;
|
||||||
|
y = *data++;
|
||||||
|
}
|
||||||
if (c >= x && c <= y) return !negated;
|
if (c >= x && c <= y) return !negated;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SUPPORT_UCP
|
#ifdef SUPPORT_UCP
|
||||||
else /* XCL_PROP & XCL_NOTPROP */
|
else /* XCL_PROP & XCL_NOTPROP */
|
||||||
{
|
{
|
||||||
int chartype, script;
|
const ucd_record *prop = GET_UCD(c);
|
||||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
BOOL isprop = t == XCL_PROP;
|
||||||
|
|
||||||
switch(*data)
|
switch(*data)
|
||||||
{
|
{
|
||||||
case PT_ANY:
|
case PT_ANY:
|
||||||
if (t == XCL_PROP) return !negated;
|
if (isprop) return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_LAMP:
|
case PT_LAMP:
|
||||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
|
||||||
(t == XCL_PROP)) return !negated;
|
prop->chartype == ucp_Lt) == isprop) return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_GC:
|
case PT_GC:
|
||||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
|
||||||
|
return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_PC:
|
case PT_PC:
|
||||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
if ((data[1] == prop->chartype) == isprop) return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_SC:
|
case PT_SC:
|
||||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
if ((data[1] == prop->script) == isprop) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_ALNUM:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
|
||||||
|
return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||||
|
which means that Perl space and POSIX space are now identical. PCRE
|
||||||
|
was changed at release 8.34. */
|
||||||
|
|
||||||
|
case PT_SPACE: /* Perl space */
|
||||||
|
case PT_PXSPACE: /* POSIX space */
|
||||||
|
switch(c)
|
||||||
|
{
|
||||||
|
HSPACE_CASES:
|
||||||
|
VSPACE_CASES:
|
||||||
|
if (isprop) return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
|
||||||
|
return !negated;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_WORD:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||||
|
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
|
||||||
|
== isprop)
|
||||||
|
return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PT_UCNC:
|
||||||
|
if (c < 0xa0)
|
||||||
|
{
|
||||||
|
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
|
||||||
|
c == CHAR_GRAVE_ACCENT) == isprop)
|
||||||
|
return !negated;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ((c < 0xd800 || c > 0xdfff) == isprop)
|
||||||
|
return !negated;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* The following three properties can occur only in an XCLASS, as there
|
||||||
|
is no \p or \P coding for them. */
|
||||||
|
|
||||||
|
/* Graphic character. Implement this as not Z (space or separator) and
|
||||||
|
not C (other), except for Cf (format) with a few exceptions. This seems
|
||||||
|
to be what Perl does. The exceptional characters are:
|
||||||
|
|
||||||
|
U+061C Arabic Letter Mark
|
||||||
|
U+180E Mongolian Vowel Separator
|
||||||
|
U+2066 - U+2069 Various "isolate"s
|
||||||
|
*/
|
||||||
|
|
||||||
|
case PT_PXGRAPH:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
|
||||||
|
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
||||||
|
(prop->chartype == ucp_Cf &&
|
||||||
|
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
|
||||||
|
)) == isprop)
|
||||||
|
return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Printable character: same as graphic, with the addition of Zs, i.e.
|
||||||
|
not Zl and not Zp, and U+180E. */
|
||||||
|
|
||||||
|
case PT_PXPRINT:
|
||||||
|
if ((prop->chartype != ucp_Zl &&
|
||||||
|
prop->chartype != ucp_Zp &&
|
||||||
|
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
|
||||||
|
(prop->chartype == ucp_Cf &&
|
||||||
|
c != 0x061c && (c < 0x2066 || c > 0x2069))
|
||||||
|
)) == isprop)
|
||||||
|
return !negated;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Punctuation: all Unicode punctuation, plus ASCII characters that
|
||||||
|
Unicode treats as symbols rather than punctuation, for Perl
|
||||||
|
compatibility (these are $+<=>^`|~). */
|
||||||
|
|
||||||
|
case PT_PXPUNCT:
|
||||||
|
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
|
||||||
|
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
|
||||||
|
return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* This should never occur, but compilers may mutter if there is no
|
/* This should never occur, but compilers may mutter if there is no
|
||||||
|
@ -1,337 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This module is a wrapper that provides a POSIX API to the underlying PCRE
|
|
||||||
functions. */
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
|
||||||
#include "config.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
|
|
||||||
compiling these functions. This must come before including pcreposix.h, where
|
|
||||||
they are set for an application (using these functions) if they have not
|
|
||||||
previously been set. */
|
|
||||||
|
|
||||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
|
||||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
|
|
||||||
# define PCREPOSIX_EXP_DEFN __declspec(dllexport)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre.h"
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
#include "pcreposix.h"
|
|
||||||
|
|
||||||
|
|
||||||
/* Table to translate PCRE compile time error codes into POSIX error codes. */
|
|
||||||
|
|
||||||
static const int eint[] = {
|
|
||||||
0, /* no error */
|
|
||||||
REG_EESCAPE, /* \ at end of pattern */
|
|
||||||
REG_EESCAPE, /* \c at end of pattern */
|
|
||||||
REG_EESCAPE, /* unrecognized character follows \ */
|
|
||||||
REG_BADBR, /* numbers out of order in {} quantifier */
|
|
||||||
REG_BADBR, /* number too big in {} quantifier */
|
|
||||||
REG_EBRACK, /* missing terminating ] for character class */
|
|
||||||
REG_ECTYPE, /* invalid escape sequence in character class */
|
|
||||||
REG_ERANGE, /* range out of order in character class */
|
|
||||||
REG_BADRPT, /* nothing to repeat */
|
|
||||||
REG_BADRPT, /* operand of unlimited repeat could match the empty string */
|
|
||||||
REG_ASSERT, /* internal error: unexpected repeat */
|
|
||||||
REG_BADPAT, /* unrecognized character after (? */
|
|
||||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
|
||||||
REG_EPAREN, /* missing ) */
|
|
||||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
|
||||||
REG_INVARG, /* erroffset passed as NULL */
|
|
||||||
REG_INVARG, /* unknown option bit(s) set */
|
|
||||||
REG_EPAREN, /* missing ) after comment */
|
|
||||||
REG_ESIZE, /* parentheses nested too deeply */
|
|
||||||
REG_ESIZE, /* regular expression too large */
|
|
||||||
REG_ESPACE, /* failed to get memory */
|
|
||||||
REG_EPAREN, /* unmatched brackets */
|
|
||||||
REG_ASSERT, /* internal error: code overflow */
|
|
||||||
REG_BADPAT, /* unrecognized character after (?< */
|
|
||||||
REG_BADPAT, /* lookbehind assertion is not fixed length */
|
|
||||||
REG_BADPAT, /* malformed number or name after (?( */
|
|
||||||
REG_BADPAT, /* conditional group contains more than two branches */
|
|
||||||
REG_BADPAT, /* assertion expected after (?( */
|
|
||||||
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */
|
|
||||||
REG_ECTYPE, /* unknown POSIX class name */
|
|
||||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
|
||||||
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */
|
|
||||||
REG_BADPAT, /* spare error */
|
|
||||||
REG_BADPAT, /* character value in \x{...} sequence is too large */
|
|
||||||
REG_BADPAT, /* invalid condition (?(0) */
|
|
||||||
REG_BADPAT, /* \C not allowed in lookbehind assertion */
|
|
||||||
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
|
|
||||||
REG_BADPAT, /* number after (?C is > 255 */
|
|
||||||
REG_BADPAT, /* closing ) for (?C expected */
|
|
||||||
REG_BADPAT, /* recursive call could loop indefinitely */
|
|
||||||
REG_BADPAT, /* unrecognized character after (?P */
|
|
||||||
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */
|
|
||||||
REG_BADPAT, /* two named subpatterns have the same name */
|
|
||||||
REG_BADPAT, /* invalid UTF-8 string */
|
|
||||||
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
|
|
||||||
REG_BADPAT, /* malformed \P or \p sequence */
|
|
||||||
REG_BADPAT, /* unknown property name after \P or \p */
|
|
||||||
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
|
|
||||||
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
|
|
||||||
REG_BADPAT, /* repeated subpattern is too long */
|
|
||||||
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */
|
|
||||||
REG_BADPAT, /* internal error: overran compiling workspace */
|
|
||||||
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */
|
|
||||||
REG_BADPAT, /* DEFINE group contains more than one branch */
|
|
||||||
REG_BADPAT, /* repeating a DEFINE group is not allowed */
|
|
||||||
REG_INVARG, /* inconsistent NEWLINE options */
|
|
||||||
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
|
|
||||||
REG_BADPAT, /* (?+ or (?- must be followed by a non-zero number */
|
|
||||||
REG_BADPAT /* number is too big */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Table of texts corresponding to POSIX error codes */
|
|
||||||
|
|
||||||
static const char *const pstring[] = {
|
|
||||||
"", /* Dummy for value 0 */
|
|
||||||
"internal error", /* REG_ASSERT */
|
|
||||||
"invalid repeat counts in {}", /* BADBR */
|
|
||||||
"pattern error", /* BADPAT */
|
|
||||||
"? * + invalid", /* BADRPT */
|
|
||||||
"unbalanced {}", /* EBRACE */
|
|
||||||
"unbalanced []", /* EBRACK */
|
|
||||||
"collation error - not relevant", /* ECOLLATE */
|
|
||||||
"bad class", /* ECTYPE */
|
|
||||||
"bad escape sequence", /* EESCAPE */
|
|
||||||
"empty expression", /* EMPTY */
|
|
||||||
"unbalanced ()", /* EPAREN */
|
|
||||||
"bad range inside []", /* ERANGE */
|
|
||||||
"expression too big", /* ESIZE */
|
|
||||||
"failed to get memory", /* ESPACE */
|
|
||||||
"bad back reference", /* ESUBREG */
|
|
||||||
"bad argument", /* INVARG */
|
|
||||||
"match failed" /* NOMATCH */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Translate error code to string *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
PCREPOSIX_EXP_DEFN size_t
|
|
||||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
|
||||||
{
|
|
||||||
const char *message, *addmessage;
|
|
||||||
size_t length, addlength;
|
|
||||||
|
|
||||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
|
||||||
"unknown error code" : pstring[errcode];
|
|
||||||
length = strlen(message) + 1;
|
|
||||||
|
|
||||||
addmessage = " at offset ";
|
|
||||||
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
|
||||||
strlen(addmessage) + 6 : 0;
|
|
||||||
|
|
||||||
if (errbuf_size > 0)
|
|
||||||
{
|
|
||||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
|
||||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
strncpy(errbuf, message, errbuf_size - 1);
|
|
||||||
errbuf[errbuf_size-1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return length + addlength;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Free store held by a regex *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
PCREPOSIX_EXP_DEFN void
|
|
||||||
regfree(regex_t *preg)
|
|
||||||
{
|
|
||||||
(pcre_free)(preg->re_pcre);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Compile a regular expression *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
Arguments:
|
|
||||||
preg points to a structure for recording the compiled expression
|
|
||||||
pattern the pattern to compile
|
|
||||||
cflags compilation flags
|
|
||||||
|
|
||||||
Returns: 0 on success
|
|
||||||
various non-zero codes on failure
|
|
||||||
*/
|
|
||||||
|
|
||||||
PCREPOSIX_EXP_DEFN int
|
|
||||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
|
||||||
{
|
|
||||||
const char *errorptr;
|
|
||||||
int erroffset;
|
|
||||||
int errorcode;
|
|
||||||
int options = 0;
|
|
||||||
|
|
||||||
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
|
|
||||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
|
||||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
|
|
||||||
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
|
|
||||||
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
|
|
||||||
|
|
||||||
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
|
|
||||||
&erroffset, NULL);
|
|
||||||
preg->re_erroffset = erroffset;
|
|
||||||
|
|
||||||
if (preg->re_pcre == NULL) return eint[errorcode];
|
|
||||||
|
|
||||||
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Match a regular expression *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Unfortunately, PCRE requires 3 ints of working space for each captured
|
|
||||||
substring, so we have to get and release working store instead of just using
|
|
||||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
|
||||||
ints. However, if the number of possible capturing brackets is small, use a
|
|
||||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
|
||||||
in a macro that can be changed at configure time.
|
|
||||||
|
|
||||||
If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
|
|
||||||
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
|
|
||||||
the only result is yes/no/error. */
|
|
||||||
|
|
||||||
PCREPOSIX_EXP_DEFN int
|
|
||||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
|
||||||
regmatch_t pmatch[], int eflags)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
int options = 0;
|
|
||||||
int *ovector = NULL;
|
|
||||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
|
||||||
BOOL allocated_ovector = FALSE;
|
|
||||||
BOOL nosub =
|
|
||||||
(((const pcre *)preg->re_pcre)->options & PCRE_NO_AUTO_CAPTURE) != 0;
|
|
||||||
|
|
||||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
|
||||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
|
||||||
|
|
||||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
|
||||||
|
|
||||||
/* When no string data is being returned, ensure that nmatch is zero.
|
|
||||||
Otherwise, ensure the vector for holding the return data is large enough. */
|
|
||||||
|
|
||||||
if (nosub) nmatch = 0;
|
|
||||||
|
|
||||||
else if (nmatch > 0)
|
|
||||||
{
|
|
||||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
|
||||||
{
|
|
||||||
ovector = &(small_ovector[0]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
|
|
||||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
|
||||||
if (ovector == NULL) return REG_ESPACE;
|
|
||||||
allocated_ovector = TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
|
|
||||||
0, options, ovector, nmatch * 3);
|
|
||||||
|
|
||||||
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
|
||||||
|
|
||||||
if (rc >= 0)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
if (!nosub)
|
|
||||||
{
|
|
||||||
for (i = 0; i < (size_t)rc; i++)
|
|
||||||
{
|
|
||||||
pmatch[i].rm_so = ovector[i*2];
|
|
||||||
pmatch[i].rm_eo = ovector[i*2+1];
|
|
||||||
}
|
|
||||||
if (allocated_ovector) free(ovector);
|
|
||||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (allocated_ovector) free(ovector);
|
|
||||||
switch(rc)
|
|
||||||
{
|
|
||||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
|
||||||
case PCRE_ERROR_NULL: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADOPTION: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADMAGIC: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
|
|
||||||
case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
|
|
||||||
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
|
|
||||||
case PCRE_ERROR_BADUTF8: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
|
|
||||||
default: return REG_ASSERT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of pcreposix.c */
|
|
@ -1,142 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
#ifndef _PCREPOSIX_H
|
|
||||||
#define _PCREPOSIX_H
|
|
||||||
|
|
||||||
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
|
|
||||||
Compatible Regular Expression library. It defines the things POSIX says should
|
|
||||||
be there. I hope.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2007 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* Allow for C++ users */
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Options, mostly defined by POSIX, but with a couple of extras. */
|
|
||||||
|
|
||||||
#define REG_ICASE 0x0001
|
|
||||||
#define REG_NEWLINE 0x0002
|
|
||||||
#define REG_NOTBOL 0x0004
|
|
||||||
#define REG_NOTEOL 0x0008
|
|
||||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
|
|
||||||
#define REG_NOSUB 0x0020
|
|
||||||
#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
|
|
||||||
|
|
||||||
/* This is not used by PCRE, but by defining it we make it easier
|
|
||||||
to slot PCRE into existing programs that make POSIX calls. */
|
|
||||||
|
|
||||||
#define REG_EXTENDED 0
|
|
||||||
|
|
||||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
|
||||||
|
|
||||||
enum {
|
|
||||||
REG_ASSERT = 1, /* internal error ? */
|
|
||||||
REG_BADBR, /* invalid repeat counts in {} */
|
|
||||||
REG_BADPAT, /* pattern error */
|
|
||||||
REG_BADRPT, /* ? * + invalid */
|
|
||||||
REG_EBRACE, /* unbalanced {} */
|
|
||||||
REG_EBRACK, /* unbalanced [] */
|
|
||||||
REG_ECOLLATE, /* collation error - not relevant */
|
|
||||||
REG_ECTYPE, /* bad class */
|
|
||||||
REG_EESCAPE, /* bad escape sequence */
|
|
||||||
REG_EMPTY, /* empty expression */
|
|
||||||
REG_EPAREN, /* unbalanced () */
|
|
||||||
REG_ERANGE, /* bad range inside [] */
|
|
||||||
REG_ESIZE, /* expression too big */
|
|
||||||
REG_ESPACE, /* failed to get memory */
|
|
||||||
REG_ESUBREG, /* bad back reference */
|
|
||||||
REG_INVARG, /* bad argument */
|
|
||||||
REG_NOMATCH /* match failed */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* The structure representing a compiled regular expression. */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
void *re_pcre;
|
|
||||||
size_t re_nsub;
|
|
||||||
size_t re_erroffset;
|
|
||||||
} regex_t;
|
|
||||||
|
|
||||||
/* The structure in which a captured offset is returned. */
|
|
||||||
|
|
||||||
typedef int regoff_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
regoff_t rm_so;
|
|
||||||
regoff_t rm_eo;
|
|
||||||
} regmatch_t;
|
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
|
||||||
imported have to be identified as such. When building PCRE, the appropriate
|
|
||||||
export settings are needed, and are set in pcreposix.c before including this
|
|
||||||
file. */
|
|
||||||
|
|
||||||
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
|
|
||||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
|
||||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* By default, we use the standard "extern" declarations. */
|
|
||||||
|
|
||||||
#ifndef PCREPOSIX_EXP_DECL
|
|
||||||
# ifdef __cplusplus
|
|
||||||
# define PCREPOSIX_EXP_DECL extern "C"
|
|
||||||
# define PCREPOSIX_EXP_DEFN extern "C"
|
|
||||||
# else
|
|
||||||
# define PCREPOSIX_EXP_DECL extern
|
|
||||||
# define PCREPOSIX_EXP_DEFN extern
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The functions */
|
|
||||||
|
|
||||||
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
|
||||||
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
|
||||||
regmatch_t *, int);
|
|
||||||
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
|
||||||
PCREPOSIX_EXP_DECL void regfree(regex_t *);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* End of pcreposix.h */
|
|
@ -6,9 +6,15 @@
|
|||||||
#define _UCP_H
|
#define _UCP_H
|
||||||
|
|
||||||
/* This file contains definitions of the property values that are returned by
|
/* This file contains definitions of the property values that are returned by
|
||||||
the function _pcre_ucp_findprop(). New values that are added for new releases
|
the UCD access macros. New values that are added for new releases of Unicode
|
||||||
of Unicode should always be at the end of each enum, for backwards
|
should always be at the end of each enum, for backwards compatibility.
|
||||||
compatibility. */
|
|
||||||
|
IMPORTANT: Note also that the specific numeric values of the enums have to be
|
||||||
|
the same as the values that are generated by the maint/MultiStage2.py script,
|
||||||
|
where the equivalent property descriptive names are listed in vectors.
|
||||||
|
|
||||||
|
ALSO: The specific values of the first two enums are assumed for the table
|
||||||
|
called catposstab in pcre_compile.c. */
|
||||||
|
|
||||||
/* These are the general character categories. */
|
/* These are the general character categories. */
|
||||||
|
|
||||||
@ -22,7 +28,7 @@ enum {
|
|||||||
ucp_Z /* Separator */
|
ucp_Z /* Separator */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* These are the particular character types. */
|
/* These are the particular character categories. */
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
ucp_Cc, /* Control */
|
ucp_Cc, /* Control */
|
||||||
@ -57,6 +63,26 @@ enum {
|
|||||||
ucp_Zs /* Space separator */
|
ucp_Zs /* Space separator */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* These are grapheme break properties. Note that the code for processing them
|
||||||
|
assumes that the values are less than 16. If more values are added that take
|
||||||
|
the number to 16 or more, the code will have to be rewritten. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
ucp_gbCR, /* 0 */
|
||||||
|
ucp_gbLF, /* 1 */
|
||||||
|
ucp_gbControl, /* 2 */
|
||||||
|
ucp_gbExtend, /* 3 */
|
||||||
|
ucp_gbPrepend, /* 4 */
|
||||||
|
ucp_gbSpacingMark, /* 5 */
|
||||||
|
ucp_gbL, /* 6 Hangul syllable type L */
|
||||||
|
ucp_gbV, /* 7 Hangul syllable type V */
|
||||||
|
ucp_gbT, /* 8 Hangul syllable type T */
|
||||||
|
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||||
|
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||||
|
ucp_gbRegionalIndicator, /* 11 */
|
||||||
|
ucp_gbOther /* 12 */
|
||||||
|
};
|
||||||
|
|
||||||
/* These are the script identifications. */
|
/* These are the script identifications. */
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -121,11 +147,76 @@ enum {
|
|||||||
ucp_Tifinagh,
|
ucp_Tifinagh,
|
||||||
ucp_Ugaritic,
|
ucp_Ugaritic,
|
||||||
ucp_Yi,
|
ucp_Yi,
|
||||||
ucp_Balinese, /* New for Unicode 5.0.0 */
|
/* New for Unicode 5.0: */
|
||||||
ucp_Cuneiform, /* New for Unicode 5.0.0 */
|
ucp_Balinese,
|
||||||
ucp_Nko, /* New for Unicode 5.0.0 */
|
ucp_Cuneiform,
|
||||||
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
|
ucp_Nko,
|
||||||
ucp_Phoenician /* New for Unicode 5.0.0 */
|
ucp_Phags_Pa,
|
||||||
|
ucp_Phoenician,
|
||||||
|
/* New for Unicode 5.1: */
|
||||||
|
ucp_Carian,
|
||||||
|
ucp_Cham,
|
||||||
|
ucp_Kayah_Li,
|
||||||
|
ucp_Lepcha,
|
||||||
|
ucp_Lycian,
|
||||||
|
ucp_Lydian,
|
||||||
|
ucp_Ol_Chiki,
|
||||||
|
ucp_Rejang,
|
||||||
|
ucp_Saurashtra,
|
||||||
|
ucp_Sundanese,
|
||||||
|
ucp_Vai,
|
||||||
|
/* New for Unicode 5.2: */
|
||||||
|
ucp_Avestan,
|
||||||
|
ucp_Bamum,
|
||||||
|
ucp_Egyptian_Hieroglyphs,
|
||||||
|
ucp_Imperial_Aramaic,
|
||||||
|
ucp_Inscriptional_Pahlavi,
|
||||||
|
ucp_Inscriptional_Parthian,
|
||||||
|
ucp_Javanese,
|
||||||
|
ucp_Kaithi,
|
||||||
|
ucp_Lisu,
|
||||||
|
ucp_Meetei_Mayek,
|
||||||
|
ucp_Old_South_Arabian,
|
||||||
|
ucp_Old_Turkic,
|
||||||
|
ucp_Samaritan,
|
||||||
|
ucp_Tai_Tham,
|
||||||
|
ucp_Tai_Viet,
|
||||||
|
/* New for Unicode 6.0.0: */
|
||||||
|
ucp_Batak,
|
||||||
|
ucp_Brahmi,
|
||||||
|
ucp_Mandaic,
|
||||||
|
/* New for Unicode 6.1.0: */
|
||||||
|
ucp_Chakma,
|
||||||
|
ucp_Meroitic_Cursive,
|
||||||
|
ucp_Meroitic_Hieroglyphs,
|
||||||
|
ucp_Miao,
|
||||||
|
ucp_Sharada,
|
||||||
|
ucp_Sora_Sompeng,
|
||||||
|
ucp_Takri,
|
||||||
|
/* New for Unicode 7.0.0: */
|
||||||
|
ucp_Bassa_Vah,
|
||||||
|
ucp_Caucasian_Albanian,
|
||||||
|
ucp_Duployan,
|
||||||
|
ucp_Elbasan,
|
||||||
|
ucp_Grantha,
|
||||||
|
ucp_Khojki,
|
||||||
|
ucp_Khudawadi,
|
||||||
|
ucp_Linear_A,
|
||||||
|
ucp_Mahajani,
|
||||||
|
ucp_Manichaean,
|
||||||
|
ucp_Mende_Kikakui,
|
||||||
|
ucp_Modi,
|
||||||
|
ucp_Mro,
|
||||||
|
ucp_Nabataean,
|
||||||
|
ucp_Old_North_Arabian,
|
||||||
|
ucp_Old_Permic,
|
||||||
|
ucp_Pahawh_Hmong,
|
||||||
|
ucp_Palmyrene,
|
||||||
|
ucp_Psalter_Pahlavi,
|
||||||
|
ucp_Pau_Cin_Hau,
|
||||||
|
ucp_Siddham,
|
||||||
|
ucp_Tirhuta,
|
||||||
|
ucp_Warang_Citi
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,92 +0,0 @@
|
|||||||
/*************************************************
|
|
||||||
* Unicode Property Table handler *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
#ifndef _UCPINTERNAL_H
|
|
||||||
#define _UCPINTERNAL_H
|
|
||||||
|
|
||||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
|
||||||
words that form a data item in the table. */
|
|
||||||
|
|
||||||
typedef struct cnode {
|
|
||||||
pcre_uint32 f0;
|
|
||||||
pcre_uint32 f1;
|
|
||||||
} cnode;
|
|
||||||
|
|
||||||
/* Things for the f0 field */
|
|
||||||
|
|
||||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
|
||||||
#define f0_scriptshift 24 /* Shift for script value */
|
|
||||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
|
||||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
|
||||||
|
|
||||||
/* Things for the f1 field */
|
|
||||||
|
|
||||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
|
||||||
#define f1_typeshift 26 /* Shift for the type field */
|
|
||||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
|
||||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
|
||||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
|
||||||
|
|
||||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
|
||||||
32-bit integers are used as follows:
|
|
||||||
|
|
||||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
|
||||||
defined by the enum in ucp.h.
|
|
||||||
|
|
||||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
|
||||||
It is not set if this entry defines a single character
|
|
||||||
|
|
||||||
(3) The 0x00600000 bits are spare.
|
|
||||||
|
|
||||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
|
||||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
|
||||||
|
|
||||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
|
||||||
defined by an enum in ucp.h.
|
|
||||||
|
|
||||||
(2) The 0x03ff0000 bits are spare.
|
|
||||||
|
|
||||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
|
||||||
range if this entry defines a range, OR the *signed* offset to the
|
|
||||||
character's "other case" partner if this entry defines a single
|
|
||||||
character. There is no partner if the value is zero.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
| | | | |
|
|
||||||
| | |-> spare | |-> spare
|
|
||||||
| | |
|
|
||||||
| |-> spare |-> spare
|
|
||||||
|
|
|
||||||
|-> range flag
|
|
||||||
|
|
||||||
The upper/lower casing information is set only for characters that come in
|
|
||||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
|
||||||
|
|
||||||
When searching the data, proceed as follows:
|
|
||||||
|
|
||||||
(1) Set up for a binary chop search.
|
|
||||||
|
|
||||||
(2) If the top is not greater than the bottom, the character is not in the
|
|
||||||
table. Its type must therefore be "Cn" ("Undefined").
|
|
||||||
|
|
||||||
(3) Find the middle vector element.
|
|
||||||
|
|
||||||
(4) Extract the code point and compare. If equal, we are done.
|
|
||||||
|
|
||||||
(5) If the test character is smaller, set the top to the current point, and
|
|
||||||
goto (2).
|
|
||||||
|
|
||||||
(6) If the current entry defines a range, compute the last character by adding
|
|
||||||
the offset, and see if the test character is within the range. If it is,
|
|
||||||
we are done.
|
|
||||||
|
|
||||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
|
||||||
(2).
|
|
||||||
*/
|
|
||||||
|
|
||||||
#endif /* _UCPINTERNAL_H */
|
|
||||||
|
|
||||||
/* End of ucpinternal.h */
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,2 +1,2 @@
|
|||||||
#define VERSION "3proxy-0.9-devel"
|
#define VERSION "3proxy-0.9-devel"
|
||||||
#define BUILDDATE "161220233959"
|
#define BUILDDATE "161222213124"
|
||||||
|
Loading…
Reference in New Issue
Block a user