summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile.am16
-rw-r--r--lib/Makefile.in780
-rw-r--r--lib/acl.c407
-rw-r--r--lib/acl.h31
-rw-r--r--lib/alloca.c489
-rw-r--r--lib/alloca_.h54
-rwxr-xr-xlib/config.charset639
-rw-r--r--lib/error.c338
-rw-r--r--lib/error.h66
-rw-r--r--lib/exit.h32
-rw-r--r--lib/exitfail.c25
-rw-r--r--lib/exitfail.h20
-rw-r--r--lib/getdelim.c128
-rw-r--r--lib/getdelim.h28
-rw-r--r--lib/getline.c32
-rw-r--r--lib/getline.h28
-rw-r--r--lib/getopt.c1191
-rw-r--r--lib/getopt1.c171
-rw-r--r--lib/getopt_.h225
-rw-r--r--lib/getopt_int.h131
-rw-r--r--lib/gettext.h242
-rw-r--r--lib/gnulib.mk400
-rw-r--r--lib/localcharset.c460
-rw-r--r--lib/localcharset.h41
-rw-r--r--lib/malloc.c35
-rw-r--r--lib/mbchar.c36
-rw-r--r--lib/mbchar.h478
-rw-r--r--lib/mbuiter.h203
-rw-r--r--lib/memchr.c201
-rw-r--r--lib/memcmp.c363
-rw-r--r--lib/memmove.c26
-rw-r--r--lib/mkstemp.c40
-rw-r--r--lib/mkstemp.h30
-rw-r--r--lib/obstack.c431
-rw-r--r--lib/obstack.h508
-rw-r--r--lib/pathmax.h47
-rw-r--r--lib/quote.c41
-rw-r--r--lib/quote.h22
-rw-r--r--lib/quotearg.c688
-rw-r--r--lib/quotearg.h137
-rw-r--r--lib/ref-add.sin30
-rw-r--r--lib/ref-del.sin25
-rw-r--r--lib/regcomp.c3858
-rw-r--r--lib/regex.c71
-rw-r--r--lib/regex.h668
-rw-r--r--lib/regex_internal.c1742
-rw-r--r--lib/regex_internal.h865
-rw-r--r--lib/regexec.c4398
-rw-r--r--lib/stat-macros.h260
-rw-r--r--lib/stat_.h48
-rw-r--r--lib/stdbool_.h115
-rw-r--r--lib/stdint_.h456
-rw-r--r--lib/strcase.h48
-rw-r--r--lib/strcasecmp.c103
-rw-r--r--lib/strerror.c47
-rw-r--r--lib/strncasecmp.c63
-rw-r--r--lib/strnlen1.c36
-rw-r--r--lib/strnlen1.h40
-rw-r--r--lib/strverscmp.c131
-rw-r--r--lib/strverscmp.h24
-rw-r--r--lib/tempname.c317
-rw-r--r--lib/unlocked-io.h137
-rw-r--r--lib/verify.h141
-rw-r--r--lib/wcwidth.h77
-rw-r--r--lib/xalloc-die.c42
-rw-r--r--lib/xalloc.h79
-rw-r--r--lib/xmalloc.c240
67 files changed, 23321 insertions, 0 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000..6629320
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,16 @@
+## Process this file with automake to produce Makefile.in
+
+noinst_LIBRARIES =
+EXTRA_DIST =
+BUILT_SOURCES =
+CLEANFILES =
+MOSTLYCLEANFILES =
+MOSTLYCLEANDIRS =
+
+# Pacify automake
+SUFFIXES = .a .o .c .y .l .h .sh .elc .el
+
+AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_srcdir)/intl -I$(top_srcdir) \
+ -I$(top_builddir)/lib -I$(top_builddir)/intl
+
+include gnulib.mk
diff --git a/lib/Makefile.in b/lib/Makefile.in
new file mode 100644
index 0000000..ba87d2a
--- /dev/null
+++ b/lib/Makefile.in
@@ -0,0 +1,780 @@
+# Makefile.in generated by automake 1.9.6 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright (C) 2004-2006 Free Software Foundation, Inc.
+#
+# This file is free software, distributed under the terms of the GNU
+# General Public License. As a special exception to the GNU General
+# Public License, this file may be distributed as part of a program
+# that contains a configuration script generated by Autoconf, under
+# the same distribution terms as the rest of that program.
+#
+# Generated by gnulib-tool.
+# Reproduce by: gnulib-tool --import --dir=. --lib=libsed --source-base=lib --m4-base=build-aux --doc-base=doc --aux-dir=build-aux --makefile_name=gnulib.mk --no-libtool --macro-prefix=gl acl alloca getline getopt gettext localcharset memchr memcmp memmove mkstemp obstack pathmax regex stat-macros stdbool strerror strverscmp unlocked-io verify
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/gnulib.mk acl.c alloca.c error.c error.h exitfail.c \
+ getdelim.c getline.c getopt.c getopt1.c malloc.c mbchar.c \
+ memchr.c memcmp.c memmove.c mkstemp.c obstack.c obstack.h \
+ quote.c quotearg.c regex.c strcasecmp.c strerror.c \
+ strncasecmp.c strverscmp.c tempname.c xmalloc.c
+subdir = lib
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/build-aux/absolute-header.m4 \
+ $(top_srcdir)/build-aux/acl.m4 \
+ $(top_srcdir)/build-aux/alloca.m4 \
+ $(top_srcdir)/build-aux/codeset.m4 \
+ $(top_srcdir)/build-aux/error.m4 \
+ $(top_srcdir)/build-aux/exitfail.m4 \
+ $(top_srcdir)/build-aux/extensions.m4 \
+ $(top_srcdir)/build-aux/getdelim.m4 \
+ $(top_srcdir)/build-aux/getline.m4 \
+ $(top_srcdir)/build-aux/getopt.m4 \
+ $(top_srcdir)/build-aux/gettext.m4 \
+ $(top_srcdir)/build-aux/glibc2.m4 \
+ $(top_srcdir)/build-aux/glibc21.m4 \
+ $(top_srcdir)/build-aux/gnulib-comp.m4 \
+ $(top_srcdir)/build-aux/iconv.m4 \
+ $(top_srcdir)/build-aux/intdiv0.m4 \
+ $(top_srcdir)/build-aux/intmax.m4 \
+ $(top_srcdir)/build-aux/inttypes-pri.m4 \
+ $(top_srcdir)/build-aux/inttypes_h.m4 \
+ $(top_srcdir)/build-aux/lcmessage.m4 \
+ $(top_srcdir)/build-aux/lib-ld.m4 \
+ $(top_srcdir)/build-aux/lib-link.m4 \
+ $(top_srcdir)/build-aux/lib-prefix.m4 \
+ $(top_srcdir)/build-aux/localcharset.m4 \
+ $(top_srcdir)/build-aux/lock.m4 \
+ $(top_srcdir)/build-aux/longdouble.m4 \
+ $(top_srcdir)/build-aux/longlong.m4 \
+ $(top_srcdir)/build-aux/mbchar.m4 \
+ $(top_srcdir)/build-aux/mbiter.m4 \
+ $(top_srcdir)/build-aux/mbrtowc.m4 \
+ $(top_srcdir)/build-aux/mbstate_t.m4 \
+ $(top_srcdir)/build-aux/memchr.m4 \
+ $(top_srcdir)/build-aux/memcmp.m4 \
+ $(top_srcdir)/build-aux/memmove.m4 \
+ $(top_srcdir)/build-aux/mkstemp.m4 \
+ $(top_srcdir)/build-aux/nls.m4 \
+ $(top_srcdir)/build-aux/pathmax.m4 \
+ $(top_srcdir)/build-aux/po.m4 \
+ $(top_srcdir)/build-aux/printf-posix.m4 \
+ $(top_srcdir)/build-aux/progtest.m4 \
+ $(top_srcdir)/build-aux/quote.m4 \
+ $(top_srcdir)/build-aux/quotearg.m4 \
+ $(top_srcdir)/build-aux/regex.m4 \
+ $(top_srcdir)/build-aux/signed.m4 \
+ $(top_srcdir)/build-aux/size_max.m4 \
+ $(top_srcdir)/build-aux/ssize_t.m4 \
+ $(top_srcdir)/build-aux/stat-macros.m4 \
+ $(top_srcdir)/build-aux/stdbool.m4 \
+ $(top_srcdir)/build-aux/stdint.m4 \
+ $(top_srcdir)/build-aux/stdint_h.m4 \
+ $(top_srcdir)/build-aux/strcase.m4 \
+ $(top_srcdir)/build-aux/strerror.m4 \
+ $(top_srcdir)/build-aux/strverscmp.m4 \
+ $(top_srcdir)/build-aux/sys_stat_h.m4 \
+ $(top_srcdir)/build-aux/uintmax_t.m4 \
+ $(top_srcdir)/build-aux/ulonglong.m4 \
+ $(top_srcdir)/build-aux/unistd_h.m4 \
+ $(top_srcdir)/build-aux/unlocked-io.m4 \
+ $(top_srcdir)/build-aux/visibility.m4 \
+ $(top_srcdir)/build-aux/wchar_t.m4 \
+ $(top_srcdir)/build-aux/wcwidth.m4 \
+ $(top_srcdir)/build-aux/wint_t.m4 \
+ $(top_srcdir)/build-aux/xalloc.m4 \
+ $(top_srcdir)/build-aux/xsize.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+LIBRARIES = $(noinst_LIBRARIES)
+AR = ar
+ARFLAGS = cru
+libsed_a_AR = $(AR) $(ARFLAGS)
+am__DEPENDENCIES_1 = @LIBOBJS@
+libsed_a_DEPENDENCIES = $(am__DEPENDENCIES_1) @ALLOCA@
+am_libsed_a_OBJECTS = localcharset.$(OBJEXT) strnlen1.$(OBJEXT) \
+ xalloc-die.$(OBJEXT)
+libsed_a_OBJECTS = $(am_libsed_a_OBJECTS)
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(libsed_a_SOURCES)
+DIST_SOURCES = $(libsed_a_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABSOLUTE_STDINT_H = @ABSOLUTE_STDINT_H@
+ABSOLUTE_SYS_STAT_H = @ABSOLUTE_SYS_STAT_H@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+ALLOCA_H = @ALLOCA_H@
+AMDEP_FALSE = @AMDEP_FALSE@
+AMDEP_TRUE = @AMDEP_TRUE@
+AMTAR = @AMTAR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@
+BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@
+BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@
+BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@
+BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@
+BUILD_HTML_FALSE = @BUILD_HTML_FALSE@
+BUILD_HTML_TRUE = @BUILD_HTML_TRUE@
+BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
+CATOBJEXT = @CATOBJEXT@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATADIRNAME = @DATADIRNAME@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GENCAT = @GENCAT@
+GETOPT_H = @GETOPT_H@
+GLIBC2 = @GLIBC2@
+GLIBC21 = @GLIBC21@
+GL_COND_LIBTOOL_FALSE = @GL_COND_LIBTOOL_FALSE@
+GL_COND_LIBTOOL_TRUE = @GL_COND_LIBTOOL_TRUE@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+HAVE_ASPRINTF = @HAVE_ASPRINTF@
+HAVE_INTTYPES_H = @HAVE_INTTYPES_H@
+HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@
+HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
+HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@
+HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@
+HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@
+HAVE_SNPRINTF = @HAVE_SNPRINTF@
+HAVE_STDINT_H = @HAVE_STDINT_H@
+HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@
+HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@
+HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@
+HAVE_VISIBILITY = @HAVE_VISIBILITY@
+HAVE_WCHAR_H = @HAVE_WCHAR_H@
+HAVE_WPRINTF = @HAVE_WPRINTF@
+HAVE__BOOL = @HAVE__BOOL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INSTOBJEXT = @INSTOBJEXT@
+INTLBISON = @INTLBISON@
+INTLLIBS = @INTLLIBS@
+INTLOBJS = @INTLOBJS@
+INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBS = @LIBS@
+LIBSED_LIBDEPS = @LIBSED_LIBDEPS@
+LIBSED_LTLIBDEPS = @LIBSED_LTLIBDEPS@
+LIBTHREAD = @LIBTHREAD@
+LIB_ACL = @LIB_ACL@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MAKEINFO_HTML_FALSE = @MAKEINFO_HTML_FALSE@
+MAKEINFO_HTML_TRUE = @MAKEINFO_HTML_TRUE@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@
+RANLIB = @RANLIB@
+SED_FEATURE_VERSION = @SED_FEATURE_VERSION@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@
+SIZE_T_SUFFIX = @SIZE_T_SUFFIX@
+STDBOOL_H = @STDBOOL_H@
+STDINT_H = @STDINT_H@
+STRIP = @STRIP@
+SYS_STAT_H = @SYS_STAT_H@
+TEST_REGEX_FALSE = @TEST_REGEX_FALSE@
+TEST_REGEX_TRUE = @TEST_REGEX_TRUE@
+TEXI2HTML = @TEXI2HTML@
+TEXI2HTML_HTML_FALSE = @TEXI2HTML_HTML_FALSE@
+TEXI2HTML_HTML_TRUE = @TEXI2HTML_HTML_TRUE@
+UNISTD_H = @UNISTD_H@
+USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@
+WINT_T_SUFFIX = @WINT_T_SUFFIX@
+WOE32DLL = @WOE32DLL@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+ac_ct_CC = @ac_ct_CC@
+am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
+am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+noinst_LIBRARIES = libsed.a
+EXTRA_DIST = acl.h alloca_.h exitfail.h getdelim.h getline.h getopt_.h \
+ getopt_int.h config.charset ref-add.sin ref-del.sin mkstemp.h \
+ pathmax.h quote.h quotearg.h regcomp.c regex.h \
+ regex_internal.c regex_internal.h regexec.c stat-macros.h \
+ stdbool_.h stdint_.h strverscmp.h stat_.h unlocked-io.h \
+ xalloc.h
+BUILT_SOURCES = $(ALLOCA_H) configmake.h $(GETOPT_H) $(STDBOOL_H) \
+ $(STDINT_H) $(SYS_STAT_H) $(UNISTD_H)
+CLEANFILES = configmake.h configmake.h-t charset.alias ref-add.sed \
+ ref-del.sed
+MOSTLYCLEANFILES = core *.stackdump alloca.h alloca.h-t getopt.h \
+ getopt.h-t stdbool.h stdbool.h-t stdint.h stdint.h-t \
+ sys/stat.h sys/stat.h-t unistd.h
+MOSTLYCLEANDIRS = sys
+
+# Pacify automake
+SUFFIXES = .a .o .c .y .l .h .sh .elc .el .sed .sin
+
+# This is for those projects which use "gettextize --intl" to put a source-code
+# copy of libintl into their package. In such projects, every Makefile.am needs
+# -I$(top_builddir)/intl, so that <libintl.h> can be found in this directory.
+# For the Makefile.ams in other directories it is the maintainer's
+# responsibility; for the one from gnulib we do it here.
+# This option has no effect when the user disables NLS (because then the intl
+# directory contains no libintl.h file) or when the project does not use
+# "gettextize --intl".
+AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_srcdir)/intl -I$(top_srcdir) \
+ -I$(top_builddir)/lib -I$(top_builddir)/intl \
+ -I$(top_builddir)/intl
+libsed_a_SOURCES = exit.h gettext.h localcharset.h localcharset.c \
+ mbchar.h mbuiter.h strcase.h strnlen1.h strnlen1.c verify.h \
+ wcwidth.h xalloc-die.c
+libsed_a_LIBADD = $(LIBOBJS) @ALLOCA@
+charset_alias = $(DESTDIR)$(libdir)/charset.alias
+charset_tmp = $(DESTDIR)$(libdir)/charset.tmp
+all: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) all-am
+
+.SUFFIXES:
+.SUFFIXES: .a .o .c .y .l .h .sh .elc .el .sed .sin .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/gnulib.mk $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+ && exit 0; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits lib/Makefile'; \
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnits lib/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+clean-noinstLIBRARIES:
+ -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
+libsed.a: $(libsed_a_OBJECTS) $(libsed_a_DEPENDENCIES)
+ -rm -f libsed.a
+ $(libsed_a_AR) libsed.a $(libsed_a_OBJECTS) $(libsed_a_LIBADD)
+ $(RANLIB) libsed.a
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/acl.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/alloca.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/error.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/exitfail.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getdelim.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getline.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/getopt1.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/malloc.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/mbchar.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/memchr.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/memcmp.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/memmove.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/mkstemp.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/obstack.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/quote.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/quotearg.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/regex.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strcasecmp.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strerror.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strncasecmp.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strverscmp.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/tempname.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/xmalloc.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/localcharset.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strnlen1.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xalloc-die.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
+@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+uninstall-info-am:
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkdir_p) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
+ if test -d $$d/$$file; then \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) check-am
+all-am: Makefile $(LIBRARIES) all-local
+installdirs:
+install: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+ -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+ -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+clean: clean-am
+
+clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf $(DEPDIR) ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-exec-am: install-exec-local
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf $(DEPDIR) ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-local
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-info-am uninstall-local
+
+.PHONY: CTAGS GTAGS all all-am all-local check check-am clean \
+ clean-generic clean-noinstLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-exec install-exec-am \
+ install-exec-local install-info install-info-am install-man \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-local pdf \
+ pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am \
+ uninstall-local
+
+
+# We need the following in order to create <alloca.h> when the system
+# doesn't have one that works with the given compiler.
+alloca.h: alloca_.h
+ cp -f $(srcdir)/alloca_.h $@-t
+ mv -f $@-t $@
+
+# Retrieve values of the variables through 'configure' followed by
+# 'make', not directly through 'configure', so that a user who
+# sets some of these variables consistently on the 'make' command
+# line gets correct results.
+#
+# One advantage of this approach, compared to the classical
+# approach of adding -DLIBDIR=\"$(libdir)\" etc. to AM_CPPFLAGS,
+# is that it protects against the use of undefined variables.
+# If, say, $(libdir) is not set in the Makefile, LIBDIR is not
+# defined by this module, and code using LIBDIR gives a
+# compilation error.
+#
+# Another advantage is that 'make' output is shorter.
+#
+# Listed in the same order as the GNU makefile conventions.
+# The Automake-defined pkg* macros are appended, in the order
+# listed in the Automake documentation.
+configmake.h: Makefile
+ rm -f $@-t $@
+ ( \
+ echo '#define PREFIX "$(prefix)"'; \
+ echo '#define EXEC_PREFIX "$(exec_prefix)"'; \
+ echo '#define BINDIR "$(bindir)"'; \
+ echo '#define SBINDIR "$(sbindir)"'; \
+ echo '#define LIBEXECDIR "$(libexecdir)"'; \
+ echo '#define DATAROOTDIR "$(datarootdir)"'; \
+ echo '#define DATADIR "$(datadir)"'; \
+ echo '#define SYSCONFDIR "$(sysconfdir)"'; \
+ echo '#define SHAREDSTATEDIR "$(sharedstatedir)"'; \
+ echo '#define LOCALSTATEDIR "$(localstatedir)"'; \
+ echo '#define INCLUDEDIR "$(includedir)"'; \
+ echo '#define OLDINCLUDEDIR "$(oldincludedir)"'; \
+ echo '#define DOCDIR "$(docdir)"'; \
+ echo '#define INFODIR "$(infodir)"'; \
+ echo '#define HTMLDIR "$(htmldir)"'; \
+ echo '#define DVIDIR "$(dvidir)"'; \
+ echo '#define PDFDIR "$(pdfdir)"'; \
+ echo '#define PSDIR "$(psdir)"'; \
+ echo '#define LIBDIR "$(libdir)"'; \
+ echo '#define LISPDIR "$(lispdir)"'; \
+ echo '#define LOCALEDIR "$(localedir)"'; \
+ echo '#define MANDIR "$(mandir)"'; \
+ echo '#define MANEXT "$(manext)"'; \
+ echo '#define PKGLIBDIR "$(pkglibdir)"'; \
+ echo '#define PKGINCLUDEDIR "$(pkgincludedir)"'; \
+ echo '#define PKGDATADIR "$(pkgdatadir)"'; \
+ :) | sed '/""/d' >$@-t
+ mv $@-t $@
+
+# We need the following in order to create <getopt.h> when the system
+# doesn't have one that works with the given compiler.
+getopt.h: getopt_.h
+ cp -f $(srcdir)/getopt_.h $@-t
+ mv -f $@-t $@
+
+# We need the following in order to install a simple file in $(libdir)
+# which is shared with other installed packages. We use a list of referencing
+# packages so that "make uninstall" will remove the file if and only if it
+# is not used by another installed package.
+# On systems with glibc-2.1 or newer, the file is redundant, therefore we
+# avoid installing it.
+
+all-local: charset.alias ref-add.sed ref-del.sed
+install-exec-local: all-local
+ test $(GLIBC21) != no || $(mkinstalldirs) $(DESTDIR)$(libdir)
+ if test -f $(charset_alias); then \
+ sed -f ref-add.sed $(charset_alias) > $(charset_tmp) ; \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \
+ rm -f $(charset_tmp) ; \
+ else \
+ if test $(GLIBC21) = no; then \
+ sed -f ref-add.sed charset.alias > $(charset_tmp) ; \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \
+ rm -f $(charset_tmp) ; \
+ fi ; \
+ fi
+
+uninstall-local: all-local
+ if test -f $(charset_alias); then \
+ sed -f ref-del.sed $(charset_alias) > $(charset_tmp); \
+ if grep '^# Packages using this file: $$' $(charset_tmp) \
+ > /dev/null; then \
+ rm -f $(charset_alias); \
+ else \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias); \
+ fi; \
+ rm -f $(charset_tmp); \
+ fi
+
+charset.alias: config.charset
+ rm -f t-$@ $@
+ $(SHELL) $(srcdir)/config.charset '$(host)' > t-$@
+ mv t-$@ $@
+.sin.sed:
+ rm -f t-$@ $@
+ sed -e '/^#/d' -e 's/@''PACKAGE''@/$(PACKAGE)/g' $< > t-$@
+ mv t-$@ $@
+
+# We need the following in order to create <stdbool.h> when the system
+# doesn't have one that works.
+stdbool.h: stdbool_.h
+ rm -f $@-t $@
+ sed -e 's/@''HAVE__BOOL''@/$(HAVE__BOOL)/g' < $(srcdir)/stdbool_.h > $@-t
+ mv $@-t $@
+
+# We need the following in order to create <stdint.h> when the system
+# doesn't have one that works with the given compiler.
+stdint.h: stdint_.h
+ rm -f $@-t $@
+ sed -e 's/@''HAVE_WCHAR_H''@/$(HAVE_WCHAR_H)/g' \
+ -e 's/@''HAVE_STDINT_H''@/$(HAVE_STDINT_H)/g' \
+ -e 's|@''ABSOLUTE_STDINT_H''@|$(ABSOLUTE_STDINT_H)|g' \
+ -e 's/@''HAVE_SYS_TYPES_H''@/$(HAVE_SYS_TYPES_H)/g' \
+ -e 's/@''HAVE_INTTYPES_H''@/$(HAVE_INTTYPES_H)/g' \
+ -e 's/@''HAVE_SYS_INTTYPES_H''@/$(HAVE_SYS_INTTYPES_H)/g' \
+ -e 's/@''HAVE_SYS_BITYPES_H''@/$(HAVE_SYS_BITYPES_H)/g' \
+ -e 's/@''HAVE_LONG_LONG_INT''@/$(HAVE_LONG_LONG_INT)/g' \
+ -e 's/@''BITSIZEOF_PTRDIFF_T''@/$(BITSIZEOF_PTRDIFF_T)/g' \
+ -e 's/@''PTRDIFF_T_SUFFIX''@/$(PTRDIFF_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_SIG_ATOMIC_T''@/$(BITSIZEOF_SIG_ATOMIC_T)/g' \
+ -e 's/@''HAVE_SIGNED_SIG_ATOMIC_T''@/$(HAVE_SIGNED_SIG_ATOMIC_T)/g' \
+ -e 's/@''SIG_ATOMIC_T_SUFFIX''@/$(SIG_ATOMIC_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_SIZE_T''@/$(BITSIZEOF_SIZE_T)/g' \
+ -e 's/@''SIZE_T_SUFFIX''@/$(SIZE_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_WCHAR_T''@/$(BITSIZEOF_WCHAR_T)/g' \
+ -e 's/@''HAVE_SIGNED_WCHAR_T''@/$(HAVE_SIGNED_WCHAR_T)/g' \
+ -e 's/@''WCHAR_T_SUFFIX''@/$(WCHAR_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_WINT_T''@/$(BITSIZEOF_WINT_T)/g' \
+ -e 's/@''HAVE_SIGNED_WINT_T''@/$(HAVE_SIGNED_WINT_T)/g' \
+ -e 's/@''WINT_T_SUFFIX''@/$(WINT_T_SUFFIX)/g' \
+ < $(srcdir)/stdint_.h > $@-t
+ mv $@-t $@
+
+# We need the following in order to create <sys/stat.h> when the system
+# has one that is incomplete.
+sys/stat.h: stat_.h
+ test -d sys || mkdir sys
+ rm -f $@-t $@
+ sed -e 's|@''ABSOLUTE_SYS_STAT_H''@|$(ABSOLUTE_SYS_STAT_H)|g' \
+ < $(srcdir)/stat_.h > $@-t
+ mv $@-t $@
+
+# We need the following in order to create an empty placeholder for
+# <unistd.h> when the system doesn't have one.
+unistd.h:
+ echo '/* Empty placeholder for $@. */' >$@
+
+mostlyclean-local: mostlyclean-generic
+ @test -z "$(MOSTLYCLEANDIRS)" || \
+ for dir in $(MOSTLYCLEANDIRS); do \
+ if test -d $$dir; then \
+ echo "rmdir $$dir"; rmdir $$dir; \
+ fi; \
+ done
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/acl.c b/lib/acl.c
new file mode 100644
index 0000000..803be0d
--- /dev/null
+++ b/lib/acl.c
@@ -0,0 +1,407 @@
+/* acl.c - access control lists
+
+ Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ Written by Paul Eggert and Andreas Gruenbacher. */
+
+#include <config.h>
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef S_ISLNK
+# define S_ISLNK(Mode) 0
+#endif
+
+#ifdef HAVE_ACL_LIBACL_H
+# include <acl/libacl.h>
+#endif
+
+#include "acl.h"
+#include "error.h"
+#include "quote.h"
+
+#include <errno.h>
+#ifndef ENOSYS
+# define ENOSYS (-1)
+#endif
+#ifndef ENOTSUP
+# define ENOTSUP (-1)
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+# define _(Text) gettext (Text)
+#else
+# define _(Text) Text
+#endif
+
+#ifndef HAVE_FCHMOD
+# define HAVE_FCHMOD false
+# define fchmod(fd, mode) (-1)
+#endif
+
+/* POSIX 1003.1e (draft 17) */
+#ifndef HAVE_ACL_GET_FD
+# define HAVE_ACL_GET_FD false
+# define acl_get_fd(fd) (NULL)
+#endif
+
+/* POSIX 1003.1e (draft 17) */
+#ifndef HAVE_ACL_SET_FD
+# define HAVE_ACL_SET_FD false
+# define acl_set_fd(fd, acl) (-1)
+#endif
+
+/* Linux-specific */
+#ifndef HAVE_ACL_EXTENDED_FILE
+# define HAVE_ACL_EXTENDED_FILE false
+# define acl_extended_file(name) (-1)
+#endif
+
+/* Linux-specific */
+#ifndef HAVE_ACL_FROM_MODE
+# define HAVE_ACL_FROM_MODE false
+# define acl_from_mode(mode) (NULL)
+#endif
+
+/* We detect the presence of POSIX 1003.1e (draft 17 -- abandoned) support
+ by checking for HAVE_ACL_GET_FILE, HAVE_ACL_SET_FILE, and HAVE_ACL_FREE.
+ Systems that have acl_get_file, acl_set_file, and acl_free must also
+ have acl_to_text, acl_from_text, and acl_delete_def_file (all defined
+ in the draft); systems that don't would hit #error statements here. */
+
+#if USE_ACL && HAVE_ACL_GET_FILE && !HAVE_ACL_ENTRIES
+# ifndef HAVE_ACL_TO_TEXT
+# error Must have acl_to_text (see POSIX 1003.1e draft 17).
+# endif
+
+/* Return the number of entries in ACL. Linux implements acl_entries
+ as a more efficient extension than using this workaround. */
+
+static int
+acl_entries (acl_t acl)
+{
+ char *text = acl_to_text (acl, NULL), *t;
+ int entries;
+ if (text == NULL)
+ return -1;
+ for (entries = 0, t = text; ; t++, entries++) {
+ t = strchr (t, '\n');
+ if (t == NULL)
+ break;
+ }
+ acl_free (text);
+ return entries;
+}
+#endif
+
+/* If DESC is a valid file descriptor use fchmod to change the
+ file's mode to MODE on systems that have fchown. On systems
+ that don't have fchown and if DESC is invalid, use chown on
+ NAME instead. */
+
+int
+chmod_or_fchmod (const char *name, int desc, mode_t mode)
+{
+ if (HAVE_FCHMOD && desc != -1)
+ return fchmod (desc, mode);
+ else
+ return chmod (name, mode);
+}
+
+/* Return 1 if NAME has a nontrivial access control list, 0 if
+ NAME only has no or a base access control list, and -1 on
+ error. SB must be set to the stat buffer of FILE. */
+
+int
+file_has_acl (char const *name, struct stat const *sb)
+{
+#if USE_ACL && HAVE_ACL && defined GETACLCNT
+ /* This implementation should work on recent-enough versions of HP-UX,
+ Solaris, and Unixware. */
+
+# ifndef MIN_ACL_ENTRIES
+# define MIN_ACL_ENTRIES 4
+# endif
+
+ if (! S_ISLNK (sb->st_mode))
+ {
+ int n = acl (name, GETACLCNT, 0, NULL);
+ return n < 0 ? (errno == ENOSYS ? 0 : -1) : (MIN_ACL_ENTRIES < n);
+ }
+#elif USE_ACL && HAVE_ACL_GET_FILE && HAVE_ACL_FREE
+ /* POSIX 1003.1e (draft 17 -- abandoned) specific version. */
+
+ if (! S_ISLNK (sb->st_mode))
+ {
+ int ret;
+
+ if (HAVE_ACL_EXTENDED_FILE)
+ ret = acl_extended_file (name);
+ else
+ {
+ acl_t acl = acl_get_file (name, ACL_TYPE_ACCESS);
+ if (acl)
+ {
+ ret = (3 < acl_entries (acl));
+ acl_free (acl);
+ if (ret == 0 && S_ISDIR (sb->st_mode))
+ {
+ acl = acl_get_file (name, ACL_TYPE_DEFAULT);
+ if (acl)
+ {
+ ret = (0 < acl_entries (acl));
+ acl_free (acl);
+ }
+ else
+ ret = -1;
+ }
+ }
+ else
+ ret = -1;
+ }
+ if (ret < 0)
+ return (errno == ENOSYS || errno == ENOTSUP) ? 0 : -1;
+ return ret;
+ }
+#endif
+
+ /* FIXME: Add support for AIX, Irix, and Tru64. Please see Samba's
+ source/lib/sysacls.c file for fix-related ideas. */
+
+ return 0;
+}
+
+/* Copy access control lists from one file to another. If SOURCE_DESC is
+ a valid file descriptor, use file descriptor operations, else use
+ filename based operations on SRC_NAME. Likewise for DEST_DESC and
+ DEST_NAME.
+ If access control lists are not available, fchmod the target file to
+ MODE. Also sets the non-permission bits of the destination file
+ (S_ISUID, S_ISGID, S_ISVTX) to those from MODE if any are set.
+ System call return value semantics. */
+
+int
+copy_acl (const char *src_name, int source_desc, const char *dst_name,
+ int dest_desc, mode_t mode)
+{
+ int ret;
+
+#if USE_ACL && HAVE_ACL_GET_FILE && HAVE_ACL_SET_FILE && HAVE_ACL_FREE
+ /* POSIX 1003.1e (draft 17 -- abandoned) specific version. */
+
+ acl_t acl;
+ if (HAVE_ACL_GET_FD && source_desc != -1)
+ acl = acl_get_fd (source_desc);
+ else
+ acl = acl_get_file (src_name, ACL_TYPE_ACCESS);
+ if (acl == NULL)
+ {
+ if (errno == ENOSYS || errno == ENOTSUP)
+ return set_acl (dst_name, dest_desc, mode);
+ else
+ {
+ error (0, errno, "%s", quote (src_name));
+ return -1;
+ }
+ }
+
+ if (HAVE_ACL_SET_FD && dest_desc != -1)
+ ret = acl_set_fd (dest_desc, acl);
+ else
+ ret = acl_set_file (dst_name, ACL_TYPE_ACCESS, acl);
+ if (ret != 0)
+ {
+ int saved_errno = errno;
+
+ if (errno == ENOSYS || errno == ENOTSUP)
+ {
+ int n = acl_entries (acl);
+
+ acl_free (acl);
+ if (n == 3)
+ {
+ if (chmod_or_fchmod (dst_name, dest_desc, mode) != 0)
+ saved_errno = errno;
+ else
+ return 0;
+ }
+ else
+ chmod_or_fchmod (dst_name, dest_desc, mode);
+ }
+ else
+ {
+ acl_free (acl);
+ chmod_or_fchmod (dst_name, dest_desc, mode);
+ }
+ error (0, saved_errno, _("preserving permissions for %s"),
+ quote (dst_name));
+ return -1;
+ }
+ else
+ acl_free (acl);
+
+ if (mode & (S_ISUID | S_ISGID | S_ISVTX))
+ {
+ /* We did not call chmod so far, so the special bits have not yet
+ been set. */
+
+ if (chmod_or_fchmod (dst_name, dest_desc, mode) != 0)
+ {
+ error (0, errno, _("preserving permissions for %s"),
+ quote (dst_name));
+ return -1;
+ }
+ }
+
+ if (S_ISDIR (mode))
+ {
+ acl = acl_get_file (src_name, ACL_TYPE_DEFAULT);
+ if (acl == NULL)
+ {
+ error (0, errno, "%s", quote (src_name));
+ return -1;
+ }
+
+ if (acl_set_file (dst_name, ACL_TYPE_DEFAULT, acl))
+ {
+ error (0, errno, _("preserving permissions for %s"),
+ quote (dst_name));
+ acl_free (acl);
+ return -1;
+ }
+ else
+ acl_free (acl);
+ }
+ return 0;
+#else
+ ret = chmod_or_fchmod (dst_name, dest_desc, mode);
+ if (ret != 0)
+ error (0, errno, _("preserving permissions for %s"), quote (dst_name));
+ return ret;
+#endif
+}
+
+/* Set the access control lists of a file. If DESC is a valid file
+ descriptor, use file descriptor operations where available, else use
+ filename based operations on NAME. If access control lists are not
+ available, fchmod the target file to MODE. Also sets the
+ non-permission bits of the destination file (S_ISUID, S_ISGID, S_ISVTX)
+ to those from MODE if any are set. System call return value
+ semantics. */
+
+int
+set_acl (char const *name, int desc, mode_t mode)
+{
+#if USE_ACL && HAVE_ACL_SET_FILE && HAVE_ACL_FREE
+ /* POSIX 1003.1e draft 17 (abandoned) specific version. */
+
+ /* We must also have have_acl_from_text and acl_delete_def_file.
+ (acl_delete_def_file could be emulated with acl_init followed
+ by acl_set_file, but acl_set_file with an empty acl is
+ unspecified.) */
+
+# ifndef HAVE_ACL_FROM_TEXT
+# error Must have acl_from_text (see POSIX 1003.1e draft 17).
+# endif
+# ifndef HAVE_ACL_DELETE_DEF_FILE
+# error Must have acl_delete_def_file (see POSIX 1003.1e draft 17).
+# endif
+
+ acl_t acl;
+ int ret;
+
+ if (HAVE_ACL_FROM_MODE)
+ {
+ acl = acl_from_mode (mode);
+ if (!acl)
+ {
+ error (0, errno, "%s", quote (name));
+ return -1;
+ }
+ }
+ else
+ {
+ char acl_text[] = "u::---,g::---,o::---";
+
+ if (mode & S_IRUSR) acl_text[ 3] = 'r';
+ if (mode & S_IWUSR) acl_text[ 4] = 'w';
+ if (mode & S_IXUSR) acl_text[ 5] = 'x';
+ if (mode & S_IRGRP) acl_text[10] = 'r';
+ if (mode & S_IWGRP) acl_text[11] = 'w';
+ if (mode & S_IXGRP) acl_text[12] = 'x';
+ if (mode & S_IROTH) acl_text[17] = 'r';
+ if (mode & S_IWOTH) acl_text[18] = 'w';
+ if (mode & S_IXOTH) acl_text[19] = 'x';
+
+ acl = acl_from_text (acl_text);
+ if (!acl)
+ {
+ error (0, errno, "%s", quote (name));
+ return -1;
+ }
+ }
+ if (HAVE_ACL_SET_FD && desc != -1)
+ ret = acl_set_fd (desc, acl);
+ else
+ ret = acl_set_file (name, ACL_TYPE_ACCESS, acl);
+ if (ret != 0)
+ {
+ int saved_errno = errno;
+ acl_free (acl);
+
+ if (errno == ENOTSUP || errno == ENOSYS)
+ {
+ if (chmod_or_fchmod (name, desc, mode) != 0)
+ saved_errno = errno;
+ else
+ return 0;
+ }
+ error (0, saved_errno, _("setting permissions for %s"), quote (name));
+ return -1;
+ }
+ else
+ acl_free (acl);
+
+ if (S_ISDIR (mode) && acl_delete_def_file (name))
+ {
+ error (0, errno, _("setting permissions for %s"), quote (name));
+ return -1;
+ }
+
+ if (mode & (S_ISUID | S_ISGID | S_ISVTX))
+ {
+ /* We did not call chmod so far, so the special bits have not yet
+ been set. */
+
+ if (chmod_or_fchmod (name, desc, mode))
+ {
+ error (0, errno, _("preserving permissions for %s"), quote (name));
+ return -1;
+ }
+ }
+ return 0;
+#else
+ int ret = chmod_or_fchmod (name, desc, mode);
+ if (ret)
+ error (0, errno, _("setting permissions for %s"), quote (name));
+ return ret;
+#endif
+}
diff --git a/lib/acl.h b/lib/acl.h
new file mode 100644
index 0000000..a7b4f9e
--- /dev/null
+++ b/lib/acl.h
@@ -0,0 +1,31 @@
+/* acl.c - access control lists
+
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ Written by Paul Eggert. */
+
+#if HAVE_SYS_ACL_H
+# include <sys/acl.h>
+#endif
+#if defined HAVE_ACL && ! defined GETACLCNT && defined ACL_CNT
+# define GETACLCNT ACL_CNT
+#endif
+
+int file_has_acl (char const *, struct stat const *);
+int copy_acl (char const *, int, char const *, int, mode_t);
+int set_acl (char const *, int, mode_t);
+int chmod_or_fchmod (char const *, int, mode_t);
diff --git a/lib/alloca.c b/lib/alloca.c
new file mode 100644
index 0000000..3a1f4e2
--- /dev/null
+++ b/lib/alloca.c
@@ -0,0 +1,489 @@
+/* alloca.c -- allocate automatically reclaimed memory
+ (Mostly) portable public-domain implementation -- D A Gwyn
+
+ This implementation of the PWB library alloca function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+ J.Otto Tennant <jot@cray.com> contributed the Cray support.
+
+ There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection. */
+
+#include <config.h>
+
+#include <alloca.h>
+
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef emacs
+# include "lisp.h"
+# include "blockinput.h"
+# ifdef EMACS_FREE
+# undef free
+# define free EMACS_FREE
+# endif
+#else
+# define memory_full() abort ()
+#endif
+
+/* If compiling with GCC 2, this file's not needed. */
+#if !defined (__GNUC__) || __GNUC__ < 2
+
+/* If someone has defined alloca as a macro,
+ there must be some other way alloca is supposed to work. */
+# ifndef alloca
+
+# ifdef emacs
+# ifdef static
+/* actually, only want this if static is defined as ""
+ -- this is for usg, in which emacs must undefine static
+ in order to make unexec workable
+ */
+# ifndef STACK_DIRECTION
+you
+lose
+-- must know STACK_DIRECTION at compile-time
+/* Using #error here is not wise since this file should work for
+ old and obscure compilers. */
+# endif /* STACK_DIRECTION undefined */
+# endif /* static */
+# endif /* emacs */
+
+/* If your stack is a linked list of frames, you have to
+ provide an "address metric" ADDRESS_FUNCTION macro. */
+
+# if defined (CRAY) && defined (CRAY_STACKSEG_END)
+long i00afunc ();
+# define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
+# else
+# define ADDRESS_FUNCTION(arg) &(arg)
+# endif
+
+/* Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+
+# ifndef STACK_DIRECTION
+# define STACK_DIRECTION 0 /* Direction unknown. */
+# endif
+
+# if STACK_DIRECTION != 0
+
+# define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
+
+# else /* STACK_DIRECTION == 0; need run-time code. */
+
+static int stack_dir; /* 1 or -1 once known. */
+# define STACK_DIR stack_dir
+
+static void
+find_stack_direction (void)
+{
+ static char *addr = NULL; /* Address of first `dummy', once known. */
+ auto char dummy; /* To get stack address. */
+
+ if (addr == NULL)
+ { /* Initial entry. */
+ addr = ADDRESS_FUNCTION (dummy);
+
+ find_stack_direction (); /* Recurse once. */
+ }
+ else
+ {
+ /* Second entry. */
+ if (ADDRESS_FUNCTION (dummy) > addr)
+ stack_dir = 1; /* Stack grew upward. */
+ else
+ stack_dir = -1; /* Stack grew downward. */
+ }
+}
+
+# endif /* STACK_DIRECTION == 0 */
+
+/* An "alloca header" is used to:
+ (a) chain together all alloca'ed blocks;
+ (b) keep track of stack depth.
+
+ It is very important that sizeof(header) agree with malloc
+ alignment chunk size. The following default should work okay. */
+
+# ifndef ALIGN_SIZE
+# define ALIGN_SIZE sizeof(double)
+# endif
+
+typedef union hdr
+{
+ char align[ALIGN_SIZE]; /* To force sizeof(header). */
+ struct
+ {
+ union hdr *next; /* For chaining headers. */
+ char *deep; /* For stack depth measure. */
+ } h;
+} header;
+
+static header *last_alloca_header = NULL; /* -> last alloca header. */
+
+/* Return a pointer to at least SIZE bytes of storage,
+ which will be automatically reclaimed upon exit from
+ the procedure that called alloca. Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32. */
+
+void *
+alloca (size_t size)
+{
+ auto char probe; /* Probes stack depth: */
+ register char *depth = ADDRESS_FUNCTION (probe);
+
+# if STACK_DIRECTION == 0
+ if (STACK_DIR == 0) /* Unknown growth direction. */
+ find_stack_direction ();
+# endif
+
+ /* Reclaim garbage, defined as all alloca'd storage that
+ was allocated from deeper in the stack than currently. */
+
+ {
+ register header *hp; /* Traverses linked list. */
+
+# ifdef emacs
+ BLOCK_INPUT;
+# endif
+
+ for (hp = last_alloca_header; hp != NULL;)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
+ {
+ register header *np = hp->h.next;
+
+ free (hp); /* Collect garbage. */
+
+ hp = np; /* -> next header. */
+ }
+ else
+ break; /* Rest are not deeper. */
+
+ last_alloca_header = hp; /* -> last valid storage. */
+
+# ifdef emacs
+ UNBLOCK_INPUT;
+# endif
+ }
+
+ if (size == 0)
+ return NULL; /* No allocation required. */
+
+ /* Allocate combined header + user data storage. */
+
+ {
+ /* Address of header. */
+ register header *new;
+
+ size_t combined_size = sizeof (header) + size;
+ if (combined_size < sizeof (header))
+ memory_full ();
+
+ new = malloc (combined_size);
+
+ if (! new)
+ memory_full ();
+
+ new->h.next = last_alloca_header;
+ new->h.deep = depth;
+
+ last_alloca_header = new;
+
+ /* User storage begins just after header. */
+
+ return (void *) (new + 1);
+ }
+}
+
+# if defined (CRAY) && defined (CRAY_STACKSEG_END)
+
+# ifdef DEBUG_I00AFUNC
+# include <stdio.h>
+# endif
+
+# ifndef CRAY_STACK
+# define CRAY_STACK
+# ifndef CRAY2
+/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
+struct stack_control_header
+ {
+ long shgrow:32; /* Number of times stack has grown. */
+ long shaseg:32; /* Size of increments to stack. */
+ long shhwm:32; /* High water mark of stack. */
+ long shsize:32; /* Current size of stack (all segments). */
+ };
+
+/* The stack segment linkage control information occurs at
+ the high-address end of a stack segment. (The stack
+ grows from low addresses to high addresses.) The initial
+ part of the stack segment linkage control information is
+ 0200 (octal) words. This provides for register storage
+ for the routine which overflows the stack. */
+
+struct stack_segment_linkage
+ {
+ long ss[0200]; /* 0200 overflow words. */
+ long sssize:32; /* Number of words in this segment. */
+ long ssbase:32; /* Offset to stack base. */
+ long:32;
+ long sspseg:32; /* Offset to linkage control of previous
+ segment of stack. */
+ long:32;
+ long sstcpt:32; /* Pointer to task common address block. */
+ long sscsnm; /* Private control structure number for
+ microtasking. */
+ long ssusr1; /* Reserved for user. */
+ long ssusr2; /* Reserved for user. */
+ long sstpid; /* Process ID for pid based multi-tasking. */
+ long ssgvup; /* Pointer to multitasking thread giveup. */
+ long sscray[7]; /* Reserved for Cray Research. */
+ long ssa0;
+ long ssa1;
+ long ssa2;
+ long ssa3;
+ long ssa4;
+ long ssa5;
+ long ssa6;
+ long ssa7;
+ long sss0;
+ long sss1;
+ long sss2;
+ long sss3;
+ long sss4;
+ long sss5;
+ long sss6;
+ long sss7;
+ };
+
+# else /* CRAY2 */
+/* The following structure defines the vector of words
+ returned by the STKSTAT library routine. */
+struct stk_stat
+ {
+ long now; /* Current total stack size. */
+ long maxc; /* Amount of contiguous space which would
+ be required to satisfy the maximum
+ stack demand to date. */
+ long high_water; /* Stack high-water mark. */
+ long overflows; /* Number of stack overflow ($STKOFEN) calls. */
+ long hits; /* Number of internal buffer hits. */
+ long extends; /* Number of block extensions. */
+ long stko_mallocs; /* Block allocations by $STKOFEN. */
+ long underflows; /* Number of stack underflow calls ($STKRETN). */
+ long stko_free; /* Number of deallocations by $STKRETN. */
+ long stkm_free; /* Number of deallocations by $STKMRET. */
+ long segments; /* Current number of stack segments. */
+ long maxs; /* Maximum number of stack segments so far. */
+ long pad_size; /* Stack pad size. */
+ long current_address; /* Current stack segment address. */
+ long current_size; /* Current stack segment size. This
+ number is actually corrupted by STKSTAT to
+ include the fifteen word trailer area. */
+ long initial_address; /* Address of initial segment. */
+ long initial_size; /* Size of initial segment. */
+ };
+
+/* The following structure describes the data structure which trails
+ any stack segment. I think that the description in 'asdef' is
+ out of date. I only describe the parts that I am sure about. */
+
+struct stk_trailer
+ {
+ long this_address; /* Address of this block. */
+ long this_size; /* Size of this block (does not include
+ this trailer). */
+ long unknown2;
+ long unknown3;
+ long link; /* Address of trailer block of previous
+ segment. */
+ long unknown5;
+ long unknown6;
+ long unknown7;
+ long unknown8;
+ long unknown9;
+ long unknown10;
+ long unknown11;
+ long unknown12;
+ long unknown13;
+ long unknown14;
+ };
+
+# endif /* CRAY2 */
+# endif /* not CRAY_STACK */
+
+# ifdef CRAY2
+/* Determine a "stack measure" for an arbitrary ADDRESS.
+ I doubt that "lint" will like this much. */
+
+static long
+i00afunc (long *address)
+{
+ struct stk_stat status;
+ struct stk_trailer *trailer;
+ long *block, size;
+ long result = 0;
+
+ /* We want to iterate through all of the segments. The first
+ step is to get the stack status structure. We could do this
+ more quickly and more directly, perhaps, by referencing the
+ $LM00 common block, but I know that this works. */
+
+ STKSTAT (&status);
+
+ /* Set up the iteration. */
+
+ trailer = (struct stk_trailer *) (status.current_address
+ + status.current_size
+ - 15);
+
+ /* There must be at least one stack segment. Therefore it is
+ a fatal error if "trailer" is null. */
+
+ if (trailer == 0)
+ abort ();
+
+ /* Discard segments that do not contain our argument address. */
+
+ while (trailer != 0)
+ {
+ block = (long *) trailer->this_address;
+ size = trailer->this_size;
+ if (block == 0 || size == 0)
+ abort ();
+ trailer = (struct stk_trailer *) trailer->link;
+ if ((block <= address) && (address < (block + size)))
+ break;
+ }
+
+ /* Set the result to the offset in this segment and add the sizes
+ of all predecessor segments. */
+
+ result = address - block;
+
+ if (trailer == 0)
+ {
+ return result;
+ }
+
+ do
+ {
+ if (trailer->this_size <= 0)
+ abort ();
+ result += trailer->this_size;
+ trailer = (struct stk_trailer *) trailer->link;
+ }
+ while (trailer != 0);
+
+ /* We are done. Note that if you present a bogus address (one
+ not in any segment), you will get a different number back, formed
+ from subtracting the address of the first block. This is probably
+ not what you want. */
+
+ return (result);
+}
+
+# else /* not CRAY2 */
+/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
+ Determine the number of the cell within the stack,
+ given the address of the cell. The purpose of this
+ routine is to linearize, in some sense, stack addresses
+ for alloca. */
+
+static long
+i00afunc (long address)
+{
+ long stkl = 0;
+
+ long size, pseg, this_segment, stack;
+ long result = 0;
+
+ struct stack_segment_linkage *ssptr;
+
+ /* Register B67 contains the address of the end of the
+ current stack segment. If you (as a subprogram) store
+ your registers on the stack and find that you are past
+ the contents of B67, you have overflowed the segment.
+
+ B67 also points to the stack segment linkage control
+ area, which is what we are really interested in. */
+
+ stkl = CRAY_STACKSEG_END ();
+ ssptr = (struct stack_segment_linkage *) stkl;
+
+ /* If one subtracts 'size' from the end of the segment,
+ one has the address of the first word of the segment.
+
+ If this is not the first segment, 'pseg' will be
+ nonzero. */
+
+ pseg = ssptr->sspseg;
+ size = ssptr->sssize;
+
+ this_segment = stkl - size;
+
+ /* It is possible that calling this routine itself caused
+ a stack overflow. Discard stack segments which do not
+ contain the target address. */
+
+ while (!(this_segment <= address && address <= stkl))
+ {
+# ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
+# endif
+ if (pseg == 0)
+ break;
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ this_segment = stkl - size;
+ }
+
+ result = address - this_segment;
+
+ /* If you subtract pseg from the current end of the stack,
+ you get the address of the previous stack segment's end.
+ This seems a little convoluted to me, but I'll bet you save
+ a cycle somewhere. */
+
+ while (pseg != 0)
+ {
+# ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o\n", pseg, size);
+# endif
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ result += size;
+ }
+ return (result);
+}
+
+# endif /* not CRAY2 */
+# endif /* CRAY */
+
+# endif /* no alloca */
+#endif /* not GCC version 2 */
diff --git a/lib/alloca_.h b/lib/alloca_.h
new file mode 100644
index 0000000..dd0b3e9
--- /dev/null
+++ b/lib/alloca_.h
@@ -0,0 +1,54 @@
+/* Memory allocation on the stack.
+
+ Copyright (C) 1995, 1999, 2001, 2002, 2003, 2004, 2006 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+/* Avoid using the symbol _ALLOCA_H here, as Bison assumes _ALLOCA_H
+ means there is a real alloca function. */
+#ifndef _GNULIB_ALLOCA_H
+# define _GNULIB_ALLOCA_H
+
+/* alloca (N) returns a pointer to N bytes of memory
+ allocated on the stack, which will last until the function returns.
+ Use of alloca should be avoided:
+ - inside arguments of function calls - undefined behaviour,
+ - in inline functions - the allocation may actually last until the
+ calling function returns,
+ - for huge N (say, N >= 65536) - you never know how large (or small)
+ the stack is, and when the stack cannot fulfill the memory allocation
+ request, the program just crashes.
+ */
+
+#ifndef alloca
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# elif defined _AIX
+# define alloca __alloca
+# elif defined _MSC_VER
+# include <malloc.h>
+# define alloca _alloca
+# else
+# include <stddef.h>
+# ifdef __cplusplus
+extern "C"
+# endif
+void *alloca (size_t);
+# endif
+#endif
+
+#endif /* _GNULIB_ALLOCA_H */
diff --git a/lib/config.charset b/lib/config.charset
new file mode 100755
index 0000000..148ea44
--- /dev/null
+++ b/lib/config.charset
@@ -0,0 +1,639 @@
+#! /bin/sh
+# Output a system dependent table of character encoding aliases.
+#
+# Copyright (C) 2000-2004, 2006 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The table consists of lines of the form
+# ALIAS CANONICAL
+#
+# ALIAS is the (system dependent) result of "nl_langinfo (CODESET)".
+# ALIAS is compared in a case sensitive way.
+#
+# CANONICAL is the GNU canonical name for this character encoding.
+# It must be an encoding supported by libiconv. Support by GNU libc is
+# also desirable. CANONICAL is case insensitive. Usually an upper case
+# MIME charset name is preferred.
+# The current list of GNU canonical charset names is as follows.
+#
+# name MIME? used by which systems
+# ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin
+# ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd darwin
+# ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd darwin
+# ISO-8859-3 Y glibc solaris
+# ISO-8859-4 Y osf solaris freebsd netbsd darwin
+# ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd darwin
+# ISO-8859-6 Y glibc aix hpux solaris
+# ISO-8859-7 Y glibc aix hpux irix osf solaris netbsd darwin
+# ISO-8859-8 Y glibc aix hpux osf solaris
+# ISO-8859-9 Y glibc aix hpux irix osf solaris darwin
+# ISO-8859-13 glibc netbsd darwin
+# ISO-8859-14 glibc
+# ISO-8859-15 glibc aix osf solaris freebsd darwin
+# KOI8-R Y glibc solaris freebsd netbsd darwin
+# KOI8-U Y glibc freebsd netbsd darwin
+# KOI8-T glibc
+# CP437 dos
+# CP775 dos
+# CP850 aix osf dos
+# CP852 dos
+# CP855 dos
+# CP856 aix
+# CP857 dos
+# CP861 dos
+# CP862 dos
+# CP864 dos
+# CP865 dos
+# CP866 freebsd netbsd darwin dos
+# CP869 dos
+# CP874 woe32 dos
+# CP922 aix
+# CP932 aix woe32 dos
+# CP943 aix
+# CP949 osf woe32 dos
+# CP950 woe32 dos
+# CP1046 aix
+# CP1124 aix
+# CP1125 dos
+# CP1129 aix
+# CP1250 woe32
+# CP1251 glibc solaris netbsd darwin woe32
+# CP1252 aix woe32
+# CP1253 woe32
+# CP1254 woe32
+# CP1255 glibc woe32
+# CP1256 woe32
+# CP1257 woe32
+# GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin
+# EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin
+# EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin
+# EUC-TW glibc aix hpux irix osf solaris netbsd
+# BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin
+# BIG5-HKSCS glibc solaris
+# GBK glibc aix osf solaris woe32 dos
+# GB18030 glibc solaris netbsd
+# SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin
+# JOHAB glibc solaris woe32
+# TIS-620 glibc aix hpux osf solaris
+# VISCII Y glibc
+# TCVN5712-1 glibc
+# GEORGIAN-PS glibc
+# HP-ROMAN8 hpux
+# HP-ARABIC8 hpux
+# HP-GREEK8 hpux
+# HP-HEBREW8 hpux
+# HP-TURKISH8 hpux
+# HP-KANA8 hpux
+# DEC-KANJI osf
+# DEC-HANYU osf
+# UTF-8 Y glibc aix hpux osf solaris netbsd darwin
+#
+# Note: Names which are not marked as being a MIME name should not be used in
+# Internet protocols for information interchange (mail, news, etc.).
+#
+# Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications
+# must understand both names and treat them as equivalent.
+#
+# The first argument passed to this file is the canonical host specification,
+# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or
+# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+
+host="$1"
+os=`echo "$host" | sed -e 's/^[^-]*-[^-]*-\(.*\)$/\1/'`
+echo "# This file contains a table of character encoding aliases,"
+echo "# suitable for operating system '${os}'."
+echo "# It was automatically generated from config.charset."
+# List of references, updated during installation:
+echo "# Packages using this file: "
+case "$os" in
+ linux-gnulibc1*)
+ # Linux libc5 doesn't have nl_langinfo(CODESET); therefore
+ # localcharset.c falls back to using the full locale name
+ # from the environment variables.
+ echo "C ASCII"
+ echo "POSIX ASCII"
+ for l in af af_ZA ca ca_ES da da_DK de de_AT de_BE de_CH de_DE de_LU \
+ en en_AU en_BW en_CA en_DK en_GB en_IE en_NZ en_US en_ZA \
+ en_ZW es es_AR es_BO es_CL es_CO es_DO es_EC es_ES es_GT \
+ es_HN es_MX es_PA es_PE es_PY es_SV es_US es_UY es_VE et \
+ et_EE eu eu_ES fi fi_FI fo fo_FO fr fr_BE fr_CA fr_CH fr_FR \
+ fr_LU ga ga_IE gl gl_ES id id_ID in in_ID is is_IS it it_CH \
+ it_IT kl kl_GL nl nl_BE nl_NL no no_NO pt pt_BR pt_PT sv \
+ sv_FI sv_SE; do
+ echo "$l ISO-8859-1"
+ echo "$l.iso-8859-1 ISO-8859-1"
+ echo "$l.iso-8859-15 ISO-8859-15"
+ echo "$l.iso-8859-15@euro ISO-8859-15"
+ echo "$l@euro ISO-8859-15"
+ echo "$l.cp-437 CP437"
+ echo "$l.cp-850 CP850"
+ echo "$l.cp-1252 CP1252"
+ echo "$l.cp-1252@euro CP1252"
+ #echo "$l.atari-st ATARI-ST" # not a commonly used encoding
+ echo "$l.utf-8 UTF-8"
+ echo "$l.utf-8@euro UTF-8"
+ done
+ for l in cs cs_CZ hr hr_HR hu hu_HU pl pl_PL ro ro_RO sk sk_SK sl \
+ sl_SI sr sr_CS sr_YU; do
+ echo "$l ISO-8859-2"
+ echo "$l.iso-8859-2 ISO-8859-2"
+ echo "$l.cp-852 CP852"
+ echo "$l.cp-1250 CP1250"
+ echo "$l.utf-8 UTF-8"
+ done
+ for l in mk mk_MK ru ru_RU; do
+ echo "$l ISO-8859-5"
+ echo "$l.iso-8859-5 ISO-8859-5"
+ echo "$l.koi8-r KOI8-R"
+ echo "$l.cp-866 CP866"
+ echo "$l.cp-1251 CP1251"
+ echo "$l.utf-8 UTF-8"
+ done
+ for l in ar ar_SA; do
+ echo "$l ISO-8859-6"
+ echo "$l.iso-8859-6 ISO-8859-6"
+ echo "$l.cp-864 CP864"
+ #echo "$l.cp-868 CP868" # not a commonly used encoding
+ echo "$l.cp-1256 CP1256"
+ echo "$l.utf-8 UTF-8"
+ done
+ for l in el el_GR gr gr_GR; do
+ echo "$l ISO-8859-7"
+ echo "$l.iso-8859-7 ISO-8859-7"
+ echo "$l.cp-869 CP869"
+ echo "$l.cp-1253 CP1253"
+ echo "$l.cp-1253@euro CP1253"
+ echo "$l.utf-8 UTF-8"
+ echo "$l.utf-8@euro UTF-8"
+ done
+ for l in he he_IL iw iw_IL; do
+ echo "$l ISO-8859-8"
+ echo "$l.iso-8859-8 ISO-8859-8"
+ echo "$l.cp-862 CP862"
+ echo "$l.cp-1255 CP1255"
+ echo "$l.utf-8 UTF-8"
+ done
+ for l in tr tr_TR; do
+ echo "$l ISO-8859-9"
+ echo "$l.iso-8859-9 ISO-8859-9"
+ echo "$l.cp-857 CP857"
+ echo "$l.cp-1254 CP1254"
+ echo "$l.utf-8 UTF-8"
+ done
+ for l in lt lt_LT lv lv_LV; do
+ #echo "$l BALTIC" # not a commonly used encoding, wrong encoding name
+ echo "$l ISO-8859-13"
+ done
+ for l in ru_UA uk uk_UA; do
+ echo "$l KOI8-U"
+ done
+ for l in zh zh_CN; do
+ #echo "$l GB_2312-80" # not a commonly used encoding, wrong encoding name
+ echo "$l GB2312"
+ done
+ for l in ja ja_JP ja_JP.EUC; do
+ echo "$l EUC-JP"
+ done
+ for l in ko ko_KR; do
+ echo "$l EUC-KR"
+ done
+ for l in th th_TH; do
+ echo "$l TIS-620"
+ done
+ for l in fa fa_IR; do
+ #echo "$l ISIRI-3342" # a broken encoding
+ echo "$l.utf-8 UTF-8"
+ done
+ ;;
+ linux* | *-gnu*)
+ # With glibc-2.1 or newer, we don't need any canonicalization,
+ # because glibc has iconv and both glibc and libiconv support all
+ # GNU canonical names directly. Therefore, the Makefile does not
+ # need to install the alias file at all.
+ # The following applies only to glibc-2.0.x and older libcs.
+ echo "ISO_646.IRV:1983 ASCII"
+ ;;
+ aix*)
+ echo "ISO8859-1 ISO-8859-1"
+ echo "ISO8859-2 ISO-8859-2"
+ echo "ISO8859-5 ISO-8859-5"
+ echo "ISO8859-6 ISO-8859-6"
+ echo "ISO8859-7 ISO-8859-7"
+ echo "ISO8859-8 ISO-8859-8"
+ echo "ISO8859-9 ISO-8859-9"
+ echo "ISO8859-15 ISO-8859-15"
+ echo "IBM-850 CP850"
+ echo "IBM-856 CP856"
+ echo "IBM-921 ISO-8859-13"
+ echo "IBM-922 CP922"
+ echo "IBM-932 CP932"
+ echo "IBM-943 CP943"
+ echo "IBM-1046 CP1046"
+ echo "IBM-1124 CP1124"
+ echo "IBM-1129 CP1129"
+ echo "IBM-1252 CP1252"
+ echo "IBM-eucCN GB2312"
+ echo "IBM-eucJP EUC-JP"
+ echo "IBM-eucKR EUC-KR"
+ echo "IBM-eucTW EUC-TW"
+ echo "big5 BIG5"
+ echo "GBK GBK"
+ echo "TIS-620 TIS-620"
+ echo "UTF-8 UTF-8"
+ ;;
+ hpux*)
+ echo "iso88591 ISO-8859-1"
+ echo "iso88592 ISO-8859-2"
+ echo "iso88595 ISO-8859-5"
+ echo "iso88596 ISO-8859-6"
+ echo "iso88597 ISO-8859-7"
+ echo "iso88598 ISO-8859-8"
+ echo "iso88599 ISO-8859-9"
+ echo "iso885915 ISO-8859-15"
+ echo "roman8 HP-ROMAN8"
+ echo "arabic8 HP-ARABIC8"
+ echo "greek8 HP-GREEK8"
+ echo "hebrew8 HP-HEBREW8"
+ echo "turkish8 HP-TURKISH8"
+ echo "kana8 HP-KANA8"
+ echo "tis620 TIS-620"
+ echo "big5 BIG5"
+ echo "eucJP EUC-JP"
+ echo "eucKR EUC-KR"
+ echo "eucTW EUC-TW"
+ echo "hp15CN GB2312"
+ #echo "ccdc ?" # what is this?
+ echo "SJIS SHIFT_JIS"
+ echo "utf8 UTF-8"
+ ;;
+ irix*)
+ echo "ISO8859-1 ISO-8859-1"
+ echo "ISO8859-2 ISO-8859-2"
+ echo "ISO8859-5 ISO-8859-5"
+ echo "ISO8859-7 ISO-8859-7"
+ echo "ISO8859-9 ISO-8859-9"
+ echo "eucCN GB2312"
+ echo "eucJP EUC-JP"
+ echo "eucKR EUC-KR"
+ echo "eucTW EUC-TW"
+ ;;
+ osf*)
+ echo "ISO8859-1 ISO-8859-1"
+ echo "ISO8859-2 ISO-8859-2"
+ echo "ISO8859-4 ISO-8859-4"
+ echo "ISO8859-5 ISO-8859-5"
+ echo "ISO8859-7 ISO-8859-7"
+ echo "ISO8859-8 ISO-8859-8"
+ echo "ISO8859-9 ISO-8859-9"
+ echo "ISO8859-15 ISO-8859-15"
+ echo "cp850 CP850"
+ echo "big5 BIG5"
+ echo "dechanyu DEC-HANYU"
+ echo "dechanzi GB2312"
+ echo "deckanji DEC-KANJI"
+ echo "deckorean EUC-KR"
+ echo "eucJP EUC-JP"
+ echo "eucKR EUC-KR"
+ echo "eucTW EUC-TW"
+ echo "GBK GBK"
+ echo "KSC5601 CP949"
+ echo "sdeckanji EUC-JP"
+ echo "SJIS SHIFT_JIS"
+ echo "TACTIS TIS-620"
+ echo "UTF-8 UTF-8"
+ ;;
+ solaris*)
+ echo "646 ASCII"
+ echo "ISO8859-1 ISO-8859-1"
+ echo "ISO8859-2 ISO-8859-2"
+ echo "ISO8859-3 ISO-8859-3"
+ echo "ISO8859-4 ISO-8859-4"
+ echo "ISO8859-5 ISO-8859-5"
+ echo "ISO8859-6 ISO-8859-6"
+ echo "ISO8859-7 ISO-8859-7"
+ echo "ISO8859-8 ISO-8859-8"
+ echo "ISO8859-9 ISO-8859-9"
+ echo "ISO8859-15 ISO-8859-15"
+ echo "koi8-r KOI8-R"
+ echo "ansi-1251 CP1251"
+ echo "BIG5 BIG5"
+ echo "Big5-HKSCS BIG5-HKSCS"
+ echo "gb2312 GB2312"
+ echo "GBK GBK"
+ echo "GB18030 GB18030"
+ echo "cns11643 EUC-TW"
+ echo "5601 EUC-KR"
+ echo "ko_KR.johap92 JOHAB"
+ echo "eucJP EUC-JP"
+ echo "PCK SHIFT_JIS"
+ echo "TIS620.2533 TIS-620"
+ #echo "sun_eu_greek ?" # what is this?
+ echo "UTF-8 UTF-8"
+ ;;
+ freebsd* | os2*)
+ # FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore
+ # localcharset.c falls back to using the full locale name
+ # from the environment variables.
+ # Likewise for OS/2. OS/2 has XFree86 just like FreeBSD. Just
+ # reuse FreeBSD's locale data for OS/2.
+ echo "C ASCII"
+ echo "US-ASCII ASCII"
+ for l in la_LN lt_LN; do
+ echo "$l.ASCII ASCII"
+ done
+ for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \
+ fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT la_LN \
+ lt_LN nl_BE nl_NL no_NO pt_PT sv_SE; do
+ echo "$l.ISO_8859-1 ISO-8859-1"
+ echo "$l.DIS_8859-15 ISO-8859-15"
+ done
+ for l in cs_CZ hr_HR hu_HU la_LN lt_LN pl_PL sl_SI; do
+ echo "$l.ISO_8859-2 ISO-8859-2"
+ done
+ for l in la_LN lt_LT; do
+ echo "$l.ISO_8859-4 ISO-8859-4"
+ done
+ for l in ru_RU ru_SU; do
+ echo "$l.KOI8-R KOI8-R"
+ echo "$l.ISO_8859-5 ISO-8859-5"
+ echo "$l.CP866 CP866"
+ done
+ echo "uk_UA.KOI8-U KOI8-U"
+ echo "zh_TW.BIG5 BIG5"
+ echo "zh_TW.Big5 BIG5"
+ echo "zh_CN.EUC GB2312"
+ echo "ja_JP.EUC EUC-JP"
+ echo "ja_JP.SJIS SHIFT_JIS"
+ echo "ja_JP.Shift_JIS SHIFT_JIS"
+ echo "ko_KR.EUC EUC-KR"
+ ;;
+ netbsd*)
+ echo "646 ASCII"
+ echo "ISO8859-1 ISO-8859-1"
+ echo "ISO8859-2 ISO-8859-2"
+ echo "ISO8859-4 ISO-8859-4"
+ echo "ISO8859-5 ISO-8859-5"
+ echo "ISO8859-7 ISO-8859-7"
+ echo "ISO8859-13 ISO-8859-13"
+ echo "ISO8859-15 ISO-8859-15"
+ echo "eucCN GB2312"
+ echo "eucJP EUC-JP"
+ echo "eucKR EUC-KR"
+ echo "eucTW EUC-TW"
+ echo "BIG5 BIG5"
+ echo "SJIS SHIFT_JIS"
+ ;;
+ darwin[56]*)
+ # Darwin 6.8 doesn't have nl_langinfo(CODESET); therefore
+ # localcharset.c falls back to using the full locale name
+ # from the environment variables.
+ echo "C ASCII"
+ for l in en_AU en_CA en_GB en_US la_LN; do
+ echo "$l.US-ASCII ASCII"
+ done
+ for l in da_DK de_AT de_CH de_DE en_AU en_CA en_GB en_US es_ES \
+ fi_FI fr_BE fr_CA fr_CH fr_FR is_IS it_CH it_IT nl_BE \
+ nl_NL no_NO pt_PT sv_SE; do
+ echo "$l ISO-8859-1"
+ echo "$l.ISO8859-1 ISO-8859-1"
+ echo "$l.ISO8859-15 ISO-8859-15"
+ done
+ for l in la_LN; do
+ echo "$l.ISO8859-1 ISO-8859-1"
+ echo "$l.ISO8859-15 ISO-8859-15"
+ done
+ for l in cs_CZ hr_HR hu_HU la_LN pl_PL sl_SI; do
+ echo "$l.ISO8859-2 ISO-8859-2"
+ done
+ for l in la_LN lt_LT; do
+ echo "$l.ISO8859-4 ISO-8859-4"
+ done
+ for l in ru_RU; do
+ echo "$l.KOI8-R KOI8-R"
+ echo "$l.ISO8859-5 ISO-8859-5"
+ echo "$l.CP866 CP866"
+ done
+ for l in bg_BG; do
+ echo "$l.CP1251 CP1251"
+ done
+ echo "uk_UA.KOI8-U KOI8-U"
+ echo "zh_TW.BIG5 BIG5"
+ echo "zh_TW.Big5 BIG5"
+ echo "zh_CN.EUC GB2312"
+ echo "ja_JP.EUC EUC-JP"
+ echo "ja_JP.SJIS SHIFT_JIS"
+ echo "ko_KR.EUC EUC-KR"
+ ;;
+ darwin*)
+ # Darwin 7.5 has nl_langinfo(CODESET), but it is useless:
+ # - It returns the empty string when LANG is set to a locale of the
+ # form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
+ # LC_CTYPE file.
+ # - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
+ # the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
+ # - The documentation says:
+ # "... all code that calls BSD system routines should ensure
+ # that the const *char parameters of these routines are in UTF-8
+ # encoding. All BSD system functions expect their string
+ # parameters to be in UTF-8 encoding and nothing else."
+ # It also says
+ # "An additional caveat is that string parameters for files,
+ # paths, and other file-system entities must be in canonical
+ # UTF-8. In a canonical UTF-8 Unicode string, all decomposable
+ # characters are decomposed ..."
+ # but this is not true: You can pass non-decomposed UTF-8 strings
+ # to file system functions, and it is the OS which will convert
+ # them to decomposed UTF-8 before accessing the file system.
+ # - The Apple Terminal application displays UTF-8 by default.
+ # - However, other applications are free to use different encodings:
+ # - xterm uses ISO-8859-1 by default.
+ # - TextEdit uses MacRoman by default.
+ # We prefer UTF-8 over decomposed UTF-8-MAC because one should
+ # minimize the use of decomposed Unicode. Unfortunately, through the
+ # Darwin file system, decomposed UTF-8 strings are leaked into user
+ # space nevertheless.
+ echo "* UTF-8"
+ ;;
+ beos*)
+ # BeOS has a single locale, and it has UTF-8 encoding.
+ echo "* UTF-8"
+ ;;
+ msdosdjgpp*)
+ # DJGPP 2.03 doesn't have nl_langinfo(CODESET); therefore
+ # localcharset.c falls back to using the full locale name
+ # from the environment variables.
+ echo "#"
+ echo "# The encodings given here may not all be correct."
+ echo "# If you find that the encoding given for your language and"
+ echo "# country is not the one your DOS machine actually uses, just"
+ echo "# correct it in this file, and send a mail to"
+ echo "# Juan Manuel Guerrero <juan.guerrero@gmx.de>"
+ echo "# and Bruno Haible <bruno@clisp.org>."
+ echo "#"
+ echo "C ASCII"
+ # ISO-8859-1 languages
+ echo "ca CP850"
+ echo "ca_ES CP850"
+ echo "da CP865" # not CP850 ??
+ echo "da_DK CP865" # not CP850 ??
+ echo "de CP850"
+ echo "de_AT CP850"
+ echo "de_CH CP850"
+ echo "de_DE CP850"
+ echo "en CP850"
+ echo "en_AU CP850" # not CP437 ??
+ echo "en_CA CP850"
+ echo "en_GB CP850"
+ echo "en_NZ CP437"
+ echo "en_US CP437"
+ echo "en_ZA CP850" # not CP437 ??
+ echo "es CP850"
+ echo "es_AR CP850"
+ echo "es_BO CP850"
+ echo "es_CL CP850"
+ echo "es_CO CP850"
+ echo "es_CR CP850"
+ echo "es_CU CP850"
+ echo "es_DO CP850"
+ echo "es_EC CP850"
+ echo "es_ES CP850"
+ echo "es_GT CP850"
+ echo "es_HN CP850"
+ echo "es_MX CP850"
+ echo "es_NI CP850"
+ echo "es_PA CP850"
+ echo "es_PY CP850"
+ echo "es_PE CP850"
+ echo "es_SV CP850"
+ echo "es_UY CP850"
+ echo "es_VE CP850"
+ echo "et CP850"
+ echo "et_EE CP850"
+ echo "eu CP850"
+ echo "eu_ES CP850"
+ echo "fi CP850"
+ echo "fi_FI CP850"
+ echo "fr CP850"
+ echo "fr_BE CP850"
+ echo "fr_CA CP850"
+ echo "fr_CH CP850"
+ echo "fr_FR CP850"
+ echo "ga CP850"
+ echo "ga_IE CP850"
+ echo "gd CP850"
+ echo "gd_GB CP850"
+ echo "gl CP850"
+ echo "gl_ES CP850"
+ echo "id CP850" # not CP437 ??
+ echo "id_ID CP850" # not CP437 ??
+ echo "is CP861" # not CP850 ??
+ echo "is_IS CP861" # not CP850 ??
+ echo "it CP850"
+ echo "it_CH CP850"
+ echo "it_IT CP850"
+ echo "lt CP775"
+ echo "lt_LT CP775"
+ echo "lv CP775"
+ echo "lv_LV CP775"
+ echo "nb CP865" # not CP850 ??
+ echo "nb_NO CP865" # not CP850 ??
+ echo "nl CP850"
+ echo "nl_BE CP850"
+ echo "nl_NL CP850"
+ echo "nn CP865" # not CP850 ??
+ echo "nn_NO CP865" # not CP850 ??
+ echo "no CP865" # not CP850 ??
+ echo "no_NO CP865" # not CP850 ??
+ echo "pt CP850"
+ echo "pt_BR CP850"
+ echo "pt_PT CP850"
+ echo "sv CP850"
+ echo "sv_SE CP850"
+ # ISO-8859-2 languages
+ echo "cs CP852"
+ echo "cs_CZ CP852"
+ echo "hr CP852"
+ echo "hr_HR CP852"
+ echo "hu CP852"
+ echo "hu_HU CP852"
+ echo "pl CP852"
+ echo "pl_PL CP852"
+ echo "ro CP852"
+ echo "ro_RO CP852"
+ echo "sk CP852"
+ echo "sk_SK CP852"
+ echo "sl CP852"
+ echo "sl_SI CP852"
+ echo "sq CP852"
+ echo "sq_AL CP852"
+ echo "sr CP852" # CP852 or CP866 or CP855 ??
+ echo "sr_CS CP852" # CP852 or CP866 or CP855 ??
+ echo "sr_YU CP852" # CP852 or CP866 or CP855 ??
+ # ISO-8859-3 languages
+ echo "mt CP850"
+ echo "mt_MT CP850"
+ # ISO-8859-5 languages
+ echo "be CP866"
+ echo "be_BE CP866"
+ echo "bg CP866" # not CP855 ??
+ echo "bg_BG CP866" # not CP855 ??
+ echo "mk CP866" # not CP855 ??
+ echo "mk_MK CP866" # not CP855 ??
+ echo "ru CP866"
+ echo "ru_RU CP866"
+ echo "uk CP1125"
+ echo "uk_UA CP1125"
+ # ISO-8859-6 languages
+ echo "ar CP864"
+ echo "ar_AE CP864"
+ echo "ar_DZ CP864"
+ echo "ar_EG CP864"
+ echo "ar_IQ CP864"
+ echo "ar_IR CP864"
+ echo "ar_JO CP864"
+ echo "ar_KW CP864"
+ echo "ar_MA CP864"
+ echo "ar_OM CP864"
+ echo "ar_QA CP864"
+ echo "ar_SA CP864"
+ echo "ar_SY CP864"
+ # ISO-8859-7 languages
+ echo "el CP869"
+ echo "el_GR CP869"
+ # ISO-8859-8 languages
+ echo "he CP862"
+ echo "he_IL CP862"
+ # ISO-8859-9 languages
+ echo "tr CP857"
+ echo "tr_TR CP857"
+ # Japanese
+ echo "ja CP932"
+ echo "ja_JP CP932"
+ # Chinese
+ echo "zh_CN GBK"
+ echo "zh_TW CP950" # not CP938 ??
+ # Korean
+ echo "kr CP949" # not CP934 ??
+ echo "kr_KR CP949" # not CP934 ??
+ # Thai
+ echo "th CP874"
+ echo "th_TH CP874"
+ # Other
+ echo "eo CP850"
+ echo "eo_EO CP850"
+ ;;
+esac
diff --git a/lib/error.c b/lib/error.c
new file mode 100644
index 0000000..cf86343
--- /dev/null
+++ b/lib/error.c
@@ -0,0 +1,338 @@
+/* Error handler for noninteractive utilities
+ Copyright (C) 1990-1998, 2000-2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#if !_LIBC
+# include <config.h>
+#endif
+
+#include "error.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if !_LIBC && ENABLE_NLS
+# include "gettext.h"
+#endif
+
+#ifdef _LIBC
+# include <libintl.h>
+# include <stdbool.h>
+# include <stdint.h>
+# include <wchar.h>
+# define mbsrtowcs __mbsrtowcs
+#endif
+
+#if USE_UNLOCKED_IO
+# include "unlocked-io.h"
+#endif
+
+#ifndef _
+# define _(String) String
+#endif
+
+/* If NULL, error will flush stdout, then print on stderr the program
+ name, a colon and a space. Otherwise, error will call this
+ function without parameters instead. */
+void (*error_print_progname) (void);
+
+/* This variable is incremented each time `error' is called. */
+unsigned int error_message_count;
+
+#ifdef _LIBC
+/* In the GNU C library, there is a predefined variable for this. */
+
+# define program_name program_invocation_name
+# include <errno.h>
+# include <limits.h>
+# include <libio/libioP.h>
+
+/* In GNU libc we want do not want to use the common name `error' directly.
+ Instead make it a weak alias. */
+extern void __error (int status, int errnum, const char *message, ...)
+ __attribute__ ((__format__ (__printf__, 3, 4)));
+extern void __error_at_line (int status, int errnum, const char *file_name,
+ unsigned int line_number, const char *message,
+ ...)
+ __attribute__ ((__format__ (__printf__, 5, 6)));;
+# define error __error
+# define error_at_line __error_at_line
+
+# include <libio/iolibio.h>
+# define fflush(s) INTUSE(_IO_fflush) (s)
+# undef putc
+# define putc(c, fp) INTUSE(_IO_putc) (c, fp)
+
+# include <bits/libc-lock.h>
+
+#else /* not _LIBC */
+
+# if !HAVE_DECL_STRERROR_R && STRERROR_R_CHAR_P
+# ifndef HAVE_DECL_STRERROR_R
+"this configure-time declaration test was not run"
+# endif
+char *strerror_r ();
+# endif
+
+/* The calling program should define program_name and set it to the
+ name of the executing program. */
+extern char *program_name;
+
+# if HAVE_STRERROR_R || defined strerror_r
+# define __strerror_r strerror_r
+# endif /* HAVE_STRERROR_R || defined strerror_r */
+#endif /* not _LIBC */
+
+static void
+print_errno_message (int errnum)
+{
+ char const *s;
+
+#if defined HAVE_STRERROR_R || _LIBC
+ char errbuf[1024];
+# if STRERROR_R_CHAR_P || _LIBC
+ s = __strerror_r (errnum, errbuf, sizeof errbuf);
+# else
+ if (__strerror_r (errnum, errbuf, sizeof errbuf) == 0)
+ s = errbuf;
+ else
+ s = 0;
+# endif
+#else
+ s = strerror (errnum);
+#endif
+
+#if !_LIBC
+ if (! s)
+ s = _("Unknown system error");
+#endif
+
+#if _LIBC
+ __fxprintf (NULL, ": %s", s);
+#else
+ fprintf (stderr, ": %s", s);
+#endif
+}
+
+static void
+error_tail (int status, int errnum, const char *message, va_list args)
+{
+#if _LIBC
+ if (_IO_fwide (stderr, 0) > 0)
+ {
+# define ALLOCA_LIMIT 2000
+ size_t len = strlen (message) + 1;
+ wchar_t *wmessage = NULL;
+ mbstate_t st;
+ size_t res;
+ const char *tmp;
+ bool use_malloc = false;
+
+ while (1)
+ {
+ if (__libc_use_alloca (len * sizeof (wchar_t)))
+ wmessage = (wchar_t *) alloca (len * sizeof (wchar_t));
+ else
+ {
+ if (!use_malloc)
+ wmessage = NULL;
+
+ wchar_t *p = (wchar_t *) realloc (wmessage,
+ len * sizeof (wchar_t));
+ if (p == NULL)
+ {
+ free (wmessage);
+ fputws_unlocked (L"out of memory\n", stderr);
+ return;
+ }
+ wmessage = p;
+ use_malloc = true;
+ }
+
+ memset (&st, '\0', sizeof (st));
+ tmp = message;
+
+ res = mbsrtowcs (wmessage, &tmp, len, &st);
+ if (res != len)
+ break;
+
+ if (__builtin_expect (len >= SIZE_MAX / 2, 0))
+ {
+ /* This really should not happen if everything is fine. */
+ res = (size_t) -1;
+ break;
+ }
+
+ len *= 2;
+ }
+
+ if (res == (size_t) -1)
+ {
+ /* The string cannot be converted. */
+ if (use_malloc)
+ {
+ free (wmessage);
+ use_malloc = false;
+ }
+ wmessage = (wchar_t *) L"???";
+ }
+
+ __vfwprintf (stderr, wmessage, args);
+
+ if (use_malloc)
+ free (wmessage);
+ }
+ else
+#endif
+ vfprintf (stderr, message, args);
+ va_end (args);
+
+ ++error_message_count;
+ if (errnum)
+ print_errno_message (errnum);
+#if _LIBC
+ __fxprintf (NULL, "\n");
+#else
+ putc ('\n', stderr);
+#endif
+ fflush (stderr);
+ if (status)
+ exit (status);
+}
+
+
+/* Print the program name and error message MESSAGE, which is a printf-style
+ format string with optional args.
+ If ERRNUM is nonzero, print its corresponding system error message.
+ Exit with status STATUS if it is nonzero. */
+void
+error (int status, int errnum, const char *message, ...)
+{
+ va_list args;
+
+#if defined _LIBC && defined __libc_ptf_call
+ /* We do not want this call to be cut short by a thread
+ cancellation. Therefore disable cancellation for now. */
+ int state = PTHREAD_CANCEL_ENABLE;
+ __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state),
+ 0);
+#endif
+
+ fflush (stdout);
+#ifdef _LIBC
+ _IO_flockfile (stderr);
+#endif
+ if (error_print_progname)
+ (*error_print_progname) ();
+ else
+ {
+#if _LIBC
+ __fxprintf (NULL, "%s: ", program_name);
+#else
+ fprintf (stderr, "%s: ", program_name);
+#endif
+ }
+
+ va_start (args, message);
+ error_tail (status, errnum, message, args);
+
+#ifdef _LIBC
+ _IO_funlockfile (stderr);
+# ifdef __libc_ptf_call
+ __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
+# endif
+#endif
+}
+
+/* Sometimes we want to have at most one error per line. This
+ variable controls whether this mode is selected or not. */
+int error_one_per_line;
+
+void
+error_at_line (int status, int errnum, const char *file_name,
+ unsigned int line_number, const char *message, ...)
+{
+ va_list args;
+
+ if (error_one_per_line)
+ {
+ static const char *old_file_name;
+ static unsigned int old_line_number;
+
+ if (old_line_number == line_number
+ && (file_name == old_file_name
+ || strcmp (old_file_name, file_name) == 0))
+ /* Simply return and print nothing. */
+ return;
+
+ old_file_name = file_name;
+ old_line_number = line_number;
+ }
+
+#if defined _LIBC && defined __libc_ptf_call
+ /* We do not want this call to be cut short by a thread
+ cancellation. Therefore disable cancellation for now. */
+ int state = PTHREAD_CANCEL_ENABLE;
+ __libc_ptf_call (pthread_setcancelstate, (PTHREAD_CANCEL_DISABLE, &state),
+ 0);
+#endif
+
+ fflush (stdout);
+#ifdef _LIBC
+ _IO_flockfile (stderr);
+#endif
+ if (error_print_progname)
+ (*error_print_progname) ();
+ else
+ {
+#if _LIBC
+ __fxprintf (NULL, "%s:", program_name);
+#else
+ fprintf (stderr, "%s:", program_name);
+#endif
+ }
+
+#if _LIBC
+ __fxprintf (NULL, file_name != NULL ? "%s:%d: " : " ",
+ file_name, line_number);
+#else
+ fprintf (stderr, file_name != NULL ? "%s:%d: " : " ",
+ file_name, line_number);
+#endif
+
+ va_start (args, message);
+ error_tail (status, errnum, message, args);
+
+#ifdef _LIBC
+ _IO_funlockfile (stderr);
+# ifdef __libc_ptf_call
+ __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0);
+# endif
+#endif
+}
+
+#ifdef _LIBC
+/* Make the weak alias. */
+# undef error
+# undef error_at_line
+weak_alias (__error, error)
+weak_alias (__error_at_line, error_at_line)
+#endif
diff --git a/lib/error.h b/lib/error.h
new file mode 100644
index 0000000..5a5f247
--- /dev/null
+++ b/lib/error.h
@@ -0,0 +1,66 @@
+/* Declaration for error-reporting function
+ Copyright (C) 1995, 1996, 1997, 2003, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _ERROR_H
+#define _ERROR_H 1
+
+#ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__
+# define __attribute__(Spec) /* empty */
+# endif
+/* The __-protected variants of `format' and `printf' attributes
+ are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7)
+# define __format__ format
+# define __printf__ printf
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Print a message with `fprintf (stderr, FORMAT, ...)';
+ if ERRNUM is nonzero, follow it with ": " and strerror (ERRNUM).
+ If STATUS is nonzero, terminate the program with `exit (STATUS)'. */
+
+extern void error (int __status, int __errnum, const char *__format, ...)
+ __attribute__ ((__format__ (__printf__, 3, 4)));
+
+extern void error_at_line (int __status, int __errnum, const char *__fname,
+ unsigned int __lineno, const char *__format, ...)
+ __attribute__ ((__format__ (__printf__, 5, 6)));
+
+/* If NULL, error will flush stdout, then print on stderr the program
+ name, a colon and a space. Otherwise, error will call this
+ function without parameters instead. */
+extern void (*error_print_progname) (void);
+
+/* This variable is incremented each time `error' is called. */
+extern unsigned int error_message_count;
+
+/* Sometimes we want to have at most one error per line. This
+ variable controls whether this mode is selected or not. */
+extern int error_one_per_line;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* error.h */
diff --git a/lib/exit.h b/lib/exit.h
new file mode 100644
index 0000000..e8f7738
--- /dev/null
+++ b/lib/exit.h
@@ -0,0 +1,32 @@
+/* exit() function.
+ Copyright (C) 1995, 2001 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _EXIT_H
+#define _EXIT_H
+
+/* Get exit() declaration. */
+#include <stdlib.h>
+
+/* Some systems do not define EXIT_*, despite otherwise supporting C89. */
+#ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+#endif
+#ifndef EXIT_FAILURE
+# define EXIT_FAILURE 1
+#endif
+
+#endif /* _EXIT_H */
diff --git a/lib/exitfail.c b/lib/exitfail.c
new file mode 100644
index 0000000..97abc67
--- /dev/null
+++ b/lib/exitfail.c
@@ -0,0 +1,25 @@
+/* Failure exit status
+
+ Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING.
+ If not, write to the Free Software Foundation,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+#include "exitfail.h"
+#include "exit.h"
+
+int volatile exit_failure = EXIT_FAILURE;
diff --git a/lib/exitfail.h b/lib/exitfail.h
new file mode 100644
index 0000000..e46cf9c
--- /dev/null
+++ b/lib/exitfail.h
@@ -0,0 +1,20 @@
+/* Failure exit status
+
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING.
+ If not, write to the Free Software Foundation,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+extern int volatile exit_failure;
diff --git a/lib/getdelim.c b/lib/getdelim.c
new file mode 100644
index 0000000..a131b0c
--- /dev/null
+++ b/lib/getdelim.c
@@ -0,0 +1,128 @@
+/* getdelim.c --- Implementation of replacement getdelim function.
+ Copyright (C) 1994, 1996, 1997, 1998, 2001, 2003, 2005 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Ported from glibc by Simon Josefsson. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "getdelim.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+#ifndef SSIZE_MAX
+# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
+#endif
+#if !HAVE_FLOCKFILE
+# undef flockfile
+# define flockfile(x) ((void) 0)
+#endif
+#if !HAVE_FUNLOCKFILE
+# undef funlockfile
+# define funlockfile(x) ((void) 0)
+#endif
+
+/* Read up to (and including) a DELIMITER from FP into *LINEPTR (and
+ NUL-terminate it). *LINEPTR is a pointer returned from malloc (or
+ NULL), pointing to *N characters of space. It is realloc'ed as
+ necessary. Returns the number of characters read (not including
+ the null terminator), or -1 on error or EOF. */
+
+ssize_t
+getdelim (char **lineptr, size_t *n, int delimiter, FILE *fp)
+{
+ ssize_t result;
+ size_t cur_len = 0;
+
+ if (lineptr == NULL || n == NULL || fp == NULL)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ flockfile (fp);
+
+ if (*lineptr == NULL || *n == 0)
+ {
+ *n = 120;
+ *lineptr = (char *) malloc (*n);
+ if (*lineptr == NULL)
+ {
+ result = -1;
+ goto unlock_return;
+ }
+ }
+
+ for (;;)
+ {
+ int i;
+
+ i = getc (fp);
+ if (i == EOF)
+ {
+ result = -1;
+ break;
+ }
+
+ /* Make enough space for len+1 (for final NUL) bytes. */
+ if (cur_len + 1 >= *n)
+ {
+ size_t needed_max =
+ SSIZE_MAX < SIZE_MAX ? (size_t) SSIZE_MAX + 1 : SIZE_MAX;
+ size_t needed = 2 * *n + 1; /* Be generous. */
+ char *new_lineptr;
+
+ if (needed_max < needed)
+ needed = needed_max;
+ if (cur_len + 1 >= needed)
+ {
+ result = -1;
+ goto unlock_return;
+ }
+
+ new_lineptr = (char *) realloc (*lineptr, needed);
+ if (new_lineptr == NULL)
+ {
+ result = -1;
+ goto unlock_return;
+ }
+
+ *lineptr = new_lineptr;
+ *n = needed;
+ }
+
+ (*lineptr)[cur_len] = i;
+ cur_len++;
+
+ if (i == delimiter)
+ break;
+ }
+ (*lineptr)[cur_len] = '\0';
+ result = cur_len ? cur_len : result;
+
+ unlock_return:
+ funlockfile (fp);
+ return result;
+}
diff --git a/lib/getdelim.h b/lib/getdelim.h
new file mode 100644
index 0000000..8bc6130
--- /dev/null
+++ b/lib/getdelim.h
@@ -0,0 +1,28 @@
+/* getdelim.h --- Prototype for replacement getdelim function.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written by Simon Josefsson. */
+
+/* Get size_t, FILE, ssize_t. And getdelim, if available. */
+# include <stddef.h>
+# include <stdio.h>
+# include <sys/types.h>
+
+#if !HAVE_DECL_GETDELIM
+ssize_t getdelim (char **lineptr, size_t *n, int delimiter, FILE *stream);
+#endif /* !HAVE_GETDELIM */
diff --git a/lib/getline.c b/lib/getline.c
new file mode 100644
index 0000000..ab054fb
--- /dev/null
+++ b/lib/getline.c
@@ -0,0 +1,32 @@
+/* getline.c --- Implementation of replacement getline function.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written by Simon Josefsson. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "getdelim.h"
+#include "getline.h"
+
+ssize_t
+getline (char **lineptr, size_t *n, FILE *stream)
+{
+ return getdelim (lineptr, n, '\n', stream);
+}
diff --git a/lib/getline.h b/lib/getline.h
new file mode 100644
index 0000000..f0f61c4
--- /dev/null
+++ b/lib/getline.h
@@ -0,0 +1,28 @@
+/* getline.h --- Prototype for replacement getline function.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written by Simon Josefsson. */
+
+/* Get size_t, FILE, ssize_t. And getline, if available. */
+# include <stddef.h>
+# include <stdio.h>
+# include <sys/types.h>
+
+#if !HAVE_DECL_GETLINE
+ssize_t getline (char **lineptr, size_t *n, FILE *stream);
+#endif /* !HAVE_GETLINE */
diff --git a/lib/getopt.c b/lib/getopt.c
new file mode 100644
index 0000000..3580ad8
--- /dev/null
+++ b/lib/getopt.c
@@ -0,0 +1,1191 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to drepper@gnu.org
+ before changing it!
+ Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002,2003,2004,2006
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+#include "getopt.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifdef __VMS
+# include <unixlib.h>
+#endif
+
+#ifdef _LIBC
+# include <libintl.h>
+#else
+# include "gettext.h"
+# define _(msgid) gettext (msgid)
+#endif
+
+#if defined _LIBC && defined USE_IN_LIBIO
+# include <wchar.h>
+#endif
+
+#ifndef attribute_hidden
+# define attribute_hidden
+#endif
+
+/* Unlike standard Unix `getopt', functions like `getopt_long'
+ let the user intersperse the options with the other arguments.
+
+ As `getopt_long' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Using `getopt' or setting the environment variable POSIXLY_CORRECT
+ disables permutation.
+ Then the application's behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt_int.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Keep a global copy of all internal members of getopt_data. */
+
+static struct _getopt_data getopt_data;
+
+
+#if defined HAVE_DECL_GETENV && !HAVE_DECL_GETENV
+extern char *getenv ();
+#endif
+
+#ifdef _LIBC
+/* Stored original parameters.
+ XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+extern int __libc_argc;
+extern char **__libc_argv;
+
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+# ifdef USE_NONOPTION_FLAGS
+/* Defined in getopt_init.c */
+extern char *__getopt_nonoption_flags;
+# endif
+
+# ifdef USE_NONOPTION_FLAGS
+# define SWAP_FLAGS(ch1, ch2) \
+ if (d->__nonoption_flags_len > 0) \
+ { \
+ char __tmp = __getopt_nonoption_flags[ch1]; \
+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
+ __getopt_nonoption_flags[ch2] = __tmp; \
+ }
+# else
+# define SWAP_FLAGS(ch1, ch2)
+# endif
+#else /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (char **argv, struct _getopt_data *d)
+{
+ int bottom = d->__first_nonopt;
+ int middle = d->__last_nonopt;
+ int top = d->optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ /* First make sure the handling of the `__getopt_nonoption_flags'
+ string can work normally. Our top argument must be in the range
+ of the string. */
+ if (d->__nonoption_flags_len > 0 && top >= d->__nonoption_flags_max_len)
+ {
+ /* We must extend the array. The user plays games with us and
+ presents new arguments. */
+ char *new_str = malloc (top + 1);
+ if (new_str == NULL)
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len = 0;
+ else
+ {
+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
+ d->__nonoption_flags_max_len),
+ '\0', top + 1 - d->__nonoption_flags_max_len);
+ d->__nonoption_flags_max_len = top + 1;
+ __getopt_nonoption_flags = new_str;
+ }
+ }
+#endif
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ SWAP_FLAGS (bottom + i, middle + i);
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ d->__first_nonopt += (d->optind - d->__last_nonopt);
+ d->__last_nonopt = d->optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (int argc, char **argv, const char *optstring,
+ int posixly_correct, struct _getopt_data *d)
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ d->__first_nonopt = d->__last_nonopt = d->optind;
+
+ d->__nextchar = NULL;
+
+ d->__posixly_correct = posixly_correct || !!getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ d->__ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ d->__ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (d->__posixly_correct)
+ d->__ordering = REQUIRE_ORDER;
+ else
+ d->__ordering = PERMUTE;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ if (!d->__posixly_correct
+ && argc == __libc_argc && argv == __libc_argv)
+ {
+ if (d->__nonoption_flags_max_len == 0)
+ {
+ if (__getopt_nonoption_flags == NULL
+ || __getopt_nonoption_flags[0] == '\0')
+ d->__nonoption_flags_max_len = -1;
+ else
+ {
+ const char *orig_str = __getopt_nonoption_flags;
+ int len = d->__nonoption_flags_max_len = strlen (orig_str);
+ if (d->__nonoption_flags_max_len < argc)
+ d->__nonoption_flags_max_len = argc;
+ __getopt_nonoption_flags =
+ (char *) malloc (d->__nonoption_flags_max_len);
+ if (__getopt_nonoption_flags == NULL)
+ d->__nonoption_flags_max_len = -1;
+ else
+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+ '\0', d->__nonoption_flags_max_len - len);
+ }
+ }
+ d->__nonoption_flags_len = d->__nonoption_flags_max_len;
+ }
+ else
+ d->__nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options.
+
+ If POSIXLY_CORRECT is nonzero, behave as if the POSIXLY_CORRECT
+ environment variable were set. */
+
+int
+_getopt_internal_r (int argc, char **argv, const char *optstring,
+ const struct option *longopts, int *longind,
+ int long_only, int posixly_correct, struct _getopt_data *d)
+{
+ int print_errors = d->opterr;
+ if (optstring[0] == ':')
+ print_errors = 0;
+
+ if (argc < 1)
+ return -1;
+
+ d->optarg = NULL;
+
+ if (d->optind == 0 || !d->__initialized)
+ {
+ if (d->optind == 0)
+ d->optind = 1; /* Don't scan ARGV[0], the program name. */
+ optstring = _getopt_initialize (argc, argv, optstring,
+ posixly_correct, d);
+ d->__initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0' \
+ || (d->optind < d->__nonoption_flags_len \
+ && __getopt_nonoption_flags[d->optind] == '1'))
+#else
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
+#endif
+
+ if (d->__nextchar == NULL || *d->__nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (d->__last_nonopt > d->optind)
+ d->__last_nonopt = d->optind;
+ if (d->__first_nonopt > d->optind)
+ d->__first_nonopt = d->optind;
+
+ if (d->__ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__last_nonopt != d->optind)
+ d->__first_nonopt = d->optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (d->optind < argc && NONOPTION_P)
+ d->optind++;
+ d->__last_nonopt = d->optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (d->optind != argc && !strcmp (argv[d->optind], "--"))
+ {
+ d->optind++;
+
+ if (d->__first_nonopt != d->__last_nonopt
+ && d->__last_nonopt != d->optind)
+ exchange ((char **) argv, d);
+ else if (d->__first_nonopt == d->__last_nonopt)
+ d->__first_nonopt = d->optind;
+ d->__last_nonopt = argc;
+
+ d->optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (d->optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (d->__first_nonopt != d->__last_nonopt)
+ d->optind = d->__first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (d->__ordering == REQUIRE_ORDER)
+ return -1;
+ d->optarg = argv[d->optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ d->__nextchar = (argv[d->optind] + 1
+ + (longopts != NULL && argv[d->optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[d->optind][1] == '-'
+ || (long_only && (argv[d->optind][2]
+ || !strchr (optstring, argv[d->optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else if (long_only
+ || pfound->has_arg != p->has_arg
+ || pfound->flag != p->flag
+ || pfound->val != p->val)
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ d->optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind - 1][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#else
+ fprintf (stderr, _("\
+%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[d->optind - 1][0],
+ pfound->name);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+
+ d->optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[d->optind][1] == '-'
+ || strchr (optstring, *d->__nextchar) == NULL)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (argv[d->optind][1] == '-')
+ {
+ /* --option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], d->__nextchar);
+#endif
+ }
+ else
+ {
+ /* +option or -option */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#else
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[d->optind][0], d->__nextchar);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->__nextchar = (char *) "";
+ d->optind++;
+ d->optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *d->__nextchar++;
+ char *temp = strchr (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*d->__nextchar == '\0')
+ ++d->optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+ int n;
+#endif
+
+ if (d->__posixly_correct)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c);
+#endif
+ }
+ else
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ n = __asprintf (&buf, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+#else
+ fprintf (stderr, _("%s: invalid option -- %c\n"), argv[0], c);
+#endif
+ }
+
+#if defined _LIBC && defined USE_IN_LIBIO
+ if (n >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#endif
+ }
+ d->optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `d->optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
+ nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+ {
+ if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[d->optind]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ d->optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ d->optarg = nameend + 1;
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+#endif
+ }
+
+ d->__nextchar += strlen (d->__nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (d->optind < argc)
+ d->optarg = argv[d->optind++];
+ else
+ {
+ if (print_errors)
+ {
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2
+ |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[d->optind - 1]);
+#endif
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ d->__nextchar += strlen (d->__nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ d->__nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ d->optind++;
+ }
+ else
+ d->optarg = NULL;
+ d->__nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*d->__nextchar != '\0')
+ {
+ d->optarg = d->__nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ d->optind++;
+ }
+ else if (d->optind == argc)
+ {
+ if (print_errors)
+ {
+ /* 1003.2 specifies the format of this message. */
+#if defined _LIBC && defined USE_IN_LIBIO
+ char *buf;
+
+ if (__asprintf (&buf, _("\
+%s: option requires an argument -- %c\n"),
+ argv[0], c) >= 0)
+ {
+ _IO_flockfile (stderr);
+
+ int old_flags2 = ((_IO_FILE *) stderr)->_flags2;
+ ((_IO_FILE *) stderr)->_flags2 |= _IO_FLAGS2_NOTCANCEL;
+
+ __fxprintf (NULL, "%s", buf);
+
+ ((_IO_FILE *) stderr)->_flags2 = old_flags2;
+ _IO_funlockfile (stderr);
+
+ free (buf);
+ }
+#else
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+#endif
+ }
+ d->optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ d->optarg = argv[d->optind++];
+ d->__nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+_getopt_internal (int argc, char **argv, const char *optstring,
+ const struct option *longopts, int *longind,
+ int long_only, int posixly_correct)
+{
+ int result;
+
+ getopt_data.optind = optind;
+ getopt_data.opterr = opterr;
+
+ result = _getopt_internal_r (argc, argv, optstring, longopts, longind,
+ long_only, posixly_correct, &getopt_data);
+
+ optind = getopt_data.optind;
+ optarg = getopt_data.optarg;
+ optopt = getopt_data.optopt;
+
+ return result;
+}
+
+/* glibc gets a LSB-compliant getopt.
+ Standalone applications get a POSIX-compliant getopt. */
+#if _LIBC
+enum { POSIXLY_CORRECT = 0 };
+#else
+enum { POSIXLY_CORRECT = 1 };
+#endif
+
+int
+getopt (int argc, char *const *argv, const char *optstring)
+{
+ return _getopt_internal (argc, (char **) argv, optstring, NULL, NULL, 0,
+ POSIXLY_CORRECT);
+}
+
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt1.c b/lib/getopt1.c
new file mode 100644
index 0000000..cc0746e
--- /dev/null
+++ b/lib/getopt1.c
@@ -0,0 +1,171 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98,2004,2006
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifdef _LIBC
+# include <getopt.h>
+#else
+# include <config.h>
+# include "getopt.h"
+#endif
+#include "getopt_int.h"
+
+#include <stdio.h>
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (int argc, char *__getopt_argv_const *argv, const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, (char **) argv, options, long_options,
+ opt_index, 0, 0);
+}
+
+int
+_getopt_long_r (int argc, char **argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 0, 0, d);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (int argc, char *__getopt_argv_const *argv,
+ const char *options,
+ const struct option *long_options, int *opt_index)
+{
+ return _getopt_internal (argc, (char **) argv, options, long_options,
+ opt_index, 1, 0);
+}
+
+int
+_getopt_long_only_r (int argc, char **argv, const char *options,
+ const struct option *long_options, int *opt_index,
+ struct _getopt_data *d)
+{
+ return _getopt_internal_r (argc, argv, options, long_options, opt_index,
+ 1, 0, d);
+}
+
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt_.h b/lib/getopt_.h
new file mode 100644
index 0000000..3c406e5
--- /dev/null
+++ b/lib/getopt_.h
@@ -0,0 +1,225 @@
+/* Declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004,2005
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _GETOPT_H
+
+#ifndef __need_getopt
+# define _GETOPT_H 1
+#endif
+
+/* Standalone applications should #define __GETOPT_PREFIX to an
+ identifier that prefixes the external functions and variables
+ defined in this header. When this happens, include the
+ headers that might declare getopt so that they will not cause
+ confusion if included after this file. Then systematically rename
+ identifiers so that they do not collide with the system functions
+ and variables. Renaming avoids problems with some compilers and
+ linkers. */
+#if defined __GETOPT_PREFIX && !defined __need_getopt
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# undef __need_getopt
+# undef getopt
+# undef getopt_long
+# undef getopt_long_only
+# undef optarg
+# undef opterr
+# undef optind
+# undef optopt
+# define __GETOPT_CONCAT(x, y) x ## y
+# define __GETOPT_XCONCAT(x, y) __GETOPT_CONCAT (x, y)
+# define __GETOPT_ID(y) __GETOPT_XCONCAT (__GETOPT_PREFIX, y)
+# define getopt __GETOPT_ID (getopt)
+# define getopt_long __GETOPT_ID (getopt_long)
+# define getopt_long_only __GETOPT_ID (getopt_long_only)
+# define optarg __GETOPT_ID (optarg)
+# define opterr __GETOPT_ID (opterr)
+# define optind __GETOPT_ID (optind)
+# define optopt __GETOPT_ID (optopt)
+#endif
+
+/* Standalone applications get correct prototypes for getopt_long and
+ getopt_long_only; they declare "char **argv". libc uses prototypes
+ with "char *const *argv" that are incorrect because getopt_long and
+ getopt_long_only can permute argv; this is required for backward
+ compatibility (e.g., for LSB 2.0.1).
+
+ This used to be `#if defined __GETOPT_PREFIX && !defined __need_getopt',
+ but it caused redefinition warnings if both unistd.h and getopt.h were
+ included, since unistd.h includes getopt.h having previously defined
+ __need_getopt.
+
+ The only place where __getopt_argv_const is used is in definitions
+ of getopt_long and getopt_long_only below, but these are visible
+ only if __need_getopt is not defined, so it is quite safe to rewrite
+ the conditional as follows:
+*/
+#if !defined __need_getopt
+# if defined __GETOPT_PREFIX
+# define __getopt_argv_const /* empty */
+# else
+# define __getopt_argv_const const
+# endif
+#endif
+
+/* If __GNU_LIBRARY__ is not already defined, either we are being used
+ standalone, or this is the first header included in the source file.
+ If we are being used with glibc, we need to include <features.h>, but
+ that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
+ not defined, include <ctype.h>, which will pull in <features.h> for us
+ if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
+ doesn't flood the namespace with stuff the way some other headers do.) */
+#if !defined __GNU_LIBRARY__
+# include <ctype.h>
+#endif
+
+#ifndef __THROW
+# ifndef __GNUC_PREREQ
+# define __GNUC_PREREQ(maj, min) (0)
+# endif
+# if defined __cplusplus && __GNUC_PREREQ (2,8)
+# define __THROW throw ()
+# else
+# define __THROW
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+#ifndef __need_getopt
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+ const char *name;
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+# define no_argument 0
+# define required_argument 1
+# define optional_argument 2
+#endif /* need getopt */
+
+
+/* Get definitions and prototypes for functions to process the
+ arguments in ARGV (ARGC of them, minus the program name) for
+ options given in OPTS.
+
+ Return the option character from OPTS just read. Return -1 when
+ there are no more options. For unrecognized options, or options
+ missing arguments, `optopt' is set to the option letter, and '?' is
+ returned.
+
+ The OPTS string is a list of characters which are recognized option
+ letters, optionally followed by colons, specifying that that letter
+ takes an argument, to be placed in `optarg'.
+
+ If a letter in OPTS is followed by two colons, its argument is
+ optional. This behavior is specific to the GNU `getopt'.
+
+ The argument `--' causes premature termination of argument
+ scanning, explicitly telling `getopt' that there are no more
+ options.
+
+ If OPTS begins with `--', then non-option arguments are treated as
+ arguments to the option '\0'. This behavior is specific to the GNU
+ `getopt'. */
+
+extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
+ __THROW;
+
+#ifndef __need_getopt
+extern int getopt_long (int ___argc, char *__getopt_argv_const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+extern int getopt_long_only (int ___argc, char *__getopt_argv_const *___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind)
+ __THROW;
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations. */
+#undef __need_getopt
+
+#endif /* getopt.h */
diff --git a/lib/getopt_int.h b/lib/getopt_int.h
new file mode 100644
index 0000000..401579f
--- /dev/null
+++ b/lib/getopt_int.h
@@ -0,0 +1,131 @@
+/* Internal declarations for getopt.
+ Copyright (C) 1989-1994,1996-1999,2001,2003,2004
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _GETOPT_INT_H
+#define _GETOPT_INT_H 1
+
+extern int _getopt_internal (int ___argc, char **___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only, int __posixly_correct);
+
+
+/* Reentrant versions which can handle parsing multiple argument
+ vectors at the same time. */
+
+/* Data type for reentrant functions. */
+struct _getopt_data
+{
+ /* These have exactly the same meaning as the corresponding global
+ variables, except that they are used for the reentrant
+ versions of getopt. */
+ int optind;
+ int opterr;
+ int optopt;
+ char *optarg;
+
+ /* Internal members. */
+
+ /* True if the internal members have been initialized. */
+ int __initialized;
+
+ /* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+ char *__nextchar;
+
+ /* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters, or by calling getopt.
+
+ PERMUTE is the default. We permute the contents of ARGV as we
+ scan, so that eventually all the non-options are at the end.
+ This allows options to be given in any order, even with programs
+ that were not written to expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were
+ written to expect options and other ARGV-elements in any order
+ and that care about the ordering of the two. We describe each
+ non-option ARGV-element as if it were the argument of an option
+ with character code 1. Using `-' as the first character of the
+ list of option characters selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+ enum
+ {
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+ } __ordering;
+
+ /* If the POSIXLY_CORRECT environment variable is set
+ or getopt was called. */
+ int __posixly_correct;
+
+
+ /* Handle permutation of arguments. */
+
+ /* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first
+ of them; `last_nonopt' is the index after the last of them. */
+
+ int __first_nonopt;
+ int __last_nonopt;
+
+#if defined _LIBC && defined USE_NONOPTION_FLAGS
+ int __nonoption_flags_max_len;
+ int __nonoption_flags_len;
+# endif
+};
+
+/* The initializer is necessary to set OPTIND and OPTERR to their
+ default values and to clear the initialization flag. */
+#define _GETOPT_DATA_INITIALIZER { 1, 1 }
+
+extern int _getopt_internal_r (int ___argc, char **___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ int __long_only, int __posixly_correct,
+ struct _getopt_data *__data);
+
+extern int _getopt_long_r (int ___argc, char **___argv,
+ const char *__shortopts,
+ const struct option *__longopts, int *__longind,
+ struct _getopt_data *__data);
+
+extern int _getopt_long_only_r (int ___argc, char **___argv,
+ const char *__shortopts,
+ const struct option *__longopts,
+ int *__longind,
+ struct _getopt_data *__data);
+
+#endif /* getopt_int.h */
diff --git a/lib/gettext.h b/lib/gettext.h
new file mode 100644
index 0000000..deeaf79
--- /dev/null
+++ b/lib/gettext.h
@@ -0,0 +1,242 @@
+/* Convenience header for conditional use of GNU <libintl.h>.
+ Copyright (C) 1995-1998, 2000-2002, 2004-2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _LIBGETTEXT_H
+#define _LIBGETTEXT_H 1
+
+/* NLS can be disabled through the configure --disable-nls option. */
+#if ENABLE_NLS
+
+/* Get declarations of GNU message catalog functions. */
+# include <libintl.h>
+
+#else
+
+/* Solaris /usr/include/locale.h includes /usr/include/libintl.h, which
+ chokes if dcgettext is defined as a macro. So include it now, to make
+ later inclusions of <locale.h> a NOP. We don't include <libintl.h>
+ as well because people using "gettext.h" will not include <libintl.h>,
+ and also including <libintl.h> would fail on SunOS 4, whereas <locale.h>
+ is OK. */
+#if defined(__sun)
+# include <locale.h>
+#endif
+
+/* Many header files from the libstdc++ coming with g++ 3.3 or newer include
+ <libintl.h>, which chokes if dcgettext is defined as a macro. So include
+ it now, to make later inclusions of <libintl.h> a NOP. */
+#if defined(__cplusplus) && defined(__GNUG__) && (__GNUC__ >= 3)
+# include <cstdlib>
+# if (__GLIBC__ >= 2) || _GLIBCXX_HAVE_LIBINTL_H
+# include <libintl.h>
+# endif
+#endif
+
+/* Disabled NLS.
+ The casts to 'const char *' serve the purpose of producing warnings
+ for invalid uses of the value returned from these functions.
+ On pre-ANSI systems without 'const', the config.h file is supposed to
+ contain "#define const". */
+# define gettext(Msgid) ((const char *) (Msgid))
+# define dgettext(Domainname, Msgid) ((const char *) (Msgid))
+# define dcgettext(Domainname, Msgid, Category) ((const char *) (Msgid))
+# define ngettext(Msgid1, Msgid2, N) \
+ ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
+# define dngettext(Domainname, Msgid1, Msgid2, N) \
+ ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
+# define dcngettext(Domainname, Msgid1, Msgid2, N, Category) \
+ ((N) == 1 ? (const char *) (Msgid1) : (const char *) (Msgid2))
+# define textdomain(Domainname) ((const char *) (Domainname))
+# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname))
+# define bind_textdomain_codeset(Domainname, Codeset) ((const char *) (Codeset))
+
+#endif
+
+/* A pseudo function call that serves as a marker for the automated
+ extraction of messages, but does not call gettext(). The run-time
+ translation is done at a different place in the code.
+ The argument, String, should be a literal string. Concatenated strings
+ and other string expressions won't work.
+ The macro's expansion is not parenthesized, so that it is suitable as
+ initializer for static 'char[]' or 'const char[]' variables. */
+#define gettext_noop(String) String
+
+/* The separator between msgctxt and msgid in a .mo file. */
+#define GETTEXT_CONTEXT_GLUE "\004"
+
+/* Pseudo function calls, taking a MSGCTXT and a MSGID instead of just a
+ MSGID. MSGCTXT and MSGID must be string literals. MSGCTXT should be
+ short and rarely need to change.
+ The letter 'p' stands for 'particular' or 'special'. */
+#define pgettext(Msgctxt, Msgid) \
+ pgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES)
+#define dpgettext(Domainname, Msgctxt, Msgid) \
+ pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, LC_MESSAGES)
+#define dcpgettext(Domainname, Msgctxt, Msgid, Category) \
+ pgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, Category)
+#define npgettext(Msgctxt, Msgid, MsgidPlural, N) \
+ npgettext_aux (NULL, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES)
+#define dnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N) \
+ npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, LC_MESSAGES)
+#define dcnpgettext(Domainname, Msgctxt, Msgid, MsgidPlural, N, Category) \
+ npgettext_aux (Domainname, Msgctxt GETTEXT_CONTEXT_GLUE Msgid, Msgid, MsgidPlural, N, Category)
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static const char *
+pgettext_aux (const char *domain,
+ const char *msg_ctxt_id, const char *msgid,
+ int category)
+{
+ const char *translation = dcgettext (domain, msg_ctxt_id, category);
+ if (translation == msg_ctxt_id)
+ return msgid;
+ else
+ return translation;
+}
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static const char *
+npgettext_aux (const char *domain,
+ const char *msg_ctxt_id, const char *msgid,
+ const char *msgid_plural, unsigned long int n,
+ int category)
+{
+ const char *translation =
+ dcngettext (domain, msg_ctxt_id, msgid_plural, n, category);
+ if (translation == msg_ctxt_id || translation == msgid_plural)
+ return (n == 1 ? msgid : msgid_plural);
+ else
+ return translation;
+}
+
+/* The same thing extended for non-constant arguments. Here MSGCTXT and MSGID
+ can be arbitrary expressions. But for string literals these macros are
+ less efficient than those above. */
+
+#include <string.h>
+
+#define _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS \
+ (__GNUC__ >= 3 || defined __cplusplus)
+
+#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
+#include <stdlib.h>
+#endif
+
+#define pgettext_expr(Msgctxt, Msgid) \
+ dcpgettext_expr (NULL, Msgctxt, Msgid, LC_MESSAGES)
+#define dpgettext_expr(Domainname, Msgctxt, Msgid) \
+ dcpgettext_expr (Domainname, Msgctxt, Msgid, LC_MESSAGES)
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static const char *
+dcpgettext_expr (const char *domain,
+ const char *msgctxt, const char *msgid,
+ int category)
+{
+ size_t msgctxt_len = strlen (msgctxt) + 1;
+ size_t msgid_len = strlen (msgid) + 1;
+ const char *translation;
+#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
+ char msg_ctxt_id[msgctxt_len + msgid_len];
+#else
+ char buf[1024];
+ char *msg_ctxt_id =
+ (msgctxt_len + msgid_len <= sizeof (buf)
+ ? buf
+ : (char *) malloc (msgctxt_len + msgid_len));
+ if (msg_ctxt_id != NULL)
+#endif
+ {
+ memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1);
+ msg_ctxt_id[msgctxt_len - 1] = '\004';
+ memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len);
+ translation = dcgettext (domain, msg_ctxt_id, category);
+#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
+ if (msg_ctxt_id != buf)
+ free (msg_ctxt_id);
+#endif
+ if (translation != msg_ctxt_id)
+ return translation;
+ }
+ return msgid;
+}
+
+#define npgettext_expr(Msgctxt, Msgid, MsgidPlural, N) \
+ dcnpgettext_expr (NULL, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES)
+#define dnpgettext_expr(Domainname, Msgctxt, Msgid, MsgidPlural, N) \
+ dcnpgettext_expr (Domainname, Msgctxt, Msgid, MsgidPlural, N, LC_MESSAGES)
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static const char *
+dcnpgettext_expr (const char *domain,
+ const char *msgctxt, const char *msgid,
+ const char *msgid_plural, unsigned long int n,
+ int category)
+{
+ size_t msgctxt_len = strlen (msgctxt) + 1;
+ size_t msgid_len = strlen (msgid) + 1;
+ const char *translation;
+#if _LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
+ char msg_ctxt_id[msgctxt_len + msgid_len];
+#else
+ char buf[1024];
+ char *msg_ctxt_id =
+ (msgctxt_len + msgid_len <= sizeof (buf)
+ ? buf
+ : (char *) malloc (msgctxt_len + msgid_len));
+ if (msg_ctxt_id != NULL)
+#endif
+ {
+ memcpy (msg_ctxt_id, msgctxt, msgctxt_len - 1);
+ msg_ctxt_id[msgctxt_len - 1] = '\004';
+ memcpy (msg_ctxt_id + msgctxt_len, msgid, msgid_len);
+ translation = dcngettext (domain, msg_ctxt_id, msgid_plural, n, category);
+#if !_LIBGETTEXT_HAVE_VARIABLE_SIZE_ARRAYS
+ if (msg_ctxt_id != buf)
+ free (msg_ctxt_id);
+#endif
+ if (!(translation == msg_ctxt_id || translation == msgid_plural))
+ return translation;
+ }
+ return (n == 1 ? msgid : msgid_plural);
+}
+
+#endif /* _LIBGETTEXT_H */
diff --git a/lib/gnulib.mk b/lib/gnulib.mk
new file mode 100644
index 0000000..c10e764
--- /dev/null
+++ b/lib/gnulib.mk
@@ -0,0 +1,400 @@
+## Process this file with automake to produce Makefile.in.
+# Copyright (C) 2004-2006 Free Software Foundation, Inc.
+#
+# This file is free software, distributed under the terms of the GNU
+# General Public License. As a special exception to the GNU General
+# Public License, this file may be distributed as part of a program
+# that contains a configuration script generated by Autoconf, under
+# the same distribution terms as the rest of that program.
+#
+# Generated by gnulib-tool.
+# Reproduce by: gnulib-tool --import --dir=. --lib=libsed --source-base=lib --m4-base=build-aux --doc-base=doc --aux-dir=build-aux --makefile_name=gnulib.mk --no-libtool --macro-prefix=gl acl alloca getline getopt gettext localcharset memchr memcmp memmove mkstemp obstack pathmax regex stat-macros stdbool strerror strverscmp unlocked-io verify
+
+
+noinst_LIBRARIES += libsed.a
+
+libsed_a_SOURCES =
+libsed_a_LIBADD = $(LIBOBJS)
+MOSTLYCLEANFILES += core *.stackdump
+
+## begin gnulib module acl
+
+EXTRA_DIST += acl.h
+
+## end gnulib module acl
+
+## begin gnulib module alloca
+
+
+libsed_a_LIBADD += @ALLOCA@
+## end gnulib module alloca
+
+## begin gnulib module alloca-opt
+
+BUILT_SOURCES += $(ALLOCA_H)
+EXTRA_DIST += alloca_.h
+
+# We need the following in order to create <alloca.h> when the system
+# doesn't have one that works with the given compiler.
+alloca.h: alloca_.h
+ cp -f $(srcdir)/alloca_.h $@-t
+ mv -f $@-t $@
+MOSTLYCLEANFILES += alloca.h alloca.h-t
+
+## end gnulib module alloca-opt
+
+## begin gnulib module configmake
+
+# Retrieve values of the variables through 'configure' followed by
+# 'make', not directly through 'configure', so that a user who
+# sets some of these variables consistently on the 'make' command
+# line gets correct results.
+#
+# One advantage of this approach, compared to the classical
+# approach of adding -DLIBDIR=\"$(libdir)\" etc. to AM_CPPFLAGS,
+# is that it protects against the use of undefined variables.
+# If, say, $(libdir) is not set in the Makefile, LIBDIR is not
+# defined by this module, and code using LIBDIR gives a
+# compilation error.
+#
+# Another advantage is that 'make' output is shorter.
+#
+# Listed in the same order as the GNU makefile conventions.
+# The Automake-defined pkg* macros are appended, in the order
+# listed in the Automake documentation.
+configmake.h: Makefile
+ rm -f $@-t $@
+ ( \
+ echo '#define PREFIX "$(prefix)"'; \
+ echo '#define EXEC_PREFIX "$(exec_prefix)"'; \
+ echo '#define BINDIR "$(bindir)"'; \
+ echo '#define SBINDIR "$(sbindir)"'; \
+ echo '#define LIBEXECDIR "$(libexecdir)"'; \
+ echo '#define DATAROOTDIR "$(datarootdir)"'; \
+ echo '#define DATADIR "$(datadir)"'; \
+ echo '#define SYSCONFDIR "$(sysconfdir)"'; \
+ echo '#define SHAREDSTATEDIR "$(sharedstatedir)"'; \
+ echo '#define LOCALSTATEDIR "$(localstatedir)"'; \
+ echo '#define INCLUDEDIR "$(includedir)"'; \
+ echo '#define OLDINCLUDEDIR "$(oldincludedir)"'; \
+ echo '#define DOCDIR "$(docdir)"'; \
+ echo '#define INFODIR "$(infodir)"'; \
+ echo '#define HTMLDIR "$(htmldir)"'; \
+ echo '#define DVIDIR "$(dvidir)"'; \
+ echo '#define PDFDIR "$(pdfdir)"'; \
+ echo '#define PSDIR "$(psdir)"'; \
+ echo '#define LIBDIR "$(libdir)"'; \
+ echo '#define LISPDIR "$(lispdir)"'; \
+ echo '#define LOCALEDIR "$(localedir)"'; \
+ echo '#define MANDIR "$(mandir)"'; \
+ echo '#define MANEXT "$(manext)"'; \
+ echo '#define PKGLIBDIR "$(pkglibdir)"'; \
+ echo '#define PKGINCLUDEDIR "$(pkgincludedir)"'; \
+ echo '#define PKGDATADIR "$(pkgdatadir)"'; \
+ :) | sed '/""/d' >$@-t
+ mv $@-t $@
+BUILT_SOURCES += configmake.h
+CLEANFILES += configmake.h configmake.h-t
+
+## end gnulib module configmake
+
+## begin gnulib module exit
+
+libsed_a_SOURCES += exit.h
+
+## end gnulib module exit
+
+## begin gnulib module exitfail
+
+EXTRA_DIST += exitfail.h
+
+## end gnulib module exitfail
+
+## begin gnulib module getdelim
+
+EXTRA_DIST += getdelim.h
+
+## end gnulib module getdelim
+
+## begin gnulib module getline
+
+EXTRA_DIST += getline.h
+
+## end gnulib module getline
+
+## begin gnulib module getopt
+
+BUILT_SOURCES += $(GETOPT_H)
+EXTRA_DIST += getopt_.h getopt_int.h
+
+# We need the following in order to create <getopt.h> when the system
+# doesn't have one that works with the given compiler.
+getopt.h: getopt_.h
+ cp -f $(srcdir)/getopt_.h $@-t
+ mv -f $@-t $@
+MOSTLYCLEANFILES += getopt.h getopt.h-t
+
+## end gnulib module getopt
+
+## begin gnulib module gettext
+
+# This is for those projects which use "gettextize --intl" to put a source-code
+# copy of libintl into their package. In such projects, every Makefile.am needs
+# -I$(top_builddir)/intl, so that <libintl.h> can be found in this directory.
+# For the Makefile.ams in other directories it is the maintainer's
+# responsibility; for the one from gnulib we do it here.
+# This option has no effect when the user disables NLS (because then the intl
+# directory contains no libintl.h file) or when the project does not use
+# "gettextize --intl".
+AM_CPPFLAGS += -I$(top_builddir)/intl
+
+## end gnulib module gettext
+
+## begin gnulib module gettext-h
+
+libsed_a_SOURCES += gettext.h
+
+## end gnulib module gettext-h
+
+## begin gnulib module localcharset
+
+libsed_a_SOURCES += localcharset.h localcharset.c
+EXTRA_DIST += config.charset ref-add.sin ref-del.sin
+
+# We need the following in order to install a simple file in $(libdir)
+# which is shared with other installed packages. We use a list of referencing
+# packages so that "make uninstall" will remove the file if and only if it
+# is not used by another installed package.
+# On systems with glibc-2.1 or newer, the file is redundant, therefore we
+# avoid installing it.
+
+all-local: charset.alias ref-add.sed ref-del.sed
+
+charset_alias = $(DESTDIR)$(libdir)/charset.alias
+charset_tmp = $(DESTDIR)$(libdir)/charset.tmp
+install-exec-local: all-local
+ test $(GLIBC21) != no || $(mkinstalldirs) $(DESTDIR)$(libdir)
+ if test -f $(charset_alias); then \
+ sed -f ref-add.sed $(charset_alias) > $(charset_tmp) ; \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \
+ rm -f $(charset_tmp) ; \
+ else \
+ if test $(GLIBC21) = no; then \
+ sed -f ref-add.sed charset.alias > $(charset_tmp) ; \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias) ; \
+ rm -f $(charset_tmp) ; \
+ fi ; \
+ fi
+
+uninstall-local: all-local
+ if test -f $(charset_alias); then \
+ sed -f ref-del.sed $(charset_alias) > $(charset_tmp); \
+ if grep '^# Packages using this file: $$' $(charset_tmp) \
+ > /dev/null; then \
+ rm -f $(charset_alias); \
+ else \
+ $(INSTALL_DATA) $(charset_tmp) $(charset_alias); \
+ fi; \
+ rm -f $(charset_tmp); \
+ fi
+
+charset.alias: config.charset
+ rm -f t-$@ $@
+ $(SHELL) $(srcdir)/config.charset '$(host)' > t-$@
+ mv t-$@ $@
+
+SUFFIXES += .sed .sin
+.sin.sed:
+ rm -f t-$@ $@
+ sed -e '/^#/d' -e 's/@''PACKAGE''@/$(PACKAGE)/g' $< > t-$@
+ mv t-$@ $@
+
+CLEANFILES += charset.alias ref-add.sed ref-del.sed
+
+## end gnulib module localcharset
+
+## begin gnulib module mbchar
+
+libsed_a_SOURCES += mbchar.h
+
+## end gnulib module mbchar
+
+## begin gnulib module mbuiter
+
+libsed_a_SOURCES += mbuiter.h
+
+## end gnulib module mbuiter
+
+## begin gnulib module mkstemp
+
+EXTRA_DIST += mkstemp.h
+
+## end gnulib module mkstemp
+
+## begin gnulib module pathmax
+
+EXTRA_DIST += pathmax.h
+
+## end gnulib module pathmax
+
+## begin gnulib module quote
+
+EXTRA_DIST += quote.h
+
+## end gnulib module quote
+
+## begin gnulib module quotearg
+
+EXTRA_DIST += quotearg.h
+
+## end gnulib module quotearg
+
+## begin gnulib module regex
+
+EXTRA_DIST += regcomp.c regex.h regex_internal.c regex_internal.h regexec.c
+
+## end gnulib module regex
+
+## begin gnulib module stat-macros
+
+EXTRA_DIST += stat-macros.h
+
+## end gnulib module stat-macros
+
+## begin gnulib module stdbool
+
+BUILT_SOURCES += $(STDBOOL_H)
+EXTRA_DIST += stdbool_.h
+
+# We need the following in order to create <stdbool.h> when the system
+# doesn't have one that works.
+stdbool.h: stdbool_.h
+ rm -f $@-t $@
+ sed -e 's/@''HAVE__BOOL''@/$(HAVE__BOOL)/g' < $(srcdir)/stdbool_.h > $@-t
+ mv $@-t $@
+MOSTLYCLEANFILES += stdbool.h stdbool.h-t
+
+## end gnulib module stdbool
+
+## begin gnulib module stdint
+
+BUILT_SOURCES += $(STDINT_H)
+EXTRA_DIST += stdint_.h
+
+# We need the following in order to create <stdint.h> when the system
+# doesn't have one that works with the given compiler.
+stdint.h: stdint_.h
+ rm -f $@-t $@
+ sed -e 's/@''HAVE_WCHAR_H''@/$(HAVE_WCHAR_H)/g' \
+ -e 's/@''HAVE_STDINT_H''@/$(HAVE_STDINT_H)/g' \
+ -e 's|@''ABSOLUTE_STDINT_H''@|$(ABSOLUTE_STDINT_H)|g' \
+ -e 's/@''HAVE_SYS_TYPES_H''@/$(HAVE_SYS_TYPES_H)/g' \
+ -e 's/@''HAVE_INTTYPES_H''@/$(HAVE_INTTYPES_H)/g' \
+ -e 's/@''HAVE_SYS_INTTYPES_H''@/$(HAVE_SYS_INTTYPES_H)/g' \
+ -e 's/@''HAVE_SYS_BITYPES_H''@/$(HAVE_SYS_BITYPES_H)/g' \
+ -e 's/@''HAVE_LONG_LONG_INT''@/$(HAVE_LONG_LONG_INT)/g' \
+ -e 's/@''BITSIZEOF_PTRDIFF_T''@/$(BITSIZEOF_PTRDIFF_T)/g' \
+ -e 's/@''PTRDIFF_T_SUFFIX''@/$(PTRDIFF_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_SIG_ATOMIC_T''@/$(BITSIZEOF_SIG_ATOMIC_T)/g' \
+ -e 's/@''HAVE_SIGNED_SIG_ATOMIC_T''@/$(HAVE_SIGNED_SIG_ATOMIC_T)/g' \
+ -e 's/@''SIG_ATOMIC_T_SUFFIX''@/$(SIG_ATOMIC_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_SIZE_T''@/$(BITSIZEOF_SIZE_T)/g' \
+ -e 's/@''SIZE_T_SUFFIX''@/$(SIZE_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_WCHAR_T''@/$(BITSIZEOF_WCHAR_T)/g' \
+ -e 's/@''HAVE_SIGNED_WCHAR_T''@/$(HAVE_SIGNED_WCHAR_T)/g' \
+ -e 's/@''WCHAR_T_SUFFIX''@/$(WCHAR_T_SUFFIX)/g' \
+ -e 's/@''BITSIZEOF_WINT_T''@/$(BITSIZEOF_WINT_T)/g' \
+ -e 's/@''HAVE_SIGNED_WINT_T''@/$(HAVE_SIGNED_WINT_T)/g' \
+ -e 's/@''WINT_T_SUFFIX''@/$(WINT_T_SUFFIX)/g' \
+ < $(srcdir)/stdint_.h > $@-t
+ mv $@-t $@
+MOSTLYCLEANFILES += stdint.h stdint.h-t
+
+## end gnulib module stdint
+
+## begin gnulib module strcase
+
+libsed_a_SOURCES += strcase.h
+
+## end gnulib module strcase
+
+## begin gnulib module strnlen1
+
+libsed_a_SOURCES += strnlen1.h strnlen1.c
+
+## end gnulib module strnlen1
+
+## begin gnulib module strverscmp
+
+EXTRA_DIST += strverscmp.h
+
+## end gnulib module strverscmp
+
+## begin gnulib module sys_stat
+
+BUILT_SOURCES += $(SYS_STAT_H)
+EXTRA_DIST += stat_.h
+
+# We need the following in order to create <sys/stat.h> when the system
+# has one that is incomplete.
+sys/stat.h: stat_.h
+ test -d sys || mkdir sys
+ rm -f $@-t $@
+ sed -e 's|@''ABSOLUTE_SYS_STAT_H''@|$(ABSOLUTE_SYS_STAT_H)|g' \
+ < $(srcdir)/stat_.h > $@-t
+ mv $@-t $@
+MOSTLYCLEANFILES += sys/stat.h sys/stat.h-t
+MOSTLYCLEANDIRS += sys
+
+## end gnulib module sys_stat
+
+## begin gnulib module unistd
+
+BUILT_SOURCES += $(UNISTD_H)
+
+# We need the following in order to create an empty placeholder for
+# <unistd.h> when the system doesn't have one.
+unistd.h:
+ echo '/* Empty placeholder for $@. */' >$@
+MOSTLYCLEANFILES += unistd.h
+
+## end gnulib module unistd
+
+## begin gnulib module unlocked-io
+
+EXTRA_DIST += unlocked-io.h
+
+## end gnulib module unlocked-io
+
+## begin gnulib module verify
+
+libsed_a_SOURCES += verify.h
+
+## end gnulib module verify
+
+## begin gnulib module wcwidth
+
+libsed_a_SOURCES += wcwidth.h
+
+## end gnulib module wcwidth
+
+## begin gnulib module xalloc
+
+EXTRA_DIST += xalloc.h
+
+## end gnulib module xalloc
+
+## begin gnulib module xalloc-die
+
+libsed_a_SOURCES += xalloc-die.c
+
+## end gnulib module xalloc-die
+
+
+mostlyclean-local: mostlyclean-generic
+ @test -z "$(MOSTLYCLEANDIRS)" || \
+ for dir in $(MOSTLYCLEANDIRS); do \
+ if test -d $$dir; then \
+ echo "rmdir $$dir"; rmdir $$dir; \
+ fi; \
+ done
diff --git a/lib/localcharset.c b/lib/localcharset.c
new file mode 100644
index 0000000..a2fc5dc
--- /dev/null
+++ b/lib/localcharset.c
@@ -0,0 +1,460 @@
+/* Determine a canonical name for the current locale's character encoding.
+
+ Copyright (C) 2000-2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "localcharset.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#if defined _WIN32 || defined __WIN32__
+# define WIN32_NATIVE
+#endif
+
+#if defined __EMX__
+/* Assume EMX program runs on OS/2, even if compiled under DOS. */
+# define OS2
+#endif
+
+#if !defined WIN32_NATIVE
+# if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+# else
+# if HAVE_SETLOCALE
+# include <locale.h>
+# endif
+# endif
+# ifdef __CYGWIN__
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+# endif
+#elif defined WIN32_NATIVE
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#endif
+#if defined OS2
+# define INCL_DOS
+# include <os2.h>
+#endif
+
+#if ENABLE_RELOCATABLE
+# include "relocatable.h"
+#else
+# define relocate(pathname) (pathname)
+#endif
+
+/* Get LIBDIR. */
+#ifndef LIBDIR
+# include "configmake.h"
+#endif
+
+#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
+ /* Win32, Cygwin, OS/2, DOS */
+# define ISSLASH(C) ((C) == '/' || (C) == '\\')
+#endif
+
+#ifndef DIRECTORY_SEPARATOR
+# define DIRECTORY_SEPARATOR '/'
+#endif
+
+#ifndef ISSLASH
+# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
+#endif
+
+#if HAVE_DECL_GETC_UNLOCKED
+# undef getc
+# define getc getc_unlocked
+#endif
+
+/* The following static variable is declared 'volatile' to avoid a
+ possible multithread problem in the function get_charset_aliases. If we
+ are running in a threaded environment, and if two threads initialize
+ 'charset_aliases' simultaneously, both will produce the same value,
+ and everything will be ok if the two assignments to 'charset_aliases'
+ are atomic. But I don't know what will happen if the two assignments mix. */
+#if __STDC__ != 1
+# define volatile /* empty */
+#endif
+/* Pointer to the contents of the charset.alias file, if it has already been
+ read, else NULL. Its format is:
+ ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */
+static const char * volatile charset_aliases;
+
+/* Return a pointer to the contents of the charset.alias file. */
+static const char *
+get_charset_aliases (void)
+{
+ const char *cp;
+
+ cp = charset_aliases;
+ if (cp == NULL)
+ {
+#if !(defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
+ FILE *fp;
+ const char *dir;
+ const char *base = "charset.alias";
+ char *file_name;
+
+ /* Make it possible to override the charset.alias location. This is
+ necessary for running the testsuite before "make install". */
+ dir = getenv ("CHARSETALIASDIR");
+ if (dir == NULL || dir[0] == '\0')
+ dir = relocate (LIBDIR);
+
+ /* Concatenate dir and base into freshly allocated file_name. */
+ {
+ size_t dir_len = strlen (dir);
+ size_t base_len = strlen (base);
+ int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
+ file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
+ if (file_name != NULL)
+ {
+ memcpy (file_name, dir, dir_len);
+ if (add_slash)
+ file_name[dir_len] = DIRECTORY_SEPARATOR;
+ memcpy (file_name + dir_len + add_slash, base, base_len + 1);
+ }
+ }
+
+ if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
+ /* Out of memory or file not found, treat it as empty. */
+ cp = "";
+ else
+ {
+ /* Parse the file's contents. */
+ char *res_ptr = NULL;
+ size_t res_size = 0;
+
+ for (;;)
+ {
+ int c;
+ char buf1[50+1];
+ char buf2[50+1];
+ size_t l1, l2;
+ char *old_res_ptr;
+
+ c = getc (fp);
+ if (c == EOF)
+ break;
+ if (c == '\n' || c == ' ' || c == '\t')
+ continue;
+ if (c == '#')
+ {
+ /* Skip comment, to end of line. */
+ do
+ c = getc (fp);
+ while (!(c == EOF || c == '\n'));
+ if (c == EOF)
+ break;
+ continue;
+ }
+ ungetc (c, fp);
+ if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
+ break;
+ l1 = strlen (buf1);
+ l2 = strlen (buf2);
+ old_res_ptr = res_ptr;
+ if (res_size == 0)
+ {
+ res_size = l1 + 1 + l2 + 1;
+ res_ptr = (char *) malloc (res_size + 1);
+ }
+ else
+ {
+ res_size += l1 + 1 + l2 + 1;
+ res_ptr = (char *) realloc (res_ptr, res_size + 1);
+ }
+ if (res_ptr == NULL)
+ {
+ /* Out of memory. */
+ res_size = 0;
+ if (old_res_ptr != NULL)
+ free (old_res_ptr);
+ break;
+ }
+ strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
+ strcpy (res_ptr + res_size - (l2 + 1), buf2);
+ }
+ fclose (fp);
+ if (res_size == 0)
+ cp = "";
+ else
+ {
+ *(res_ptr + res_size) = '\0';
+ cp = res_ptr;
+ }
+ }
+
+ if (file_name != NULL)
+ free (file_name);
+
+#else
+
+# if defined VMS
+ /* To avoid the troubles of an extra file charset.alias_vms in the
+ sources of many GNU packages, simply inline the aliases here. */
+ /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
+ "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
+ section 10.7 "Handling Different Character Sets". */
+ cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
+ "ISO8859-2" "\0" "ISO-8859-2" "\0"
+ "ISO8859-5" "\0" "ISO-8859-5" "\0"
+ "ISO8859-7" "\0" "ISO-8859-7" "\0"
+ "ISO8859-8" "\0" "ISO-8859-8" "\0"
+ "ISO8859-9" "\0" "ISO-8859-9" "\0"
+ /* Japanese */
+ "eucJP" "\0" "EUC-JP" "\0"
+ "SJIS" "\0" "SHIFT_JIS" "\0"
+ "DECKANJI" "\0" "DEC-KANJI" "\0"
+ "SDECKANJI" "\0" "EUC-JP" "\0"
+ /* Chinese */
+ "eucTW" "\0" "EUC-TW" "\0"
+ "DECHANYU" "\0" "DEC-HANYU" "\0"
+ "DECHANZI" "\0" "GB2312" "\0"
+ /* Korean */
+ "DECKOREAN" "\0" "EUC-KR" "\0";
+# endif
+
+# if defined WIN32_NATIVE || defined __CYGWIN__
+ /* To avoid the troubles of installing a separate file in the same
+ directory as the DLL and of retrieving the DLL's directory at
+ runtime, simply inline the aliases here. */
+
+ cp = "CP936" "\0" "GBK" "\0"
+ "CP1361" "\0" "JOHAB" "\0"
+ "CP20127" "\0" "ASCII" "\0"
+ "CP20866" "\0" "KOI8-R" "\0"
+ "CP20936" "\0" "GB2312" "\0"
+ "CP21866" "\0" "KOI8-RU" "\0"
+ "CP28591" "\0" "ISO-8859-1" "\0"
+ "CP28592" "\0" "ISO-8859-2" "\0"
+ "CP28593" "\0" "ISO-8859-3" "\0"
+ "CP28594" "\0" "ISO-8859-4" "\0"
+ "CP28595" "\0" "ISO-8859-5" "\0"
+ "CP28596" "\0" "ISO-8859-6" "\0"
+ "CP28597" "\0" "ISO-8859-7" "\0"
+ "CP28598" "\0" "ISO-8859-8" "\0"
+ "CP28599" "\0" "ISO-8859-9" "\0"
+ "CP28605" "\0" "ISO-8859-15" "\0"
+ "CP38598" "\0" "ISO-8859-8" "\0"
+ "CP51932" "\0" "EUC-JP" "\0"
+ "CP51936" "\0" "GB2312" "\0"
+ "CP51949" "\0" "EUC-KR" "\0"
+ "CP51950" "\0" "EUC-TW" "\0"
+ "CP54936" "\0" "GB18030" "\0"
+ "CP65001" "\0" "UTF-8" "\0";
+# endif
+#endif
+
+ charset_aliases = cp;
+ }
+
+ return cp;
+}
+
+/* Determine the current locale's character encoding, and canonicalize it
+ into one of the canonical names listed in config.charset.
+ The result must not be freed; it is statically allocated.
+ If the canonical name cannot be determined, the result is a non-canonical
+ name. */
+
+#ifdef STATIC
+STATIC
+#endif
+const char *
+locale_charset (void)
+{
+ const char *codeset;
+ const char *aliases;
+
+#if !(defined WIN32_NATIVE || defined OS2)
+
+# if HAVE_LANGINFO_CODESET
+
+ /* Most systems support nl_langinfo (CODESET) nowadays. */
+ codeset = nl_langinfo (CODESET);
+
+# ifdef __CYGWIN__
+ /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always
+ returns "US-ASCII". As long as this is not fixed, return the suffix
+ of the locale name from the environment variables (if present) or
+ the codepage as a number. */
+ if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
+ {
+ const char *locale;
+ static char buf[2 + 10 + 1];
+
+ locale = getenv ("LC_ALL");
+ if (locale == NULL || locale[0] == '\0')
+ {
+ locale = getenv ("LC_CTYPE");
+ if (locale == NULL || locale[0] == '\0')
+ locale = getenv ("LANG");
+ }
+ if (locale != NULL && locale[0] != '\0')
+ {
+ /* If the locale name contains an encoding after the dot, return
+ it. */
+ const char *dot = strchr (locale, '.');
+
+ if (dot != NULL)
+ {
+ const char *modifier;
+
+ dot++;
+ /* Look for the possible @... trailer and remove it, if any. */
+ modifier = strchr (dot, '@');
+ if (modifier == NULL)
+ return dot;
+ if (modifier - dot < sizeof (buf))
+ {
+ memcpy (buf, dot, modifier - dot);
+ buf [modifier - dot] = '\0';
+ return buf;
+ }
+ }
+ }
+
+ /* Woe32 has a function returning the locale's codepage as a number. */
+ sprintf (buf, "CP%u", GetACP ());
+ codeset = buf;
+ }
+# endif
+
+# else
+
+ /* On old systems which lack it, use setlocale or getenv. */
+ const char *locale = NULL;
+
+ /* But most old systems don't have a complete set of locales. Some
+ (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't
+ use setlocale here; it would return "C" when it doesn't support the
+ locale name the user has set. */
+# if HAVE_SETLOCALE && 0
+ locale = setlocale (LC_CTYPE, NULL);
+# endif
+ if (locale == NULL || locale[0] == '\0')
+ {
+ locale = getenv ("LC_ALL");
+ if (locale == NULL || locale[0] == '\0')
+ {
+ locale = getenv ("LC_CTYPE");
+ if (locale == NULL || locale[0] == '\0')
+ locale = getenv ("LANG");
+ }
+ }
+
+ /* On some old systems, one used to set locale = "iso8859_1". On others,
+ you set it to "language_COUNTRY.charset". In any case, we resolve it
+ through the charset.alias file. */
+ codeset = locale;
+
+# endif
+
+#elif defined WIN32_NATIVE
+
+ static char buf[2 + 10 + 1];
+
+ /* Woe32 has a function returning the locale's codepage as a number. */
+ sprintf (buf, "CP%u", GetACP ());
+ codeset = buf;
+
+#elif defined OS2
+
+ const char *locale;
+ static char buf[2 + 10 + 1];
+ ULONG cp[3];
+ ULONG cplen;
+
+ /* Allow user to override the codeset, as set in the operating system,
+ with standard language environment variables. */
+ locale = getenv ("LC_ALL");
+ if (locale == NULL || locale[0] == '\0')
+ {
+ locale = getenv ("LC_CTYPE");
+ if (locale == NULL || locale[0] == '\0')
+ locale = getenv ("LANG");
+ }
+ if (locale != NULL && locale[0] != '\0')
+ {
+ /* If the locale name contains an encoding after the dot, return it. */
+ const char *dot = strchr (locale, '.');
+
+ if (dot != NULL)
+ {
+ const char *modifier;
+
+ dot++;
+ /* Look for the possible @... trailer and remove it, if any. */
+ modifier = strchr (dot, '@');
+ if (modifier == NULL)
+ return dot;
+ if (modifier - dot < sizeof (buf))
+ {
+ memcpy (buf, dot, modifier - dot);
+ buf [modifier - dot] = '\0';
+ return buf;
+ }
+ }
+
+ /* Resolve through the charset.alias file. */
+ codeset = locale;
+ }
+ else
+ {
+ /* OS/2 has a function returning the locale's codepage as a number. */
+ if (DosQueryCp (sizeof (cp), cp, &cplen))
+ codeset = "";
+ else
+ {
+ sprintf (buf, "CP%u", cp[0]);
+ codeset = buf;
+ }
+ }
+
+#endif
+
+ if (codeset == NULL)
+ /* The canonical name cannot be determined. */
+ codeset = "";
+
+ /* Resolve alias. */
+ for (aliases = get_charset_aliases ();
+ *aliases != '\0';
+ aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
+ if (strcmp (codeset, aliases) == 0
+ || (aliases[0] == '*' && aliases[1] == '\0'))
+ {
+ codeset = aliases + strlen (aliases) + 1;
+ break;
+ }
+
+ /* Don't return an empty string. GNU libc and GNU libiconv interpret
+ the empty string as denoting "the locale's character encoding",
+ thus GNU libiconv would call this function a second time. */
+ if (codeset[0] == '\0')
+ codeset = "ASCII";
+
+ return codeset;
+}
diff --git a/lib/localcharset.h b/lib/localcharset.h
new file mode 100644
index 0000000..5030210
--- /dev/null
+++ b/lib/localcharset.h
@@ -0,0 +1,41 @@
+/* Determine a canonical name for the current locale's character encoding.
+ Copyright (C) 2000-2003 Free Software Foundation, Inc.
+ This file is part of the GNU CHARSET Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _LOCALCHARSET_H
+#define _LOCALCHARSET_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Determine the current locale's character encoding, and canonicalize it
+ into one of the canonical names listed in config.charset.
+ The result must not be freed; it is statically allocated.
+ If the canonical name cannot be determined, the result is a non-canonical
+ name. */
+extern const char * locale_charset (void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _LOCALCHARSET_H */
diff --git a/lib/malloc.c b/lib/malloc.c
new file mode 100644
index 0000000..d4dae3e
--- /dev/null
+++ b/lib/malloc.c
@@ -0,0 +1,35 @@
+/* malloc() function that is glibc compatible.
+
+ Copyright (C) 1997, 1998, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* written by Jim Meyering */
+
+#include <config.h>
+#undef malloc
+
+#include <stdlib.h>
+
+/* Allocate an N-byte block of memory from the heap.
+ If N is zero, allocate a 1-byte block. */
+
+void *
+rpl_malloc (size_t n)
+{
+ if (n == 0)
+ n = 1;
+ return malloc (n);
+}
diff --git a/lib/mbchar.c b/lib/mbchar.c
new file mode 100644
index 0000000..590109b
--- /dev/null
+++ b/lib/mbchar.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2001, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#include <config.h>
+
+#include <limits.h>
+
+#include "mbchar.h"
+
+#if IS_BASIC_ASCII
+
+/* Bit table of characters in the ISO C "basic character set". */
+unsigned int is_basic_table [UCHAR_MAX / 32 + 1] =
+{
+ 0x00001a00, /* '\t' '\v' '\f' */
+ 0xffffffef, /* ' '...'#' '%'...'?' */
+ 0xfffffffe, /* 'A'...'Z' '[' '\\' ']' '^' '_' */
+ 0x7ffffffe /* 'a'...'z' '{' '|' '}' '~' */
+ /* The remaining bits are 0. */
+};
+
+#endif /* IS_BASIC_ASCII */
diff --git a/lib/mbchar.h b/lib/mbchar.h
new file mode 100644
index 0000000..bf31027
--- /dev/null
+++ b/lib/mbchar.h
@@ -0,0 +1,478 @@
+/* Multibyte character data type.
+ Copyright (C) 2001, 2005-2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>. */
+
+/* A multibyte character is a short subsequence of a char* string,
+ representing a single wide character.
+
+ We use multibyte characters instead of wide characters because of
+ the following goals:
+ 1) correct multibyte handling, i.e. operate according to the LC_CTYPE
+ locale,
+ 2) ease of maintenance, i.e. the maintainer needs not know all details
+ of the ISO C 99 standard,
+ 3) don't fail grossly if the input is not in the encoding set by the
+ locale, because often different encodings are in use in the same
+ countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...),
+ 4) fast in the case of ASCII characters,
+ 5) portability, i.e. don't make unportable assumptions about wchar_t.
+
+ Multibyte characters are only accessed through the mb* macros.
+
+ mb_ptr (mbc)
+ return a pointer to the beginning of the multibyte sequence.
+
+ mb_len (mbc)
+ returns the number of bytes occupied by the multibyte sequence.
+ Always > 0.
+
+ mb_iseq (mbc, sc)
+ returns true if mbc is the standard ASCII character sc.
+
+ mb_isnul (mbc)
+ returns true if mbc is the nul character.
+
+ mb_cmp (mbc1, mbc2)
+ returns a positive, zero, or negative value depending on whether mbc1
+ sorts after, same or before mbc2.
+
+ mb_casecmp (mbc1, mbc2)
+ returns a positive, zero, or negative value depending on whether mbc1
+ sorts after, same or before mbc2, modulo upper/lowercase conversion.
+
+ mb_equal (mbc1, mbc2)
+ returns true if mbc1 and mbc2 are equal.
+
+ mb_caseequal (mbc1, mbc2)
+ returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion.
+
+ mb_isalnum (mbc)
+ returns true if mbc is alphanumeric.
+
+ mb_isalpha (mbc)
+ returns true if mbc is alphabetic.
+
+ mb_isascii(mbc)
+ returns true if mbc is plain ASCII.
+
+ mb_isblank (mbc)
+ returns true if mbc is a blank.
+
+ mb_iscntrl (mbc)
+ returns true if mbc is a control character.
+
+ mb_isdigit (mbc)
+ returns true if mbc is a decimal digit.
+
+ mb_isgraph (mbc)
+ returns true if mbc is a graphic character.
+
+ mb_islower (mbc)
+ returns true if mbc is lowercase.
+
+ mb_isprint (mbc)
+ returns true if mbc is a printable character.
+
+ mb_ispunct (mbc)
+ returns true if mbc is a punctuation character.
+
+ mb_isspace (mbc)
+ returns true if mbc is a space character.
+
+ mb_isupper (mbc)
+ returns true if mbc is uppercase.
+
+ mb_isxdigit (mbc)
+ returns true if mbc is a hexadecimal digit.
+
+ mb_width (mbc)
+ returns the number of columns on the output device occupied by mbc.
+ Always >= 0.
+
+ mb_putc (mbc, stream)
+ outputs mbc on stream, a byte oriented FILE stream opened for output.
+
+ mb_setascii (&mbc, sc)
+ assigns the standard ASCII character sc to mbc.
+
+ mb_copy (&destmbc, &srcmbc)
+ copies srcmbc to destmbc.
+
+ Here are the function prototypes of the macros.
+
+ extern const char * mb_ptr (const mbchar_t mbc);
+ extern size_t mb_len (const mbchar_t mbc);
+ extern bool mb_iseq (const mbchar_t mbc, char sc);
+ extern bool mb_isnul (const mbchar_t mbc);
+ extern int mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2);
+ extern int mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2);
+ extern bool mb_equal (const mbchar_t mbc1, const mbchar_t mbc2);
+ extern bool mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2);
+ extern bool mb_isalnum (const mbchar_t mbc);
+ extern bool mb_isalpha (const mbchar_t mbc);
+ extern bool mb_isascii (const mbchar_t mbc);
+ extern bool mb_isblank (const mbchar_t mbc);
+ extern bool mb_iscntrl (const mbchar_t mbc);
+ extern bool mb_isdigit (const mbchar_t mbc);
+ extern bool mb_isgraph (const mbchar_t mbc);
+ extern bool mb_islower (const mbchar_t mbc);
+ extern bool mb_isprint (const mbchar_t mbc);
+ extern bool mb_ispunct (const mbchar_t mbc);
+ extern bool mb_isspace (const mbchar_t mbc);
+ extern bool mb_isupper (const mbchar_t mbc);
+ extern bool mb_isxdigit (const mbchar_t mbc);
+ extern int mb_width (const mbchar_t mbc);
+ extern void mb_putc (const mbchar_t mbc, FILE *stream);
+ extern void mb_setascii (mbchar_t *new, char sc);
+ extern void mb_copy (mbchar_t *new, const mbchar_t *old);
+ */
+
+#ifndef _MBCHAR_H
+#define _MBCHAR_H 1
+
+#include <stdbool.h>
+#include <string.h>
+
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
+ <wchar.h>. */
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+
+/* BeOS 5 has the functions but no <wctype.h>. */
+#if HAVE_WCTYPE_H
+# include <wctype.h>
+#endif
+/* FreeBSD 4.4 to 4.11 has <wctype.h> but lacks the functions.
+ Assume all 12 functions are implemented the same way, or not at all. */
+#if !defined iswalnum && !HAVE_ISWCNTRL
+static inline int
+iswalnum (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? (wc >= '0' && wc <= '9') || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z')
+ : 0);
+}
+# define iswalnum iswalnum
+#endif
+#if !defined iswalpha && !HAVE_ISWCNTRL
+static inline int
+iswalpha (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? (wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'
+ : 0);
+}
+# define iswalpha iswalpha
+#endif
+#if !defined iswblank && !HAVE_ISWCNTRL
+static inline int
+iswblank (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc == ' ' || wc == '\t'
+ : 0);
+}
+# define iswblank iswblank
+#endif
+#if !defined iswcntrl && !HAVE_ISWCNTRL
+static inline int
+iswcntrl (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? (wc & ~0x1f) == 0 || wc == 0x7f
+ : 0);
+}
+# define iswcntrl iswcntrl
+#endif
+#if !defined iswdigit && !HAVE_ISWCNTRL
+static inline int
+iswdigit (wint_t wc)
+{
+ return (wc >= '0' && wc <= '9');
+}
+# define iswdigit iswdigit
+#endif
+#if !defined iswgraph && !HAVE_ISWCNTRL
+static inline int
+iswgraph (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= '!' && wc <= '~'
+ : 1);
+}
+# define iswgraph iswgraph
+#endif
+#if !defined iswlower && !HAVE_ISWCNTRL
+static inline int
+iswlower (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= 'a' && wc <= 'z'
+ : 0);
+}
+# define iswlower iswlower
+#endif
+#if !defined iswprint && !HAVE_ISWCNTRL
+static inline int
+iswprint (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= ' ' && wc <= '~'
+ : 1);
+}
+# define iswprint iswprint
+#endif
+#if !defined iswpunct && !HAVE_ISWCNTRL
+static inline int
+iswpunct (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= '!' && wc <= '~'
+ && !((wc >= '0' && wc <= '9')
+ || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'))
+ : 1);
+}
+# define iswpunct iswpunct
+#endif
+#if !defined iswspace && !HAVE_ISWCNTRL
+static inline int
+iswspace (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc == ' ' || wc == '\t'
+ || wc == '\n' || wc == '\v' || wc == '\f' || wc == '\r'
+ : 0);
+}
+# define iswspace iswspace
+#endif
+#if !defined iswupper && !HAVE_ISWCNTRL
+static inline int
+iswupper (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= 'A' && wc <= 'Z'
+ : 0);
+}
+# define iswupper iswupper
+#endif
+#if !defined iswxdigit && !HAVE_ISWCNTRL
+static inline int
+iswxdigit (wint_t wc)
+{
+ return (wc >= '0' && wc <= '9') || ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'F');
+}
+# define iswxdigit iswxdigit
+#endif
+
+#include "wcwidth.h"
+
+#define MBCHAR_BUF_SIZE 24
+
+struct mbchar
+{
+ const char *ptr; /* pointer to current character */
+ size_t bytes; /* number of bytes of current character, > 0 */
+ bool wc_valid; /* true if wc is a valid wide character */
+ wchar_t wc; /* if wc_valid: the current character */
+ char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */
+};
+
+/* EOF (not a real character) is represented with bytes = 0 and
+ wc_valid = false. */
+
+typedef struct mbchar mbchar_t;
+
+/* Access the current character. */
+#define mb_ptr(mbc) ((mbc).ptr)
+#define mb_len(mbc) ((mbc).bytes)
+
+/* Comparison of characters. */
+#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc))
+#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0)
+#define mb_cmp(mbc1, mbc2) \
+ ((mbc1).wc_valid \
+ ? ((mbc2).wc_valid \
+ ? (int) (mbc1).wc - (int) (mbc2).wc \
+ : -1) \
+ : ((mbc2).wc_valid \
+ ? 1 \
+ : (mbc1).bytes == (mbc2).bytes \
+ ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \
+ : (mbc1).bytes < (mbc2).bytes \
+ ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
+ : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))
+#define mb_casecmp(mbc1, mbc2) \
+ ((mbc1).wc_valid \
+ ? ((mbc2).wc_valid \
+ ? (int) towlower ((mbc1).wc) - (int) towlower ((mbc2).wc) \
+ : -1) \
+ : ((mbc2).wc_valid \
+ ? 1 \
+ : (mbc1).bytes == (mbc2).bytes \
+ ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \
+ : (mbc1).bytes < (mbc2).bytes \
+ ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
+ : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))
+#define mb_equal(mbc1, mbc2) \
+ ((mbc1).wc_valid && (mbc2).wc_valid \
+ ? (mbc1).wc == (mbc2).wc \
+ : (mbc1).bytes == (mbc2).bytes \
+ && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
+#define mb_caseequal(mbc1, mbc2) \
+ ((mbc1).wc_valid && (mbc2).wc_valid \
+ ? towlower ((mbc1).wc) == towlower ((mbc2).wc) \
+ : (mbc1).bytes == (mbc2).bytes \
+ && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
+
+/* <ctype.h>, <wctype.h> classification. */
+#define mb_isascii(mbc) \
+ ((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127)
+#define mb_isalnum(mbc) ((mbc).wc_valid && iswalnum ((mbc).wc))
+#define mb_isalpha(mbc) ((mbc).wc_valid && iswalpha ((mbc).wc))
+#define mb_isblank(mbc) ((mbc).wc_valid && iswblank ((mbc).wc))
+#define mb_iscntrl(mbc) ((mbc).wc_valid && iswcntrl ((mbc).wc))
+#define mb_isdigit(mbc) ((mbc).wc_valid && iswdigit ((mbc).wc))
+#define mb_isgraph(mbc) ((mbc).wc_valid && iswgraph ((mbc).wc))
+#define mb_islower(mbc) ((mbc).wc_valid && iswlower ((mbc).wc))
+#define mb_isprint(mbc) ((mbc).wc_valid && iswprint ((mbc).wc))
+#define mb_ispunct(mbc) ((mbc).wc_valid && iswpunct ((mbc).wc))
+#define mb_isspace(mbc) ((mbc).wc_valid && iswspace ((mbc).wc))
+#define mb_isupper(mbc) ((mbc).wc_valid && iswupper ((mbc).wc))
+#define mb_isxdigit(mbc) ((mbc).wc_valid && iswxdigit ((mbc).wc))
+
+/* Extra <wchar.h> function. */
+
+/* Unprintable characters appear as a small box of width 1. */
+#define MB_UNPRINTABLE_WIDTH 1
+
+static inline int
+mb_width_aux (wint_t wc)
+{
+ int w = wcwidth (wc);
+ /* For unprintable characters, arbitrarily return 0 for control characters
+ and MB_UNPRINTABLE_WIDTH otherwise. */
+ return (w >= 0 ? w : iswcntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH);
+}
+
+#define mb_width(mbc) \
+ ((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH)
+
+/* Output. */
+#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, (mbc).bytes, (stream))
+
+/* Assignment. */
+#define mb_setascii(mbc, sc) \
+ ((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \
+ (mbc)->wc = (mbc)->buf[0] = (sc))
+
+/* Copying a character. */
+static inline void
+mb_copy (mbchar_t *new, const mbchar_t *old)
+{
+ if (old->ptr == &old->buf[0])
+ {
+ memcpy (&new->buf[0], &old->buf[0], old->bytes);
+ new->ptr = &new->buf[0];
+ }
+ else
+ new->ptr = old->ptr;
+ new->bytes = old->bytes;
+ if ((new->wc_valid = old->wc_valid))
+ new->wc = old->wc;
+}
+
+
+/* is_basic(c) tests whether the single-byte character c is in the
+ ISO C "basic character set".
+ This is a convenience function, and is in this file only to share code
+ between mbiter_multi.h and mbfile_multi.h. */
+#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
+/* The character set is ISO-646, not EBCDIC. */
+# define IS_BASIC_ASCII 1
+
+extern unsigned int is_basic_table[];
+
+static inline bool
+is_basic (char c)
+{
+ return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31))
+ & 1;
+}
+
+#else
+
+static inline bool
+is_basic (char c)
+{
+ switch (c)
+ {
+ case '\t': case '\v': case '\f':
+ case ' ': case '!': case '"': case '#': case '%':
+ case '&': case '\'': case '(': case ')': case '*':
+ case '+': case ',': case '-': case '.': case '/':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case ':': case ';': case '<': case '=': case '>':
+ case '?':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z':
+ case '[': case '\\': case ']': case '^': case '_':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z': case '{': case '|': case '}': case '~':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+#endif
+
+#endif /* _MBCHAR_H */
diff --git a/lib/mbuiter.h b/lib/mbuiter.h
new file mode 100644
index 0000000..9da3a6c
--- /dev/null
+++ b/lib/mbuiter.h
@@ -0,0 +1,203 @@
+/* Iterating through multibyte strings: macros for multi-byte encodings.
+ Copyright (C) 2001, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Written by Bruno Haible <bruno@clisp.org>. */
+
+/* The macros in this file implement forward iteration through a
+ multi-byte string, without knowing its length a-priori.
+
+ With these macros, an iteration loop that looks like
+
+ char *iter;
+ for (iter = buf; *iter != '\0'; iter++)
+ {
+ do_something (*iter);
+ }
+
+ becomes
+
+ mbui_iterator_t iter;
+ for (mbui_init (iter, buf); mbui_avail (iter); mbui_advance (iter))
+ {
+ do_something (mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
+ }
+
+ The benefit of these macros over plain use of mbrtowc is:
+ - Handling of invalid multibyte sequences is possible without
+ making the code more complicated, while still preserving the
+ invalid multibyte sequences.
+
+ Compared to mbiter.h, the macros here don't need to know the string's
+ length a-priori. The downside is that at each step, the look-ahead
+ that guards against overrunning the terminating '\0' is more expensive.
+ The mbui_* macros are therefore suitable when there is a high probability
+ that only the first few multibyte characters need to be inspected.
+ Whereas the mbi_* macros are better if usually the iteration runs
+ through the entire string.
+
+ mbui_iterator_t
+ is a type usable for variable declarations.
+
+ mbui_init (iter, startptr)
+ initializes the iterator, starting at startptr.
+
+ mbui_avail (iter)
+ returns true if there are more multibyte chracters available before
+ the end of string is reached. In this case, mbui_cur (iter) is
+ initialized to the next multibyte chracter.
+
+ mbui_advance (iter)
+ advances the iterator by one multibyte character.
+
+ mbui_cur (iter)
+ returns the current multibyte character, of type mbchar_t. All the
+ macros defined in mbchar.h can be used on it.
+
+ mbui_cur_ptr (iter)
+ return a pointer to the beginning of the current multibyte character.
+
+ mbui_reloc (iter, ptrdiff)
+ relocates iterator when the string is moved by ptrdiff bytes.
+
+ Here are the function prototypes of the macros.
+
+ extern void mbui_init (mbui_iterator_t iter, const char *startptr);
+ extern bool mbui_avail (mbui_iterator_t iter);
+ extern void mbui_advance (mbui_iterator_t iter);
+ extern mbchar_t mbui_cur (mbui_iterator_t iter);
+ extern const char * mbui_cur_ptr (mbui_iterator_t iter);
+ extern void mbui_reloc (mbui_iterator_t iter, ptrdiff_t ptrdiff);
+ */
+
+#ifndef _MBUITER_H
+#define _MBUITER_H 1
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
+ <wchar.h>. */
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+
+#include "mbchar.h"
+#include "strnlen1.h"
+
+struct mbuiter_multi
+{
+ bool in_shift; /* true if next byte may not be interpreted as ASCII */
+ mbstate_t state; /* if in_shift: current shift state */
+ bool next_done; /* true if mbui_avail has already filled the following */
+ struct mbchar cur; /* the current character:
+ const char *cur.ptr pointer to current character
+ The following are only valid after mbui_avail.
+ size_t cur.bytes number of bytes of current character
+ bool cur.wc_valid true if wc is a valid wide character
+ wchar_t cur.wc if wc_valid: the current character
+ */
+};
+
+static inline void
+mbuiter_multi_next (struct mbuiter_multi *iter)
+{
+ if (iter->next_done)
+ return;
+ if (iter->in_shift)
+ goto with_shift;
+ /* Handle most ASCII characters quickly, without calling mbrtowc(). */
+ if (is_basic (*iter->cur.ptr))
+ {
+ /* These characters are part of the basic character set. ISO C 99
+ guarantees that their wide character code is identical to their
+ char code. */
+ iter->cur.bytes = 1;
+ iter->cur.wc = *iter->cur.ptr;
+ iter->cur.wc_valid = true;
+ }
+ else
+ {
+ assert (mbsinit (&iter->state));
+ iter->in_shift = true;
+ with_shift:
+ iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr,
+ strnlen1 (iter->cur.ptr, MB_CUR_MAX),
+ &iter->state);
+ if (iter->cur.bytes == (size_t) -1)
+ {
+ /* An invalid multibyte sequence was encountered. */
+ iter->cur.bytes = 1;
+ iter->cur.wc_valid = false;
+ /* Whether to set iter->in_shift = false and reset iter->state
+ or not is not very important; the string is bogus anyway. */
+ }
+ else if (iter->cur.bytes == (size_t) -2)
+ {
+ /* An incomplete multibyte character at the end. */
+ iter->cur.bytes = strlen (iter->cur.ptr);
+ iter->cur.wc_valid = false;
+ /* Whether to set iter->in_shift = false and reset iter->state
+ or not is not important; the string end is reached anyway. */
+ }
+ else
+ {
+ if (iter->cur.bytes == 0)
+ {
+ /* A null wide character was encountered. */
+ iter->cur.bytes = 1;
+ assert (*iter->cur.ptr == '\0');
+ assert (iter->cur.wc == 0);
+ }
+ iter->cur.wc_valid = true;
+
+ /* When in the initial state, we can go back treating ASCII
+ characters more quickly. */
+ if (mbsinit (&iter->state))
+ iter->in_shift = false;
+ }
+ }
+ iter->next_done = true;
+}
+
+static inline void
+mbuiter_multi_reloc (struct mbuiter_multi *iter, ptrdiff_t ptrdiff)
+{
+ iter->cur.ptr += ptrdiff;
+}
+
+/* Iteration macros. */
+typedef struct mbuiter_multi mbui_iterator_t;
+#define mbui_init(iter, startptr) \
+ ((iter).cur.ptr = (startptr), \
+ (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \
+ (iter).next_done = false)
+#define mbui_avail(iter) \
+ (mbuiter_multi_next (&(iter)), !mb_isnul ((iter).cur))
+#define mbui_advance(iter) \
+ ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
+
+/* Access to the current character. */
+#define mbui_cur(iter) (iter).cur
+#define mbui_cur_ptr(iter) (iter).cur.ptr
+
+/* Relocation. */
+#define mbui_reloc(iter, ptrdiff) mbuiter_multi_reloc (&iter, ptrdiff)
+
+#endif /* _MBUITER_H */
diff --git a/lib/memchr.c b/lib/memchr.c
new file mode 100644
index 0000000..d44ad6d
--- /dev/null
+++ b/lib/memchr.c
@@ -0,0 +1,201 @@
+/* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004, 2006 Free
+ Software Foundation, Inc.
+
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+NOTE: The canonical source of this file is maintained with the GNU C Library.
+Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <stddef.h>
+
+#if defined _LIBC
+# include <memcopy.h>
+#else
+# define reg_char char
+#endif
+
+#include <limits.h>
+
+#if HAVE_BP_SYM_H || defined _LIBC
+# include <bp-sym.h>
+#else
+# define BP_SYM(sym) sym
+#endif
+
+#undef memchr
+#undef __memchr
+
+/* Search no more than N bytes of S for C. */
+void *
+__memchr (void const *s, int c_in, size_t n)
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+ unsigned reg_char c;
+ int i;
+
+ c = (unsigned char) c_in;
+
+ /* Handle the first few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s;
+ n > 0 && (size_t) char_ptr % sizeof longword != 0;
+ --n, ++char_ptr)
+ if (*char_ptr == c)
+ return (void *) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to any size longwords. */
+
+ longword_ptr = (const unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ /* Set MAGIC_BITS to be this pattern of 1 and 0 bits.
+ Set CHARMASK to be a longword, each of whose bytes is C. */
+
+ magic_bits = 0xfefefefe;
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+#if 0xffffffffU < ULONG_MAX
+ magic_bits |= magic_bits << 32;
+ charmask |= charmask << 32;
+ if (8 < sizeof longword)
+ for (i = 64; i < sizeof longword * 8; i *= 2)
+ {
+ magic_bits |= magic_bits << i;
+ charmask |= charmask << i;
+ }
+#endif
+ magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1);
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= sizeof longword)
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *longword_ptr++ ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
+
+ if (cp[0] == c)
+ return (void *) cp;
+ if (cp[1] == c)
+ return (void *) &cp[1];
+ if (cp[2] == c)
+ return (void *) &cp[2];
+ if (cp[3] == c)
+ return (void *) &cp[3];
+ if (4 < sizeof longword && cp[4] == c)
+ return (void *) &cp[4];
+ if (5 < sizeof longword && cp[5] == c)
+ return (void *) &cp[5];
+ if (6 < sizeof longword && cp[6] == c)
+ return (void *) &cp[6];
+ if (7 < sizeof longword && cp[7] == c)
+ return (void *) &cp[7];
+ if (8 < sizeof longword)
+ for (i = 8; i < sizeof longword; i++)
+ if (cp[i] == c)
+ return (void *) &cp[i];
+ }
+
+ n -= sizeof longword;
+ }
+
+ char_ptr = (const unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*char_ptr == c)
+ return (void *) char_ptr;
+ else
+ ++char_ptr;
+ }
+
+ return 0;
+}
+#ifdef weak_alias
+weak_alias (__memchr, BP_SYM (memchr))
+#endif
diff --git a/lib/memcmp.c b/lib/memcmp.c
new file mode 100644
index 0000000..cf98bfa
--- /dev/null
+++ b/lib/memcmp.c
@@ -0,0 +1,363 @@
+/* Copyright (C) 1991, 1993, 1995, 1997, 1998, 2003, 2006 Free Software
+ Foundation, Inc.
+
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+ USA. */
+
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#undef memcmp
+
+#ifdef _LIBC
+
+# include <memcopy.h>
+# include <endian.h>
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define WORDS_BIGENDIAN
+# endif
+
+#else /* Not in the GNU C library. */
+
+# include <sys/types.h>
+
+/* Type to use for aligned memory operations.
+ This should normally be the biggest type supported by a single load
+ and store. Must be an unsigned type. */
+# define op_t unsigned long int
+# define OPSIZ (sizeof(op_t))
+
+/* Threshold value for when to enter the unrolled loops. */
+# define OP_T_THRES 16
+
+/* Type to use for unaligned operations. */
+typedef unsigned char byte;
+
+# ifndef WORDS_BIGENDIAN
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
+# else
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
+# endif
+
+#endif /* In the GNU C library. */
+
+#ifdef WORDS_BIGENDIAN
+# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
+#else
+# define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
+#endif
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
+
+/* The strategy of this memcmp is:
+
+ 1. Compare bytes until one of the block pointers is aligned.
+
+ 2. Compare using memcmp_common_alignment or
+ memcmp_not_common_alignment, regarding the alignment of the other
+ block after the initial byte operations. The maximum number of
+ full words (of type op_t) are compared in this way.
+
+ 3. Compare the few remaining bytes. */
+
+#ifndef WORDS_BIGENDIAN
+/* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
+ A and B are known to be different.
+ This is needed only on little-endian machines. */
+
+# ifdef __GNUC__
+__inline
+# endif
+static int
+memcmp_bytes (long unsigned int a, long unsigned int b)
+{
+ long int srcp1 = (long int) &a;
+ long int srcp2 = (long int) &b;
+ op_t a0, b0;
+
+ do
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ }
+ while (a0 == b0);
+ return a0 - b0;
+}
+#endif
+
+/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
+ objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
+ memory operations on `op_t's. */
+#ifdef __GNUC__
+__inline
+#endif
+static int
+memcmp_common_alignment (long int srcp1, long int srcp2, size_t len)
+{
+ op_t a0, a1;
+ op_t b0, b1;
+
+ switch (len % 4)
+ {
+ default: /* Avoid warning about uninitialized local variables. */
+ case 2:
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ srcp1 -= 2 * OPSIZ;
+ srcp2 -= 2 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp1)[0];
+ b1 = ((op_t *) srcp2)[0];
+ srcp1 -= OPSIZ;
+ srcp2 -= OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return 0;
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ goto do3;
+ case 1:
+ a1 = ((op_t *) srcp1)[0];
+ b1 = ((op_t *) srcp2)[0];
+ srcp1 += OPSIZ;
+ srcp2 += OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ /* Fall through. */
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+
+ do3:
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[1];
+ if (a0 != b0)
+ return CMP_LT_OR_GT (a0, b0);
+
+ do2:
+ a0 = ((op_t *) srcp1)[2];
+ b0 = ((op_t *) srcp2)[2];
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+
+ do1:
+ a1 = ((op_t *) srcp1)[3];
+ b1 = ((op_t *) srcp2)[3];
+ if (a0 != b0)
+ return CMP_LT_OR_GT (a0, b0);
+
+ srcp1 += 4 * OPSIZ;
+ srcp2 += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+ return 0;
+}
+
+/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
+ `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
+ operations on `op_t', but SRCP1 *should be unaligned*. */
+#ifdef __GNUC__
+__inline
+#endif
+static int
+memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ op_t b0, b1, b2, b3;
+ op_t x;
+ int shl, shr;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp1 to make it aligned for comparison. */
+
+ shl = 8 * (srcp1 % OPSIZ);
+ shr = 8 * OPSIZ - shl;
+
+ /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp1 &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ default: /* Avoid warning about uninitialized local variables. */
+ case 2:
+ a1 = ((op_t *) srcp1)[0];
+ a2 = ((op_t *) srcp1)[1];
+ b2 = ((op_t *) srcp2)[0];
+ srcp1 -= 1 * OPSIZ;
+ srcp2 -= 2 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp1)[0];
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[0];
+ srcp2 -= 1 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return 0;
+ a3 = ((op_t *) srcp1)[0];
+ a0 = ((op_t *) srcp1)[1];
+ b0 = ((op_t *) srcp2)[0];
+ srcp1 += 1 * OPSIZ;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp1)[0];
+ a3 = ((op_t *) srcp1)[1];
+ b3 = ((op_t *) srcp2)[0];
+ srcp1 += 2 * OPSIZ;
+ srcp2 += 1 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ /* Fall through. */
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ x = MERGE(a2, shl, a3, shr);
+ if (x != b3)
+ return CMP_LT_OR_GT (x, b3);
+
+ do3:
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[1];
+ x = MERGE(a3, shl, a0, shr);
+ if (x != b0)
+ return CMP_LT_OR_GT (x, b0);
+
+ do2:
+ a2 = ((op_t *) srcp1)[2];
+ b2 = ((op_t *) srcp2)[2];
+ x = MERGE(a0, shl, a1, shr);
+ if (x != b1)
+ return CMP_LT_OR_GT (x, b1);
+
+ do1:
+ a3 = ((op_t *) srcp1)[3];
+ b3 = ((op_t *) srcp2)[3];
+ x = MERGE(a1, shl, a2, shr);
+ if (x != b2)
+ return CMP_LT_OR_GT (x, b2);
+
+ srcp1 += 4 * OPSIZ;
+ srcp2 += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ x = MERGE(a2, shl, a3, shr);
+ if (x != b3)
+ return CMP_LT_OR_GT (x, b3);
+ return 0;
+}
+
+int
+rpl_memcmp (const void *s1, const void *s2, size_t len)
+{
+ op_t a0;
+ op_t b0;
+ long int srcp1 = (long int) s1;
+ long int srcp2 = (long int) s2;
+ op_t res;
+
+ if (len >= OP_T_THRES)
+ {
+ /* There are at least some bytes to compare. No need to test
+ for LEN == 0 in this alignment loop. */
+ while (srcp2 % OPSIZ != 0)
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ res = a0 - b0;
+ if (res != 0)
+ return res;
+ len -= 1;
+ }
+
+ /* SRCP2 is now aligned for memory operations on `op_t'.
+ SRCP1 alignment determines if we can do a simple,
+ aligned compare or need to shuffle bits. */
+
+ if (srcp1 % OPSIZ == 0)
+ res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
+ else
+ res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
+ if (res != 0)
+ return res;
+
+ /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
+ srcp1 += len & -OPSIZ;
+ srcp2 += len & -OPSIZ;
+ len %= OPSIZ;
+ }
+
+ /* There are just a few bytes to compare. Use byte memory operations. */
+ while (len != 0)
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ res = a0 - b0;
+ if (res != 0)
+ return res;
+ len -= 1;
+ }
+
+ return 0;
+}
+
+#ifdef weak_alias
+# undef bcmp
+weak_alias (memcmp, bcmp)
+#endif
diff --git a/lib/memmove.c b/lib/memmove.c
new file mode 100644
index 0000000..c5ff8b5
--- /dev/null
+++ b/lib/memmove.c
@@ -0,0 +1,26 @@
+/* memmove.c -- copy memory.
+ Copy LENGTH bytes from SOURCE to DEST. Does not null-terminate.
+ In the public domain.
+ By David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+#include <config.h>
+
+#include <stddef.h>
+
+void *
+memmove (void *dest0, void const *source0, size_t length)
+{
+ char *dest = dest0;
+ char const *source = source0;
+ if (source < dest)
+ /* Moving from low mem to hi mem; start at end. */
+ for (source += length, dest += length; length; --length)
+ *--dest = *--source;
+ else if (source != dest)
+ {
+ /* Moving from hi mem to low mem; start at beginning. */
+ for (; length; --length)
+ *dest++ = *source++;
+ }
+ return dest0;
+}
diff --git a/lib/mkstemp.c b/lib/mkstemp.c
new file mode 100644
index 0000000..eabee0c
--- /dev/null
+++ b/lib/mkstemp.c
@@ -0,0 +1,40 @@
+/* Copyright (C) 1998, 1999, 2001, 2005, 2006 Free Software Foundation, Inc.
+ This file is derived from the one in the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#if !_LIBC
+# include <config.h>
+# include "mkstemp.h"
+int __gen_tempname ();
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef __GT_FILE
+# define __GT_FILE 0
+#endif
+
+/* Generate a unique temporary file name from TEMPLATE.
+ The last six characters of TEMPLATE must be "XXXXXX";
+ they are replaced with a string that makes the file name unique.
+ Then open the file and return a fd. */
+int
+mkstemp (template)
+ char *template;
+{
+ return __gen_tempname (template, __GT_FILE);
+}
diff --git a/lib/mkstemp.h b/lib/mkstemp.h
new file mode 100644
index 0000000..69e6fec
--- /dev/null
+++ b/lib/mkstemp.h
@@ -0,0 +1,30 @@
+/* Create a unique temporary file.
+
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* written by Jim Meyering */
+
+#include <stdlib.h>
+
+#ifdef __MKSTEMP_PREFIX
+# define _GL_CONCAT(x, y) x ## y
+# define _GL_XCONCAT(x, y) _GL_CONCAT (x, y)
+# define __MKSTEMP_ID(y) _GL_XCONCAT (__MKSTEMP_PREFIX, y)
+# undef mkstemp
+# define mkstemp __MKSTEMP_ID (mkstemp)
+int mkstemp (char *);
+#endif
diff --git a/lib/obstack.c b/lib/obstack.c
new file mode 100644
index 0000000..1bbf62b
--- /dev/null
+++ b/lib/obstack.c
@@ -0,0 +1,431 @@
+/* obstack.c - subroutines used implicitly by object stack macros
+
+ Copyright (C) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997,
+ 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifdef _LIBC
+# include <obstack.h>
+# include <shlib-compat.h>
+#else
+# include <config.h>
+# include "obstack.h"
+#endif
+
+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
+ incremented whenever callers compiled using an old obstack.h can no
+ longer properly call the functions in this obstack.c. */
+#define OBSTACK_INTERFACE_VERSION 1
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself, and the installed library
+ supports the same library interface we do. This code is part of the GNU
+ C Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object
+ files, it is simpler to just do this in the source for each such file. */
+
+#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */
+#if !defined _LIBC && defined __GNU_LIBRARY__ && __GNU_LIBRARY__ > 1
+# include <gnu-versions.h>
+# if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
+# define ELIDE_CODE
+# endif
+#endif
+
+#include <stddef.h>
+
+#ifndef ELIDE_CODE
+
+# include <stdint.h>
+
+/* Determine default alignment. */
+union fooround
+{
+ uintmax_t i;
+ long double d;
+ void *p;
+};
+struct fooalign
+{
+ char c;
+ union fooround u;
+};
+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
+ But in fact it might be less smart and round addresses to as much as
+ DEFAULT_ROUNDING. So we prepare for it to do that. */
+enum
+ {
+ DEFAULT_ALIGNMENT = offsetof (struct fooalign, u),
+ DEFAULT_ROUNDING = sizeof (union fooround)
+ };
+
+/* When we copy a long block of data, this is the unit to do it with.
+ On some machines, copying successive ints does not work;
+ in such a case, redefine COPYING_UNIT to `long' (if that works)
+ or `char' as a last resort. */
+# ifndef COPYING_UNIT
+# define COPYING_UNIT int
+# endif
+
+
+/* The functions allocating more room by calling `obstack_chunk_alloc'
+ jump to the handler pointed to by `obstack_alloc_failed_handler'.
+ This can be set to a user defined function which should either
+ abort gracefully or use longjump - but shouldn't return. This
+ variable by default points to the internal function
+ `print_and_abort'. */
+static void print_and_abort (void);
+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
+
+/* Exit value used when `print_and_abort' is used. */
+# include <stdlib.h>
+# ifdef _LIBC
+int obstack_exit_failure = EXIT_FAILURE;
+# else
+# include "exitfail.h"
+# define obstack_exit_failure exit_failure
+# endif
+
+# ifdef _LIBC
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+/* A looong time ago (before 1994, anyway; we're not sure) this global variable
+ was used by non-GNU-C macros to avoid multiple evaluation. The GNU C
+ library still exports it because somebody might use it. */
+struct obstack *_obstack_compat;
+compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0);
+# endif
+# endif
+
+/* Define a macro that either calls functions with the traditional malloc/free
+ calling interface, or calls functions with the mmalloc/mfree interface
+ (that adds an extra first argument), based on the state of use_extra_arg.
+ For free, do not use ?:, since some compilers, like the MIPS compilers,
+ do not allow (expr) ? void : void. */
+
+# define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
+
+# define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
+ } while (0)
+
+
+/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
+ Objects start on multiples of ALIGNMENT (0 means use default).
+ CHUNKFUN is the function to use to allocate chunks,
+ and FREEFUN the function to free them.
+
+ Return nonzero if successful, calls obstack_alloc_failed_handler if
+ allocation fails. */
+
+int
+_obstack_begin (struct obstack *h,
+ int size, int alignment,
+ void *(*chunkfun) (long),
+ void (*freefun) (void *))
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->use_extra_arg = 0;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+ alignment - 1);
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+int
+_obstack_begin_1 (struct obstack *h, int size, int alignment,
+ void *(*chunkfun) (void *, long),
+ void (*freefun) (void *, void *),
+ void *arg)
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->extra_arg = arg;
+ h->use_extra_arg = 1;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+ alignment - 1);
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+/* Allocate a new current chunk for the obstack *H
+ on the assumption that LENGTH bytes need to be added
+ to the current object, or a new object of length LENGTH allocated.
+ Copies any partial object from the end of the old chunk
+ to the beginning of the new one. */
+
+void
+_obstack_newchunk (struct obstack *h, int length)
+{
+ register struct _obstack_chunk *old_chunk = h->chunk;
+ register struct _obstack_chunk *new_chunk;
+ register long new_size;
+ register long obj_size = h->next_free - h->object_base;
+ register long i;
+ long already;
+ char *object_base;
+
+ /* Compute size for new chunk. */
+ new_size = (obj_size + length) + (obj_size >> 3) + h->alignment_mask + 100;
+ if (new_size < h->chunk_size)
+ new_size = h->chunk_size;
+
+ /* Allocate and initialize the new chunk. */
+ new_chunk = CALL_CHUNKFUN (h, new_size);
+ if (!new_chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->chunk = new_chunk;
+ new_chunk->prev = old_chunk;
+ new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
+
+ /* Compute an aligned object_base in the new chunk */
+ object_base =
+ __PTR_ALIGN ((char *) new_chunk, new_chunk->contents, h->alignment_mask);
+
+ /* Move the existing object to the new chunk.
+ Word at a time is fast and is safe if the object
+ is sufficiently aligned. */
+ if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
+ {
+ for (i = obj_size / sizeof (COPYING_UNIT) - 1;
+ i >= 0; i--)
+ ((COPYING_UNIT *)object_base)[i]
+ = ((COPYING_UNIT *)h->object_base)[i];
+ /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
+ but that can cross a page boundary on a machine
+ which does not do strict alignment for COPYING_UNITS. */
+ already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
+ }
+ else
+ already = 0;
+ /* Copy remaining bytes one by one. */
+ for (i = already; i < obj_size; i++)
+ object_base[i] = h->object_base[i];
+
+ /* If the object just copied was the only data in OLD_CHUNK,
+ free that chunk and remove it from the chain.
+ But not if that chunk might contain an empty object. */
+ if (! h->maybe_empty_object
+ && (h->object_base
+ == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents,
+ h->alignment_mask)))
+ {
+ new_chunk->prev = old_chunk->prev;
+ CALL_FREEFUN (h, old_chunk);
+ }
+
+ h->object_base = object_base;
+ h->next_free = h->object_base + obj_size;
+ /* The new chunk certainly contains no empty object yet. */
+ h->maybe_empty_object = 0;
+}
+# ifdef _LIBC
+libc_hidden_def (_obstack_newchunk)
+# endif
+
+/* Return nonzero if object OBJ has been allocated from obstack H.
+ This is here for debugging.
+ If you use it in a program, you are probably losing. */
+
+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
+ obstack.h because it is just for debugging. */
+int _obstack_allocated_p (struct obstack *h, void *obj);
+
+int
+_obstack_allocated_p (struct obstack *h, void *obj)
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = (h)->chunk;
+ /* We use >= rather than > since the object cannot be exactly at
+ the beginning of the chunk but might be an empty object exactly
+ at the end of an adjacent chunk. */
+ while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ lp = plp;
+ }
+ return lp != 0;
+}
+
+/* Free objects in obstack H, including OBJ and everything allocate
+ more recently than OBJ. If OBJ is zero, free everything in H. */
+
+# undef obstack_free
+
+void
+obstack_free (struct obstack *h, void *obj)
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *) (obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+# ifdef _LIBC
+/* Older versions of libc used a function _obstack_free intended to be
+ called by non-GCC compilers. */
+strong_alias (obstack_free, _obstack_free)
+# endif
+
+int
+_obstack_memory_used (struct obstack *h)
+{
+ register struct _obstack_chunk* lp;
+ register int nbytes = 0;
+
+ for (lp = h->chunk; lp != 0; lp = lp->prev)
+ {
+ nbytes += lp->limit - (char *) lp;
+ }
+ return nbytes;
+}
+
+/* Define the error handler. */
+# ifdef _LIBC
+# include <libintl.h>
+# else
+# include "gettext.h"
+# endif
+# ifndef _
+# define _(msgid) gettext (msgid)
+# endif
+
+# ifdef _LIBC
+# include <libio/iolibio.h>
+# endif
+
+# ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+# define __attribute__(Spec) /* empty */
+# endif
+# endif
+
+static void
+__attribute__ ((noreturn))
+print_and_abort (void)
+{
+ /* Don't change any of these strings. Yes, it would be possible to add
+ the newline to the string and use fputs or so. But this must not
+ happen because the "memory exhausted" message appears in other places
+ like this and the translation should be reused instead of creating
+ a very similar string which requires a separate translation. */
+# ifdef _LIBC
+ (void) __fxprintf (NULL, "%s\n", _("memory exhausted"));
+# else
+ fprintf (stderr, "%s\n", _("memory exhausted"));
+# endif
+ exit (obstack_exit_failure);
+}
+
+#endif /* !ELIDE_CODE */
diff --git a/lib/obstack.h b/lib/obstack.h
new file mode 100644
index 0000000..95dd438
--- /dev/null
+++ b/lib/obstack.h
@@ -0,0 +1,508 @@
+/* obstack.h - object stack macros
+ Copyright (C) 1988-1994,1996-1999,2003,2004,2005
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Summary:
+
+All the apparent functions defined here are macros. The idea
+is that you would use these pre-tested macros to solve a
+very specific set of problems, and they would run fast.
+Caution: no side-effects in arguments please!! They may be
+evaluated MANY times!!
+
+These macros operate a stack of objects. Each object starts life
+small, and may grow to maturity. (Consider building a word syllable
+by syllable.) An object can move while it is growing. Once it has
+been "finished" it never changes address again. So the "top of the
+stack" is typically an immature growing object, while the rest of the
+stack is of mature, fixed size and fixed address objects.
+
+These routines grab large chunks of memory, using a function you
+supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
+by calling `obstack_chunk_free'. You must define them and declare
+them before using any obstack macros.
+
+Each independent stack is represented by a `struct obstack'.
+Each of the obstack macros expects a pointer to such a structure
+as the first argument.
+
+One motivation for this package is the problem of growing char strings
+in symbol tables. Unless you are "fascist pig with a read-only mind"
+--Gosper's immortal quote from HAKMEM item 154, out of context--you
+would not like to put any arbitrary upper limit on the length of your
+symbols.
+
+In practice this often means you will build many short symbols and a
+few long symbols. At the time you are reading a symbol you don't know
+how long it is. One traditional method is to read a symbol into a
+buffer, realloc()ating the buffer every time you try to read a symbol
+that is longer than the buffer. This is beaut, but you still will
+want to copy the symbol from the buffer to a more permanent
+symbol-table entry say about half the time.
+
+With obstacks, you can work differently. Use one obstack for all symbol
+names. As you read a symbol, grow the name in the obstack gradually.
+When the name is complete, finalize it. Then, if the symbol exists already,
+free the newly read name.
+
+The way we do this is to take a large chunk, allocating memory from
+low addresses. When you want to build a symbol in the chunk you just
+add chars above the current "high water mark" in the chunk. When you
+have finished adding chars, because you got to the end of the symbol,
+you know how long the chars are, and you can create a new object.
+Mostly the chars will not burst over the highest address of the chunk,
+because you would typically expect a chunk to be (say) 100 times as
+long as an average object.
+
+In case that isn't clear, when we have enough chars to make up
+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
+so we just point to it where it lies. No moving of chars is
+needed and this is the second win: potentially long strings need
+never be explicitly shuffled. Once an object is formed, it does not
+change its address during its lifetime.
+
+When the chars burst over a chunk boundary, we allocate a larger
+chunk, and then copy the partly formed object from the end of the old
+chunk to the beginning of the new larger chunk. We then carry on
+accreting characters to the end of the object as we normally would.
+
+A special macro is provided to add a single char at a time to a
+growing object. This allows the use of register variables, which
+break the ordinary 'growth' macro.
+
+Summary:
+ We allocate large chunks.
+ We carve out one object at a time from the current chunk.
+ Once carved, an object never moves.
+ We are free to append data of any size to the currently
+ growing object.
+ Exactly one object is growing in an obstack at any one time.
+ You can run one obstack per control block.
+ You may have as many control blocks as you dare.
+ Because of the way we do it, you can `unwind' an obstack
+ back to a previous state. (You may remove objects much
+ as you would with a stack.)
+*/
+
+
+/* Don't do the contents of this file more than once. */
+
+#ifndef _OBSTACK_H
+#define _OBSTACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* We need the type of a pointer subtraction. If __PTRDIFF_TYPE__ is
+ defined, as with GNU C, use that; that way we don't pollute the
+ namespace with <stddef.h>'s symbols. Otherwise, include <stddef.h>
+ and use ptrdiff_t. */
+
+#ifdef __PTRDIFF_TYPE__
+# define PTR_INT_TYPE __PTRDIFF_TYPE__
+#else
+# include <stddef.h>
+# define PTR_INT_TYPE ptrdiff_t
+#endif
+
+/* If B is the base of an object addressed by P, return the result of
+ aligning P to the next multiple of A + 1. B and P must be of type
+ char *. A + 1 must be a power of 2. */
+
+#define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A)))
+
+/* Similiar to _BPTR_ALIGN (B, P, A), except optimize the common case
+ where pointers can be converted to integers, aligned as integers,
+ and converted back again. If PTR_INT_TYPE is narrower than a
+ pointer (e.g., the AS/400), play it safe and compute the alignment
+ relative to B. Otherwise, use the faster strategy of computing the
+ alignment relative to 0. */
+
+#define __PTR_ALIGN(B, P, A) \
+ __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \
+ P, A)
+
+#include <string.h>
+
+struct _obstack_chunk /* Lives at front of each chunk. */
+{
+ char *limit; /* 1 past end of this chunk */
+ struct _obstack_chunk *prev; /* address of prior chunk or NULL */
+ char contents[4]; /* objects begin here */
+};
+
+struct obstack /* control current object in current chunk */
+{
+ long chunk_size; /* preferred size to allocate chunks in */
+ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */
+ char *object_base; /* address of object we are building */
+ char *next_free; /* where to add next char to current object */
+ char *chunk_limit; /* address of char after current chunk */
+ union
+ {
+ PTR_INT_TYPE tempint;
+ void *tempptr;
+ } temp; /* Temporary for some macros. */
+ int alignment_mask; /* Mask of alignment for each object. */
+ /* These prototypes vary based on `use_extra_arg', and we use
+ casts to the prototypeless function type in all assignments,
+ but having prototypes here quiets -Wstrict-prototypes. */
+ struct _obstack_chunk *(*chunkfun) (void *, long);
+ void (*freefun) (void *, struct _obstack_chunk *);
+ void *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+ unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
+ unsigned maybe_empty_object:1;/* There is a possibility that the current
+ chunk contains a zero-length object. This
+ prevents freeing the chunk if we allocate
+ a bigger chunk to replace it. */
+ unsigned alloc_failed:1; /* No longer used, as we now call the failed
+ handler on error, but retained for binary
+ compatibility. */
+};
+
+/* Declare the external functions we use; they are in obstack.c. */
+
+extern void _obstack_newchunk (struct obstack *, int);
+extern int _obstack_begin (struct obstack *, int, int,
+ void *(*) (long), void (*) (void *));
+extern int _obstack_begin_1 (struct obstack *, int, int,
+ void *(*) (void *, long),
+ void (*) (void *, void *), void *);
+extern int _obstack_memory_used (struct obstack *);
+
+void obstack_free (struct obstack *obstack, void *block);
+
+
+/* Error handler called when `obstack_chunk_alloc' failed to allocate
+ more memory. This can be set to a user defined function which
+ should either abort gracefully or use longjump - but shouldn't
+ return. The default action is to print a message and abort. */
+extern void (*obstack_alloc_failed_handler) (void);
+
+/* Exit value used when `print_and_abort' is used. */
+extern int obstack_exit_failure;
+
+/* Pointer to beginning of object being allocated or to be allocated next.
+ Note that this might not be the final address of the object
+ because a new chunk might be needed to hold the final size. */
+
+#define obstack_base(h) ((void *) (h)->object_base)
+
+/* Size for allocating ordinary chunks. */
+
+#define obstack_chunk_size(h) ((h)->chunk_size)
+
+/* Pointer to next byte not yet allocated in current chunk. */
+
+#define obstack_next_free(h) ((h)->next_free)
+
+/* Mask specifying low bits that should be clear in address of an object. */
+
+#define obstack_alignment_mask(h) ((h)->alignment_mask)
+
+/* To prevent prototype warnings provide complete argument list. */
+#define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) (long)) (chunkfun), \
+ (void (*) (void *)) (freefun))
+
+#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) (void *, long)) (chunkfun), \
+ (void (*) (void *, void *)) (freefun), (arg))
+
+#define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
+
+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = (achar))
+
+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#define obstack_memory_used(h) _obstack_memory_used (h)
+
+#if defined __GNUC__ && defined __STDC__ && __STDC__
+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
+ does not implement __extension__. But that compiler doesn't define
+ __GNUC_MINOR__. */
+# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
+# define __extension__
+# endif
+
+/* For GNU C, if not -traditional,
+ we can define these macros to compute all args only once
+ without using a global variable.
+ Also, we can avoid using the `temp' slot, to make faster code. */
+
+# define obstack_object_size(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (unsigned) (__o->next_free - __o->object_base); })
+
+# define obstack_room(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (unsigned) (__o->chunk_limit - __o->next_free); })
+
+# define obstack_make_room(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ (void) 0; })
+
+# define obstack_empty_p(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (__o->chunk->prev == 0 \
+ && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \
+ __o->chunk->contents, \
+ __o->alignment_mask)); })
+
+# define obstack_grow(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len); \
+ memcpy (__o->next_free, where, __len); \
+ __o->next_free += __len; \
+ (void) 0; })
+
+# define obstack_grow0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len + 1); \
+ memcpy (__o->next_free, where, __len); \
+ __o->next_free += __len; \
+ *(__o->next_free)++ = 0; \
+ (void) 0; })
+
+# define obstack_1grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, 1); \
+ obstack_1grow_fast (__o, datum); \
+ (void) 0; })
+
+/* These assume that the obstack alignment is good enough for pointers
+ or ints, and that the data added so far to the current object
+ shares that much alignment. */
+
+# define obstack_ptr_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (void *)); \
+ obstack_ptr_grow_fast (__o, datum); }) \
+
+# define obstack_int_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (int)); \
+ obstack_int_grow_fast (__o, datum); })
+
+# define obstack_ptr_grow_fast(OBSTACK,aptr) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ *(const void **) __o1->next_free = (aptr); \
+ __o1->next_free += sizeof (const void *); \
+ (void) 0; })
+
+# define obstack_int_grow_fast(OBSTACK,aint) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ *(int *) __o1->next_free = (aint); \
+ __o1->next_free += sizeof (int); \
+ (void) 0; })
+
+# define obstack_blank(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ obstack_blank_fast (__o, __len); \
+ (void) 0; })
+
+# define obstack_alloc(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_blank (__h, (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow0 (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+/* The local variable is named __o1 to avoid a name conflict
+ when obstack_blank is called. */
+# define obstack_finish(OBSTACK) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ void *__value = (void *) __o1->object_base; \
+ if (__o1->next_free == __value) \
+ __o1->maybe_empty_object = 1; \
+ __o1->next_free \
+ = __PTR_ALIGN (__o1->object_base, __o1->next_free, \
+ __o1->alignment_mask); \
+ if (__o1->next_free - (char *)__o1->chunk \
+ > __o1->chunk_limit - (char *)__o1->chunk) \
+ __o1->next_free = __o1->chunk_limit; \
+ __o1->object_base = __o1->next_free; \
+ __value; })
+
+# define obstack_free(OBSTACK, OBJ) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ void *__obj = (OBJ); \
+ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
+ __o->next_free = __o->object_base = (char *)__obj; \
+ else (obstack_free) (__o, __obj); })
+
+#else /* not __GNUC__ or not __STDC__ */
+
+# define obstack_object_size(h) \
+ (unsigned) ((h)->next_free - (h)->object_base)
+
+# define obstack_room(h) \
+ (unsigned) ((h)->chunk_limit - (h)->next_free)
+
+# define obstack_empty_p(h) \
+ ((h)->chunk->prev == 0 \
+ && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \
+ (h)->chunk->contents, \
+ (h)->alignment_mask))
+
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+ so that we can avoid having void expressions
+ in the arms of the conditional expression.
+ Casting the third operand to void was tried before,
+ but some compilers won't accept it. */
+
+# define obstack_make_room(h,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0))
+
+# define obstack_grow(h,where,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \
+ memcpy ((h)->next_free, where, (h)->temp.tempint), \
+ (h)->next_free += (h)->temp.tempint)
+
+# define obstack_grow0(h,where,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0), \
+ memcpy ((h)->next_free, where, (h)->temp.tempint), \
+ (h)->next_free += (h)->temp.tempint, \
+ *((h)->next_free)++ = 0)
+
+# define obstack_1grow(h,datum) \
+( (((h)->next_free + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), 1), 0) : 0), \
+ obstack_1grow_fast (h, datum))
+
+# define obstack_ptr_grow(h,datum) \
+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
+ obstack_ptr_grow_fast (h, datum))
+
+# define obstack_int_grow(h,datum) \
+( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
+ obstack_int_grow_fast (h, datum))
+
+# define obstack_ptr_grow_fast(h,aptr) \
+ (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr))
+
+# define obstack_int_grow_fast(h,aint) \
+ (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint))
+
+# define obstack_blank(h,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \
+ obstack_blank_fast (h, (h)->temp.tempint))
+
+# define obstack_alloc(h,length) \
+ (obstack_blank ((h), (length)), obstack_finish ((h)))
+
+# define obstack_copy(h,where,length) \
+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_copy0(h,where,length) \
+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_finish(h) \
+( ((h)->next_free == (h)->object_base \
+ ? (((h)->maybe_empty_object = 1), 0) \
+ : 0), \
+ (h)->temp.tempptr = (h)->object_base, \
+ (h)->next_free \
+ = __PTR_ALIGN ((h)->object_base, (h)->next_free, \
+ (h)->alignment_mask), \
+ (((h)->next_free - (char *) (h)->chunk \
+ > (h)->chunk_limit - (char *) (h)->chunk) \
+ ? ((h)->next_free = (h)->chunk_limit) : 0), \
+ (h)->object_base = (h)->next_free, \
+ (h)->temp.tempptr)
+
+# define obstack_free(h,obj) \
+( (h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk, \
+ ((((h)->temp.tempint > 0 \
+ && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk)) \
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp.tempint + (char *) (h)->chunk) \
+ : (((obstack_free) ((h), (h)->temp.tempint + (char *) (h)->chunk), 0), 0)))
+
+#endif /* not __GNUC__ or not __STDC__ */
+
+#ifdef __cplusplus
+} /* C++ */
+#endif
+
+#endif /* obstack.h */
diff --git a/lib/pathmax.h b/lib/pathmax.h
new file mode 100644
index 0000000..6941e45
--- /dev/null
+++ b/lib/pathmax.h
@@ -0,0 +1,47 @@
+/* Define PATH_MAX somehow. Requires sys/types.h.
+ Copyright (C) 1992, 1999, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _PATHMAX_H
+# define _PATHMAX_H
+
+# include <unistd.h>
+
+# include <limits.h>
+
+# ifndef _POSIX_PATH_MAX
+# define _POSIX_PATH_MAX 256
+# endif
+
+# if !defined PATH_MAX && defined _PC_PATH_MAX
+# define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 \
+ : pathconf ("/", _PC_PATH_MAX))
+# endif
+
+/* Don't include sys/param.h if it already has been. */
+# if defined HAVE_SYS_PARAM_H && !defined PATH_MAX && !defined MAXPATHLEN
+# include <sys/param.h>
+# endif
+
+# if !defined PATH_MAX && defined MAXPATHLEN
+# define PATH_MAX MAXPATHLEN
+# endif
+
+# ifndef PATH_MAX
+# define PATH_MAX _POSIX_PATH_MAX
+# endif
+
+#endif /* _PATHMAX_H */
diff --git a/lib/quote.c b/lib/quote.c
new file mode 100644
index 0000000..119be72
--- /dev/null
+++ b/lib/quote.c
@@ -0,0 +1,41 @@
+/* quote.c - quote arguments for output
+
+ Copyright (C) 1998, 1999, 2000, 2001, 2003, 2005, 2006 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#include <config.h>
+
+#include "quotearg.h"
+#include "quote.h"
+
+/* Return an unambiguous printable representation of NAME,
+ allocated in slot N, suitable for diagnostics. */
+char const *
+quote_n (int n, char const *name)
+{
+ return quotearg_n_style (n, locale_quoting_style, name);
+}
+
+/* Return an unambiguous printable representation of NAME,
+ suitable for diagnostics. */
+char const *
+quote (char const *name)
+{
+ return quote_n (0, name);
+}
diff --git a/lib/quote.h b/lib/quote.h
new file mode 100644
index 0000000..5400ead
--- /dev/null
+++ b/lib/quote.h
@@ -0,0 +1,22 @@
+/* quote.h - prototypes for quote.c
+
+ Copyright (C) 1998, 1999, 2000, 2001, 2003 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+
+char const *quote_n (int n, char const *name);
+char const *quote (char const *name);
diff --git a/lib/quotearg.c b/lib/quotearg.c
new file mode 100644
index 0000000..381f00a
--- /dev/null
+++ b/lib/quotearg.c
@@ -0,0 +1,688 @@
+/* quotearg.c - quote arguments for output
+
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#include <config.h>
+
+#include "quotearg.h"
+
+#include "xalloc.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+#if HAVE_WCHAR_H
+
+/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
+# include <stdio.h>
+# include <time.h>
+
+# include <wchar.h>
+#endif
+
+#if !HAVE_MBRTOWC
+/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
+ other macros are defined only for documentation and to satisfy C
+ syntax. */
+# undef MB_CUR_MAX
+# define MB_CUR_MAX 1
+# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
+# define iswprint(wc) isprint ((unsigned char) (wc))
+# undef HAVE_MBSINIT
+#endif
+
+#if !defined mbsinit && !HAVE_MBSINIT
+# define mbsinit(ps) 1
+#endif
+
+#ifndef iswprint
+# if HAVE_WCTYPE_H
+# include <wctype.h>
+# endif
+# if !defined iswprint && !HAVE_ISWPRINT
+# define iswprint(wc) 1
+# endif
+#endif
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#define INT_BITS (sizeof (int) * CHAR_BIT)
+
+struct quoting_options
+{
+ /* Basic quoting style. */
+ enum quoting_style style;
+
+ /* Quote the characters indicated by this bit vector even if the
+ quoting style would not normally require them to be quoted. */
+ unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
+};
+
+/* Names of quoting styles. */
+char const *const quoting_style_args[] =
+{
+ "literal",
+ "shell",
+ "shell-always",
+ "c",
+ "escape",
+ "locale",
+ "clocale",
+ 0
+};
+
+/* Correspondences to quoting style names. */
+enum quoting_style const quoting_style_vals[] =
+{
+ literal_quoting_style,
+ shell_quoting_style,
+ shell_always_quoting_style,
+ c_quoting_style,
+ escape_quoting_style,
+ locale_quoting_style,
+ clocale_quoting_style
+};
+
+/* The default quoting options. */
+static struct quoting_options default_quoting_options;
+
+/* Allocate a new set of quoting options, with contents initially identical
+ to O if O is not null, or to the default if O is null.
+ It is the caller's responsibility to free the result. */
+struct quoting_options *
+clone_quoting_options (struct quoting_options *o)
+{
+ int e = errno;
+ struct quoting_options *p = xmalloc (sizeof *p);
+ *p = *(o ? o : &default_quoting_options);
+ errno = e;
+ return p;
+}
+
+/* Get the value of O's quoting style. If O is null, use the default. */
+enum quoting_style
+get_quoting_style (struct quoting_options *o)
+{
+ return (o ? o : &default_quoting_options)->style;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting style to S. */
+void
+set_quoting_style (struct quoting_options *o, enum quoting_style s)
+{
+ (o ? o : &default_quoting_options)->style = s;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting options for character C to I.
+ Return the old value. Currently, the only values defined for I are
+ 0 (the default) and 1 (which means to quote the character even if
+ it would not otherwise be quoted). */
+int
+set_char_quoting (struct quoting_options *o, char c, int i)
+{
+ unsigned char uc = c;
+ unsigned int *p =
+ (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
+ int shift = uc % INT_BITS;
+ int r = (*p >> shift) & 1;
+ *p ^= ((i & 1) ^ r) << shift;
+ return r;
+}
+
+/* MSGID approximates a quotation mark. Return its translation if it
+ has one; otherwise, return either it or "\"", depending on S. */
+static char const *
+gettext_quote (char const *msgid, enum quoting_style s)
+{
+ char const *translation = _(msgid);
+ if (translation == msgid && s == clocale_quoting_style)
+ translation = "\"";
+ return translation;
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
+ non-quoting-style part of O to control quoting.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
+
+ This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
+ ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
+ style specified by O, and O may not be null. */
+
+static size_t
+quotearg_buffer_restyled (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ enum quoting_style quoting_style,
+ struct quoting_options const *o)
+{
+ size_t i;
+ size_t len = 0;
+ char const *quote_string = 0;
+ size_t quote_string_len = 0;
+ bool backslash_escapes = false;
+ bool unibyte_locale = MB_CUR_MAX == 1;
+
+#define STORE(c) \
+ do \
+ { \
+ if (len < buffersize) \
+ buffer[len] = (c); \
+ len++; \
+ } \
+ while (0)
+
+ switch (quoting_style)
+ {
+ case c_quoting_style:
+ STORE ('"');
+ backslash_escapes = true;
+ quote_string = "\"";
+ quote_string_len = 1;
+ break;
+
+ case escape_quoting_style:
+ backslash_escapes = true;
+ break;
+
+ case locale_quoting_style:
+ case clocale_quoting_style:
+ {
+ /* TRANSLATORS:
+ Get translations for open and closing quotation marks.
+
+ The message catalog should translate "`" to a left
+ quotation mark suitable for the locale, and similarly for
+ "'". If the catalog has no translation,
+ locale_quoting_style quotes `like this', and
+ clocale_quoting_style quotes "like this".
+
+ For example, an American English Unicode locale should
+ translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
+ should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
+ MARK). A British English Unicode locale should instead
+ translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
+ U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
+
+ If you don't know what to put here, please see
+ <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
+ and use glyphs suitable for your language. */
+
+ char const *left = gettext_quote (N_("`"), quoting_style);
+ char const *right = gettext_quote (N_("'"), quoting_style);
+ for (quote_string = left; *quote_string; quote_string++)
+ STORE (*quote_string);
+ backslash_escapes = true;
+ quote_string = right;
+ quote_string_len = strlen (quote_string);
+ }
+ break;
+
+ case shell_always_quoting_style:
+ STORE ('\'');
+ quote_string = "'";
+ quote_string_len = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
+ {
+ unsigned char c;
+ unsigned char esc;
+
+ if (backslash_escapes
+ && quote_string_len
+ && i + quote_string_len <= argsize
+ && memcmp (arg + i, quote_string, quote_string_len) == 0)
+ STORE ('\\');
+
+ c = arg[i];
+ switch (c)
+ {
+ case '\0':
+ if (backslash_escapes)
+ {
+ STORE ('\\');
+ STORE ('0');
+ STORE ('0');
+ c = '0';
+ }
+ break;
+
+ case '?':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case c_quoting_style:
+ if (i + 2 < argsize && arg[i + 1] == '?')
+ switch (arg[i + 2])
+ {
+ case '!': case '\'':
+ case '(': case ')': case '-': case '/':
+ case '<': case '=': case '>':
+ /* Escape the second '?' in what would otherwise be
+ a trigraph. */
+ c = arg[i + 2];
+ i += 2;
+ STORE ('?');
+ STORE ('\\');
+ STORE ('?');
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case '\a': esc = 'a'; goto c_escape;
+ case '\b': esc = 'b'; goto c_escape;
+ case '\f': esc = 'f'; goto c_escape;
+ case '\n': esc = 'n'; goto c_and_shell_escape;
+ case '\r': esc = 'r'; goto c_and_shell_escape;
+ case '\t': esc = 't'; goto c_and_shell_escape;
+ case '\v': esc = 'v'; goto c_escape;
+ case '\\': esc = c; goto c_and_shell_escape;
+
+ c_and_shell_escape:
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ c_escape:
+ if (backslash_escapes)
+ {
+ c = esc;
+ goto store_escape;
+ }
+ break;
+
+ case '{': case '}': /* sometimes special if isolated */
+ if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
+ break;
+ /* Fall through. */
+ case '#': case '~':
+ if (i != 0)
+ break;
+ /* Fall through. */
+ case ' ':
+ case '!': /* special in bash */
+ case '"': case '$': case '&':
+ case '(': case ')': case '*': case ';':
+ case '<':
+ case '=': /* sometimes special in 0th or (with "set -k") later args */
+ case '>': case '[':
+ case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
+ case '`': case '|':
+ /* A shell special character. In theory, '$' and '`' could
+ be the first bytes of multibyte characters, which means
+ we should check them with mbrtowc, but in practice this
+ doesn't happen so it's not worth worrying about. */
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ break;
+
+ case '\'':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case shell_always_quoting_style:
+ STORE ('\'');
+ STORE ('\\');
+ STORE ('\'');
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case '%': case '+': case ',': case '-': case '.': case '/':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': case ':':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
+ case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
+ case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+ /* These characters don't cause problems, no matter what the
+ quoting style is. They cannot start multibyte sequences. */
+ break;
+
+ default:
+ /* If we have a multibyte sequence, copy it until we reach
+ its end, find an error, or come back to the initial shift
+ state. For C-like styles, if the sequence has
+ unprintable characters, escape the whole sequence, since
+ we can't easily escape single characters within it. */
+ {
+ /* Length of multibyte sequence found so far. */
+ size_t m;
+
+ bool printable;
+
+ if (unibyte_locale)
+ {
+ m = 1;
+ printable = isprint (c) != 0;
+ }
+ else
+ {
+ mbstate_t mbstate;
+ memset (&mbstate, 0, sizeof mbstate);
+
+ m = 0;
+ printable = true;
+ if (argsize == SIZE_MAX)
+ argsize = strlen (arg);
+
+ do
+ {
+ wchar_t w;
+ size_t bytes = mbrtowc (&w, &arg[i + m],
+ argsize - (i + m), &mbstate);
+ if (bytes == 0)
+ break;
+ else if (bytes == (size_t) -1)
+ {
+ printable = false;
+ break;
+ }
+ else if (bytes == (size_t) -2)
+ {
+ printable = false;
+ while (i + m < argsize && arg[i + m])
+ m++;
+ break;
+ }
+ else
+ {
+ /* Work around a bug with older shells that "see" a '\'
+ that is really the 2nd byte of a multibyte character.
+ In practice the problem is limited to ASCII
+ chars >= '@' that are shell special chars. */
+ if ('[' == 0x5b && quoting_style == shell_quoting_style)
+ {
+ size_t j;
+ for (j = 1; j < bytes; j++)
+ switch (arg[i + m + j])
+ {
+ case '[': case '\\': case '^':
+ case '`': case '|':
+ goto use_shell_always_quoting_style;
+
+ default:
+ break;
+ }
+ }
+
+ if (! iswprint (w))
+ printable = false;
+ m += bytes;
+ }
+ }
+ while (! mbsinit (&mbstate));
+ }
+
+ if (1 < m || (backslash_escapes && ! printable))
+ {
+ /* Output a multibyte sequence, or an escaped
+ unprintable unibyte character. */
+ size_t ilim = i + m;
+
+ for (;;)
+ {
+ if (backslash_escapes && ! printable)
+ {
+ STORE ('\\');
+ STORE ('0' + (c >> 6));
+ STORE ('0' + ((c >> 3) & 7));
+ c = '0' + (c & 7);
+ }
+ if (ilim <= i + 1)
+ break;
+ STORE (c);
+ c = arg[++i];
+ }
+
+ goto store_c;
+ }
+ }
+ }
+
+ if (! (backslash_escapes
+ && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
+ goto store_c;
+
+ store_escape:
+ STORE ('\\');
+
+ store_c:
+ STORE (c);
+ }
+
+ if (i == 0 && quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+
+ if (quote_string)
+ for (; *quote_string; quote_string++)
+ STORE (*quote_string);
+
+ if (len < buffersize)
+ buffer[len] = '\0';
+ return len;
+
+ use_shell_always_quoting_style:
+ return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ shell_always_quoting_style, o);
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using O to control quoting.
+ If O is null, use the default.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is SIZE_MAX, use the string length of the argument for
+ ARGSIZE. */
+size_t
+quotearg_buffer (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ struct quoting_options const *o)
+{
+ struct quoting_options const *p = o ? o : &default_quoting_options;
+ int e = errno;
+ size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ p->style, p);
+ errno = e;
+ return r;
+}
+
+/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
+ allocated storage containing the quoted string. */
+char *
+quotearg_alloc (char const *arg, size_t argsize,
+ struct quoting_options const *o)
+{
+ int e = errno;
+ size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
+ char *buf = xmalloc (bufsize);
+ quotearg_buffer (buf, bufsize, arg, argsize, o);
+ errno = e;
+ return buf;
+}
+
+/* Use storage slot N to return a quoted version of argument ARG.
+ ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
+ null-terminated string.
+ OPTIONS specifies the quoting options.
+ The returned value points to static storage that can be
+ reused by the next call to this function with the same value of N.
+ N must be nonnegative. N is deliberately declared with type "int"
+ to allow for future extensions (using negative values). */
+static char *
+quotearg_n_options (int n, char const *arg, size_t argsize,
+ struct quoting_options const *options)
+{
+ int e = errno;
+
+ /* Preallocate a slot 0 buffer, so that the caller can always quote
+ one small component of a "memory exhausted" message in slot 0. */
+ static char slot0[256];
+ static unsigned int nslots = 1;
+ unsigned int n0 = n;
+ struct slotvec
+ {
+ size_t size;
+ char *val;
+ };
+ static struct slotvec slotvec0 = {sizeof slot0, slot0};
+ static struct slotvec *slotvec = &slotvec0;
+
+ if (n < 0)
+ abort ();
+
+ if (nslots <= n0)
+ {
+ /* FIXME: technically, the type of n1 should be `unsigned int',
+ but that evokes an unsuppressible warning from gcc-4.0.1 and
+ older. If gcc ever provides an option to suppress that warning,
+ revert to the original type, so that the test in xalloc_oversized
+ is once again performed only at compile time. */
+ size_t n1 = n0 + 1;
+
+ if (xalloc_oversized (n1, sizeof *slotvec))
+ xalloc_die ();
+
+ if (slotvec == &slotvec0)
+ {
+ slotvec = xmalloc (sizeof *slotvec);
+ *slotvec = slotvec0;
+ }
+ slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
+ memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
+ nslots = n1;
+ }
+
+ {
+ size_t size = slotvec[n].size;
+ char *val = slotvec[n].val;
+ size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
+
+ if (size <= qsize)
+ {
+ slotvec[n].size = size = qsize + 1;
+ if (val != slot0)
+ free (val);
+ slotvec[n].val = val = xmalloc (size);
+ quotearg_buffer (val, size, arg, argsize, options);
+ }
+
+ errno = e;
+ return val;
+ }
+}
+
+char *
+quotearg_n (int n, char const *arg)
+{
+ return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
+}
+
+char *
+quotearg (char const *arg)
+{
+ return quotearg_n (0, arg);
+}
+
+/* Return quoting options for STYLE, with no extra quoting. */
+static struct quoting_options
+quoting_options_from_style (enum quoting_style style)
+{
+ struct quoting_options o;
+ o.style = style;
+ memset (o.quote_these_too, 0, sizeof o.quote_these_too);
+ return o;
+}
+
+char *
+quotearg_n_style (int n, enum quoting_style s, char const *arg)
+{
+ struct quoting_options const o = quoting_options_from_style (s);
+ return quotearg_n_options (n, arg, SIZE_MAX, &o);
+}
+
+char *
+quotearg_n_style_mem (int n, enum quoting_style s,
+ char const *arg, size_t argsize)
+{
+ struct quoting_options const o = quoting_options_from_style (s);
+ return quotearg_n_options (n, arg, argsize, &o);
+}
+
+char *
+quotearg_style (enum quoting_style s, char const *arg)
+{
+ return quotearg_n_style (0, s, arg);
+}
+
+char *
+quotearg_char (char const *arg, char ch)
+{
+ struct quoting_options options;
+ options = default_quoting_options;
+ set_char_quoting (&options, ch, 1);
+ return quotearg_n_options (0, arg, SIZE_MAX, &options);
+}
+
+char *
+quotearg_colon (char const *arg)
+{
+ return quotearg_char (arg, ':');
+}
diff --git a/lib/quotearg.h b/lib/quotearg.h
new file mode 100644
index 0000000..24f26f7
--- /dev/null
+++ b/lib/quotearg.h
@@ -0,0 +1,137 @@
+/* quotearg.h - quote arguments for output
+
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert <eggert@twinsun.com> */
+
+#ifndef QUOTEARG_H_
+# define QUOTEARG_H_ 1
+
+# include <stddef.h>
+
+/* Basic quoting styles. */
+enum quoting_style
+ {
+ /* Output names as-is (ls --quoting-style=literal). */
+ literal_quoting_style,
+
+ /* Quote names for the shell if they contain shell metacharacters
+ or would cause ambiguous output (ls --quoting-style=shell). */
+ shell_quoting_style,
+
+ /* Quote names for the shell, even if they would normally not
+ require quoting (ls --quoting-style=shell-always). */
+ shell_always_quoting_style,
+
+ /* Quote names as for a C language string (ls --quoting-style=c). */
+ c_quoting_style,
+
+ /* Like c_quoting_style except omit the surrounding double-quote
+ characters (ls --quoting-style=escape). */
+ escape_quoting_style,
+
+ /* Like clocale_quoting_style, but quote `like this' instead of
+ "like this" in the default C locale (ls --quoting-style=locale). */
+ locale_quoting_style,
+
+ /* Like c_quoting_style except use quotation marks appropriate for
+ the locale (ls --quoting-style=clocale). */
+ clocale_quoting_style
+ };
+
+/* For now, --quoting-style=literal is the default, but this may change. */
+# ifndef DEFAULT_QUOTING_STYLE
+# define DEFAULT_QUOTING_STYLE literal_quoting_style
+# endif
+
+/* Names of quoting styles and their corresponding values. */
+extern char const *const quoting_style_args[];
+extern enum quoting_style const quoting_style_vals[];
+
+struct quoting_options;
+
+/* The functions listed below set and use a hidden variable
+ that contains the default quoting style options. */
+
+/* Allocate a new set of quoting options, with contents initially identical
+ to O if O is not null, or to the default if O is null.
+ It is the caller's responsibility to free the result. */
+struct quoting_options *clone_quoting_options (struct quoting_options *o);
+
+/* Get the value of O's quoting style. If O is null, use the default. */
+enum quoting_style get_quoting_style (struct quoting_options *o);
+
+/* In O (or in the default if O is null),
+ set the value of the quoting style to S. */
+void set_quoting_style (struct quoting_options *o, enum quoting_style s);
+
+/* In O (or in the default if O is null),
+ set the value of the quoting options for character C to I.
+ Return the old value. Currently, the only values defined for I are
+ 0 (the default) and 1 (which means to quote the character even if
+ it would not otherwise be quoted). */
+int set_char_quoting (struct quoting_options *o, char c, int i);
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using O to control quoting.
+ If O is null, use the default.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
+size_t quotearg_buffer (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ struct quoting_options const *o);
+
+/* Like quotearg_buffer, except return the result in a newly allocated
+ buffer. It is the caller's responsibility to free the result. */
+char *quotearg_alloc (char const *arg, size_t argsize,
+ struct quoting_options const *o);
+
+/* Use storage slot N to return a quoted version of the string ARG.
+ Use the default quoting options.
+ The returned value points to static storage that can be
+ reused by the next call to this function with the same value of N.
+ N must be nonnegative. */
+char *quotearg_n (int n, char const *arg);
+
+/* Equivalent to quotearg_n (0, ARG). */
+char *quotearg (char const *arg);
+
+/* Use style S and storage slot N to return a quoted version of the string ARG.
+ This is like quotearg_n (N, ARG), except that it uses S with no other
+ options to specify the quoting method. */
+char *quotearg_n_style (int n, enum quoting_style s, char const *arg);
+
+/* Use style S and storage slot N to return a quoted version of the
+ argument ARG of size ARGSIZE. This is like quotearg_n_style
+ (N, S, ARG), except it can quote null bytes. */
+char *quotearg_n_style_mem (int n, enum quoting_style s,
+ char const *arg, size_t argsize);
+
+/* Equivalent to quotearg_n_style (0, S, ARG). */
+char *quotearg_style (enum quoting_style s, char const *arg);
+
+/* Like quotearg (ARG), except also quote any instances of CH. */
+char *quotearg_char (char const *arg, char ch);
+
+/* Equivalent to quotearg_char (ARG, ':'). */
+char *quotearg_colon (char const *arg);
+
+#endif /* !QUOTEARG_H_ */
diff --git a/lib/ref-add.sin b/lib/ref-add.sin
new file mode 100644
index 0000000..bc5cc79
--- /dev/null
+++ b/lib/ref-add.sin
@@ -0,0 +1,30 @@
+# Add this package to a list of references stored in a text file.
+#
+# Copyright (C) 2000 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# Written by Bruno Haible <haible@clisp.cons.org>.
+#
+/^# Packages using this file: / {
+ s/# Packages using this file://
+ ta
+ :a
+ s/ @PACKAGE@ / @PACKAGE@ /
+ tb
+ s/ $/ @PACKAGE@ /
+ :b
+ s/^/# Packages using this file:/
+}
diff --git a/lib/ref-del.sin b/lib/ref-del.sin
new file mode 100644
index 0000000..e9301bf
--- /dev/null
+++ b/lib/ref-del.sin
@@ -0,0 +1,25 @@
+# Remove this package from a list of references stored in a text file.
+#
+# Copyright (C) 2000 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# Written by Bruno Haible <haible@clisp.cons.org>.
+#
+/^# Packages using this file: / {
+ s/# Packages using this file://
+ s/ @PACKAGE@ / /
+ s/^/# Packages using this file:/
+}
diff --git a/lib/regcomp.c b/lib/regcomp.c
new file mode 100644
index 0000000..6e317f5
--- /dev/null
+++ b/lib/regcomp.c
@@ -0,0 +1,3858 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002,2003,2004,2005,2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+ size_t length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+ const re_dfastate_t *init_state,
+ char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (regex_t *preg);
+static reg_errcode_t preorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t postorder (bin_tree_t *root,
+ reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra);
+static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
+static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
+static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
+ bin_tree_t *node);
+static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
+static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
+static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
+static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
+ unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ Idx node, bool root);
+static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static Idx fetch_number (re_string_t *input, re_token_t *token,
+ reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax) internal_function;
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ Idx nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ Idx nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ Idx nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ Idx nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+ re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax,
+ reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token, int token_len,
+ re_dfa_t *dfa,
+ reg_syntax_t syntax,
+ bool accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token);
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ re_charset_t *mbcset,
+ Idx *equiv_class_alloc,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ re_charset_t *mbcset,
+ Idx *char_class_alloc,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
+ bitset_t sbcset,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+ RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra,
+ bool non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type);
+static bin_tree_t *create_token_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token);
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void free_token (re_token_t *node);
+static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
+static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char __re_error_msgid[] =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+static const size_t __re_error_msgid_idx[] =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry. */
+
+#ifdef _LIBC
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+#else /* size_t might promote */
+const char *
+re_compile_pattern (const char *pattern, size_t length,
+ struct re_pattern_buffer *bufp)
+#endif
+{
+ reg_errcode_t ret;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub, unless RE_NO_SUB is set. */
+ bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+ if (!ret)
+ return NULL;
+ return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ char *fastmap = bufp->fastmap;
+
+ memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+ re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+ if (dfa->init_state != dfa->init_state_word)
+ re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+ if (dfa->init_state != dfa->init_state_nl)
+ re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+ if (dfa->init_state != dfa->init_state_begbuf)
+ re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+ bufp->fastmap_accurate = 1;
+ return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, bool icase, int ch)
+{
+ fastmap[ch] = 1;
+ if (icase)
+ fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+ Compile fastmap for the initial_state INIT_STATE. */
+
+static void
+re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
+ char *fastmap)
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ Idx node_cnt;
+ bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+ for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+ {
+ Idx node = init_state->nodes.elems[node_cnt];
+ re_token_type_t type = dfa->nodes[node].type;
+
+ if (type == CHARACTER)
+ {
+ re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ unsigned char buf[MB_LEN_MAX];
+ unsigned char *p;
+ wchar_t wc;
+ mbstate_t state;
+
+ p = buf;
+ *p++ = dfa->nodes[node].opr.c;
+ while (++node < dfa->nodes_len
+ && dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].mb_partial)
+ *p++ = dfa->nodes[node].opr.c;
+ memset (&state, '\0', sizeof (state));
+ if (mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
+ && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1))
+ re_set_fastmap (fastmap, false, buf[0]);
+ }
+#endif
+ }
+ else if (type == SIMPLE_BRACKET)
+ {
+ int i, ch;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ {
+ int j;
+ bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (w & ((bitset_word_t) 1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET)
+ {
+ Idx i;
+ re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+ if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+ || cset->nranges || cset->nchar_classes)
+ {
+# ifdef _LIBC
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ {
+ /* In this case we want to catch the bytes which are
+ the first byte of any collation elements.
+ e.g. In da_DK, we want to catch 'a' since "aa"
+ is a valid collation element, and don't catch
+ 'b' since 'b' is the only collation element
+ which starts from 'b'. */
+ const int32_t *table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
+ }
+# else
+ if (dfa->mb_cur_max > 1)
+ for (i = 0; i < SBC_MAX; ++i)
+ if (__btowc (i) == WEOF)
+ re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+ }
+ for (i = 0; i < cset->nmbchars; ++i)
+ {
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+ || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+ || type == END_OF_RE)
+ {
+ memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+ if (type == END_OF_RE)
+ bufp->can_be_null = 1;
+ return;
+ }
+ }
+}
+
+/* Entry point for POSIX code. */
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = re_malloc (char, SBC_MAX);
+ if (BE (preg->fastmap == NULL, 0))
+ return REG_ESPACE;
+
+ syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+ preg->no_sub = !!(cflags & REG_NOSUB);
+ preg->translate = NULL;
+
+ ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ /* We have already checked preg->fastmap != NULL. */
+ if (BE (ret == REG_NOERROR, 1))
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. This function never fails in this implementation. */
+ (void) re_compile_fastmap (preg);
+ else
+ {
+ /* Some error occurred while compiling the expression. */
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+#ifdef _LIBC
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *__restrict preg;
+ char *__restrict errbuf;
+ size_t errbuf_size;
+#else /* size_t might promote */
+size_t
+regerror (int errcode, const regex_t *__restrict preg,
+ char *__restrict errbuf, size_t errbuf_size)
+#endif
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (BE (errcode < 0
+ || errcode >= (int) (sizeof (__re_error_msgid_idx)
+ / sizeof (__re_error_msgid_idx[0])), 0))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (BE (errbuf_size != 0, 1))
+ {
+ if (BE (msg_size > errbuf_size, 0))
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+ UTF-8 is used. Otherwise we would allocate memory just to initialize
+ it the same all the time. UTF-8 is the preferred encoding so this is
+ a worthwhile optimization. */
+static const bitset_t utf8_sb_map =
+{
+ /* Set the first 128 bits. */
+# if 4 * BITSET_WORD_BITS < ASCII_CHARS
+# error "bitset_word_t is narrower than 32 bits"
+# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
+ BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
+# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
+ BITSET_WORD_MAX, BITSET_WORD_MAX,
+# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
+ BITSET_WORD_MAX,
+# endif
+ (BITSET_WORD_MAX
+ >> (SBC_MAX % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
+};
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+ Idx i, j;
+
+ if (dfa->nodes)
+ for (i = 0; i < dfa->nodes_len; ++i)
+ free_token (dfa->nodes + i);
+ re_free (dfa->nexts);
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ if (dfa->eclosures != NULL)
+ re_node_set_free (dfa->eclosures + i);
+ if (dfa->inveclosures != NULL)
+ re_node_set_free (dfa->inveclosures + i);
+ if (dfa->edests != NULL)
+ re_node_set_free (dfa->edests + i);
+ }
+ re_free (dfa->edests);
+ re_free (dfa->eclosures);
+ re_free (dfa->inveclosures);
+ re_free (dfa->nodes);
+
+ if (dfa->state_table)
+ for (i = 0; i <= dfa->state_hash_mask; ++i)
+ {
+ struct re_state_table_entry *entry = dfa->state_table + i;
+ for (j = 0; j < entry->num; ++j)
+ {
+ re_dfastate_t *state = entry->array[j];
+ free_state (state);
+ }
+ re_free (entry->array);
+ }
+ re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+ if (dfa->sb_char != utf8_sb_map)
+ re_free (dfa->sb_char);
+#endif
+ re_free (dfa->subexp_map);
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
+
+ re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ if (BE (dfa != NULL, 1))
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+
+ re_free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec above without link errors. */
+weak_function
+# endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+ char *fastmap;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (re_comp_buf.buffer)
+ {
+ fastmap = re_comp_buf.fastmap;
+ re_comp_buf.fastmap = NULL;
+ __regfree (&re_comp_buf);
+ memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+ re_comp_buf.fastmap = fastmap;
+ }
+
+ if (re_comp_buf.fastmap == NULL)
+ {
+ re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (__re_error_msgid
+ + __re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+ __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+ Compile the regular expression PATTERN, whose length is LENGTH.
+ SYNTAX indicate regular expression's syntax. */
+
+static reg_errcode_t
+re_compile_internal (regex_t *preg, const char * pattern, size_t length,
+ reg_syntax_t syntax)
+{
+ reg_errcode_t err = REG_NOERROR;
+ re_dfa_t *dfa;
+ re_string_t regexp;
+
+ /* Initialize the pattern buffer. */
+ preg->fastmap_accurate = 0;
+ preg->syntax = syntax;
+ preg->not_bol = preg->not_eol = 0;
+ preg->used = 0;
+ preg->re_nsub = 0;
+ preg->can_be_null = 0;
+ preg->regs_allocated = REGS_UNALLOCATED;
+
+ /* Initialize the dfa. */
+ dfa = (re_dfa_t *) preg->buffer;
+ if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+ {
+ /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. If ->buffer is NULL this
+ is a simple allocation. */
+ dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+ if (dfa == NULL)
+ return REG_ESPACE;
+ preg->allocated = sizeof (re_dfa_t);
+ preg->buffer = (unsigned char *) dfa;
+ }
+ preg->used = sizeof (re_dfa_t);
+
+ err = init_dfa (dfa, length);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+#ifdef DEBUG
+ /* Note: length+1 will not overflow since it is checked in init_dfa. */
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+ __libc_lock_init (dfa->lock);
+
+ err = re_string_construct (&regexp, pattern, length, preg->translate,
+ syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_compile_internal_free_return:
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+
+ /* Parse the regular expression, and build a structure tree. */
+ preg->re_nsub = 0;
+ dfa->str_tree = parse (&regexp, preg, syntax, &err);
+ if (BE (dfa->str_tree == NULL, 0))
+ goto re_compile_internal_free_return;
+
+ /* Analyze the tree and create the nfa. */
+ err = analyze (preg);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* If possible, do searching in single byte encoding to speed things up. */
+ if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+ optimize_utf8 (dfa);
+#endif
+
+ /* Then create the initial state of the dfa. */
+ err = create_initial_state (dfa);
+
+ /* Release work areas. */
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ }
+
+ return err;
+}
+
+/* Initialize DFA. We use the length of the regular expression PAT_LEN
+ as the initial length of some arrays. */
+
+static reg_errcode_t
+init_dfa (re_dfa_t *dfa, size_t pat_len)
+{
+ __re_size_t table_size;
+#ifndef _LIBC
+ char *codeset_name;
+#endif
+#ifdef RE_ENABLE_I18N
+ size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
+#else
+ size_t max_i18n_object_size = 0;
+#endif
+ size_t max_object_size =
+ MAX (sizeof (struct re_state_table_entry),
+ MAX (sizeof (re_token_t),
+ MAX (sizeof (re_node_set),
+ MAX (sizeof (regmatch_t),
+ max_i18n_object_size))));
+
+ memset (dfa, '\0', sizeof (re_dfa_t));
+
+ /* Force allocation of str_tree_storage the first time. */
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+ /* Avoid overflows. The extra "/ 2" is for the table_size doubling
+ calculation below, and for similar doubling calculations
+ elsewhere. And it's <= rather than <, because some of the
+ doubling calculations add 1 afterwards. */
+ if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0))
+ return REG_ESPACE;
+
+ dfa->nodes_alloc = pat_len + 1;
+ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+ /* table_size = 2 ^ ceil(log pat_len) */
+ for (table_size = 1; ; table_size <<= 1)
+ if (table_size > pat_len)
+ break;
+
+ dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+ dfa->state_hash_mask = table_size - 1;
+
+ dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+ if (dfa->mb_cur_max == 6
+ && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+ dfa->is_utf8 = 1;
+ dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+ != 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+ codeset_name = nl_langinfo (CODESET);
+# else
+ codeset_name = getenv ("LC_ALL");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LC_CTYPE");
+ if (codeset_name == NULL || codeset_name[0] == '\0')
+ codeset_name = getenv ("LANG");
+ if (codeset_name == NULL)
+ codeset_name = "";
+ else if (strchr (codeset_name, '.') != NULL)
+ codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+ if (strcasecmp (codeset_name, "UTF-8") == 0
+ || strcasecmp (codeset_name, "UTF8") == 0)
+ dfa->is_utf8 = 1;
+
+ /* We check exhaustively in the loop below if this charset is a
+ superset of ASCII. */
+ dfa->map_notascii = 0;
+#endif
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ if (dfa->is_utf8)
+ dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
+ else
+ {
+ int i, j, ch;
+
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+ if (BE (dfa->sb_char == NULL, 0))
+ return REG_ESPACE;
+
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ {
+ wint_t wch = __btowc (ch);
+ if (wch != WEOF)
+ dfa->sb_char[i] |= (bitset_word_t) 1 << j;
+# ifndef _LIBC
+ if (isascii (ch) && wch != ch)
+ dfa->map_notascii = 1;
+# endif
+ }
+ }
+ }
+#endif
+
+ if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+ "word". In this case "word" means that it is the word construction
+ character used by some operators like "\<", "\>", etc. */
+
+static void
+internal_function
+init_word_char (re_dfa_t *dfa)
+{
+ int i, j, ch;
+ dfa->word_ops_used = 1;
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (isalnum (ch) || ch == '_')
+ dfa->word_char[i] |= (bitset_word_t) 1 << j;
+}
+
+/* Free the work area which are only used while compiling. */
+
+static void
+free_workarea_compile (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_storage_t *storage, *next;
+ for (storage = dfa->str_tree_storage; storage; storage = next)
+ {
+ next = storage->next;
+ re_free (storage);
+ }
+ dfa->str_tree_storage = NULL;
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+ dfa->str_tree = NULL;
+ re_free (dfa->org_indices);
+ dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts. */
+
+static reg_errcode_t
+create_initial_state (re_dfa_t *dfa)
+{
+ Idx first, i;
+ reg_errcode_t err;
+ re_node_set init_nodes;
+
+ /* Initial states have the epsilon closure of the node which is
+ the first node of the regular expression. */
+ first = dfa->str_tree->first->node_idx;
+ dfa->init_node = first;
+ err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* The back-references which are in initial states can epsilon transit,
+ since in this case all of the subexpressions can be null.
+ Then we add epsilon closures of the nodes which are the next nodes of
+ the back-references. */
+ if (dfa->nbackref > 0)
+ for (i = 0; i < init_nodes.nelem; ++i)
+ {
+ Idx node_idx = init_nodes.elems[i];
+ re_token_type_t type = dfa->nodes[node_idx].type;
+
+ Idx clexp_idx;
+ if (type != OP_BACK_REF)
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
+ if (type == OP_BACK_REF)
+ {
+ Idx dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
+ {
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ i = 0;
+ }
+ }
+ }
+
+ /* It must be the first time to invoke acquire_state. */
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (BE (dfa->init_state == NULL, 0))
+ return err;
+ if (dfa->init_state->has_constraint)
+ {
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_WORD);
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_NEWLINE);
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
+ CONTEXT_NEWLINE
+ | CONTEXT_BEGBUF);
+ if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return err;
+ }
+ else
+ dfa->init_state_word = dfa->init_state_nl
+ = dfa->init_state_begbuf = dfa->init_state;
+
+ re_node_set_free (&init_nodes);
+ return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+ to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+ DFA nodes where needed. */
+
+static void
+optimize_utf8 (re_dfa_t *dfa)
+{
+ Idx node;
+ int i;
+ bool mb_chars = false;
+ bool has_period = false;
+
+ for (node = 0; node < dfa->nodes_len; ++node)
+ switch (dfa->nodes[node].type)
+ {
+ case CHARACTER:
+ if (dfa->nodes[node].opr.c >= ASCII_CHARS)
+ mb_chars = true;
+ break;
+ case ANCHOR:
+ switch (dfa->nodes[node].opr.idx)
+ {
+ case LINE_FIRST:
+ case LINE_LAST:
+ case BUF_FIRST:
+ case BUF_LAST:
+ break;
+ default:
+ /* Word anchors etc. cannot be handled. */
+ return;
+ }
+ break;
+ case OP_PERIOD:
+ has_period = true;
+ break;
+ case OP_BACK_REF:
+ case OP_ALT:
+ case END_OF_RE:
+ case OP_DUP_ASTERISK:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ break;
+ case COMPLEX_BRACKET:
+ return;
+ case SIMPLE_BRACKET:
+ /* Just double check. */
+ {
+ int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
+ for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+ {
+ if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
+ return;
+ rshift = 0;
+ }
+ }
+ break;
+ default:
+ abort ();
+ }
+
+ if (mb_chars || has_period)
+ for (node = 0; node < dfa->nodes_len; ++node)
+ {
+ if (dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].opr.c >= ASCII_CHARS)
+ dfa->nodes[node].mb_partial = 0;
+ else if (dfa->nodes[node].type == OP_PERIOD)
+ dfa->nodes[node].type = OP_UTF8_PERIOD;
+ }
+
+ /* The search can be in single byte locale. */
+ dfa->mb_cur_max = 1;
+ dfa->is_utf8 = 0;
+ dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+ "eclosure", and "inveclosure". */
+
+static reg_errcode_t
+analyze (regex_t *preg)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ reg_errcode_t ret;
+
+ /* Allocate arrays. */
+ dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
+ dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
+ dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+ || dfa->eclosures == NULL, 0))
+ return REG_ESPACE;
+
+ dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
+ if (dfa->subexp_map != NULL)
+ {
+ Idx i;
+ for (i = 0; i < preg->re_nsub; i++)
+ dfa->subexp_map[i] = i;
+ preorder (dfa->str_tree, optimize_subexps, dfa);
+ for (i = 0; i < preg->re_nsub; i++)
+ if (dfa->subexp_map[i] != i)
+ break;
+ if (i == preg->re_nsub)
+ {
+ free (dfa->subexp_map);
+ dfa->subexp_map = NULL;
+ }
+ }
+
+ ret = postorder (dfa->str_tree, lower_subexps, preg);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = postorder (dfa->str_tree, calc_first, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ preorder (dfa->str_tree, calc_next, dfa);
+ ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ ret = calc_eclosure (dfa);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ /* We only need this during the prune_impossible_nodes pass in regexec.c;
+ skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
+ if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+ if (BE (dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ ret = calc_inveclosure (dfa);
+ }
+
+ return ret;
+}
+
+/* Our parse trees are very unbalanced, so we cannot use a stack to
+ implement parse tree visits. Instead, we use parent pointers and
+ some hairy code in these two functions. */
+static reg_errcode_t
+postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node, *prev;
+
+ for (node = root; ; )
+ {
+ /* Descend down the tree, preferably to the left (or to the right
+ if that's the only child). */
+ while (node->left || node->right)
+ if (node->left)
+ node = node->left;
+ else
+ node = node->right;
+
+ do
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ if (node->parent == NULL)
+ return REG_NOERROR;
+ prev = node;
+ node = node->parent;
+ }
+ /* Go up while we have a node that is reached from the right. */
+ while (node->right == prev || node->right == NULL);
+ node = node->right;
+ }
+}
+
+static reg_errcode_t
+preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
+ void *extra)
+{
+ bin_tree_t *node;
+
+ for (node = root; ; )
+ {
+ reg_errcode_t err = fn (extra, node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ node = node->left;
+ else
+ {
+ bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ if (!node)
+ return REG_NOERROR;
+ }
+ node = node->right;
+ }
+ }
+}
+
+/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
+ re_search_internal to map the inner one's opr.idx to this one's. Adjust
+ backreferences as well. Requires a preorder visit. */
+static reg_errcode_t
+optimize_subexps (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+
+ if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+ {
+ int idx = node->token.opr.idx;
+ node->token.opr.idx = dfa->subexp_map[idx];
+ dfa->used_bkref_map |= 1 << node->token.opr.idx;
+ }
+
+ else if (node->token.type == SUBEXP
+ && node->left && node->left->token.type == SUBEXP)
+ {
+ Idx other_idx = node->left->token.opr.idx;
+
+ node->left = node->left->left;
+ if (node->left)
+ node->left->parent = node;
+
+ dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
+ }
+
+ return REG_NOERROR;
+}
+
+/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
+ of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
+static reg_errcode_t
+lower_subexps (void *extra, bin_tree_t *node)
+{
+ regex_t *preg = (regex_t *) extra;
+ reg_errcode_t err = REG_NOERROR;
+
+ if (node->left && node->left->token.type == SUBEXP)
+ {
+ node->left = lower_subexp (&err, preg, node->left);
+ if (node->left)
+ node->left->parent = node;
+ }
+ if (node->right && node->right->token.type == SUBEXP)
+ {
+ node->right = lower_subexp (&err, preg, node->right);
+ if (node->right)
+ node->right->parent = node;
+ }
+
+ return err;
+}
+
+static bin_tree_t *
+lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *body = node->left;
+ bin_tree_t *op, *cls, *tree1, *tree;
+
+ if (preg->no_sub
+ /* We do not optimize empty subexpressions, because otherwise we may
+ have bad CONCAT nodes with NULL children. This is obviously not
+ very common, so we do not lose much. An example that triggers
+ this case is the sed "script" /\(\)/x. */
+ && node->left != NULL
+ && (node->token.opr.idx >= BITSET_WORD_BITS
+ || !(dfa->used_bkref_map
+ & ((bitset_word_t) 1 << node->token.opr.idx))))
+ return node->left;
+
+ /* Convert the SUBEXP node to the concatenation of an
+ OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
+ op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
+ cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
+ tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
+ tree = create_tree (dfa, op, tree1, CONCAT);
+ if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
+ op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
+ return tree;
+}
+
+/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
+ nodes. Requires a postorder visit. */
+static reg_errcode_t
+calc_first (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ if (node->token.type == CONCAT)
+ {
+ node->first = node->left->first;
+ node->node_idx = node->left->node_idx;
+ }
+ else
+ {
+ node->first = node;
+ node->node_idx = re_dfa_add_node (dfa, node->token);
+ if (BE (node->node_idx == REG_MISSING, 0))
+ return REG_ESPACE;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 2: compute NEXT on the tree. Preorder visit. */
+static reg_errcode_t
+calc_next (void *extra, bin_tree_t *node)
+{
+ switch (node->token.type)
+ {
+ case OP_DUP_ASTERISK:
+ node->left->next = node;
+ break;
+ case CONCAT:
+ node->left->next = node->right->first;
+ node->right->next = node->next;
+ break;
+ default:
+ if (node->left)
+ node->left->next = node->next;
+ if (node->right)
+ node->right->next = node->next;
+ break;
+ }
+ return REG_NOERROR;
+}
+
+/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
+static reg_errcode_t
+link_nfa_nodes (void *extra, bin_tree_t *node)
+{
+ re_dfa_t *dfa = (re_dfa_t *) extra;
+ Idx idx = node->node_idx;
+ reg_errcode_t err = REG_NOERROR;
+
+ switch (node->token.type)
+ {
+ case CONCAT:
+ break;
+
+ case END_OF_RE:
+ assert (node->next == NULL);
+ break;
+
+ case OP_DUP_ASTERISK:
+ case OP_ALT:
+ {
+ Idx left, right;
+ dfa->has_plural_match = 1;
+ if (node->left != NULL)
+ left = node->left->first->node_idx;
+ else
+ left = node->next->node_idx;
+ if (node->right != NULL)
+ right = node->right->first->node_idx;
+ else
+ right = node->next->node_idx;
+ assert (REG_VALID_INDEX (left));
+ assert (REG_VALID_INDEX (right));
+ err = re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ break;
+
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
+ break;
+
+ case OP_BACK_REF:
+ dfa->nexts[idx] = node->next->node_idx;
+ if (node->token.type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
+ break;
+
+ default:
+ assert (!IS_EPSILON_NODE (node->token.type));
+ dfa->nexts[idx] = node->next->node_idx;
+ break;
+ }
+
+ return err;
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+internal_function
+duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
+ Idx root_node, unsigned int init_constraint)
+{
+ Idx org_node, clone_node;
+ bool ok;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ Idx org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == REG_MISSING, 0))
+ return REG_ESPACE;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ if (dfa->nodes[org_node].type == ANCHOR)
+ {
+ /* In case of the node has another constraint, append it. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ /* ...but if the node is root_node itself, it means the
+ epsilon closure have a loop, then tie it to the
+ destination of the root_node. */
+ ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ break;
+ }
+ constraint |= dfa->nodes[org_node].opr.ctx_type;
+ }
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == REG_MISSING, 0))
+ return REG_ESPACE;
+ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* Search for a duplicated node which satisfies the constraint. */
+ clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+ if (clone_dest == REG_MISSING)
+ {
+ /* There are no such a duplicated node, create a new one. */
+ reg_errcode_t err;
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == REG_MISSING, 0))
+ return REG_ESPACE;
+ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ {
+ /* There are a duplicated node which satisfy the constraint,
+ use it to avoid infinite loop. */
+ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+
+ org_dest = dfa->edests[org_node].elems[1];
+ clone_dest = duplicate_node (dfa, org_dest, constraint);
+ if (BE (clone_dest == REG_MISSING, 0))
+ return REG_ESPACE;
+ ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+ satisfies the constraint CONSTRAINT. */
+
+static Idx
+search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
+ unsigned int constraint)
+{
+ Idx idx;
+ for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+ {
+ if (org_node == dfa->org_indices[idx]
+ && constraint == dfa->nodes[idx].constraint)
+ return idx; /* Found. */
+ }
+ return REG_MISSING; /* Not found. */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ Return the index of the new node, or REG_MISSING if insufficient storage is
+ available. */
+
+static Idx
+duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
+{
+ Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+ if (BE (dup_idx != REG_MISSING, 1))
+ {
+ dfa->nodes[dup_idx].constraint = constraint;
+ if (dfa->nodes[org_idx].type == ANCHOR)
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+ dfa->nodes[dup_idx].duplicated = 1;
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ }
+ return dup_idx;
+}
+
+static reg_errcode_t
+calc_inveclosure (re_dfa_t *dfa)
+{
+ Idx src, idx;
+ bool ok;
+ for (idx = 0; idx < dfa->nodes_len; ++idx)
+ re_node_set_init_empty (dfa->inveclosures + idx);
+
+ for (src = 0; src < dfa->nodes_len; ++src)
+ {
+ Idx *elems = dfa->eclosures[src].elems;
+ for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+ {
+ ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Calculate "eclosure" for all the node in DFA. */
+
+static reg_errcode_t
+calc_eclosure (re_dfa_t *dfa)
+{
+ Idx node_idx;
+ bool incomplete;
+#ifdef DEBUG
+ assert (dfa->nodes_len > 0);
+#endif
+ incomplete = false;
+ /* For each nodes, calculate epsilon closure. */
+ for (node_idx = 0; ; ++node_idx)
+ {
+ reg_errcode_t err;
+ re_node_set eclosure_elem;
+ if (node_idx == dfa->nodes_len)
+ {
+ if (!incomplete)
+ break;
+ incomplete = false;
+ node_idx = 0;
+ }
+
+#ifdef DEBUG
+ assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
+#endif
+
+ /* If we have already calculated, skip it. */
+ if (dfa->eclosures[node_idx].nelem != 0)
+ continue;
+ /* Calculate epsilon closure of `node_idx'. */
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (dfa->eclosures[node_idx].nelem == 0)
+ {
+ incomplete = true;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE. */
+
+static reg_errcode_t
+calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
+{
+ reg_errcode_t err;
+ unsigned int constraint;
+ Idx i;
+ bool incomplete;
+ bool ok;
+ re_node_set eclosure;
+ incomplete = false;
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* This indicates that we are calculating this node now.
+ We reference this value to avoid infinite loop. */
+ dfa->eclosures[node].nelem = REG_MISSING;
+
+ constraint = ((dfa->nodes[node].type == ANCHOR)
+ ? dfa->nodes[node].opr.ctx_type : 0);
+ /* If the current node has constraints, duplicate all nodes.
+ Since they must inherit the constraints. */
+ if (constraint
+ && dfa->edests[node].nelem
+ && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ err = duplicate_node_closure (dfa, node, node, node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Expand each epsilon destination nodes. */
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ for (i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ re_node_set eclosure_elem;
+ Idx edest = dfa->edests[node].elems[i];
+ /* If calculating the epsilon closure of `edest' is in progress,
+ return intermediate result. */
+ if (dfa->eclosures[edest].nelem == REG_MISSING)
+ {
+ incomplete = true;
+ continue;
+ }
+ /* If we haven't calculated the epsilon closure of `edest' yet,
+ calculate now. Otherwise use calculated epsilon closure. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ eclosure_elem = dfa->eclosures[edest];
+ /* Merge the epsilon closure of `edest'. */
+ re_node_set_merge (&eclosure, &eclosure_elem);
+ /* If the epsilon closure of `edest' is incomplete,
+ the epsilon closure of this node is also incomplete. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ incomplete = true;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+
+ /* Epsilon closures include itself. */
+ ok = re_node_set_insert (&eclosure, node);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser. */
+
+/* Fetch a token from INPUT.
+ We must not use this function inside bracket expressions. */
+
+static void
+internal_function
+fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax)
+{
+ re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function inside bracket expressions. */
+
+static int
+internal_function
+peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+ token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+ token->mb_partial = 0;
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ token->mb_partial = 1;
+ return 1;
+ }
+#endif
+ if (c == '\\')
+ {
+ unsigned char c2;
+ if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+ {
+ token->type = BACK_SLASH;
+ return 1;
+ }
+
+ c2 = re_string_peek_byte_case (input, 1);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input,
+ re_string_cur_idx (input) + 1);
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (c2) != 0;
+
+ switch (c2)
+ {
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (!(syntax & RE_NO_BK_REFS))
+ {
+ token->type = OP_BACK_REF;
+ token->opr.idx = c2 - '1';
+ }
+ break;
+ case '<':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_FIRST;
+ }
+ break;
+ case '>':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_LAST;
+ }
+ break;
+ case 'b':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_DELIM;
+ }
+ break;
+ case 'B':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = NOT_WORD_DELIM;
+ }
+ break;
+ case 'w':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_WORD;
+ break;
+ case 'W':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTWORD;
+ break;
+ case 's':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_SPACE;
+ break;
+ case 'S':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTSPACE;
+ break;
+ case '`':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_FIRST;
+ }
+ break;
+ case '\'':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_LAST;
+ }
+ break;
+ case '(':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ default:
+ break;
+ }
+ return 2;
+ }
+
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (token->opr.c);
+
+ switch (c)
+ {
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ token->type = OP_ALT;
+ break;
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '*':
+ token->type = OP_DUP_ASTERISK;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '[':
+ token->type = OP_OPEN_BRACKET;
+ break;
+ case '.':
+ token->type = OP_PERIOD;
+ break;
+ case '^':
+ if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+ re_string_cur_idx (input) != 0)
+ {
+ char prev = re_string_peek_byte (input, -1);
+ if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_FIRST;
+ break;
+ case '$':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) + 1 != re_string_length (input))
+ {
+ re_token_t next;
+ re_string_skip_bytes (input, 1);
+ peek_token (&next, input, syntax);
+ re_string_skip_bytes (input, -1);
+ if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_LAST;
+ break;
+ default:
+ break;
+ }
+ return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function out of bracket expressions. */
+
+static int
+internal_function
+peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
+{
+ unsigned char c;
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ return 1;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+ && re_string_cur_idx (input) + 1 < re_string_length (input))
+ {
+ /* In this case, '\' escape a character. */
+ unsigned char c2;
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ return 1;
+ }
+ if (c == '[') /* '[' is a special char in a bracket exps. */
+ {
+ unsigned char c2;
+ int token_len;
+ if (re_string_cur_idx (input) + 1 < re_string_length (input))
+ c2 = re_string_peek_byte (input, 1);
+ else
+ c2 = 0;
+ token->opr.c = c2;
+ token_len = 2;
+ switch (c2)
+ {
+ case '.':
+ token->type = OP_OPEN_COLL_ELEM;
+ break;
+ case '=':
+ token->type = OP_OPEN_EQUIV_CLASS;
+ break;
+ case ':':
+ if (syntax & RE_CHAR_CLASSES)
+ {
+ token->type = OP_OPEN_CHAR_CLASS;
+ break;
+ }
+ /* else fall through. */
+ default:
+ token->type = CHARACTER;
+ token->opr.c = c;
+ token_len = 1;
+ break;
+ }
+ return token_len;
+ }
+ switch (c)
+ {
+ case '-':
+ token->type = OP_CHARSET_RANGE;
+ break;
+ case ']':
+ token->type = OP_CLOSE_BRACKET;
+ break;
+ case '^':
+ token->type = OP_NON_MATCH_LIST;
+ break;
+ default:
+ token->type = CHARACTER;
+ }
+ return 1;
+}
+
+/* Functions for parser. */
+
+/* Entry point of the parser.
+ Parse the regular expression REGEXP and return the structure tree.
+ If an error is occured, ERR is set by error code, and return NULL.
+ This function build the following tree, from regular expression <reg_exp>:
+ CAT
+ / \
+ / \
+ <reg_exp> EOR
+
+ CAT means concatenation.
+ EOR means end of regular expression. */
+
+static bin_tree_t *
+parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
+ reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *eor, *root;
+ re_token_t current_token;
+ dfa->syntax = syntax;
+ fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+ if (tree != NULL)
+ root = create_tree (dfa, tree, eor, CONCAT);
+ else
+ root = eor;
+ if (BE (eor == NULL || root == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ return root;
+}
+
+/* This function build the following tree, from regular expression
+ <branch1>|<branch2>:
+ ALT
+ / \
+ / \
+ <branch1> <branch2>
+
+ ALT means alternative, which represents the operator `|'. */
+
+static bin_tree_t *
+parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *branch = NULL;
+ tree = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type == OP_ALT)
+ {
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ if (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ branch = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && branch == NULL, 0))
+ return NULL;
+ }
+ else
+ branch = NULL;
+ tree = create_tree (dfa, tree, branch, OP_ALT);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ <exp1><exp2>:
+ CAT
+ / \
+ / \
+ <exp1> <exp2>
+
+ CAT means concatenation. */
+
+static bin_tree_t *
+parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
+{
+ bin_tree_t *tree, *exp;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ tree = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ exp = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && exp == NULL, 0))
+ {
+ return NULL;
+ }
+ if (tree != NULL && exp != NULL)
+ {
+ tree = create_tree (dfa, tree, exp, CONCAT);
+ if (tree == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else if (tree == NULL)
+ tree = exp;
+ /* Otherwise exp == NULL, we don't need to create new tree. */
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+ *
+ |
+ a
+*/
+
+static bin_tree_t *
+parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ switch (token->type)
+ {
+ case CHARACTER:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (!re_string_eoi (regexp)
+ && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+ {
+ bin_tree_t *mbc_remain;
+ fetch_token (token, regexp, syntax);
+ mbc_remain = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+ if (BE (mbc_remain == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+#endif
+ break;
+ case OP_OPEN_SUBEXP:
+ tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_OPEN_BRACKET:
+ tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_BACK_REF:
+ if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
+ {
+ *err = REG_ESUBREG;
+ return NULL;
+ }
+ dfa->used_bkref_map |= 1 << token->opr.idx;
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ ++dfa->nbackref;
+ dfa->has_mb_node = 1;
+ break;
+ case OP_OPEN_DUP_NUM:
+ if (syntax & RE_CONTEXT_INVALID_DUP)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ /* FALLTHROUGH */
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ case OP_DUP_QUESTION:
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ {
+ fetch_token (token, regexp, syntax);
+ return parse_expression (regexp, preg, token, syntax, nest, err);
+ }
+ /* else fall through */
+ case OP_CLOSE_SUBEXP:
+ if ((token->type == OP_CLOSE_SUBEXP) &&
+ !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
+ /* else fall through */
+ case OP_CLOSE_DUP_NUM:
+ /* We treat it as a normal character. */
+
+ /* Then we can these characters as normal characters. */
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be initialized already
+ by peek_token. */
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ break;
+ case ANCHOR:
+ if ((token->opr.ctx_type
+ & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+ && dfa->word_ops_used == 0)
+ init_word_char (dfa);
+ if (token->opr.ctx_type == WORD_DELIM
+ || token->opr.ctx_type == NOT_WORD_DELIM)
+ {
+ bin_tree_t *tree_first, *tree_last;
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ }
+ else
+ {
+ token->opr.ctx_type = INSIDE_WORD;
+ tree_first = create_token_tree (dfa, NULL, NULL, token);
+ token->opr.ctx_type = INSIDE_NOTWORD;
+ }
+ tree_last = create_token_tree (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+ if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else
+ {
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ /* We must return here, since ANCHORs can't be followed
+ by repetition operators.
+ eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+ it must not be "<ANCHOR(^)><REPEAT(*)>". */
+ fetch_token (token, regexp, syntax);
+ return tree;
+ case OP_PERIOD:
+ tree = create_token_tree (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ if (dfa->mb_cur_max > 1)
+ dfa->has_mb_node = 1;
+ break;
+ case OP_WORD:
+ case OP_NOTWORD:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "alnum",
+ (const unsigned char *) "_",
+ token->type == OP_NOTWORD, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_SPACE:
+ case OP_NOTSPACE:
+ tree = build_charclass_op (dfa, regexp->trans,
+ (const unsigned char *) "space",
+ (const unsigned char *) "",
+ token->type == OP_NOTSPACE, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_ALT:
+ case END_OF_RE:
+ return NULL;
+ case BACK_SLASH:
+ *err = REG_EESCAPE;
+ return NULL;
+ default:
+ /* Must not happen? */
+#ifdef DEBUG
+ assert (0);
+#endif
+ return NULL;
+ }
+ fetch_token (token, regexp, syntax);
+
+ while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+ || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+ {
+ tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ /* In BRE consecutive duplications are not allowed. */
+ if ((syntax & RE_CONTEXT_INVALID_DUP)
+ && (token->type == OP_DUP_ASTERISK
+ || token->type == OP_OPEN_DUP_NUM))
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ }
+
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ (<reg_exp>):
+ SUBEXP
+ |
+ <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
+ reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ size_t cur_nsub;
+ cur_nsub = preg->re_nsub++;
+
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+ /* The subexpression may be a null string. */
+ if (token->type == OP_CLOSE_SUBEXP)
+ tree = NULL;
+ else
+ {
+ tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+ if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
+ *err = REG_EPAREN;
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+
+ if (cur_nsub <= '9' - '1')
+ dfa->completed_bkref_map |= 1 << cur_nsub;
+
+ tree = create_tree (dfa, tree, NULL, SUBEXP);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ tree->token.opr.idx = cur_nsub;
+ return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
+
+static bin_tree_t *
+parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)
+{
+ bin_tree_t *tree = NULL, *old_tree = NULL;
+ Idx i, start, end, start_idx = re_string_cur_idx (regexp);
+ re_token_t start_token = *token;
+
+ if (token->type == OP_OPEN_DUP_NUM)
+ {
+ end = 0;
+ start = fetch_number (regexp, token, syntax);
+ if (start == REG_MISSING)
+ {
+ if (token->type == CHARACTER && token->opr.c == ',')
+ start = 0; /* We treat "{,m}" as "{0,m}". */
+ else
+ {
+ *err = REG_BADBR; /* <re>{} is invalid. */
+ return NULL;
+ }
+ }
+ if (BE (start != REG_ERROR, 1))
+ {
+ /* We treat "{n}" as "{n,n}". */
+ end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+ : ((token->type == CHARACTER && token->opr.c == ',')
+ ? fetch_number (regexp, token, syntax) : REG_ERROR));
+ }
+ if (BE (start == REG_ERROR || end == REG_ERROR, 0))
+ {
+ /* Invalid sequence. */
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ if (token->type == END_OF_RE)
+ *err = REG_EBRACE;
+ else
+ *err = REG_BADBR;
+
+ return NULL;
+ }
+
+ /* If the syntax bit is set, rollback. */
+ re_string_set_index (regexp, start_idx);
+ *token = start_token;
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be already initialized by
+ peek_token. */
+ return elem;
+ }
+
+ if (BE (end != REG_MISSING && start > end, 0))
+ {
+ /* First number greater than second. */
+ *err = REG_BADBR;
+ return NULL;
+ }
+ }
+ else
+ {
+ start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+ end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
+ }
+
+ fetch_token (token, regexp, syntax);
+
+ if (BE (elem == NULL, 0))
+ return NULL;
+ if (BE (start == 0 && end == 0, 0))
+ {
+ postorder (elem, free_tree, NULL);
+ return NULL;
+ }
+
+ /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
+ if (BE (start > 0, 0))
+ {
+ tree = elem;
+ for (i = 2; i <= start; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (start == end)
+ return tree;
+
+ /* Duplicate ELEM before it is marked optional. */
+ elem = duplicate_tree (elem, dfa);
+ old_tree = tree;
+ }
+ else
+ old_tree = NULL;
+
+ if (elem->token.type == SUBEXP)
+ postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
+
+ tree = create_tree (dfa, elem, NULL,
+ (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ /* This loop is actually executed only when end != REG_MISSING,
+ to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
+ already created the start+1-th copy. */
+ if ((Idx) -1 < 0 || end != REG_MISSING)
+ for (i = start + 2; i <= end; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ tree = create_tree (dfa, tree, NULL, OP_ALT);
+ if (BE (tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+
+ if (old_tree)
+ tree = create_tree (dfa, old_tree, tree, CONCAT);
+
+ return tree;
+
+ parse_dup_op_espace:
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+ I'm not sure, but maybe enough. */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+ /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+static reg_errcode_t
+internal_function
+# ifdef RE_ENABLE_I18N
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+# else /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem)
+# endif /* not RE_ENABLE_I18N */
+{
+ unsigned int start_ch, end_ch;
+ /* Equivalence Classes and Character Classes can't be a range start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ /* We can handle no multi character collating elements without libc
+ support. */
+ if (BE ((start_elem->type == COLL_SYM
+ && strlen ((char *) start_elem->opr.name) > 1)
+ || (end_elem->type == COLL_SYM
+ && strlen ((char *) end_elem->opr.name) > 1), 0))
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ {
+ wchar_t wc;
+ wint_t start_wc;
+ wint_t end_wc;
+ wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+ start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+ ? __btowc (start_ch) : start_elem->opr.wch);
+ end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+ ? __btowc (end_ch) : end_elem->opr.wch);
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ cmp_buf[0] = start_wc;
+ cmp_buf[4] = end_wc;
+ if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (mbcset)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ Idx new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wc = 0; wc < SBC_MAX; ++wc)
+ {
+ cmp_buf[2] = wc;
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ bitset_set (sbcset, wc);
+ }
+ }
+# else /* not RE_ENABLE_I18N */
+ {
+ unsigned int ch;
+ start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ if (start_ch > end_ch)
+ return REG_ERANGE;
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ if (start_ch <= ch && ch <= end_ch)
+ bitset_set (sbcset, ch);
+ }
+# endif /* not RE_ENABLE_I18N */
+ return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static reg_errcode_t
+internal_function
+build_collating_symbol (bitset_t sbcset,
+# ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset, Idx *coll_sym_alloc,
+# endif
+ const unsigned char *name)
+{
+ size_t name_len = strlen ((const char *) name);
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE. */
+
+ auto inline int32_t
+ __attribute ((always_inline))
+ seek_collating_symbol_entry (name, name_len)
+ const unsigned char *name;
+ size_t name_len;
+ {
+ int32_t hash = elem_hash ((const char *) name, name_len);
+ int32_t elem = hash % table_size;
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t second = hash % (table_size - 2) + 1;
+
+ do
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+ while (symb_table[2 * elem] != 0);
+ }
+ return elem;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+ auto inline unsigned int
+ __attribute ((always_inline))
+ lookup_collation_sequence_value (br_elem)
+ bracket_elem_t *br_elem;
+ {
+ if (br_elem->type == SB_CHAR)
+ {
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ Idx *range_alloc;
+ bitset_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+ {
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem);
+ end_collseq = lookup_collation_sequence_value (end_elem);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ Idx new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
+
+ auto inline reg_errcode_t
+ __attribute ((always_inline))
+ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ Idx *coll_sym_alloc;
+ bitset_t sbcset;
+ const unsigned char *name;
+ {
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (symb_table[2 * elem] == 0 && name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (BE (new_coll_syms == NULL, 0))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+ }
+#endif
+
+ re_token_t br_token;
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+ Idx equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ bool non_match = false;
+ bin_tree_t *work_tree;
+ int token_len;
+ bool first_round = true;
+#ifdef _LIBC
+ collseqmb = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules)
+ {
+ /*
+ if (MB_CUR_MAX > 1)
+ */
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+ }
+#endif
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+ if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_NON_MATCH_LIST)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ non_match = true;
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set (sbcset, '\0');
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ }
+
+ /* We treat the first ']' as a normal character. */
+ if (token->type == OP_CLOSE_BRACKET)
+ token->type = CHARACTER;
+
+ while (1)
+ {
+ bracket_elem_t start_elem, end_elem;
+ unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+ unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+ reg_errcode_t ret;
+ int token_len2 = 0;
+ bool is_range_exp = false;
+ re_token_t token2;
+
+ start_elem.opr.name = start_name_buf;
+ ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+ syntax, first_round);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+ first_round = false;
+
+ /* Get information about the next token. We need it in any case. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+ /* Do not check for ranges if we know they are not allowed. */
+ if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+ {
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CHARSET_RANGE)
+ {
+ re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
+ token_len2 = peek_token_bracket (&token2, regexp, syntax);
+ if (BE (token2.type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token2.type == OP_CLOSE_BRACKET)
+ {
+ /* We treat the last '-' as a normal character. */
+ re_string_skip_bytes (regexp, -token_len);
+ token->type = CHARACTER;
+ }
+ else
+ is_range_exp = true;
+ }
+ }
+
+ if (is_range_exp == true)
+ {
+ end_elem.opr.name = end_name_buf;
+ ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+ dfa, syntax, true);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+ *err = build_range_exp (sbcset, mbcset, &range_alloc,
+ &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+ *err = build_range_exp (sbcset,
+ dfa->mb_cur_max > 1 ? mbcset : NULL,
+ &range_alloc, &start_elem, &end_elem);
+# else
+ *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ }
+ else
+ {
+ switch (start_elem.type)
+ {
+ case SB_CHAR:
+ bitset_set (sbcset, start_elem.opr.ch);
+ break;
+#ifdef RE_ENABLE_I18N
+ case MB_CHAR:
+ /* Check whether the array has enough space. */
+ if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+ {
+ wchar_t *new_mbchars;
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (BE (new_mbchars == NULL, 0))
+ goto parse_bracket_exp_espace;
+ mbcset->mbchars = new_mbchars;
+ }
+ mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+ break;
+#endif /* RE_ENABLE_I18N */
+ case EQUIV_CLASS:
+ *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case COLL_SYM:
+ *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case CHAR_CLASS:
+ *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name, syntax);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ default:
+ assert (0);
+ break;
+ }
+ }
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CLOSE_BRACKET)
+ break;
+ }
+
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+
+ if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+ || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+ || mbcset->non_match)))
+ {
+ bin_tree_t *mbc_tree;
+ int sbc_idx;
+ /* Build a tree for complex bracket. */
+ dfa->has_mb_node = 1;
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
+ if (sbcset[sbc_idx])
+ break;
+ /* If there are no bits set in sbcset, there is no point
+ of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
+ if (sbc_idx < BITSET_WORDS)
+ {
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+ /* Then join them by ALT node. */
+ work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ else
+ {
+ re_free (sbcset);
+ work_tree = mbc_tree;
+ }
+ }
+ else
+#endif /* not RE_ENABLE_I18N */
+ {
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ return work_tree;
+
+ parse_bracket_exp_espace:
+ *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ return NULL;
+}
+
+/* Parse an element in the bracket expression. */
+
+static reg_errcode_t
+parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token, int token_len, re_dfa_t *dfa,
+ reg_syntax_t syntax, bool accept_hyphen)
+{
+#ifdef RE_ENABLE_I18N
+ int cur_char_size;
+ cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+ if (cur_char_size > 1)
+ {
+ elem->type = MB_CHAR;
+ elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+ re_string_skip_bytes (regexp, cur_char_size);
+ return REG_NOERROR;
+ }
+#endif /* RE_ENABLE_I18N */
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+ || token->type == OP_OPEN_EQUIV_CLASS)
+ return parse_bracket_symbol (elem, regexp, token);
+ if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+ {
+ /* A '-' must only appear as anything but a range indicator before
+ the closing bracket. Everything else is an error. */
+ re_token_t token2;
+ (void) peek_token_bracket (&token2, regexp, syntax);
+ if (token2.type != OP_CLOSE_BRACKET)
+ /* The actual error value is not standardized since this whole
+ case is undefined. But ERANGE makes good sense. */
+ return REG_ERANGE;
+ }
+ elem->type = SB_CHAR;
+ elem->opr.ch = token->opr.c;
+ return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression. Bracket symbols are
+ such as [:<character_class>:], [.<collating_element>.], and
+ [=<equivalent_class>=]. */
+
+static reg_errcode_t
+parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
+ re_token_t *token)
+{
+ unsigned char ch, delim = token->opr.c;
+ int i = 0;
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ for (;; ++i)
+ {
+ if (i >= BRACKET_NAME_BUF_SIZE)
+ return REG_EBRACK;
+ if (token->type == OP_OPEN_CHAR_CLASS)
+ ch = re_string_fetch_byte_case (regexp);
+ else
+ ch = re_string_fetch_byte (regexp);
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+ break;
+ elem->opr.name[i] = ch;
+ }
+ re_string_skip_bytes (regexp, 1);
+ elem->opr.name[i] = '\0';
+ switch (token->type)
+ {
+ case OP_OPEN_COLL_ELEM:
+ elem->type = COLL_SYM;
+ break;
+ case OP_OPEN_EQUIV_CLASS:
+ elem->type = EQUIV_CLASS;
+ break;
+ case OP_OPEN_CHAR_CLASS:
+ elem->type = CHAR_CLASS;
+ break;
+ default:
+ break;
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the equivalence class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
+ Idx *equiv_class_alloc, const unsigned char *name)
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (bitset_t sbcset, const unsigned char *name)
+#endif /* not RE_ENABLE_I18N */
+{
+#ifdef _LIBC
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra, *cp;
+ unsigned char char_buf[2];
+ int32_t idx1, idx2;
+ unsigned int ch;
+ size_t len;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+ /* Calculate the index for equivalence class. */
+ cp = name;
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ idx1 = findidx (&cp);
+ if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ /* This isn't a valid character. */
+ return REG_ECOLLATE;
+
+ /* Build single byte matcing table for this equivalence class. */
+ char_buf[1] = (unsigned char) '\0';
+ len = weights[idx1];
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ {
+ char_buf[0] = ch;
+ cp = char_buf;
+ idx2 = findidx (&cp);
+/*
+ idx2 = table[ch];
+*/
+ if (idx2 == 0)
+ /* This isn't a valid character. */
+ continue;
+ if (len == weights[idx2])
+ {
+ int cnt = 0;
+ while (cnt <= len &&
+ weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+ ++cnt;
+
+ if (cnt > len)
+ bitset_set (sbcset, ch);
+ }
+ }
+ /* Check whether the array has enough space. */
+ if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+ int32_t,
+ new_equiv_class_alloc);
+ if (BE (new_equiv_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->equiv_classes = new_equiv_classes;
+ *equiv_class_alloc = new_equiv_class_alloc;
+ }
+ mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+ }
+ else
+#endif /* _LIBC */
+ {
+ if (BE (strlen ((const char *) name) != 1, 0))
+ return REG_ECOLLATE;
+ bitset_set (sbcset, *name);
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the character class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ re_charset_t *mbcset, Idx *char_class_alloc,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#else /* not RE_ENABLE_I18N */
+build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
+ const unsigned char *class_name, reg_syntax_t syntax)
+#endif /* not RE_ENABLE_I18N */
+{
+ int i;
+ const char *name = (const char *) class_name;
+
+ /* In case of REG_ICASE "upper" and "lower" match the both of
+ upper and lower cases. */
+ if ((syntax & RE_ICASE)
+ && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+ name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ new_char_class_alloc);
+ if (BE (new_char_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->char_classes = new_char_classes;
+ *char_class_alloc = new_char_class_alloc;
+ }
+ mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func) \
+ do { \
+ if (BE (trans != NULL, 0)) \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, trans[i]); \
+ } \
+ else \
+ { \
+ for (i = 0; i < SBC_MAX; ++i) \
+ if (ctype_func (i)) \
+ bitset_set (sbcset, i); \
+ } \
+ } while (0)
+
+ if (strcmp (name, "alnum") == 0)
+ BUILD_CHARCLASS_LOOP (isalnum);
+ else if (strcmp (name, "cntrl") == 0)
+ BUILD_CHARCLASS_LOOP (iscntrl);
+ else if (strcmp (name, "lower") == 0)
+ BUILD_CHARCLASS_LOOP (islower);
+ else if (strcmp (name, "space") == 0)
+ BUILD_CHARCLASS_LOOP (isspace);
+ else if (strcmp (name, "alpha") == 0)
+ BUILD_CHARCLASS_LOOP (isalpha);
+ else if (strcmp (name, "digit") == 0)
+ BUILD_CHARCLASS_LOOP (isdigit);
+ else if (strcmp (name, "print") == 0)
+ BUILD_CHARCLASS_LOOP (isprint);
+ else if (strcmp (name, "upper") == 0)
+ BUILD_CHARCLASS_LOOP (isupper);
+ else if (strcmp (name, "blank") == 0)
+ BUILD_CHARCLASS_LOOP (isblank);
+ else if (strcmp (name, "graph") == 0)
+ BUILD_CHARCLASS_LOOP (isgraph);
+ else if (strcmp (name, "punct") == 0)
+ BUILD_CHARCLASS_LOOP (ispunct);
+ else if (strcmp (name, "xdigit") == 0)
+ BUILD_CHARCLASS_LOOP (isxdigit);
+ else
+ return REG_ECTYPE;
+
+ return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra, bool non_match,
+ reg_errcode_t *err)
+{
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ Idx alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ reg_errcode_t ret;
+ re_token_t br_token;
+ bin_tree_t *tree;
+
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+ if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ if (non_match)
+ {
+#ifdef RE_ENABLE_I18N
+ /*
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set(cset->sbcset, '\0');
+ */
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ }
+
+ /* We don't care the syntax in this case. */
+ ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+ class_name, 0);
+
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = ret;
+ return NULL;
+ }
+ /* \w match '_' also. */
+ for (; *extra; extra++)
+ bitset_set (sbcset, *extra);
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (tree == NULL, 0))
+ goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto build_word_op_espace;
+ /* Then join them by ALT node. */
+ tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+ if (BE (mbc_tree != NULL, 1))
+ return tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+ Fetch a number from `input', and return the number.
+ Return REG_MISSING if the number field is empty like "{,1}".
+ Return REG_ERROR if an error occurred. */
+
+static Idx
+fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
+{
+ Idx num = REG_MISSING;
+ unsigned char c;
+ while (1)
+ {
+ fetch_token (token, input, syntax);
+ c = token->opr.c;
+ if (BE (token->type == END_OF_RE, 0))
+ return REG_ERROR;
+ if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+ break;
+ num = ((token->type != CHARACTER || c < '0' || '9' < c
+ || num == REG_ERROR)
+ ? REG_ERROR
+ : ((num == REG_MISSING) ? c - '0' : num * 10 + c - '0'));
+ num = (num > RE_DUP_MAX) ? REG_ERROR : num;
+ }
+ return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+ re_free (cset->mbchars);
+# ifdef _LIBC
+ re_free (cset->coll_syms);
+ re_free (cset->equiv_classes);
+ re_free (cset->range_starts);
+ re_free (cset->range_ends);
+# endif
+ re_free (cset->char_classes);
+ re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation. */
+
+/* Create a tree node. */
+
+static bin_tree_t *
+create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type)
+{
+ re_token_t t;
+ t.type = type;
+ return create_token_tree (dfa, left, right, &t);
+}
+
+static bin_tree_t *
+create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
+{
+ bin_tree_t *tree;
+ if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+ {
+ bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+ if (storage == NULL)
+ return NULL;
+ storage->next = dfa->str_tree_storage;
+ dfa->str_tree_storage = storage;
+ dfa->str_tree_storage_idx = 0;
+ }
+ tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+ tree->parent = NULL;
+ tree->left = left;
+ tree->right = right;
+ tree->token = *token;
+ tree->token.duplicated = 0;
+ tree->token.opt_subexp = 0;
+ tree->first = NULL;
+ tree->next = NULL;
+ tree->node_idx = REG_MISSING;
+
+ if (left != NULL)
+ left->parent = tree;
+ if (right != NULL)
+ right->parent = tree;
+ return tree;
+}
+
+/* Mark the tree SRC as an optional subexpression.
+ To be called from preorder or postorder. */
+
+static reg_errcode_t
+mark_opt_subexp (void *extra, bin_tree_t *node)
+{
+ Idx idx = (Idx) (long) extra;
+ if (node->token.type == SUBEXP && node->token.opr.idx == idx)
+ node->token.opt_subexp = 1;
+
+ return REG_NOERROR;
+}
+
+/* Free the allocated memory inside NODE. */
+
+static void
+free_token (re_token_t *node)
+{
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+}
+
+/* Worker function for tree walking. Free the allocated memory inside NODE
+ and its children. */
+
+static reg_errcode_t
+free_tree (void *extra, bin_tree_t *node)
+{
+ free_token (&node->token);
+ return REG_NOERROR;
+}
+
+
+/* Duplicate the node SRC, and return new node. This is a preorder
+ visit similar to the one implemented by the generic visitor, but
+ we need more infrastructure to maintain two parallel trees --- so,
+ it's easier to duplicate. */
+
+static bin_tree_t *
+duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
+{
+ const bin_tree_t *node;
+ bin_tree_t *dup_root;
+ bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+
+ for (node = root; ; )
+ {
+ /* Create a new tree and link it back to the current parent. */
+ *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
+ if (*p_new == NULL)
+ return NULL;
+ (*p_new)->parent = dup_node;
+ (*p_new)->token.duplicated = 1;
+ dup_node = *p_new;
+
+ /* Go to the left node, or up and to the right. */
+ if (node->left)
+ {
+ node = node->left;
+ p_new = &dup_node->left;
+ }
+ else
+ {
+ const bin_tree_t *prev = NULL;
+ while (node->right == prev || node->right == NULL)
+ {
+ prev = node;
+ node = node->parent;
+ dup_node = dup_node->parent;
+ if (!node)
+ return dup_root;
+ }
+ node = node->right;
+ p_new = &dup_node->right;
+ }
+ }
+}
diff --git a/lib/regex.c b/lib/regex.c
new file mode 100644
index 0000000..d4eb726
--- /dev/null
+++ b/lib/regex.c
@@ -0,0 +1,71 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+/* Make sure noone compiles this code with a C++ compiler. */
+#if defined __cplusplus && defined _LIBC
+# error "This is C code, use a C compiler"
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
+ GNU regex allows. Include it before <regex.h>, which correctly
+ #undefs RE_DUP_MAX and sets it to the right value. */
+#include <limits.h>
+
+#include <regex.h>
+#include "regex_internal.h"
+
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
+
+/* Binary backward compatibility. */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
diff --git a/lib/regex.h b/lib/regex.h
new file mode 100644
index 0000000..e8748e1
--- /dev/null
+++ b/lib/regex.h
@@ -0,0 +1,668 @@
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Define __USE_GNU_REGEX to declare GNU extensions that violate the
+ POSIX name space rules. */
+#undef __USE_GNU_REGEX
+#if (defined _GNU_SOURCE \
+ || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \
+ && !defined _XOPEN_SOURCE))
+# define __USE_GNU_REGEX 1
+#endif
+
+#ifdef _REGEX_LARGE_OFFSETS
+
+/* Use types and values that are wide enough to represent signed and
+ unsigned byte offsets in memory. This currently works only when
+ the regex code is used outside of the GNU C library; it is not yet
+ supported within glibc itself, and glibc users should not define
+ _REGEX_LARGE_OFFSETS. */
+
+/* The type of the offset of a byte within a string.
+ For historical reasons POSIX 1003.1-2004 requires that regoff_t be
+ at least as wide as off_t. However, many common POSIX platforms set
+ regoff_t to the more-sensible ssize_t and the Open Group has
+ signalled its intention to change the requirement to be that
+ regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN
+ 60 (2005-08-25). We don't know of any hosts where ssize_t or
+ ptrdiff_t is wider than ssize_t, so ssize_t is safe. */
+typedef ssize_t regoff_t;
+
+/* The type of nonnegative object indexes. Traditionally, GNU regex
+ uses 'int' for these. Code that uses __re_idx_t should work
+ regardless of whether the type is signed. */
+typedef size_t __re_idx_t;
+
+/* The type of object sizes. */
+typedef size_t __re_size_t;
+
+/* The type of object sizes, in places where the traditional code
+ uses unsigned long int. */
+typedef size_t __re_long_size_t;
+
+#else
+
+/* Use types that are binary-compatible with the traditional GNU regex
+ implementation, which mishandles strings longer than INT_MAX. */
+
+typedef int regoff_t;
+typedef int __re_idx_t;
+typedef unsigned int __re_size_t;
+typedef unsigned long int __re_long_size_t;
+
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+#ifdef __USE_GNU_REGEX
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+# define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+# define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+ re_compile_pattern. */
+# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+
+#endif /* defined __USE_GNU_REGEX */
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+#ifdef __USE_GNU_REGEX
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+# define RE_SYNTAX_EMACS 0
+
+# define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+# define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+# define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+# define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+# define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+# define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+# define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+# define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+# define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+# define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+# define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+# define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+#endif /* defined __USE_GNU_REGEX */
+
+#ifdef __USE_GNU_REGEX
+
+/* Maximum number of duplicates an interval can allow. POSIX-conforming
+ systems might define this in <limits.h>, but we want our
+ value, so remove any previous define. */
+# ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+# endif
+
+/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
+ the counter as a 2-byte signed integer. This is no longer true, so
+ RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
+ ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined.
+ However, there would be a huge performance problem if someone
+ actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
+ its historical value. */
+# define RE_DUP_MAX (0x7fff)
+
+#endif /* defined __USE_GNU_REGEX */
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (1 << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (1 << 2)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (1 << 3)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* Use PMATCH[0] to delimit the start and end of the search in the
+ buffer. */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `__re_error_msgid' table in regcomp.c. */
+
+typedef enum
+{
+ _REG_ENOSYS = -1, /* This will never happen for this implementation. */
+ _REG_NOERROR = 0, /* Success. */
+ _REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ _REG_BADPAT, /* Invalid pattern. */
+ _REG_ECOLLATE, /* Invalid collating element. */
+ _REG_ECTYPE, /* Invalid character class name. */
+ _REG_EESCAPE, /* Trailing backslash. */
+ _REG_ESUBREG, /* Invalid back reference. */
+ _REG_EBRACK, /* Unmatched left bracket. */
+ _REG_EPAREN, /* Parenthesis imbalance. */
+ _REG_EBRACE, /* Unmatched \{. */
+ _REG_BADBR, /* Invalid contents of \{\}. */
+ _REG_ERANGE, /* Invalid range end. */
+ _REG_ESPACE, /* Ran out of memory. */
+ _REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ _REG_EEND, /* Premature end. */
+ _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+#ifdef _XOPEN_SOURCE
+# define REG_ENOSYS _REG_ENOSYS
+#endif
+#define REG_NOERROR _REG_NOERROR
+#define REG_NOMATCH _REG_NOMATCH
+#define REG_BADPAT _REG_BADPAT
+#define REG_ECOLLATE _REG_ECOLLATE
+#define REG_ECTYPE _REG_ECTYPE
+#define REG_EESCAPE _REG_EESCAPE
+#define REG_ESUBREG _REG_ESUBREG
+#define REG_EBRACK _REG_EBRACK
+#define REG_EPAREN _REG_EPAREN
+#define REG_EBRACE _REG_EBRACE
+#define REG_BADBR _REG_BADBR
+#define REG_ERANGE _REG_ERANGE
+#define REG_ESPACE _REG_ESPACE
+#define REG_BADRPT _REG_BADRPT
+#define REG_EEND _REG_EEND
+#define REG_ESIZE _REG_ESIZE
+#define REG_ERPAREN _REG_ERPAREN
+
+/* struct re_pattern_buffer normally uses member names like `buffer'
+ that POSIX does not allow. In POSIX mode these members have names
+ with leading `re_' (e.g., `re_buffer'). */
+#ifdef __USE_GNU_REGEX
+# define _REG_RE_NAME(id) id
+# define _REG_RM_NAME(id) id
+#else
+# define _REG_RE_NAME(id) re_##id
+# define _REG_RM_NAME(id) rm_##id
+#endif
+
+/* The user can specify the type of the re_translate member by
+ defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned
+ char *. This pollutes the POSIX name space, so in POSIX mode just
+ use unsigned char *. */
+#ifdef __USE_GNU_REGEX
+# ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE unsigned char *
+# endif
+# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
+#else
+# define REG_TRANSLATE_TYPE unsigned char *
+#endif
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are sometimes used as
+ array indexes. */
+ unsigned char *_REG_RE_NAME (buffer);
+
+ /* Number of bytes to which `buffer' points. */
+ __re_long_size_t _REG_RE_NAME (allocated);
+
+ /* Number of bytes actually used in `buffer'. */
+ __re_long_size_t _REG_RE_NAME (used);
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t _REG_RE_NAME (syntax);
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses the
+ fastmap, if there is one, to skip over impossible starting points
+ for matches. */
+ char *_REG_RE_NAME (fastmap);
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation is
+ applied to a pattern when it is compiled and to a string when it
+ is matched. */
+ REG_TRANSLATE_TYPE _REG_RE_NAME (translate);
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see whether or
+ not we should use the fastmap, so we don't set this absolutely
+ perfectly; see `re_compile_fastmap' (the `duplicate' case). */
+ unsigned int _REG_RE_NAME (can_be_null) : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#ifdef __USE_GNU_REGEX
+# define REGS_UNALLOCATED 0
+# define REGS_REALLOCATE 1
+# define REGS_FIXED 2
+#endif
+ unsigned int _REG_RE_NAME (regs_allocated) : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned int _REG_RE_NAME (fastmap_accurate) : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned int _REG_RE_NAME (no_sub) : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the beginning
+ of the string. */
+ unsigned int _REG_RE_NAME (not_bol) : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned int _REG_RE_NAME (not_eol) : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned int _REG_RE_NAME (newline_anchor) : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ __re_size_t _REG_RM_NAME (num_regs);
+ regoff_t *_REG_RM_NAME (start);
+ regoff_t *_REG_RM_NAME (end);
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#if !defined RE_NREGS && defined __USE_GNU_REGEX
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern (const char *__pattern, size_t __length,
+ struct re_pattern_buffer *__buffer);
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern regoff_t re_search (struct re_pattern_buffer *__buffer,
+ const char *__string, __re_idx_t __length,
+ __re_idx_t __start, regoff_t __range,
+ struct re_registers *__regs);
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, __re_idx_t __length1,
+ const char *__string2, __re_idx_t __length2,
+ __re_idx_t __start, regoff_t __range,
+ struct re_registers *__regs,
+ __re_idx_t __stop);
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern regoff_t re_match (struct re_pattern_buffer *__buffer,
+ const char *__string, __re_idx_t __length,
+ __re_idx_t __start, struct re_registers *__regs);
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
+ const char *__string1, __re_idx_t __length1,
+ const char *__string2, __re_idx_t __length2,
+ __re_idx_t __start, struct re_registers *__regs,
+ __re_idx_t __stop);
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers (struct re_pattern_buffer *__buffer,
+ struct re_registers *__regs,
+ __re_size_t __num_regs,
+ regoff_t *__starts, regoff_t *__ends);
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp (const char *);
+extern int re_exec (const char *);
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \
+ && !defined __GNUG__
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp (regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags);
+
+extern int regexec (const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags);
+
+extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
+ char *__restrict __errbuf, size_t __errbuf_size);
+
+extern void regfree (regex_t *__preg);
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
new file mode 100644
index 0000000..78e16f3
--- /dev/null
+++ b/lib/regex_internal.c
@@ -0,0 +1,1742 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+static void re_string_construct_common (const char *str, Idx len,
+ re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, bool icase,
+ const re_dfa_t *dfa) internal_function;
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ re_hashval_t hash) internal_function;
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context,
+ re_hashval_t hash) internal_function;
+
+/* Functions for string operation. */
+
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
+static reg_errcode_t
+internal_function
+re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
+ RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ Idx init_buf_len;
+
+ /* Ensure at least one character fits into the buffers. */
+ if (init_len < dfa->mb_cur_max)
+ init_len = dfa->mb_cur_max;
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->word_char = dfa->word_char;
+ pstr->word_ops_used = dfa->word_ops_used;
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+ pstr->valid_raw_len = pstr->valid_len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+internal_function
+re_string_construct (re_string_t *pstr, const char *str, Idx len,
+ RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
+{
+ reg_errcode_t ret;
+ memset (pstr, '\0', sizeof (re_string_t));
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ if (len > 0)
+ {
+ ret = re_string_realloc_buffers (pstr, len + 1);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ if (pstr->valid_raw_len >= len)
+ break;
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+ break;
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ {
+ pstr->valid_len = pstr->bufs_len;
+ pstr->valid_raw_len = pstr->bufs_len;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct. */
+
+static reg_errcode_t
+internal_function
+re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
+{
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ wint_t *new_wcs;
+
+ /* Avoid overflow. */
+ size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
+ if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
+ return REG_ESPACE;
+
+ new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_wcs == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_wcs;
+ if (pstr->offsets != NULL)
+ {
+ Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
+ if (BE (new_offsets == NULL, 0))
+ return REG_ESPACE;
+ pstr->offsets = new_offsets;
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ {
+ unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_mbs == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs = new_mbs;
+ }
+ pstr->bufs_len = new_buf_len;
+ return REG_NOERROR;
+}
+
+
+static void
+internal_function
+re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, bool icase,
+ const re_dfa_t *dfa)
+{
+ pstr->raw_mbs = (const unsigned char *) str;
+ pstr->len = len;
+ pstr->raw_len = len;
+ pstr->trans = trans;
+ pstr->icase = icase;
+ pstr->mbs_allocated = (trans != NULL || icase);
+ pstr->mb_cur_max = dfa->mb_cur_max;
+ pstr->is_utf8 = dfa->is_utf8;
+ pstr->map_notascii = dfa->map_notascii;
+ pstr->stop = pstr->len;
+ pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+ If the byte sequence of the string are:
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+ Then wide character buffer will be:
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
+ We use WEOF for padding, they indicate that the position isn't
+ a first byte of a multibyte character.
+
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
+internal_function
+build_wcs_buffer (re_string_t *pstr)
+{
+#ifdef _LIBC
+ unsigned char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ unsigned char buf[64];
+#endif
+ mbstate_t prev_st;
+ Idx byte_idx, end_idx, remain_len;
+ size_t mbclen;
+
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ /* Apply the translation if we need. */
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+ buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+ mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ if (BE (pstr->trans != NULL, 0))
+ wc = pstr->trans[wc];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Write wide character and padding. */
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static reg_errcode_t
+internal_function
+build_wcs_upper_buffer (re_string_t *pstr)
+{
+ mbstate_t prev_st;
+ Idx src_idx, byte_idx, end_idx, remain_len;
+ size_t mbclen;
+#ifdef _LIBC
+ char buf[MB_LEN_MAX];
+ assert (MB_LEN_MAX >= pstr->mb_cur_max);
+#else
+ char buf[64];
+#endif
+
+ byte_idx = pstr->valid_len;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ /* The following optimization assumes that ASCII characters can be
+ mapped to wide characters with a simple cast. */
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+ {
+ while (byte_idx < end_idx)
+ {
+ wchar_t wc;
+
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+ && mbsinit (&pstr->cur_state))
+ {
+ /* In case of a singlebyte character. */
+ pstr->mbs[byte_idx]
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+ /* The next step uses the assumption that wchar_t is encoded
+ ASCII-safe: all ASCII values can be converted like this. */
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+ ++byte_idx;
+ continue;
+ }
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen < (size_t) -2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else
+ {
+ src_idx = byte_idx;
+ goto offsets_needed;
+ }
+ }
+ else
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->mbs[byte_idx] = ch;
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+ return REG_NOERROR;
+ }
+ else
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+ offsets_needed:
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+ buf[i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+ mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen < (size_t) -2, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ size_t mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else if (mbcdlen != (size_t) -1)
+ {
+ size_t i;
+
+ if (byte_idx + mbcdlen > pstr->bufs_len)
+ {
+ pstr->cur_state = prev_st;
+ break;
+ }
+
+ if (pstr->offsets == NULL)
+ {
+ pstr->offsets = re_malloc (Idx, pstr->bufs_len);
+
+ if (pstr->offsets == NULL)
+ return REG_ESPACE;
+ }
+ if (!pstr->offsets_needed)
+ {
+ for (i = 0; i < (size_t) byte_idx; ++i)
+ pstr->offsets[i] = i;
+ pstr->offsets_needed = 1;
+ }
+
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+ pstr->wcs[byte_idx] = wcu;
+ pstr->offsets[byte_idx] = src_idx;
+ for (i = 1; i < mbcdlen; ++i)
+ {
+ pstr->offsets[byte_idx + i]
+ = src_idx + (i < mbclen ? i : mbclen - 1);
+ pstr->wcs[byte_idx + i] = WEOF;
+ }
+ pstr->len += mbcdlen - mbclen;
+ if (pstr->raw_stop > src_idx)
+ pstr->stop += mbcdlen - mbclen;
+ end_idx = (pstr->bufs_len > pstr->len)
+ ? pstr->len : pstr->bufs_len;
+ byte_idx += mbcdlen;
+ src_idx += mbclen;
+ continue;
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ {
+ size_t i;
+ for (i = 0; i < mbclen; ++i)
+ pstr->offsets[byte_idx + i] = src_idx + i;
+ }
+ src_idx += mbclen;
+
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans [ch];
+ pstr->mbs[byte_idx] = ch;
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ pstr->offsets[byte_idx] = src_idx;
+ ++src_idx;
+
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = src_idx;
+ return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static Idx
+internal_function
+re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
+{
+ mbstate_t prev_st;
+ Idx rawbuf_idx;
+ size_t mbclen;
+ wint_t wc = WEOF;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ wchar_t wc2;
+ Idx remain_len;
+ remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a single byte character. */
+ if (mbclen == 0 || remain_len == 0)
+ wc = L'\0';
+ else
+ wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ else
+ wc = wc2;
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ *last_wc = wc;
+ return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
+internal_function
+build_upper_buffer (re_string_t *pstr)
+{
+ Idx char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans[ch];
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
+ }
+ pstr->valid_len = char_idx;
+ pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR. */
+
+static void
+internal_function
+re_string_translate_buffer (re_string_t *pstr)
+{
+ Idx buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs[buf_idx] = pstr->trans[ch];
+ }
+
+ pstr->valid_len = buf_idx;
+ pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+internal_function
+re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
+{
+ Idx offset;
+
+ if (BE (pstr->raw_mbs_idx <= idx, 0))
+ offset = idx - pstr->raw_mbs_idx;
+ else
+ {
+ /* Reset buffer. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+ pstr->len = pstr->raw_len;
+ pstr->stop = pstr->raw_stop;
+ pstr->valid_len = 0;
+ pstr->raw_mbs_idx = 0;
+ pstr->valid_raw_len = 0;
+ pstr->offsets_needed = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!pstr->mbs_allocated)
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
+ offset = idx;
+ }
+
+ if (BE (offset != 0, 1))
+ {
+ /* Should the already checked characters be kept? */
+ if (BE (offset < pstr->valid_raw_len, 1))
+ {
+ /* Yes, move them to the front of the buffer. */
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ Idx low = 0, high = pstr->valid_len, mid;
+ do
+ {
+ mid = (high + low) / 2;
+ if (pstr->offsets[mid] > offset)
+ high = mid;
+ else if (pstr->offsets[mid] < offset)
+ low = mid + 1;
+ else
+ break;
+ }
+ while (low < high);
+ if (pstr->offsets[mid] < offset)
+ ++mid;
+ pstr->tip_context = re_string_context_at (pstr, mid - 1,
+ eflags);
+ /* This can be quite complicated, so handle specially
+ only the common and easy case where the character with
+ different length representation of lower and upper
+ case is present at or after offset. */
+ if (pstr->valid_len > offset
+ && mid == offset && pstr->offsets[mid] == offset)
+ {
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+ memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+ for (low = 0; low < pstr->valid_len; low++)
+ pstr->offsets[low] = pstr->offsets[low + offset] - offset;
+ }
+ else
+ {
+ /* Otherwise, just find out how long the partial multibyte
+ character at offset is and fill it with WEOF/255. */
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ while (mid > 0 && pstr->offsets[mid - 1] == offset)
+ --mid;
+ while (mid < pstr->valid_len)
+ if (pstr->wcs[mid] != WEOF)
+ break;
+ else
+ ++mid;
+ if (mid == pstr->valid_len)
+ pstr->valid_len = 0;
+ else
+ {
+ pstr->valid_len = pstr->offsets[mid] - offset;
+ if (pstr->valid_len)
+ {
+ for (low = 0; low < pstr->valid_len; ++low)
+ pstr->wcs[low] = WEOF;
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ }
+ else
+#endif
+ {
+ pstr->tip_context = re_string_context_at (pstr, offset - 1,
+ eflags);
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
+#endif
+ }
+ }
+ else
+ {
+ /* No, skip all characters until IDX. */
+ Idx prev_valid_len = pstr->valid_len;
+
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ }
+#endif
+ pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ Idx wcs_idx;
+ wint_t wc = WEOF;
+
+ if (pstr->is_utf8)
+ {
+ const unsigned char *raw, *p, *q, *end;
+
+ /* Special case UTF-8. Multi-byte chars start with any
+ byte other than 0x80 - 0xbf. */
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+ end = raw + (offset - pstr->mb_cur_max);
+ if (end < pstr->raw_mbs)
+ end = pstr->raw_mbs;
+ p = raw + offset - 1;
+#ifdef _LIBC
+ /* We know the wchar_t encoding is UCS4, so for the simple
+ case, ASCII characters, skip the conversion step. */
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
+ {
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ /* pstr->valid_len = 0; */
+ wc = (wchar_t) *p;
+ }
+ else
+#endif
+ for (; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ Idx mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+ size_t mbclen;
+
+ q = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ q = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state);
+ if (raw + offset - p <= mbclen
+ && mbclen < (size_t) -2)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mbclen - (raw + offset - p);
+ wc = wc2;
+ }
+ break;
+ }
+ }
+
+ if (wc == WEOF)
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ if (wc == WEOF)
+ pstr->tip_context
+ = re_string_context_at (pstr, prev_valid_len - 1, eflags);
+ else
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+ && IS_WIDE_WORD_CHAR (wc))
+ ? CONTEXT_WORD
+ : ((IS_WIDE_NEWLINE (wc)
+ && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ if (BE (pstr->valid_len, 0))
+ {
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ if (pstr->mbs_allocated)
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ pstr->valid_raw_len = 0;
+ if (pstr->trans)
+ c = pstr->trans[c];
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
+ ? CONTEXT_WORD
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ }
+ }
+ if (!BE (pstr->mbs_allocated, 0))
+ pstr->mbs += offset;
+ }
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+ pstr->stop -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ if (pstr->icase)
+ {
+ reg_errcode_t ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+ build_wcs_buffer (pstr);
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
+
+ pstr->cur_idx = 0;
+ return REG_NOERROR;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
+{
+ int ch;
+ Idx off;
+
+ /* Handle the common (easiest) cases first. */
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ off = pstr->offsets[off];
+#endif
+
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+ this function returns CAPITAL LETTER I instead of first byte of
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
+ since peek_byte_case doesn't advance cur_idx in any way. */
+ if (pstr->offsets_needed && !isascii (ch))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ return ch;
+}
+
+static unsigned char
+internal_function __attribute ((pure))
+re_string_fetch_byte_case (re_string_t *pstr)
+{
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ {
+ Idx off;
+ int ch;
+
+ /* For tr_TR.UTF-8 [[:islower:]] there is
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
+ in that case the whole multi-byte character and return
+ the original letter. On the other side, with
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
+ anything else would complicate things too much. */
+
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
+ return re_string_fetch_byte (pstr);
+
+ off = pstr->offsets[pstr->cur_idx];
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+ if (! isascii (ch))
+ return re_string_fetch_byte (pstr);
+
+ re_string_skip_bytes (pstr,
+ re_string_char_size_at (pstr, pstr->cur_idx));
+ return ch;
+ }
+#endif
+
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+internal_function
+re_string_destruct (re_string_t *pstr)
+{
+#ifdef RE_ENABLE_I18N
+ re_free (pstr->wcs);
+ re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT. */
+
+static unsigned int
+internal_function
+re_string_context_at (const re_string_t *input, Idx idx, int eflags)
+{
+ int c;
+ if (BE (! REG_VALID_INDEX (idx), 0))
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+ if (BE (idx == input->len, 0))
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc;
+ Idx wc_idx = idx;
+ while(input->wcs[wc_idx] == WEOF)
+ {
+#ifdef DEBUG
+ /* It must not happen. */
+ assert (REG_VALID_INDEX (wc_idx));
+#endif
+ --wc_idx;
+ if (! REG_VALID_INDEX (wc_idx))
+ return input->tip_context;
+ }
+ wc = input->wcs[wc_idx];
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+ return CONTEXT_WORD;
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+ ? CONTEXT_NEWLINE : 0);
+ }
+ else
+#endif
+ {
+ c = re_string_byte_at (input, idx);
+ if (bitset_contain (input->word_char, c))
+ return CONTEXT_WORD;
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+ }
+}
+
+/* Functions for set operation. */
+
+static reg_errcode_t
+internal_function
+re_node_set_alloc (re_node_set *set, Idx size)
+{
+ set->alloc = size;
+ set->nelem = 0;
+ set->elems = re_malloc (Idx, size);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_1 (re_node_set *set, Idx elem)
+{
+ set->alloc = 1;
+ set->nelem = 1;
+ set->elems = re_malloc (Idx, 1);
+ if (BE (set->elems == NULL, 0))
+ {
+ set->alloc = set->nelem = 0;
+ return REG_ESPACE;
+ }
+ set->elems[0] = elem;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
+{
+ set->alloc = 2;
+ set->elems = re_malloc (Idx, 2);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ if (elem1 == elem2)
+ {
+ set->nelem = 1;
+ set->elems[0] = elem1;
+ }
+ else
+ {
+ set->nelem = 2;
+ if (elem1 < elem2)
+ {
+ set->elems[0] = elem1;
+ set->elems[1] = elem2;
+ }
+ else
+ {
+ set->elems[0] = elem2;
+ set->elems[1] = elem1;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
+{
+ dest->nelem = src->nelem;
+ if (src->nelem > 0)
+ {
+ dest->alloc = dest->nelem;
+ dest->elems = re_malloc (Idx, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ {
+ dest->alloc = dest->nelem = 0;
+ return REG_ESPACE;
+ }
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
+ }
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
+static reg_errcode_t
+internal_function
+re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ Idx i1, i2, is, id, delta, sbase;
+ if (src1->nelem == 0 || src2->nelem == 0)
+ return REG_NOERROR;
+
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
+ conservative estimate. */
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+ {
+ Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
+ Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
+ if (BE (new_elems == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_elems;
+ dest->alloc = new_alloc;
+ }
+
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
+ into the top of DEST those that are not already in DEST itself. */
+ sbase = dest->nelem + src1->nelem + src2->nelem;
+ i1 = src1->nelem - 1;
+ i2 = src2->nelem - 1;
+ id = dest->nelem - 1;
+ for (;;)
+ {
+ if (src1->elems[i1] == src2->elems[i2])
+ {
+ /* Try to find the item in DEST. Maybe we could binary search? */
+ while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
+ --id;
+
+ if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
+ dest->elems[--sbase] = src1->elems[i1];
+
+ if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
+ break;
+ }
+
+ /* Lower the highest of the two items. */
+ else if (src1->elems[i1] < src2->elems[i2])
+ {
+ if (! REG_VALID_INDEX (--i2))
+ break;
+ }
+ else
+ {
+ if (! REG_VALID_INDEX (--i1))
+ break;
+ }
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
+ delta = is - sbase + 1;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place; this is more or
+ less the same loop that is in re_node_set_merge. */
+ dest->nelem += delta;
+ if (delta > 0 && REG_VALID_INDEX (id))
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (! REG_VALID_INDEX (--id))
+ break;
+ }
+ }
+
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
+
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
+ const re_node_set *src2)
+{
+ Idx i1, i2, id;
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+ {
+ dest->alloc = src1->nelem + src2->nelem;
+ dest->elems = re_malloc (Idx, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (src1 != NULL && src1->nelem > 0)
+ return re_node_set_init_copy (dest, src1);
+ else if (src2 != NULL && src2->nelem > 0)
+ return re_node_set_init_copy (dest, src2);
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+ }
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ dest->elems[id++] = src2->elems[i2++];
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ ++i2;
+ dest->elems[id++] = src1->elems[i1++];
+ }
+ if (i1 < src1->nelem)
+ {
+ memcpy (dest->elems + id, src1->elems + i1,
+ (src1->nelem - i1) * sizeof (Idx));
+ id += src1->nelem - i1;
+ }
+ else if (i2 < src2->nelem)
+ {
+ memcpy (dest->elems + id, src2->elems + i2,
+ (src2->nelem - i2) * sizeof (Idx));
+ id += src2->nelem - i2;
+ }
+ dest->nelem = id;
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+internal_function
+re_node_set_merge (re_node_set *dest, const re_node_set *src)
+{
+ Idx is, id, sbase, delta;
+ if (src == NULL || src->nelem == 0)
+ return REG_NOERROR;
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
+ {
+ Idx new_alloc = 2 * (src->nelem + dest->alloc);
+ Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
+ if (BE (new_buffer == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_buffer;
+ dest->alloc = new_alloc;
+ }
+
+ if (BE (dest->nelem == 0, 0))
+ {
+ dest->nelem = src->nelem;
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
+ return REG_NOERROR;
+ }
+
+ /* Copy into the top of DEST the items of SRC that are not
+ found in DEST. Maybe we could binary search in DEST? */
+ for (sbase = dest->nelem + 2 * src->nelem,
+ is = src->nelem - 1, id = dest->nelem - 1;
+ REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
+ {
+ if (dest->elems[id] == src->elems[is])
+ is--, id--;
+ else if (dest->elems[id] < src->elems[is])
+ dest->elems[--sbase] = src->elems[is--];
+ else /* if (dest->elems[id] > src->elems[is]) */
+ --id;
+ }
+
+ if (REG_VALID_INDEX (is))
+ {
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
+ sbase -= is + 1;
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + 2 * src->nelem - 1;
+ delta = is - sbase + 1;
+ if (delta == 0)
+ return REG_NOERROR;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place. */
+ dest->nelem += delta;
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (! REG_VALID_INDEX (--id))
+ {
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase,
+ delta * sizeof (Idx));
+ break;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have ELEM.
+ Return true if successful. */
+
+static bool
+internal_function
+re_node_set_insert (re_node_set *set, Idx elem)
+{
+ Idx idx;
+ /* In case the set is empty. */
+ if (set->alloc == 0)
+ return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
+
+ if (BE (set->nelem, 0) == 0)
+ {
+ /* We already guaranteed above that set->alloc != 0. */
+ set->elems[0] = elem;
+ ++set->nelem;
+ return true;
+ }
+
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ Idx *new_elems;
+ set->alloc = set->alloc * 2;
+ new_elems = re_realloc (set->elems, Idx, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return false;
+ set->elems = new_elems;
+ }
+
+ /* Move the elements which follows the new element. Test the
+ first element separately to skip a check in the inner loop. */
+ if (elem < set->elems[0])
+ {
+ idx = 0;
+ for (idx = set->nelem; idx > 0; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+ else
+ {
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+
+ /* Insert the new element. */
+ set->elems[idx] = elem;
+ ++set->nelem;
+ return true;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have any element greater than or equal to ELEM.
+ Return true if successful. */
+
+static bool
+internal_function
+re_node_set_insert_last (re_node_set *set, Idx elem)
+{
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ Idx *new_elems;
+ set->alloc = (set->alloc + 1) * 2;
+ new_elems = re_realloc (set->elems, Idx, set->alloc);
+ if (BE (new_elems == NULL, 0))
+ return false;
+ set->elems = new_elems;
+ }
+
+ /* Insert the new element. */
+ set->elems[set->nelem++] = elem;
+ return true;
+}
+
+/* Compare two node sets SET1 and SET2.
+ Return true if SET1 and SET2 are equivalent. */
+
+static bool
+internal_function __attribute ((pure))
+re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
+{
+ Idx i;
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+ return false;
+ for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
+ if (set1->elems[i] != set2->elems[i])
+ return false;
+ return true;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
+
+static Idx
+internal_function __attribute ((pure))
+re_node_set_contains (const re_node_set *set, Idx elem)
+{
+ __re_size_t idx, right, mid;
+ if (! REG_VALID_NONZERO_INDEX (set->nelem))
+ return 0;
+
+ /* Binary search the element. */
+ idx = 0;
+ right = set->nelem - 1;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+ return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+internal_function
+re_node_set_remove_at (re_node_set *set, Idx idx)
+{
+ if (idx < 0 || idx >= set->nelem)
+ return;
+ --set->nelem;
+ for (; idx < set->nelem; idx++)
+ set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+ Or return REG_MISSING if an error occurred. */
+
+static Idx
+internal_function
+re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
+{
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+ {
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
+ Idx *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures;
+ re_token_t *new_nodes;
+ size_t max_object_size =
+ MAX (sizeof (re_token_t),
+ MAX (sizeof (re_node_set),
+ sizeof (Idx)));
+
+ /* Avoid overflows. */
+ if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
+ return REG_MISSING;
+
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
+ if (BE (new_nodes == NULL, 0))
+ return REG_MISSING;
+ dfa->nodes = new_nodes;
+ new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL, 0))
+ return REG_MISSING;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->nodes_alloc = new_nodes_alloc;
+ }
+ dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+ {
+ int type = token.type;
+ dfa->nodes[dfa->nodes_len].accept_mb =
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+ }
+#endif
+ dfa->nexts[dfa->nodes_len] = REG_MISSING;
+ re_node_set_init_empty (dfa->edests + dfa->nodes_len);
+ re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
+ return dfa->nodes_len++;
+}
+
+static inline re_hashval_t
+internal_function
+calc_state_hash (const re_node_set *nodes, unsigned int context)
+{
+ re_hashval_t hash = nodes->nelem + context;
+ Idx i;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ hash += nodes->elems[i];
+ return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes)
+{
+ re_hashval_t hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ Idx i;
+#ifdef lint
+ /* Suppress bogus uninitialized-variable warnings. */
+ *err = REG_NOERROR;
+#endif
+ if (BE (nodes->nelem == 0, 0))
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, 0);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (&state->nodes, nodes))
+ return state;
+ }
+
+ /* There are no appropriate state in the dfa, create the new one. */
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+ whose context is equivalent to CONTEXT.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t *
+internal_function
+re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
+ const re_node_set *nodes, unsigned int context)
+{
+ re_hashval_t hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ Idx i;
+#ifdef lint
+ /* Suppress bogus uninitialized-variable warnings. */
+ *err = REG_NOERROR;
+#endif
+ if (nodes->nelem == 0)
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, context);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (state->hash == hash
+ && state->context == context
+ && re_node_set_compare (state->entrance_nodes, nodes))
+ return state;
+ }
+ /* There are no appropriate state in `dfa', create the new one. */
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (BE (new_state == NULL, 0))
+ *err = REG_ESPACE;
+
+ return new_state;
+}
+
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+ HASH put in the appropriate bucket of DFA's state table. Return value
+ indicates the error code if failed. */
+
+static reg_errcode_t
+register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
+ re_hashval_t hash)
+{
+ struct re_state_table_entry *spot;
+ reg_errcode_t err;
+ Idx i;
+
+ newstate->hash = hash;
+ err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < newstate->nodes.nelem; i++)
+ {
+ Idx elem = newstate->nodes.elems[i];
+ if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+ if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
+ return REG_ESPACE;
+ }
+
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+ if (BE (spot->alloc <= spot->num, 0))
+ {
+ Idx new_alloc = 2 * spot->num + 2;
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+ new_alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ spot->array = new_array;
+ spot->alloc = new_alloc;
+ }
+ spot->array[spot->num++] = newstate;
+ return REG_NOERROR;
+}
+
+static void
+free_state (re_dfastate_t *state)
+{
+ re_node_set_free (&state->non_eps_nodes);
+ re_node_set_free (&state->inveclosure);
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->word_trtable);
+ re_free (state->trtable);
+ re_free (state);
+}
+
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ re_hashval_t hash)
+{
+ Idx i;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->entrance_nodes = &newstate->nodes;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (type == CHARACTER && !node->constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+internal_function
+create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
+ unsigned int context, re_hashval_t hash)
+{
+ Idx i, nctx_nodes = 0;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+
+ newstate->context = context;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ unsigned int constraint = 0;
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (node->constraint)
+ constraint = node->constraint;
+
+ if (type == CHARACTER && !constraint)
+ continue;
+#ifdef RE_ENABLE_I18N
+ newstate->accept_mb |= node->accept_mb;
+#endif /* RE_ENABLE_I18N */
+
+ /* If the state has the halt node, the state is a halt state. */
+ if (type == END_OF_RE)
+ newstate->halt = 1;
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR)
+ constraint = node->opr.ctx_type;
+
+ if (constraint)
+ {
+ if (newstate->entrance_nodes == &newstate->nodes)
+ {
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
+ if (BE (newstate->entrance_nodes == NULL, 0))
+ {
+ free_state (newstate);
+ return NULL;
+ }
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ nctx_nodes = 0;
+ newstate->has_constraint = 1;
+ }
+
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+ {
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+ ++nctx_nodes;
+ }
+ }
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
new file mode 100644
index 0000000..cc9b5b7
--- /dev/null
+++ b/lib/regex_internal.h
@@ -0,0 +1,865 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _LIBC
+# include "strcase.h"
+#endif
+
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+#include <stdint.h>
+#if defined _LIBC
+# include <bits/libc-lock.h>
+#else
+# define __libc_lock_init(NAME) do { } while (0)
+# define __libc_lock_lock(NAME) do { } while (0)
+# define __libc_lock_unlock(NAME) do { } while (0)
+#endif
+
+/* In case that the system doesn't have isblank(). */
+#if !defined _LIBC && !HAVE_DECL_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+# define _RE_DEFINE_LOCALE_FUNCTIONS 1
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) \
+ INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+/* For loser systems without the definition. */
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# ifdef _LIBC
+# define inline
+# endif
+#endif
+
+/* Number of ASCII characters. */
+#define ASCII_CHARS 0x80
+
+/* Number of single byte characters. */
+#define SBC_MAX (UCHAR_MAX + 1)
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline. */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc. */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+# ifndef __mempcpy
+# define __mempcpy mempcpy
+# endif
+# define __wcrtomb wcrtomb
+# define __regfree regfree
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+typedef __re_idx_t Idx;
+
+/* Special return value for failure to match. */
+#define REG_MISSING ((Idx) -1)
+
+/* Special return value for internal error. */
+#define REG_ERROR ((Idx) -2)
+
+/* Test whether N is a valid index, and is not one of the above. */
+#ifdef _REGEX_LARGE_OFFSETS
+# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
+#else
+# define REG_VALID_INDEX(n) (0 <= (n))
+#endif
+
+/* Test whether N is a valid nonzero index. */
+#ifdef _REGEX_LARGE_OFFSETS
+# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
+#else
+# define REG_VALID_NONZERO_INDEX(n) (0 < (n))
+#endif
+
+/* A hash value, suitable for computing hash tables. */
+typedef __re_size_t re_hashval_t;
+
+/* An integer used to represent a set of bits. It must be unsigned,
+ and must be at least as wide as unsigned int. */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t. */
+#define BITSET_WORD_MAX ULONG_MAX
+
+/* Number of bits in a bitset_word_t. For portability to hosts with
+ padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
+ instead, deduce it directly from BITSET_WORD_MAX. Avoid
+ greater-than-32-bit integers and unconditional shifts by more than
+ 31 bits, as they're not portable. */
+#if BITSET_WORD_MAX == 0xffffffff
+# define BITSET_WORD_BITS 32
+#elif BITSET_WORD_MAX >> 31 >> 5 == 1
+# define BITSET_WORD_BITS 36
+#elif BITSET_WORD_MAX >> 31 >> 16 == 1
+# define BITSET_WORD_BITS 48
+#elif BITSET_WORD_MAX >> 31 >> 28 == 1
+# define BITSET_WORD_BITS 60
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
+# define BITSET_WORD_BITS 64
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
+# define BITSET_WORD_BITS 72
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
+# define BITSET_WORD_BITS 128
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
+# define BITSET_WORD_BITS 256
+#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
+# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
+# if BITSET_WORD_BITS <= SBC_MAX
+# error "Invalid SBC_MAX"
+# endif
+#elif BITSET_WORD_MAX == (0xffffffff + 2) * 0xffffffff
+/* Work around a bug in 64-bit PGC (before version 6.1-2), where the
+ preprocessor mishandles large unsigned values as if they were signed. */
+# define BITSET_WORD_BITS 64
+#else
+# error "Add case for new bitset_word_t size"
+#endif
+
+/* Number of bitset_word_t values in a bitset_t. */
+#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
+
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define WORD_DELIM_CONSTRAINT 0x0100
+#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+
+typedef enum
+{
+ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+ LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+ BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+ BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+ WORD_DELIM = WORD_DELIM_CONSTRAINT,
+ NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+ Idx alloc;
+ Idx nelem;
+ Idx *elems;
+} re_node_set;
+
+typedef enum
+{
+ NON_TYPE = 0,
+
+ /* Node type, These are used by token, node, tree. */
+ CHARACTER = 1,
+ END_OF_RE = 2,
+ SIMPLE_BRACKET = 3,
+ OP_BACK_REF = 4,
+ OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET = 6,
+ OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+ /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+ when the debugger shows values of this enum type. */
+#define EPSILON_BIT 8
+ OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+ OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+ OP_ALT = EPSILON_BIT | 2,
+ OP_DUP_ASTERISK = EPSILON_BIT | 3,
+ ANCHOR = EPSILON_BIT | 4,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT = 16,
+ SUBEXP = 17,
+
+ /* Token type, these are used only by token. */
+ OP_DUP_PLUS = 18,
+ OP_DUP_QUESTION,
+ OP_OPEN_BRACKET,
+ OP_CLOSE_BRACKET,
+ OP_CHARSET_RANGE,
+ OP_OPEN_DUP_NUM,
+ OP_CLOSE_DUP_NUM,
+ OP_NON_MATCH_LIST,
+ OP_OPEN_COLL_ELEM,
+ OP_CLOSE_COLL_ELEM,
+ OP_OPEN_EQUIV_CLASS,
+ OP_CLOSE_EQUIV_CLASS,
+ OP_OPEN_CHAR_CLASS,
+ OP_CLOSE_CHAR_CLASS,
+ OP_WORD,
+ OP_NOTWORD,
+ OP_SPACE,
+ OP_NOTSPACE,
+ BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+ /* Multibyte characters. */
+ wchar_t *mbchars;
+
+ /* Collating symbols. */
+# ifdef _LIBC
+ int32_t *coll_syms;
+# endif
+
+ /* Equivalence classes. */
+# ifdef _LIBC
+ int32_t *equiv_classes;
+# endif
+
+ /* Range expressions. */
+# ifdef _LIBC
+ uint32_t *range_starts;
+ uint32_t *range_ends;
+# else /* not _LIBC */
+ wchar_t *range_starts;
+ wchar_t *range_ends;
+# endif /* not _LIBC */
+
+ /* Character classes. */
+ wctype_t *char_classes;
+
+ /* If this character set is the non-matching list. */
+ unsigned int non_match : 1;
+
+ /* # of multibyte characters. */
+ Idx nmbchars;
+
+ /* # of collating symbols. */
+ Idx ncoll_syms;
+
+ /* # of equivalence classes. */
+ Idx nequiv_classes;
+
+ /* # of range expressions. */
+ Idx nranges;
+
+ /* # of character classes. */
+ Idx nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+ union
+ {
+ unsigned char c; /* for CHARACTER */
+ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset; /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+ Idx idx; /* for BACK_REF */
+ re_context_type ctx_type; /* for ANCHOR */
+ } opr;
+#if __GNUC__ >= 2
+ re_token_type_t type : 8;
+#else
+ re_token_type_t type;
+#endif
+ unsigned int constraint : 10; /* context constraint */
+ unsigned int duplicated : 1;
+ unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+ unsigned int accept_mb : 1;
+ /* These 2 bits can be moved into the union if needed (e.g. if running out
+ of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
+ unsigned int mb_partial : 1;
+#endif
+ unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+
+struct re_string_t
+{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Store the multibyte string. In case of "case insensitive mode" like
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+ /* Store the wide character string which is corresponding to MBS. */
+ wint_t *wcs;
+ Idx *offsets;
+ mbstate_t cur_state;
+#endif
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ Idx raw_mbs_idx;
+ /* The length of the valid characters in the buffers. */
+ Idx valid_len;
+ /* The corresponding number of bytes in raw_mbs array. */
+ Idx valid_raw_len;
+ /* The length of the buffers MBS and WCS. */
+ Idx bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ Idx cur_idx;
+ /* length of RAW_MBS array. */
+ Idx raw_len;
+ /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
+ Idx len;
+ /* End of the buffer may be shorter than its length in the cases such
+ as re_match_2, re_search_2. Then, we use STOP for end of the buffer
+ instead of LEN. */
+ Idx raw_stop;
+ /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
+ Idx stop;
+
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ RE_TRANSLATE_TYPE trans;
+ /* Copy of re_dfa_t's word_char. */
+ re_const_bitset_ptr_t word_char;
+ /* true if REG_ICASE. */
+ unsigned char icase;
+ unsigned char is_utf8;
+ unsigned char map_notascii;
+ unsigned char mbs_allocated;
+ unsigned char offsets_needed;
+ unsigned char newline_anchor;
+ unsigned char word_ops_used;
+ int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+# define internal_function __attribute ((regparm (3), stdcall))
+# else
+# define internal_function
+# endif
+#endif
+
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ Idx new_buf_len)
+ internal_function;
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
+ int eflags)
+ internal_function __attribute ((pure));
+#define re_string_peek_byte(pstr, offset) \
+ ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+ ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+ ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+ ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+ || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#include <alloca.h>
+
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+# define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* alloca is implemented with malloc, so just use malloc. */
+# define __libc_use_alloca(n) 0
+# endif
+#endif
+
+#ifndef MAX
+# define MAX(a,b) ((a) < (b) ? (b) : (a))
+#endif
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+ struct bin_tree_t *parent;
+ struct bin_tree_t *left;
+ struct bin_tree_t *right;
+ struct bin_tree_t *first;
+ struct bin_tree_t *next;
+
+ re_token_t token;
+
+ /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+ Otherwise `type' indicate the type of this node. */
+ Idx node_idx;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+ ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+ struct bin_tree_storage_t *next;
+ bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+ || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+ || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+ re_hashval_t hash;
+ re_node_set nodes;
+ re_node_set non_eps_nodes;
+ re_node_set inveclosure;
+ re_node_set *entrance_nodes;
+ struct re_dfastate_t **trtable, **word_trtable;
+ unsigned int context : 4;
+ unsigned int halt : 1;
+ /* If this state can accept `multi byte'.
+ Note that we refer to multibyte characters, and multi character
+ collating elements as `multi byte'. */
+ unsigned int accept_mb : 1;
+ /* If this state has backreference node(s). */
+ unsigned int has_backref : 1;
+ unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+struct re_state_table_entry
+{
+ Idx num;
+ Idx alloc;
+ re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
+
+typedef struct
+{
+ Idx next_idx;
+ Idx alloc;
+ re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
+
+typedef struct
+{
+ Idx node;
+ Idx str_idx; /* The position NODE match at. */
+ state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+ And information about the node, whose type is OP_CLOSE_SUBEXP,
+ corresponding to NODE is stored in LASTS. */
+
+typedef struct
+{
+ Idx str_idx;
+ Idx node;
+ state_array_t *path;
+ Idx alasts; /* Allocation size of LASTS. */
+ Idx nlasts; /* The number of LASTS. */
+ re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+ Idx node;
+ Idx str_idx;
+ Idx subexp_from;
+ Idx subexp_to;
+ char more;
+ char unused;
+ unsigned short int eps_reachable_subexps_map;
+};
+
+typedef struct
+{
+ /* The string object corresponding to the input string. */
+ re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ const re_dfa_t *const dfa;
+#else
+ const re_dfa_t *dfa;
+#endif
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ Idx match_last;
+ Idx last_node;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ Idx state_log_top;
+ /* Back reference cache. */
+ Idx nbkref_ents;
+ Idx abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_mb_elem_len;
+ Idx nsub_tops;
+ Idx asub_tops;
+ re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+ Idx last_node;
+ Idx last_str_idx;
+ re_node_set limits;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+ Idx idx;
+ Idx node;
+ regmatch_t *regs;
+ re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+ Idx num;
+ Idx alloc;
+ struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+ re_token_t *nodes;
+ size_t nodes_alloc;
+ size_t nodes_len;
+ Idx *nexts;
+ Idx *org_indices;
+ re_node_set *edests;
+ re_node_set *eclosures;
+ re_node_set *inveclosures;
+ struct re_state_table_entry *state_table;
+ re_dfastate_t *init_state;
+ re_dfastate_t *init_state_word;
+ re_dfastate_t *init_state_nl;
+ re_dfastate_t *init_state_begbuf;
+ bin_tree_t *str_tree;
+ bin_tree_storage_t *str_tree_storage;
+ re_bitset_ptr_t sb_char;
+ int str_tree_storage_idx;
+
+ /* number of subexpressions `re_nsub' is in regex_t. */
+ re_hashval_t state_hash_mask;
+ Idx init_node;
+ Idx nbackref; /* The number of backreference in this dfa. */
+
+ /* Bitmap expressing which backreference is used. */
+ bitset_word_t used_bkref_map;
+ bitset_word_t completed_bkref_map;
+
+ unsigned int has_plural_match : 1;
+ /* If this dfa has "multibyte node", which is a backreference or
+ a node which can accept multibyte character or multi character
+ collating element. */
+ unsigned int has_mb_node : 1;
+ unsigned int is_utf8 : 1;
+ unsigned int map_notascii : 1;
+ unsigned int word_ops_used : 1;
+ int mb_cur_max;
+ bitset_t word_char;
+ reg_syntax_t syntax;
+ Idx *subexp_map;
+#ifdef DEBUG
+ char* re_str;
+#endif
+#ifdef _LIBC
+ __libc_lock_define (, lock)
+#endif
+};
+
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+
+
+typedef enum
+{
+ SB_CHAR,
+ MB_CHAR,
+ EQUIV_CLASS,
+ COLL_SYM,
+ CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+ bracket_elem_type type;
+ union
+ {
+ unsigned char ch;
+ unsigned char *name;
+ wchar_t wch;
+ } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset_t operation. */
+
+static inline void
+bitset_set (bitset_t set, Idx i)
+{
+ set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
+}
+
+static inline void
+bitset_clear (bitset_t set, Idx i)
+{
+ set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
+}
+
+static inline bool
+bitset_contain (const bitset_t set, Idx i)
+{
+ return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
+}
+
+static inline void
+bitset_empty (bitset_t set)
+{
+ memset (set, '\0', sizeof (bitset_t));
+}
+
+static inline void
+bitset_set_all (bitset_t set)
+{
+ memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
+ if (SBC_MAX % BITSET_WORD_BITS != 0)
+ set[BITSET_WORDS - 1] =
+ ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
+}
+
+static inline void
+bitset_copy (bitset_t dest, const bitset_t src)
+{
+ memcpy (dest, src, sizeof (bitset_t));
+}
+
+static inline void
+bitset_not (bitset_t set)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
+ set[bitset_i] = ~set[bitset_i];
+ if (SBC_MAX % BITSET_WORD_BITS != 0)
+ set[BITSET_WORDS - 1] =
+ ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
+ & ~set[BITSET_WORDS - 1]);
+}
+
+static inline void
+bitset_merge (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_mask (bitset_t dest, const bitset_t src)
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
+ dest[bitset_i] &= src[bitset_i];
+}
+
+#ifdef RE_ENABLE_I18N
+/* Inline functions for re_string. */
+static inline int
+internal_function __attribute ((pure))
+re_string_char_size_at (const re_string_t *pstr, Idx idx)
+{
+ int byte_idx;
+ if (pstr->mb_cur_max == 1)
+ return 1;
+ for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+ if (pstr->wcs[idx + byte_idx] != WEOF)
+ break;
+ return byte_idx;
+}
+
+static inline wint_t
+internal_function __attribute ((pure))
+re_string_wchar_at (const re_string_t *pstr, Idx idx)
+{
+ if (pstr->mb_cur_max == 1)
+ return (wint_t) pstr->mbs[idx];
+ return (wint_t) pstr->wcs[idx];
+}
+
+static int
+internal_function __attribute ((pure))
+re_string_elem_size_at (const re_string_t *pstr, Idx idx)
+{
+# ifdef _LIBC
+ const unsigned char *p, *extra;
+ const int32_t *table, *indirect;
+ int32_t tmp;
+# include <locale/weight.h>
+ uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+ if (nrules != 0)
+ {
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ p = pstr->mbs + idx;
+ tmp = findidx (&p);
+ return p - pstr->mbs - idx;
+ }
+ else
+# endif /* _LIBC */
+ return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /* _REGEX_INTERNAL_H */
diff --git a/lib/regexec.c b/lib/regexec.c
new file mode 100644
index 0000000..84c3aed
--- /dev/null
+++ b/lib/regexec.c
@@ -0,0 +1,4398 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ Idx n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
+ Idx str_idx, Idx from, Idx to)
+ internal_function;
+static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
+ internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
+ Idx str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ Idx node, Idx str_idx)
+ internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, Idx last_node,
+ Idx last_str_idx)
+ internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, Idx length,
+ Idx start, Idx last_start, Idx stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags) internal_function;
+static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, Idx length1,
+ const char *string2, Idx length2,
+ Idx start, regoff_t range,
+ struct re_registers *regs,
+ Idx stop, bool ret_len) internal_function;
+static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, Idx length, Idx start,
+ regoff_t range, Idx stop,
+ struct re_registers *regs,
+ bool ret_len) internal_function;
+static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ Idx nregs, int regs_allocated)
+ internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+ internal_function;
+static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
+ Idx *p_match_first) internal_function;
+static Idx check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, Idx idx)
+ internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, Idx cur_node,
+ Idx cur_idx, Idx nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ Idx str_idx, Idx dest_node, Idx nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes)
+ internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ bool fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+ internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ Idx node_idx, Idx str_idx, Idx max_str_idx)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx)
+ internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, Idx str_idx,
+ re_node_set *cur_dest)
+ internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ Idx str_idx,
+ re_node_set *dest_nodes)
+ internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates)
+ internal_function;
+static bool check_dst_limits (const re_match_context_t *mctx,
+ const re_node_set *limits,
+ Idx dst_node, Idx dst_idx, Idx src_node,
+ Idx src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+ int boundaries, Idx subexp_idx,
+ Idx from_node, Idx bkref_idx)
+ internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+ Idx limit, Idx subexp_idx,
+ Idx node, Idx str_idx,
+ Idx bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ Idx str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ Idx str_idx, const re_node_set *candidates)
+ internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, Idx num)
+ internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+ re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+ internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ Idx str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+ const re_node_set *nodes)
+ internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+ Idx bkref_node, Idx bkref_str_idx)
+ internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+ const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ Idx bkref_node, Idx bkref_str)
+ internal_function;
+static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ Idx subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+ state_array_t *path, Idx top_node,
+ Idx top_str, Idx last_node, Idx last_str,
+ int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+ Idx str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ Idx ex_subexp, int type)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ Idx target, Idx ex_subexp,
+ int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+ re_node_set *cur_nodes, Idx cur_str,
+ Idx subexp_num, int type)
+ internal_function;
+static bool build_trtable (const re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
+ const re_string_t *input, Idx idx)
+ internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len)
+ internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset_t *states_ch) internal_function;
+static bool check_node_accept (const re_match_context_t *mctx,
+ const re_token_t *node, Idx idx)
+ internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+ internal_function;
+
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ Idx start, length;
+#ifdef _LIBC
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+#endif
+
+ if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+ return REG_BADPAT;
+
+ if (eflags & REG_STARTEND)
+ {
+ start = pmatch[0].rm_so;
+ length = pmatch[0].rm_eo;
+ }
+ else
+ {
+ start = 0;
+ length = strlen (string);
+ }
+
+ __libc_lock_lock (dfa->lock);
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, start, length,
+ length, 0, NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, start, length,
+ length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
+ return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ return regexec (preg, string, nmatch, pmatch,
+ eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stored in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+regoff_t
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ Idx length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, true);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+regoff_t
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ Idx length, start;
+ regoff_t range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs,
+ false);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+regoff_t
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ Idx length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, true);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+regoff_t
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ Idx length1, length2, start, stop;
+ regoff_t range;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, false);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static regoff_t
+internal_function
+re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, Idx length1,
+ const char *string2, Idx length2,
+ Idx start, regoff_t range, struct re_registers *regs,
+ Idx stop, bool ret_len)
+{
+ const char *str;
+ regoff_t rval;
+ Idx len = length1 + length2;
+ char *s = NULL;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+#ifdef _LIBC
+ memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+#endif
+ str = s;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ re_free (s);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is true the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static regoff_t
+internal_function
+re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, Idx length,
+ Idx start, regoff_t range, Idx stop, struct re_registers *regs,
+ bool ret_len)
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ Idx nregs;
+ regoff_t rval;
+ int eflags = 0;
+#ifdef _LIBC
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+#endif
+ Idx last_start = start + range;
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (length < last_start || (0 <= range && last_start < start), 0))
+ last_start = length;
+ else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
+ last_start = 0;
+
+ __libc_lock_lock (dfa->lock);
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED
+ && regs->num_regs <= bufp->re_nsub, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ {
+ rval = -2;
+ goto out;
+ }
+
+ result = re_search_internal (bufp, string, length, start, last_start, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
+ return rval;
+}
+
+static unsigned int
+internal_function
+re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
+ int regs_allocated)
+{
+ int rval = REGS_REALLOCATE;
+ Idx i;
+ Idx need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->end == NULL, 0))
+ {
+ re_free (regs->start);
+ return REGS_UNALLOCATED;
+ }
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (BE (need_regs > regs->num_regs, 0))
+ {
+ regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+ regoff_t *new_end;
+ if (BE (new_start == NULL, 0))
+ return REGS_UNALLOCATED;
+ new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_end == NULL, 0))
+ {
+ re_free (new_start);
+ return REGS_UNALLOCATED;
+ }
+ regs->start = new_start;
+ regs->end = new_end;
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ __re_size_t num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = NULL;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ meaning as with regexec. LAST_START is START + RANGE, where
+ START and RANGE have the same meaning as with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (0 <= LAST_START && LAST_START <= LENGTH) */
+
+static reg_errcode_t
+internal_function
+re_search_internal (const regex_t *preg,
+ const char *string, Idx length,
+ Idx start, Idx last_start, Idx stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags)
+{
+ reg_errcode_t err;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ Idx left_lim, right_lim;
+ int incr;
+ bool fl_longest_match;
+ int match_kind;
+ Idx match_first;
+ Idx match_last = REG_MISSING;
+ Idx extra_nmatch;
+ bool sb;
+ int ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_match_context_t mctx = { .dfa = dfa };
+#else
+ re_match_context_t mctx;
+#endif
+ char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
+ && start != last_start && !preg->can_be_null)
+ ? preg->fastmap : NULL);
+ RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+ mctx.dfa = dfa;
+#endif
+
+ extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+ nmatch -= extra_nmatch;
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (0 <= last_start && last_start <= length);
+#endif
+
+ /* If initial states with non-begbuf contexts have no elements,
+ the regex must be anchored. If preg->newline_anchor is set,
+ we'll never use init_state_nl, so do not check it. */
+ if (dfa->init_state->nodes.nelem == 0
+ && dfa->init_state_word->nodes.nelem == 0
+ && (dfa->init_state_nl->nodes.nelem == 0
+ || !preg->newline_anchor))
+ {
+ if (start != 0 && last_start != 0)
+ return REG_NOMATCH;
+ start = last_start = 0;
+ }
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ mctx.input.stop = stop;
+ mctx.input.raw_stop = stop;
+ mctx.input.newline_anchor = preg->newline_anchor;
+
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+
+ mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+ match_first = start;
+ mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (last_start < start) ? -1 : 1;
+ left_lim = (last_start < start) ? last_start : start;
+ right_lim = (last_start < start) ? start : last_start;
+ sb = dfa->mb_cur_max == 1;
+ match_kind =
+ (fastmap
+ ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (start <= last_start ? 2 : 0)
+ | (t != NULL ? 1 : 0))
+ : 8);
+
+ for (;; match_first += incr)
+ {
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized, in order to
+ save on code size. We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 8:
+ /* No fastmap. */
+ break;
+
+ case 7:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 6:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
+ }
+ break;
+
+ case 4:
+ case 5:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ for (;;)
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
+ if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
+ {
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ offset = match_first - mctx.input.raw_mbs_idx;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = (match_first >= length
+ ? 0 : re_string_byte_at (&mctx.input, offset));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ if (match_first < left_lim || match_first > right_lim)
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ break;
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the beginning of the buffer. */
+ err = re_string_reconstruct (&mctx.input, match_first, eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
+#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ start <= last_start ? &match_first : NULL);
+ if (match_last != REG_MISSING)
+ {
+ if (BE (match_last == REG_ERROR, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = REG_MISSING;
+ }
+ else
+ break; /* We found a match. */
+ }
+ }
+
+ match_ctx_clean (&mctx);
+ }
+
+#ifdef DEBUG
+ assert (match_last != REG_MISSING);
+ assert (err == REG_NOERROR);
+#endif
+
+ /* Set pmatch[] if we need. */
+ if (nmatch > 0)
+ {
+ Idx reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+ /* FIXME: This function should fail if mctx.match_last exceeds
+ the maximum possible regoff_t value. We need a new error
+ code REG_OVERFLOW. */
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that we could assume that the matching starts
+ from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+#ifdef RE_ENABLE_I18N
+ if (BE (mctx.input.offsets_needed != 0, 0))
+ {
+ pmatch[reg_idx].rm_so =
+ (pmatch[reg_idx].rm_so == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+ pmatch[reg_idx].rm_eo =
+ (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+ }
+#else
+ assert (mctx.input.offsets_needed == 0);
+#endif
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+ {
+ pmatch[nmatch + reg_idx].rm_so = -1;
+ pmatch[nmatch + reg_idx].rm_eo = -1;
+ }
+
+ if (dfa->subexp_map)
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
+ }
+
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&mctx.input);
+ return err;
+}
+
+static reg_errcode_t
+internal_function
+prune_impossible_nodes (re_match_context_t *mctx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
+ return REG_ESPACE;
+
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (! REG_VALID_INDEX (match_last))
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (mctx,
+ mctx->state_log[match_last],
+ match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ Idx idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+ and return the index where the matching end. Return REG_MISSING if
+ there is no match, and return REG_ERROR in case of an error.
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+ next place where we may want to try matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
+
+static Idx
+internal_function
+check_matching (re_match_context_t *mctx, bool fl_longest_match,
+ Idx *p_match_first)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ Idx match = 0;
+ Idx match_last = REG_MISSING;
+ Idx cur_str_idx = re_string_cur_idx (&mctx->input);
+ re_dfastate_t *cur_state;
+ bool at_init_state = p_match_first != NULL;
+ Idx next_start_idx = cur_str_idx;
+
+ err = REG_NOERROR;
+ cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+ /* An initial state must not be NULL (invalid). */
+ if (BE (cur_state == NULL, 0))
+ {
+ assert (err == REG_ESPACE);
+ return REG_ERROR;
+ }
+
+ if (mctx->state_log != NULL)
+ {
+ mctx->state_log[cur_str_idx] = cur_state;
+
+ /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+ later. E.g. Processing back references. */
+ if (BE (dfa->nbackref, 0))
+ {
+ at_init_state = false;
+ err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (mctx, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+
+ /* If the RE accepts NULL string. */
+ if (BE (cur_state->halt, 0))
+ {
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state, cur_str_idx))
+ {
+ if (!fl_longest_match)
+ return cur_str_idx;
+ else
+ {
+ match_last = cur_str_idx;
+ match = 1;
+ }
+ }
+ }
+
+ while (!re_string_eoi (&mctx->input))
+ {
+ re_dfastate_t *old_state = cur_state;
+ Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+ if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ assert (err == REG_ESPACE);
+ return REG_ERROR;
+ }
+ }
+
+ cur_state = transit_state (&err, mctx, cur_state);
+ if (mctx->state_log != NULL)
+ cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+ if (cur_state == NULL)
+ {
+ /* Reached the invalid state or an error. Try to recover a valid
+ state using the state log, if available and if we have not
+ already found a valid (even if not the longest) match. */
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ERROR;
+
+ if (mctx->state_log == NULL
+ || (match && !fl_longest_match)
+ || (cur_state = find_recover_state (&err, mctx)) == NULL)
+ break;
+ }
+
+ if (BE (at_init_state, 0))
+ {
+ if (old_state == cur_state)
+ next_start_idx = next_char_idx;
+ else
+ at_init_state = false;
+ }
+
+ if (cur_state->halt)
+ {
+ /* Reached a halt state.
+ Check the halt state can satisfy the current context. */
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state,
+ re_string_cur_idx (&mctx->input)))
+ {
+ /* We found an appropriate halt state. */
+ match_last = re_string_cur_idx (&mctx->input);
+ match = 1;
+
+ /* We found a match, do not modify match_first below. */
+ p_match_first = NULL;
+ if (!fl_longest_match)
+ break;
+ }
+ }
+ }
+
+ if (p_match_first)
+ *p_match_first += next_start_idx;
+
+ return match_last;
+}
+
+/* Check NODE match the current context. */
+
+static bool
+internal_function
+check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
+{
+ re_token_type_t type = dfa->nodes[node].type;
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
+ return false;
+ if (!constraint)
+ return true;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+ return false;
+ return true;
+}
+
+/* Check the halt state STATE match the current context.
+ Return 0 if not match, if the node, STATE has, is a halt node and
+ match the context, return the node. */
+
+static Idx
+internal_function
+check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, Idx idx)
+{
+ Idx i;
+ unsigned int context;
+#ifdef DEBUG
+ assert (state->halt);
+#endif
+ context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+ for (i = 0; i < state->nodes.nelem; ++i)
+ if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+ return state->nodes.elems[i];
+ return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES;
+ return REG_MISSING in case of errors. */
+
+static Idx
+internal_function
+proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
+ Idx *pidx, Idx node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx i;
+ bool ok;
+ if (IS_EPSILON_NODE (dfa->nodes[node].type))
+ {
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ re_node_set *edests = &dfa->edests[node];
+ Idx dest_node;
+ ok = re_node_set_insert (eps_via_nodes, node);
+ if (BE (! ok, 0))
+ return REG_ERROR;
+ /* Pick up a valid destination, or return REG_MISSING if none
+ is found. */
+ for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
+ {
+ Idx candidate = edests->elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
+ continue;
+ if (dest_node == REG_MISSING)
+ dest_node = candidate;
+
+ else
+ {
+ /* In order to avoid infinite loop like "(a*)*", return the second
+ epsilon-transition if the first was already considered. */
+ if (re_node_set_contains (eps_via_nodes, dest_node))
+ return candidate;
+
+ /* Otherwise, push the second epsilon-transition on the fail stack. */
+ else if (fs != NULL
+ && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+ eps_via_nodes))
+ return REG_ERROR;
+
+ /* We know we are going to exit. */
+ break;
+ }
+ }
+ return dest_node;
+ }
+ else
+ {
+ Idx naccepted = 0;
+ re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->nodes[node].accept_mb)
+ naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (type == OP_BACK_REF)
+ {
+ Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+ if (fs != NULL)
+ {
+ if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+ return REG_MISSING;
+ else if (naccepted)
+ {
+ char *buf = (char *) re_string_get_buffer (&mctx->input);
+ if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+ naccepted) != 0)
+ return REG_MISSING;
+ }
+ }
+
+ if (naccepted == 0)
+ {
+ Idx dest_node;
+ ok = re_node_set_insert (eps_via_nodes, node);
+ if (BE (! ok, 0))
+ return REG_ERROR;
+ dest_node = dfa->edests[node].elems[0];
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
+ return dest_node;
+ }
+ }
+
+ if (naccepted != 0
+ || check_node_accept (mctx, dfa->nodes + node, *pidx))
+ {
+ Idx dest_node = dfa->nexts[node];
+ *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+ if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+ || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node)))
+ return REG_MISSING;
+ re_node_set_empty (eps_via_nodes);
+ return dest_node;
+ }
+ }
+ return REG_MISSING;
+}
+
+static reg_errcode_t
+internal_function
+push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
+ Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ reg_errcode_t err;
+ Idx num = fs->num++;
+ if (fs->num == fs->alloc)
+ {
+ struct re_fail_stack_ent_t *new_array;
+ new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+ * fs->alloc * 2));
+ if (new_array == NULL)
+ return REG_ESPACE;
+ fs->alloc *= 2;
+ fs->stack = new_array;
+ }
+ fs->stack[num].idx = str_idx;
+ fs->stack[num].node = dest_node;
+ fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ if (fs->stack[num].regs == NULL)
+ return REG_ESPACE;
+ memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+ return err;
+}
+
+static Idx
+internal_function
+pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes)
+{
+ Idx num = --fs->num;
+ assert (REG_VALID_INDEX (num));
+ *pidx = fs->stack[num].idx;
+ memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
+ *eps_via_nodes = fs->stack[num].eps_via_nodes;
+ return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+ PMATCH.
+ Note: We assume that pmatch[0] is already set, and
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
+
+static reg_errcode_t
+internal_function
+set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
+ regmatch_t *pmatch, bool fl_backtrack)
+{
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ Idx idx, cur_node;
+ re_node_set eps_via_nodes;
+ struct re_fail_stack_t *fs;
+ struct re_fail_stack_t fs_body = { 0, 2, NULL };
+ regmatch_t *prev_idx_match;
+ bool prev_idx_match_malloced = false;
+
+#ifdef DEBUG
+ assert (nmatch > 1);
+ assert (mctx->state_log != NULL);
+#endif
+ if (fl_backtrack)
+ {
+ fs = &fs_body;
+ fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+ if (fs->stack == NULL)
+ return REG_ESPACE;
+ }
+ else
+ fs = NULL;
+
+ cur_node = dfa->init_node;
+ re_node_set_init_empty (&eps_via_nodes);
+
+ if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
+ prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
+ else
+ {
+ prev_idx_match = re_malloc (regmatch_t, nmatch);
+ if (prev_idx_match == NULL)
+ {
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ prev_idx_match_malloced = true;
+ }
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+
+ for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+ {
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+
+ if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ {
+ Idx reg_idx;
+ if (fs)
+ {
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+ break;
+ if (reg_idx == nmatch)
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+ }
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ }
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOERROR;
+ }
+ }
+
+ /* Proceed to next node. */
+ cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+ &eps_via_nodes, fs);
+
+ if (BE (! REG_VALID_INDEX (cur_node), 0))
+ {
+ if (BE (cur_node == REG_ERROR, 0))
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ if (fs)
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return REG_NOMATCH;
+ }
+ }
+ }
+ re_node_set_free (&eps_via_nodes);
+ if (prev_idx_match_malloced)
+ re_free (prev_idx_match);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+internal_function
+free_fail_stack_return (struct re_fail_stack_t *fs)
+{
+ if (fs)
+ {
+ Idx fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
+ return REG_NOERROR;
+}
+
+static void
+internal_function
+update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
+{
+ int type = dfa->nodes[cur_node].type;
+ if (type == OP_OPEN_SUBEXP)
+ {
+ Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+ /* We are at the first node of this sub expression. */
+ if (reg_num < nmatch)
+ {
+ pmatch[reg_num].rm_so = cur_idx;
+ pmatch[reg_num].rm_eo = -1;
+ }
+ }
+ else if (type == OP_CLOSE_SUBEXP)
+ {
+ Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
+ if (reg_num < nmatch)
+ {
+ /* We are at the last node of this sub expression. */
+ if (pmatch[reg_num].rm_so < cur_idx)
+ {
+ pmatch[reg_num].rm_eo = cur_idx;
+ /* This is a non-empty match or we are not inside an optional
+ subexpression. Accept this right away. */
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ }
+ else
+ {
+ if (dfa->nodes[cur_node].opt_subexp
+ && prev_idx_match[reg_num].rm_so != -1)
+ /* We transited through an empty match for an optional
+ subexpression, like (a?)*, and this is not the subexp's
+ first match. Copy back the old content of the registers
+ so that matches of an inner subexpression are undone as
+ well, like in ((a?))*. */
+ memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+ else
+ /* We completed a subexpression, but it may be part of
+ an optional one, so do not update PREV_IDX_MATCH. */
+ pmatch[reg_num].rm_eo = cur_idx;
+ }
+ }
+ }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
+
+ Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+ 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+ If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+ the LAST_NODE, we throw away the node `a'.
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+ string `s' and transit to `b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+ away the node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+ thrown away, we throw away the node `a'.
+ 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+ node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+ we throw away the node `a'. */
+
+#define STATE_NODE_CONTAINS(state,node) \
+ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+internal_function
+sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
+{
+ reg_errcode_t err;
+ int null_cnt = 0;
+ Idx str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+
+#ifdef DEBUG
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+
+ /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
+ transit to the last_node and the last_node itself. */
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* Then check each states in the state_log. */
+ while (str_idx > 0)
+ {
+ /* Update counters. */
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
+ --str_idx;
+
+ if (mctx->state_log[str_idx])
+ {
+ err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* Add all the nodes which satisfy the following conditions:
+ - It can epsilon transit to a node in CUR_DEST.
+ - It is in CUR_SRC.
+ And update state_log. */
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ err = REG_NOERROR;
+ free_return:
+ re_node_set_free (&cur_dest);
+ return err;
+}
+
+static reg_errcode_t
+internal_function
+build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ Idx str_idx, re_node_set *cur_dest)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
+ Idx i;
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'
+ (with the epsilon nodes pre-filtered out). */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ Idx prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ bool ok;
+
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[prev_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[prev_node].accept_mb)
+ naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+ if (!naccepted
+ && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ Idx to_idx = str_idx + naccepted;
+ if (check_dst_limits (mctx, &sctx->limits,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ok = re_node_set_insert (cur_dest, prev_node);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions. */
+
+static reg_errcode_t
+internal_function
+clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
+{
+ Idx top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input.bufs_len
+ || (next_state_log_idx >= mctx->input.valid_len
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (top < next_state_log_idx)
+ {
+ memset (mctx->state_log + top + 1, '\0',
+ sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+ mctx->state_log_top = next_state_log_idx;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, Idx num)
+{
+ Idx st_idx;
+ reg_errcode_t err;
+ for (st_idx = 0; st_idx < num; ++st_idx)
+ {
+ if (dst[st_idx] == NULL)
+ dst[st_idx] = src[st_idx];
+ else if (src[st_idx] != NULL)
+ {
+ re_node_set merged_set;
+ err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+ &src[st_idx]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+ re_node_set_free (&merged_set);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, Idx str_idx,
+ re_node_set *dest_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
+ : &mctx->state_log[str_idx]->nodes);
+
+ if (dest_nodes->nelem == 0)
+ sctx->sifted_states[str_idx] = NULL;
+ else
+ {
+ if (candidates)
+ {
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* Then, check the limitations in the current sift_context. */
+ if (sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (candidates && mctx->state_log[str_idx]->has_backref)
+ {
+ err = sift_states_bkref (mctx, sctx, str_idx, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ reg_errcode_t err = REG_NOERROR;
+ Idx i;
+
+ re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (!state->inveclosure.alloc)
+ {
+ err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return REG_ESPACE;
+ for (i = 0; i < dest_nodes->nelem; i++)
+ re_node_set_merge (&state->inveclosure,
+ dfa->inveclosures + dest_nodes->elems[i]);
+ }
+ return re_node_set_add_intersect (dest_nodes, candidates,
+ &state->inveclosure);
+}
+
+static reg_errcode_t
+internal_function
+sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
+ const re_node_set *candidates)
+{
+ Idx ecl_idx;
+ reg_errcode_t err;
+ re_node_set *inv_eclosure = dfa->inveclosures + node;
+ re_node_set except_nodes;
+ re_node_set_init_empty (&except_nodes);
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ Idx cur_node = inv_eclosure->elems[ecl_idx];
+ if (cur_node == node)
+ continue;
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+ {
+ Idx edst1 = dfa->edests[cur_node].elems[0];
+ Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
+ ? dfa->edests[cur_node].elems[1] : REG_MISSING);
+ if ((!re_node_set_contains (inv_eclosure, edst1)
+ && re_node_set_contains (dest_nodes, edst1))
+ || (REG_VALID_NONZERO_INDEX (edst2)
+ && !re_node_set_contains (inv_eclosure, edst2)
+ && re_node_set_contains (dest_nodes, edst2)))
+ {
+ err = re_node_set_add_intersect (&except_nodes, candidates,
+ dfa->inveclosures + cur_node);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&except_nodes);
+ return err;
+ }
+ }
+ }
+ }
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ Idx cur_node = inv_eclosure->elems[ecl_idx];
+ if (!re_node_set_contains (&except_nodes, cur_node))
+ {
+ Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+ re_node_set_remove_at (dest_nodes, idx);
+ }
+ }
+ re_node_set_free (&except_nodes);
+ return REG_NOERROR;
+}
+
+static bool
+internal_function
+check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
+ Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx lim_idx, src_pos, dst_pos;
+
+ Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+ Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ Idx subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = mctx->bkref_ents + limits->elems[lim_idx];
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+
+ dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, dst_node, dst_idx,
+ dst_bkref_idx);
+ src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ subexp_idx, src_node, src_idx,
+ src_bkref_idx);
+
+ /* In case of:
+ <src> <dst> ( <subexp> )
+ ( <subexp> ) <src> <dst>
+ ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
+ if (src_pos == dst_pos)
+ continue; /* This is unrelated limitation. */
+ else
+ return true;
+ }
+ return false;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
+ Idx subexp_idx, Idx from_node, Idx bkref_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ const re_node_set *eclosures = dfa->eclosures + from_node;
+ Idx node_idx;
+
+ /* Else, we are on the boundary: examine the nodes on the epsilon
+ closure. */
+ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+ {
+ Idx node = eclosures->elems[node_idx];
+ switch (dfa->nodes[node].type)
+ {
+ case OP_BACK_REF:
+ if (bkref_idx != REG_MISSING)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+ do
+ {
+ Idx dst;
+ int cpos;
+
+ if (ent->node != node)
+ continue;
+
+ if (subexp_idx < BITSET_WORD_BITS
+ && !(ent->eps_reachable_subexps_map
+ & ((bitset_word_t) 1 << subexp_idx)))
+ continue;
+
+ /* Recurse trying to reach the OP_OPEN_SUBEXP and
+ OP_CLOSE_SUBEXP cases below. But, if the
+ destination node is the same node as the source
+ node, don't recurse because it would cause an
+ infinite loop: a regex that exhibits this behavior
+ is ()\1*\1* */
+ dst = dfa->edests[node].elems[0];
+ if (dst == from_node)
+ {
+ if (boundaries & 1)
+ return -1;
+ else /* if (boundaries & 2) */
+ return 0;
+ }
+
+ cpos =
+ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ dst, bkref_idx);
+ if (cpos == -1 /* && (boundaries & 1) */)
+ return -1;
+ if (cpos == 0 && (boundaries & 2))
+ return 0;
+
+ if (subexp_idx < BITSET_WORD_BITS)
+ ent->eps_reachable_subexps_map
+ &= ~((bitset_word_t) 1 << subexp_idx);
+ }
+ while (ent++->more);
+ }
+ break;
+
+ case OP_OPEN_SUBEXP:
+ if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
+ return -1;
+ break;
+
+ case OP_CLOSE_SUBEXP:
+ if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+internal_function
+check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
+ Idx subexp_idx, Idx from_node, Idx str_idx,
+ Idx bkref_idx)
+{
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int boundaries;
+
+ /* If we are outside the range of the subexpression, return -1 or 1. */
+ if (str_idx < lim->subexp_from)
+ return -1;
+
+ if (lim->subexp_to < str_idx)
+ return 1;
+
+ /* If we are within the subexpression, return 0. */
+ boundaries = (str_idx == lim->subexp_from);
+ boundaries |= (str_idx == lim->subexp_to) << 1;
+ if (boundaries == 0)
+ return 0;
+
+ /* Else, examine epsilon closure. */
+ return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ from_node, bkref_idx);
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+ which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
+ const re_node_set *candidates, re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents, Idx str_idx)
+{
+ reg_errcode_t err;
+ Idx node_idx, lim_idx;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ Idx subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = bkref_ents + limits->elems[lim_idx];
+
+ if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+ continue; /* This is unrelated limitation. */
+
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
+ if (ent->subexp_to == str_idx)
+ {
+ Idx ops_node = REG_MISSING;
+ Idx cls_node = REG_MISSING;
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ Idx node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_OPEN_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ ops_node = node;
+ else if (type == OP_CLOSE_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ cls_node = node;
+ }
+
+ /* Check the limitation of the open subexpression. */
+ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
+ if (REG_VALID_INDEX (ops_node))
+ {
+ err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Check the limitation of the close subexpression. */
+ if (REG_VALID_INDEX (cls_node))
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ Idx node = dest_nodes->elems[node_idx];
+ if (!re_node_set_contains (dfa->inveclosures + node,
+ cls_node)
+ && !re_node_set_contains (dfa->eclosures + node,
+ cls_node))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ --node_idx;
+ }
+ }
+ }
+ else /* (ent->subexp_to != str_idx) */
+ {
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ Idx node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+ {
+ if (subexp_idx != dfa->nodes[node].opr.idx)
+ continue;
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+internal_function
+sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ Idx str_idx, const re_node_set *candidates)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ Idx node_idx, node;
+ re_sift_context_t local_sctx;
+ Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+ if (first_idx == REG_MISSING)
+ return REG_NOERROR;
+
+ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
+
+ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+ {
+ Idx enabled_idx;
+ re_token_type_t type;
+ struct re_backref_cache_entry *entry;
+ node = candidates->elems[node_idx];
+ type = dfa->nodes[node].type;
+ /* Avoid infinite loop for the REs like "()\1+". */
+ if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+ continue;
+ if (type != OP_BACK_REF)
+ continue;
+
+ entry = mctx->bkref_ents + first_idx;
+ enabled_idx = first_idx;
+ do
+ {
+ Idx subexp_len;
+ Idx to_idx;
+ Idx dst_node;
+ bool ok;
+ re_dfastate_t *cur_state;
+
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+ || check_dst_limits (mctx, &sctx->limits, node,
+ str_idx, dst_node, to_idx))
+ continue;
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (! ok, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+ /* mctx->bkref_ents may have changed, reload the pointer. */
+ entry = mctx->bkref_ents + enabled_idx;
+ }
+ while (enabled_idx++, entry++->more);
+ }
+ err = REG_NOERROR;
+ free_return:
+ if (local_sctx.sifted_states != NULL)
+ {
+ re_node_set_free (&local_sctx.limits);
+ }
+
+ return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+internal_function
+sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
+ Idx node_idx, Idx str_idx, Idx max_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int naccepted;
+ /* Check the node can accept `multi byte'. */
+ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+ if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+ !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+ dfa->nexts[node_idx]))
+ /* The node can't accept the `multi byte', or the
+ destination was already thrown away, then the node
+ could't accept the current input `multi byte'. */
+ naccepted = 0;
+ /* Otherwise, it is sure that the node could accept
+ `naccepted' bytes input. */
+ return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition. */
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte, and update STATE_LOG if necessary.
+ If STATE can accept a multibyte char/collating element/back reference
+ update the destination of STATE_LOG. */
+
+static re_dfastate_t *
+internal_function
+transit_state (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ re_dfastate_t **trtable;
+ unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+ /* If the current state can accept multibyte. */
+ if (BE (state->accept_mb, 0))
+ {
+ *err = transit_state_mb (mctx, state);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ /* Then decide the next state with the single byte. */
+#if 0
+ if (0)
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
+
+ /* Use transition table */
+ ch = re_string_fetch_byte (&mctx->input);
+ for (;;)
+ {
+ trtable = state->trtable;
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
+ {
+ unsigned int context;
+ context
+ = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return trtable[ch + SBC_MAX];
+ else
+ return trtable[ch];
+ }
+
+ if (!build_trtable (mctx->dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ /* Retry, we now have a transition table. */
+ }
+}
+
+/* Update the state_log if we need */
+re_dfastate_t *
+internal_function
+merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx cur_idx = re_string_cur_idx (&mctx->input);
+
+ if (cur_idx > mctx->state_log_top)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ mctx->state_log_top = cur_idx;
+ }
+ else if (mctx->state_log[cur_idx] == 0)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ }
+ else
+ {
+ re_dfastate_t *pstate;
+ unsigned int context;
+ re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+ /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
+ pstate = mctx->state_log[cur_idx];
+ log_nodes = pstate->entrance_nodes;
+ if (next_state != NULL)
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+ else
+ next_nodes = *log_nodes;
+ /* Note: We already add the nodes of the initial state,
+ then we don't need to add them here. */
+
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ next_state = mctx->state_log[cur_idx]
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ if (table_nodes != NULL)
+ re_node_set_free (&next_nodes);
+ }
+
+ if (BE (dfa->nbackref, 0) && next_state != NULL)
+ {
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
+ *err = check_subexp_matching_top (mctx, &next_state->nodes,
+ cur_idx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+
+ /* If the next state has back references. */
+ if (next_state->has_backref)
+ {
+ *err = transit_state_bkref (mctx, &next_state->nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
+ }
+
+ return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+ multi-byte match, then look in the log for a state
+ from which to restart matching. */
+static re_dfastate_t *
+internal_function
+find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
+{
+ re_dfastate_t *cur_state;
+ do
+ {
+ Idx max = mctx->state_log_top;
+ Idx cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ do
+ {
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
+ }
+ while (mctx->state_log[cur_str_idx] == NULL);
+
+ cur_state = merge_state_with_log (err, mctx, NULL);
+ }
+ while (*err == REG_NOERROR && cur_state == NULL);
+ return cur_state;
+}
+
+/* Helper functions for transit_state. */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+ OP_OPEN_SUBEXP and which have corresponding back references in the regular
+ expression. And register them to use them later for evaluating the
+ correspoding back references. */
+
+static reg_errcode_t
+internal_function
+check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
+ Idx str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx node_idx;
+ reg_errcode_t err;
+
+ /* TODO: This isn't efficient.
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+ {
+ Idx node = cur_nodes->elems[node_idx];
+ if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+ && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+ && (dfa->used_bkref_map
+ & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
+ {
+ err = match_ctx_add_subtop (mctx, node, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte. */
+
+static re_dfastate_t *
+transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
+ re_dfastate_t *state)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ re_node_set next_nodes;
+ re_dfastate_t *next_state;
+ Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+ unsigned int context;
+
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+ {
+ Idx cur_node = state->nodes.elems[node_cnt];
+ if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ re_node_set_free (&next_nodes);
+ re_string_skip_bytes (&mctx->input, 1);
+ return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+internal_function
+transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ Idx i;
+
+ for (i = 0; i < pstate->nodes.nelem; ++i)
+ {
+ re_node_set dest_nodes, *new_nodes;
+ Idx cur_node_idx = pstate->nodes.elems[i];
+ int naccepted;
+ Idx dest_idx;
+ unsigned int context;
+ re_dfastate_t *dest_state;
+
+ if (!dfa->nodes[cur_node_idx].accept_mb)
+ continue;
+
+ if (dfa->nodes[cur_node_idx].constraint)
+ {
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input),
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+ context))
+ continue;
+ }
+
+ /* How many bytes the node can accept? */
+ naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+ re_string_cur_idx (&mctx->input));
+ if (naccepted == 0)
+ continue;
+
+ /* The node can accepts `naccepted' bytes. */
+ dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+ mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+ : mctx->max_mb_elem_len);
+ err = clean_state_log_if_needed (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+#ifdef DEBUG
+ assert (dfa->nexts[cur_node_idx] != REG_MISSING);
+#endif
+ new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+
+ dest_state = mctx->state_log[dest_idx];
+ if (dest_state == NULL)
+ dest_nodes = *new_nodes;
+ else
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ context = re_string_context_at (&mctx->input, dest_idx - 1,
+ mctx->eflags);
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (dest_state != NULL)
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+internal_function
+transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ Idx i;
+ Idx cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ for (i = 0; i < nodes->nelem; ++i)
+ {
+ Idx dest_str_idx, prev_nelem, bkc_idx;
+ Idx node_idx = nodes->elems[i];
+ unsigned int context;
+ const re_token_t *node = dfa->nodes + node_idx;
+ re_node_set *new_dest_nodes;
+
+ /* Check whether `node' is a backreference or not. */
+ if (node->type != OP_BACK_REF)
+ continue;
+
+ if (node->constraint)
+ {
+ context = re_string_context_at (&mctx->input, cur_str_idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ continue;
+ }
+
+ /* `node' is a backreference.
+ Check the substring which the substring matched. */
+ bkc_idx = mctx->nbkref_ents;
+ err = get_subexp (mctx, node_idx, cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* And add the epsilon closures (which is `new_dest_nodes') of
+ the backreference to appropriate state_log. */
+#ifdef DEBUG
+ assert (dfa->nexts[node_idx] != REG_MISSING);
+#endif
+ for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+ {
+ Idx subexp_len;
+ re_dfastate_t *dest_state;
+ struct re_backref_cache_entry *bkref_ent;
+ bkref_ent = mctx->bkref_ents + bkc_idx;
+ if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+ continue;
+ subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+ : dfa->eclosures + dfa->nexts[node_idx]);
+ dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+ - bkref_ent->subexp_from);
+ context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+ mctx->eflags);
+ dest_state = mctx->state_log[dest_str_idx];
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
+ /* Add `new_dest_node' to state_log. */
+ if (dest_state == NULL)
+ {
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes,
+ context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ re_node_set dest_nodes;
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&dest_nodes);
+ goto free_return;
+ }
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* We need to check recursively if the backreference can epsilon
+ transit. */
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ {
+ err = check_subexp_matching_top (mctx, new_dest_nodes,
+ cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ err = transit_state_bkref (mctx, new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+ at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+ Note that we might collect inappropriate candidates here.
+ However, the cost of checking them strictly here is too high, then we
+ delay these checking for prune_impossible_nodes(). */
+
+static reg_errcode_t
+internal_function
+get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ Idx subexp_num, sub_top_idx;
+ const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
+ Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+ if (cache_idx != REG_MISSING)
+ {
+ const struct re_backref_cache_entry *entry
+ = mctx->bkref_ents + cache_idx;
+ do
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ while (entry++->more);
+ }
+
+ subexp_num = dfa->nodes[bkref_node].opr.idx;
+
+ /* For each sub expression */
+ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+ {
+ reg_errcode_t err;
+ re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+ re_sub_match_last_t *sub_last;
+ Idx sub_last_idx, sl_str, bkref_str_off;
+
+ if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+ continue; /* It isn't related. */
+
+ sl_str = sub_top->str_idx;
+ bkref_str_off = bkref_str_idx;
+ /* At first, check the last node of sub expressions we already
+ evaluated. */
+ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+ {
+ regoff_t sl_str_diff;
+ sub_last = sub_top->lasts[sub_last_idx];
+ sl_str_diff = sub_last->str_idx - sl_str;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_diff > 0)
+ {
+ if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+ {
+ /* Not enough chars for a successful match. */
+ if (bkref_str_off + sl_str_diff > mctx->input.len)
+ break;
+
+ err = clean_state_log_if_needed (mctx,
+ bkref_str_off
+ + sl_str_diff);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+ /* We don't need to search this sub expression any more. */
+ break;
+ }
+ bkref_str_off += sl_str_diff;
+ sl_str += sl_str_diff;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+
+ /* Reload buf, since the preceding call might have reallocated
+ the buffer. */
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (sub_last_idx < sub_top->nlasts)
+ continue;
+ if (sub_last_idx > 0)
+ ++sl_str;
+ /* Then, search for the other last nodes of the sub expression. */
+ for (; sl_str <= bkref_str_idx; ++sl_str)
+ {
+ Idx cls_node;
+ regoff_t sl_str_off;
+ const re_node_set *nodes;
+ sl_str_off = sl_str - sub_top->str_idx;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_off > 0)
+ {
+ if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+ {
+ /* If we are at the end of the input, we cannot match. */
+ if (bkref_str_off >= mctx->input.len)
+ break;
+
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (buf [bkref_str_off++] != buf[sl_str - 1])
+ break; /* We don't need to search this sub expression
+ any more. */
+ }
+ if (mctx->state_log[sl_str] == NULL)
+ continue;
+ /* Does this state have a ')' of the sub expression? */
+ nodes = &mctx->state_log[sl_str]->nodes;
+ cls_node = find_subexp_node (dfa, nodes, subexp_num,
+ OP_CLOSE_SUBEXP);
+ if (cls_node == REG_MISSING)
+ continue; /* No. */
+ if (sub_top->path == NULL)
+ {
+ sub_top->path = calloc (sizeof (state_array_t),
+ sl_str - sub_top->str_idx + 1);
+ if (sub_top->path == NULL)
+ return REG_ESPACE;
+ }
+ /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+ in the current context? */
+ err = check_arrival (mctx, sub_top->path, sub_top->node,
+ sub_top->str_idx, cls_node, sl_str,
+ OP_CLOSE_SUBEXP);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+ if (BE (sub_last == NULL, 0))
+ return REG_ESPACE;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp(). */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+ If it can arrive, register the sub expression expressed with SUB_TOP
+ and SUB_LAST. */
+
+static reg_errcode_t
+internal_function
+get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
+{
+ reg_errcode_t err;
+ Idx to_idx;
+ /* Can the subexpression arrive the back reference? */
+ err = check_arrival (mctx, &sub_last->path, sub_last->node,
+ sub_last->str_idx, bkref_node, bkref_str,
+ OP_OPEN_SUBEXP);
+ if (err != REG_NOERROR)
+ return err;
+ err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+ sub_last->str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+ return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+ Search '(' if FL_OPEN, or search ')' otherwise.
+ TODO: This function isn't efficient...
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+
+static Idx
+internal_function
+find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ Idx subexp_idx, int type)
+{
+ Idx cls_idx;
+ for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+ {
+ Idx cls_node = nodes->elems[cls_idx];
+ const re_token_t *node = dfa->nodes + cls_node;
+ if (node->type == type
+ && node->opr.idx == subexp_idx)
+ return cls_node;
+ }
+ return REG_MISSING;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+ LAST_NODE at LAST_STR. We record the path onto PATH since it will be
+ heavily reused.
+ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+
+static reg_errcode_t
+internal_function
+check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
+ Idx top_str, Idx last_node, Idx last_str, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err = REG_NOERROR;
+ Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
+ re_dfastate_t *cur_state = NULL;
+ re_node_set *cur_nodes, next_nodes;
+ re_dfastate_t **backup_state_log;
+ unsigned int context;
+
+ subexp_num = dfa->nodes[top_node].opr.idx;
+ /* Extend the buffer if we need. */
+ if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+ {
+ re_dfastate_t **new_array;
+ Idx old_alloc = path->alloc;
+ Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
+ if (BE (new_alloc < old_alloc, 0)
+ || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
+ return REG_ESPACE;
+ new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ path->array = new_array;
+ path->alloc = new_alloc;
+ memset (new_array + old_alloc, '\0',
+ sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+ }
+
+ str_idx = path->next_idx ? path->next_idx : top_str;
+
+ /* Temporary modify MCTX. */
+ backup_state_log = mctx->state_log;
+ backup_cur_idx = mctx->input.cur_idx;
+ mctx->state_log = path->array;
+ mctx->input.cur_idx = str_idx;
+
+ /* Setup initial node set. */
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ if (str_idx == top_str)
+ {
+ err = re_node_set_init_1 (&next_nodes, top_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ cur_state = mctx->state_log[str_idx];
+ if (cur_state && cur_state->has_backref)
+ {
+ err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ re_node_set_init_empty (&next_nodes);
+ }
+ if (str_idx == top_str || (cur_state && cur_state->has_backref))
+ {
+ if (next_nodes.nelem)
+ {
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ }
+
+ for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+ {
+ re_node_set_empty (&next_nodes);
+ if (mctx->state_log[str_idx + 1])
+ {
+ err = re_node_set_merge (&next_nodes,
+ &mctx->state_log[str_idx + 1]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ if (cur_state)
+ {
+ err = check_arrival_add_next_nodes (mctx, str_idx,
+ &cur_state->non_eps_nodes,
+ &next_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ ++str_idx;
+ if (next_nodes.nelem)
+ {
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
+ subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+ }
+ re_node_set_free (&next_nodes);
+ cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+ : &mctx->state_log[last_str]->nodes);
+ path->next_idx = str_idx;
+
+ /* Fix MCTX. */
+ mctx->state_log = backup_state_log;
+ mctx->input.cur_idx = backup_cur_idx;
+
+ /* Then check the current node set has the node LAST_NODE. */
+ if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+ return REG_NOERROR;
+
+ return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival. */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+ to NEXT_NODES.
+ TODO: This function is similar to the functions transit_state*(),
+ however this function has many additional works.
+ Can't we unify them? */
+
+static reg_errcode_t
+internal_function
+check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
+ re_node_set *cur_nodes, re_node_set *next_nodes)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ bool ok;
+ Idx cur_idx;
+ reg_errcode_t err = REG_NOERROR;
+ re_node_set union_set;
+ re_node_set_init_empty (&union_set);
+ for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+ {
+ int naccepted = 0;
+ Idx cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
+ re_token_type_t type = dfa->nodes[cur_node].type;
+ assert (!IS_EPSILON_NODE (type));
+#endif
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (dfa->nodes[cur_node].accept_mb)
+ {
+ naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+ str_idx);
+ if (naccepted > 1)
+ {
+ re_dfastate_t *dest_state;
+ Idx next_node = dfa->nexts[cur_node];
+ Idx next_idx = str_idx + naccepted;
+ dest_state = mctx->state_log[next_idx];
+ re_node_set_empty (&union_set);
+ if (dest_state)
+ {
+ err = re_node_set_merge (&union_set, &dest_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ ok = re_node_set_insert (&union_set, next_node);
+ if (BE (! ok, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+ &union_set);
+ if (BE (mctx->state_log[next_idx] == NULL
+ && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (naccepted
+ || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+ {
+ ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (! ok, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ }
+ re_node_set_free (&union_set);
+ return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+ CUR_NODES, however exclude the nodes which are:
+ - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+ - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
+ Idx ex_subexp, int type)
+{
+ reg_errcode_t err;
+ Idx idx, outside_node;
+ re_node_set new_nodes;
+#ifdef DEBUG
+ assert (cur_nodes->nelem);
+#endif
+ err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ /* Create a new node set NEW_NODES with the nodes which are epsilon
+ closures of the node in CUR_NODES. */
+
+ for (idx = 0; idx < cur_nodes->nelem; ++idx)
+ {
+ Idx cur_node = cur_nodes->elems[idx];
+ const re_node_set *eclosure = dfa->eclosures + cur_node;
+ outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+ if (outside_node == REG_MISSING)
+ {
+ /* There are no problematic nodes, just merge them. */
+ err = re_node_set_merge (&new_nodes, eclosure);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ /* There are problematic nodes, re-calculate incrementally. */
+ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ }
+ re_node_set_free (cur_nodes);
+ *cur_nodes = new_nodes;
+ return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+ Check incrementally the epsilon closure of TARGET, and if it isn't
+ problematic append it to DST_NODES. */
+
+static reg_errcode_t
+internal_function
+check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
+ Idx target, Idx ex_subexp, int type)
+{
+ Idx cur_node;
+ for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+ {
+ bool ok;
+
+ if (dfa->nodes[cur_node].type == type
+ && dfa->nodes[cur_node].opr.idx == ex_subexp)
+ {
+ if (type == OP_CLOSE_SUBEXP)
+ {
+ ok = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ }
+ break;
+ }
+ ok = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (! ok, 0))
+ return REG_ESPACE;
+ if (dfa->edests[cur_node].nelem == 0)
+ break;
+ if (dfa->edests[cur_node].nelem == 2)
+ {
+ reg_errcode_t err;
+ err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+ dfa->edests[cur_node].elems[1],
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ cur_node = dfa->edests[cur_node].elems[0];
+ }
+ return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+ destination of the back references by the appropriate entry
+ in MCTX->BKREF_ENTS. */
+
+static reg_errcode_t
+internal_function
+expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
+ Idx cur_str, Idx subexp_num, int type)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ struct re_backref_cache_entry *ent;
+
+ if (cache_idx_start == REG_MISSING)
+ return REG_NOERROR;
+
+ restart:
+ ent = mctx->bkref_ents + cache_idx_start;
+ do
+ {
+ Idx to_idx, next_node;
+
+ /* Is this entry ENT is appropriate? */
+ if (!re_node_set_contains (cur_nodes, ent->node))
+ continue; /* No. */
+
+ to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+ /* Calculate the destination of the back reference, and append it
+ to MCTX->STATE_LOG. */
+ if (to_idx == cur_str)
+ {
+ /* The backreference did epsilon transit, we must re-check all the
+ node in the current state. */
+ re_node_set new_dests;
+ reg_errcode_t err2, err3;
+ next_node = dfa->edests[ent->node].elems[0];
+ if (re_node_set_contains (cur_nodes, next_node))
+ continue;
+ err = re_node_set_init_1 (&new_dests, next_node);
+ err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+ err3 = re_node_set_merge (cur_nodes, &new_dests);
+ re_node_set_free (&new_dests);
+ if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+ || err3 != REG_NOERROR, 0))
+ {
+ err = (err != REG_NOERROR ? err
+ : (err2 != REG_NOERROR ? err2 : err3));
+ return err;
+ }
+ /* TODO: It is still inefficient... */
+ goto restart;
+ }
+ else
+ {
+ re_node_set union_set;
+ next_node = dfa->nexts[ent->node];
+ if (mctx->state_log[to_idx])
+ {
+ bool ok;
+ if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+ next_node))
+ continue;
+ err = re_node_set_init_copy (&union_set,
+ &mctx->state_log[to_idx]->nodes);
+ ok = re_node_set_insert (&union_set, next_node);
+ if (BE (err != REG_NOERROR || ! ok, 0))
+ {
+ re_node_set_free (&union_set);
+ err = err != REG_NOERROR ? err : REG_ESPACE;
+ return err;
+ }
+ }
+ else
+ {
+ err = re_node_set_init_1 (&union_set, next_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+ re_node_set_free (&union_set);
+ if (BE (mctx->state_log[to_idx] == NULL
+ && err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ while (ent++->more);
+ return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+ Return true if successful. */
+
+static bool
+internal_function
+build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
+{
+ reg_errcode_t err;
+ Idx i, j;
+ int ch;
+ bool need_word_trtable = false;
+ bitset_word_t elem, mask;
+ bool dests_node_malloced = false;
+ bool dest_states_malloced = false;
+ Idx ndests; /* Number of the destination states from `state'. */
+ re_dfastate_t **trtable;
+ re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+ re_node_set follows, *dests_node;
+ bitset_t *dests_ch;
+ bitset_t acceptable;
+
+ struct dests_alloc
+ {
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
+ } *dests_alloc;
+
+ /* We build DFA states which corresponds to the destination nodes
+ from `state'. `dests_node[i]' represents the nodes which i-th
+ destination state contains, and `dests_ch[i]' represents the
+ characters which i-th destination state accepts. */
+ if (__libc_use_alloca (sizeof (struct dests_alloc)))
+ dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
+ else
+ {
+ dests_alloc = re_malloc (struct dests_alloc, 1);
+ if (BE (dests_alloc == NULL, 0))
+ return false;
+ dests_node_malloced = true;
+ }
+ dests_node = dests_alloc->dests_node;
+ dests_ch = dests_alloc->dests_ch;
+
+ /* Initialize transiton table. */
+ state->word_trtable = state->trtable = NULL;
+
+ /* At first, group all nodes belonging to `state' into several
+ destinations. */
+ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+ if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
+ {
+ if (dests_node_malloced)
+ free (dests_alloc);
+ if (ndests == 0)
+ {
+ state->trtable = (re_dfastate_t **)
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ return true;
+ }
+ return false;
+ }
+
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+
+ /* Avoid arithmetic overflow in size calculation. */
+ if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
+ / (3 * sizeof (re_dfastate_t *)))
+ < ndests),
+ 0))
+ goto out_free;
+
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
+ + ndests * 3 * sizeof (re_dfastate_t *)))
+ dest_states = (re_dfastate_t **)
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ else
+ {
+ dest_states = (re_dfastate_t **)
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ if (BE (dest_states == NULL, 0))
+ {
+out_free:
+ if (dest_states_malloced)
+ free (dest_states);
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ if (dests_node_malloced)
+ free (dests_alloc);
+ return false;
+ }
+ dest_states_malloced = true;
+ }
+ dest_states_word = dest_states + ndests;
+ dest_states_nl = dest_states_word + ndests;
+ bitset_empty (acceptable);
+
+ /* Then build the states for all destinations. */
+ for (i = 0; i < ndests; ++i)
+ {
+ Idx next_node;
+ re_node_set_empty (&follows);
+ /* Merge the follows of this destination states. */
+ for (j = 0; j < dests_node[i].nelem; ++j)
+ {
+ next_node = dfa->nexts[dests_node[i].elems[j]];
+ if (next_node != REG_MISSING)
+ {
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ /* If the new state has context constraint,
+ build appropriate states for these contexts. */
+ if (dest_states[i]->has_constraint)
+ {
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_WORD);
+ if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+
+ if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
+ need_word_trtable = true;
+
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_NEWLINE);
+ if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ else
+ {
+ dest_states_word[i] = dest_states[i];
+ dest_states_nl[i] = dest_states[i];
+ }
+ bitset_merge (acceptable, dests_ch[i]);
+ }
+
+ if (!BE (need_word_trtable, 0))
+ {
+ /* We don't care about whether the following character is a word
+ character, or we are in a single-byte character set so we can
+ discern by looking at the character code: allocate a
+ 256-entry transition table. */
+ trtable = state->trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ if (dfa->word_char[i] & mask)
+ trtable[ch] = dest_states_word[j];
+ else
+ trtable[ch] = dest_states[j];
+ }
+ }
+ else
+ {
+ /* We care about whether the following character is a word
+ character, and we are in a multi-byte character set: discern
+ by looking at the character code: build two 256-entry
+ transition tables, one starting at trtable[0] and one
+ starting at trtable[SBC_MAX]. */
+ trtable = state->word_trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_WORDS; ++i)
+ for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ trtable[ch] = dest_states[j];
+ trtable[ch + SBC_MAX] = dest_states_word[j];
+ }
+ }
+
+ /* new line */
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (j = 0; j < ndests; ++j)
+ if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[j];
+ if (need_word_trtable)
+ trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+
+ if (dest_states_malloced)
+ free (dest_states);
+
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+
+ if (dests_node_malloced)
+ free (dests_alloc);
+
+ return true;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+ Then for all destinations, set the nodes belonging to the destination
+ to DESTS_NODE[i] and set the characters accepted by the destination
+ to DEST_CH[i]. This function return the number of destinations. */
+
+static Idx
+internal_function
+group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
+ re_node_set *dests_node, bitset_t *dests_ch)
+{
+ reg_errcode_t err;
+ bool ok;
+ Idx i, j, k;
+ Idx ndests; /* Number of the destinations from `state'. */
+ bitset_t accepts; /* Characters a node can accept. */
+ const re_node_set *cur_nodes = &state->nodes;
+ bitset_empty (accepts);
+ ndests = 0;
+
+ /* For all the nodes belonging to `state', */
+ for (i = 0; i < cur_nodes->nelem; ++i)
+ {
+ re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ /* Enumerate all single byte character this node can accept. */
+ if (type == CHARACTER)
+ bitset_set (accepts, node->opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ bitset_merge (accepts, node->opr.sbcset);
+ }
+ else if (type == OP_PERIOD)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ bitset_merge (accepts, dfa->sb_char);
+ else
+#endif
+ bitset_set_all (accepts);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == OP_UTF8_PERIOD)
+ {
+ if (ASCII_CHARS % BITSET_WORD_BITS == 0)
+ memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
+ else
+ bitset_merge (accepts, utf8_sb_map);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#endif
+ else
+ continue;
+
+ /* Check the `accepts' and sift the characters which are not
+ match it the context. */
+ if (constraint)
+ {
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ {
+ bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bitset_empty (accepts);
+ if (accepts_newline)
+ bitset_set (accepts, NEWLINE_CHAR);
+ else
+ continue;
+ }
+ if (constraint & NEXT_ENDBUF_CONSTRAINT)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+
+ if (constraint & NEXT_WORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && !node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ {
+ bitset_word_t any_set = 0;
+ if (type == CHARACTER && node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_WORDS; ++j)
+ any_set |= (accepts[j] &= ~dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ }
+
+ /* Then divide `accepts' into DFA states, or create a new
+ state. Above, we make sure that accepts is not empty. */
+ for (j = 0; j < ndests; ++j)
+ {
+ bitset_t intersec; /* Intersection sets, see below. */
+ bitset_t remains;
+ /* Flags, see below. */
+ bitset_word_t has_intersec, not_subset, not_consumed;
+
+ /* Optimization, skip if this state doesn't accept the character. */
+ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+ continue;
+
+ /* Enumerate the intersection set of this state and `accepts'. */
+ has_intersec = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+ /* And skip if the intersection set is empty. */
+ if (!has_intersec)
+ continue;
+
+ /* Then check if this state is a subset of `accepts'. */
+ not_subset = not_consumed = 0;
+ for (k = 0; k < BITSET_WORDS; ++k)
+ {
+ not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+ not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+ }
+
+ /* If this state isn't a subset of `accepts', create a
+ new group state, which has the `remains'. */
+ if (not_subset)
+ {
+ bitset_copy (dests_ch[ndests], remains);
+ bitset_copy (dests_ch[j], intersec);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ }
+
+ /* Put the position in the current group. */
+ ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (! ok, 0))
+ goto error_return;
+
+ /* If all characters are consumed, go to next node. */
+ if (!not_consumed)
+ break;
+ }
+ /* Some characters remain, create a new group. */
+ if (j == ndests)
+ {
+ bitset_copy (dests_ch[ndests], accepts);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ bitset_empty (accepts);
+ }
+ }
+ return ndests;
+ error_return:
+ for (j = 0; j < ndests; ++j)
+ re_node_set_free (dests_node + j);
+ return REG_MISSING;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+ Return the number of the bytes the node accepts.
+ STR_IDX is the current index of the input string.
+
+ This function handles the nodes which can accept one character, or
+ one collating element like '.', '[a-z]', opposite to the other nodes
+ can only accept one byte. */
+
+static int
+internal_function
+check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
+ const re_string_t *input, Idx str_idx)
+{
+ const re_token_t *node = dfa->nodes + node_idx;
+ int char_len, elem_len;
+ Idx i;
+
+ if (BE (node->type == OP_UTF8_PERIOD, 0))
+ {
+ unsigned char c = re_string_byte_at (input, str_idx), d;
+ if (BE (c < 0xc2, 1))
+ return 0;
+
+ if (str_idx + 2 > input->len)
+ return 0;
+
+ d = re_string_byte_at (input, str_idx + 1);
+ if (c < 0xe0)
+ return (d < 0x80 || d > 0xbf) ? 0 : 2;
+ else if (c < 0xf0)
+ {
+ char_len = 3;
+ if (c == 0xe0 && d < 0xa0)
+ return 0;
+ }
+ else if (c < 0xf8)
+ {
+ char_len = 4;
+ if (c == 0xf0 && d < 0x90)
+ return 0;
+ }
+ else if (c < 0xfc)
+ {
+ char_len = 5;
+ if (c == 0xf8 && d < 0x88)
+ return 0;
+ }
+ else if (c < 0xfe)
+ {
+ char_len = 6;
+ if (c == 0xfc && d < 0x84)
+ return 0;
+ }
+ else
+ return 0;
+
+ if (str_idx + char_len > input->len)
+ return 0;
+
+ for (i = 1; i < char_len; ++i)
+ {
+ d = re_string_byte_at (input, str_idx + i);
+ if (d < 0x80 || d > 0xbf)
+ return 0;
+ }
+ return char_len;
+ }
+
+ char_len = re_string_char_size_at (input, str_idx);
+ if (node->type == OP_PERIOD)
+ {
+ if (char_len <= 1)
+ return 0;
+ /* FIXME: I don't think this if is needed, as both '\n'
+ and '\0' are char_len == 1. */
+ /* '.' accepts any one character except the following two cases. */
+ if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+ re_string_byte_at (input, str_idx) == '\n') ||
+ ((dfa->syntax & RE_DOT_NOT_NULL) &&
+ re_string_byte_at (input, str_idx) == '\0'))
+ return 0;
+ return char_len;
+ }
+
+ elem_len = re_string_elem_size_at (input, str_idx);
+ if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+ return 0;
+
+ if (node->type == COMPLEX_BRACKET)
+ {
+ const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+ const unsigned char *pin
+ = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
+ Idx j;
+ uint32_t nrules;
+# endif /* _LIBC */
+ int match_len = 0;
+ wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+ ? re_string_wchar_at (input, str_idx) : 0);
+
+ /* match with multibyte character? */
+ for (i = 0; i < cset->nmbchars; ++i)
+ if (wc == cset->mbchars[i])
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ /* match with character_class? */
+ for (i = 0; i < cset->nchar_classes; ++i)
+ {
+ wctype_t wt = cset->char_classes[i];
+ if (__iswctype (wc, wt))
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ unsigned int in_collseq = 0;
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
+ int32_t idx;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ /* match with collating_symbol? */
+ if (cset->ncoll_syms)
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ for (i = 0; i < cset->ncoll_syms; ++i)
+ {
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
+ /* Compare the length of input collating element and
+ the length of current collating element. */
+ if (*coll_sym != elem_len)
+ continue;
+ /* Compare each bytes. */
+ for (j = 0; j < *coll_sym; j++)
+ if (pin[j] != coll_sym[1 + j])
+ break;
+ if (j == *coll_sym)
+ {
+ /* Match if every bytes is equal. */
+ match_len = j;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+ if (cset->nranges)
+ {
+ if (elem_len <= char_len)
+ {
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ in_collseq = __collseq_table_lookup (collseqwc, wc);
+ }
+ else
+ in_collseq = find_collation_sequence_value (pin, elem_len);
+ }
+ /* match with range expression? */
+ for (i = 0; i < cset->nranges; ++i)
+ if (cset->range_starts[i] <= in_collseq
+ && in_collseq <= cset->range_ends[i])
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+
+ /* match with equivalence_class? */
+ if (cset->nequiv_classes)
+ {
+ const unsigned char *cp = pin;
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+ idx = findidx (&cp);
+ if (idx > 0)
+ for (i = 0; i < cset->nequiv_classes; ++i)
+ {
+ int32_t equiv_class_idx = cset->equiv_classes[i];
+ size_t weight_len = weights[idx];
+ if (weight_len == weights[equiv_class_idx])
+ {
+ Idx cnt = 0;
+ while (cnt <= weight_len
+ && (weights[equiv_class_idx + 1 + cnt]
+ == weights[idx + 1 + cnt]))
+ ++cnt;
+ if (cnt > weight_len)
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ }
+ }
+ else
+# endif /* _LIBC */
+ {
+ /* match with range expression? */
+#if __GNUC__ >= 2
+ wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+ wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+ cmp_buf[2] = wc;
+#endif
+ for (i = 0; i < cset->nranges; ++i)
+ {
+ cmp_buf[0] = cset->range_starts[i];
+ cmp_buf[4] = cset->range_ends[i];
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ check_node_accept_bytes_match:
+ if (!cset->non_match)
+ return match_len;
+ else
+ {
+ if (match_len > 0)
+ return 0;
+ else
+ return (elem_len > char_len) ? elem_len : char_len;
+ }
+ }
+ return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+internal_function
+find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
+{
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules == 0)
+ {
+ if (mbs_len == 1)
+ {
+ /* No valid character. Match it as a single byte character. */
+ const unsigned char *collseq = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ return collseq[mbs[0]];
+ }
+ return UINT_MAX;
+ }
+ else
+ {
+ int32_t idx;
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ int32_t extrasize = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+ for (idx = 0; idx < extrasize;)
+ {
+ int mbs_cnt;
+ bool found = false;
+ int32_t elem_mbs_len;
+ /* Skip the name of collating element name. */
+ idx = idx + extra[idx] + 1;
+ elem_mbs_len = extra[idx++];
+ if (mbs_len == elem_mbs_len)
+ {
+ for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+ if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+ break;
+ if (mbs_cnt == elem_mbs_len)
+ /* Found the entry. */
+ found = true;
+ }
+ /* Skip the byte sequence of the collating element. */
+ idx += elem_mbs_len;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ /* Skip the wide char sequence of the collating element. */
+ idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ /* If we found the entry, return the sequence value. */
+ if (found)
+ return *(uint32_t *) (extra + idx);
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ }
+ return UINT_MAX;
+ }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+ byte of the INPUT. */
+
+static bool
+internal_function
+check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
+ Idx idx)
+{
+ unsigned char ch;
+ ch = re_string_byte_at (&mctx->input, idx);
+ switch (node->type)
+ {
+ case CHARACTER:
+ if (node->opr.c != ch)
+ return false;
+ break;
+
+ case SIMPLE_BRACKET:
+ if (!bitset_contain (node->opr.sbcset, ch))
+ return false;
+ break;
+
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ if (ch >= ASCII_CHARS)
+ return false;
+ /* FALLTHROUGH */
+#endif
+ case OP_PERIOD:
+ if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+ || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (node->constraint)
+ {
+ /* The node has constraints. Check whether the current context
+ satisfies the constraints. */
+ unsigned int context = re_string_context_at (&mctx->input, idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ return false;
+ }
+
+ return true;
+}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+internal_function
+extend_buffers (re_match_context_t *mctx)
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = &mctx->input;
+
+ /* Avoid overflow. */
+ if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
+ return REG_ESPACE;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ /* XXX We have no indication of the size of this buffer. If this
+ allocation fail we have no indication that the state_log array
+ does not have the right size. */
+ re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len + 1);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->state_log = new_array;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ }
+ return REG_NOERROR;
+}
+
+
+/* Functions for matching context. */
+
+/* Initialize MCTX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
+{
+ mctx->eflags = eflags;
+ mctx->match_last = REG_MISSING;
+ if (n > 0)
+ {
+ /* Avoid overflow. */
+ size_t max_object_size =
+ MAX (sizeof (struct re_backref_cache_entry),
+ sizeof (re_sub_match_top_t *));
+ if (BE (SIZE_MAX / max_object_size < n, 0))
+ return REG_ESPACE;
+
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+ if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+ return REG_ESPACE;
+ }
+ /* Already zero-ed by the caller.
+ else
+ mctx->bkref_ents = NULL;
+ mctx->nbkref_ents = 0;
+ mctx->nsub_tops = 0; */
+ mctx->abkref_ents = n;
+ mctx->max_mb_elem_len = 1;
+ mctx->asub_tops = n;
+ return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+ This function must be invoked when the matcher changes the start index
+ of the input, or changes the input string. */
+
+static void
+internal_function
+match_ctx_clean (re_match_context_t *mctx)
+{
+ Idx st_idx;
+ for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+ {
+ Idx sl_idx;
+ re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+ for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+ {
+ re_sub_match_last_t *last = top->lasts[sl_idx];
+ re_free (last->path.array);
+ re_free (last);
+ }
+ re_free (top->lasts);
+ if (top->path)
+ {
+ re_free (top->path->array);
+ re_free (top->path);
+ }
+ free (top);
+ }
+
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX. */
+
+static void
+internal_function
+match_ctx_free (re_match_context_t *mctx)
+{
+ /* First, free all the memory associated with MCTX->SUB_TOPS. */
+ match_ctx_clean (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+/* Add a new backreference entry to MCTX.
+ Note that we assume that caller never call this function with duplicate
+ entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+internal_function
+match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
+ Idx to)
+{
+ if (mctx->nbkref_ents >= mctx->abkref_ents)
+ {
+ struct re_backref_cache_entry* new_entry;
+ new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+ mctx->abkref_ents * 2);
+ if (BE (new_entry == NULL, 0))
+ {
+ re_free (mctx->bkref_ents);
+ return REG_ESPACE;
+ }
+ mctx->bkref_ents = new_entry;
+ memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+ sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+ mctx->abkref_ents *= 2;
+ }
+ if (mctx->nbkref_ents > 0
+ && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+ mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
+ mctx->bkref_ents[mctx->nbkref_ents].node = node;
+ mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+
+ /* This is a cache that saves negative results of check_dst_limits_calc_pos.
+ If bit N is clear, means that this entry won't epsilon-transition to
+ an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
+ it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
+ such node.
+
+ A backreference does not epsilon-transition unless it is empty, so set
+ to all zeros if FROM != TO. */
+ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
+ = (from == to ? -1 : 0);
+
+ mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
+ if (mctx->max_mb_elem_len < to - from)
+ mctx->max_mb_elem_len = to - from;
+ return REG_NOERROR;
+}
+
+/* Return the first entry with the same str_idx, or REG_MISSING if none is
+ found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+
+static Idx
+internal_function
+search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
+{
+ Idx left, right, mid, last;
+ last = right = mctx->nbkref_ents;
+ for (left = 0; left < right;)
+ {
+ mid = (left + right) / 2;
+ if (mctx->bkref_ents[mid].str_idx < str_idx)
+ left = mid + 1;
+ else
+ right = mid;
+ }
+ if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+ return left;
+ else
+ return REG_MISSING;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+ at STR_IDX. */
+
+static reg_errcode_t
+internal_function
+match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
+{
+#ifdef DEBUG
+ assert (mctx->sub_tops != NULL);
+ assert (mctx->asub_tops > 0);
+#endif
+ if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+ {
+ Idx new_asub_tops = mctx->asub_tops * 2;
+ re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+ re_sub_match_top_t *,
+ new_asub_tops);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops = new_array;
+ mctx->asub_tops = new_asub_tops;
+ }
+ mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+ if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops[mctx->nsub_tops]->node = node;
+ mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+ return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+
+static re_sub_match_last_t *
+internal_function
+match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
+{
+ re_sub_match_last_t *new_entry;
+ if (BE (subtop->nlasts == subtop->alasts, 0))
+ {
+ Idx new_alasts = 2 * subtop->alasts + 1;
+ re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+ re_sub_match_last_t *,
+ new_alasts);
+ if (BE (new_array == NULL, 0))
+ return NULL;
+ subtop->lasts = new_array;
+ subtop->alasts = new_alasts;
+ }
+ new_entry = calloc (1, sizeof (re_sub_match_last_t));
+ if (BE (new_entry != NULL, 1))
+ {
+ subtop->lasts[subtop->nlasts] = new_entry;
+ new_entry->node = node;
+ new_entry->str_idx = str_idx;
+ ++subtop->nlasts;
+ }
+ return new_entry;
+}
+
+static void
+internal_function
+sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
+{
+ sctx->sifted_states = sifted_sts;
+ sctx->limited_states = limited_sts;
+ sctx->last_node = last_node;
+ sctx->last_str_idx = last_str_idx;
+ re_node_set_init_empty (&sctx->limits);
+}
diff --git a/lib/stat-macros.h b/lib/stat-macros.h
new file mode 100644
index 0000000..829ddd3
--- /dev/null
+++ b/lib/stat-macros.h
@@ -0,0 +1,260 @@
+/* stat-related macros
+
+ Copyright (C) 1993, 1994, 2001, 2002, 2004, 2006 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert and Jim Meyering. */
+
+#ifndef STAT_MACROS_H
+# define STAT_MACROS_H 1
+
+# if ! defined S_ISREG && ! defined S_IFREG
+# error "you must include <sys/stat.h> before including this file"
+# endif
+
+# ifndef S_IFMT
+# define S_IFMT 0170000
+# endif
+
+# if STAT_MACROS_BROKEN
+# undef S_ISBLK
+# undef S_ISCHR
+# undef S_ISDIR
+# undef S_ISFIFO
+# undef S_ISLNK
+# undef S_ISNAM
+# undef S_ISMPB
+# undef S_ISMPC
+# undef S_ISNWK
+# undef S_ISREG
+# undef S_ISSOCK
+# endif
+
+
+# ifndef S_ISBLK
+# ifdef S_IFBLK
+# define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
+# else
+# define S_ISBLK(m) 0
+# endif
+# endif
+
+# ifndef S_ISCHR
+# ifdef S_IFCHR
+# define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+# else
+# define S_ISCHR(m) 0
+# endif
+# endif
+
+# ifndef S_ISDIR
+# ifdef S_IFDIR
+# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+# else
+# define S_ISDIR(m) 0
+# endif
+# endif
+
+# ifndef S_ISDOOR /* Solaris 2.5 and up */
+# define S_ISDOOR(m) 0
+# endif
+
+# ifndef S_ISFIFO
+# ifdef S_IFIFO
+# define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
+# else
+# define S_ISFIFO(m) 0
+# endif
+# endif
+
+# ifndef S_ISLNK
+# ifdef S_IFLNK
+# define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
+# else
+# define S_ISLNK(m) 0
+# endif
+# endif
+
+# ifndef S_ISMPB /* V7 */
+# ifdef S_IFMPB
+# define S_ISMPB(m) (((m) & S_IFMT) == S_IFMPB)
+# define S_ISMPC(m) (((m) & S_IFMT) == S_IFMPC)
+# else
+# define S_ISMPB(m) 0
+# define S_ISMPC(m) 0
+# endif
+# endif
+
+# ifndef S_ISNAM /* Xenix */
+# ifdef S_IFNAM
+# define S_ISNAM(m) (((m) & S_IFMT) == S_IFNAM)
+# else
+# define S_ISNAM(m) 0
+# endif
+# endif
+
+# ifndef S_ISNWK /* HP/UX */
+# ifdef S_IFNWK
+# define S_ISNWK(m) (((m) & S_IFMT) == S_IFNWK)
+# else
+# define S_ISNWK(m) 0
+# endif
+# endif
+
+# ifndef S_ISPORT /* Solaris 10 and up */
+# define S_ISPORT(m) 0
+# endif
+
+# ifndef S_ISREG
+# ifdef S_IFREG
+# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+# else
+# define S_ISREG(m) 0
+# endif
+# endif
+
+# ifndef S_ISSOCK
+# ifdef S_IFSOCK
+# define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+# else
+# define S_ISSOCK(m) 0
+# endif
+# endif
+
+
+# ifndef S_TYPEISMQ
+# define S_TYPEISMQ(p) 0
+# endif
+
+# ifndef S_TYPEISTMO
+# define S_TYPEISTMO(p) 0
+# endif
+
+
+# ifndef S_TYPEISSEM
+# ifdef S_INSEM
+# define S_TYPEISSEM(p) (S_ISNAM ((p)->st_mode) && (p)->st_rdev == S_INSEM)
+# else
+# define S_TYPEISSEM(p) 0
+# endif
+# endif
+
+# ifndef S_TYPEISSHM
+# ifdef S_INSHD
+# define S_TYPEISSHM(p) (S_ISNAM ((p)->st_mode) && (p)->st_rdev == S_INSHD)
+# else
+# define S_TYPEISSHM(p) 0
+# endif
+# endif
+
+/* high performance ("contiguous data") */
+# ifndef S_ISCTG
+# define S_ISCTG(p) 0
+# endif
+
+/* Cray DMF (data migration facility): off line, with data */
+# ifndef S_ISOFD
+# define S_ISOFD(p) 0
+# endif
+
+/* Cray DMF (data migration facility): off line, with no data */
+# ifndef S_ISOFL
+# define S_ISOFL(p) 0
+# endif
+
+/* 4.4BSD whiteout */
+# ifndef S_ISWHT
+# define S_ISWHT(m) 0
+# endif
+
+/* If any of the following are undefined,
+ define them to their de facto standard values. */
+# if !S_ISUID
+# define S_ISUID 04000
+# endif
+# if !S_ISGID
+# define S_ISGID 02000
+# endif
+
+/* S_ISVTX is a common extension to POSIX. */
+# ifndef S_ISVTX
+# define S_ISVTX 01000
+# endif
+
+# if !S_IRUSR && S_IREAD
+# define S_IRUSR S_IREAD
+# endif
+# if !S_IRUSR
+# define S_IRUSR 00400
+# endif
+# if !S_IRGRP
+# define S_IRGRP (S_IRUSR >> 3)
+# endif
+# if !S_IROTH
+# define S_IROTH (S_IRUSR >> 6)
+# endif
+
+# if !S_IWUSR && S_IWRITE
+# define S_IWUSR S_IWRITE
+# endif
+# if !S_IWUSR
+# define S_IWUSR 00200
+# endif
+# if !S_IWGRP
+# define S_IWGRP (S_IWUSR >> 3)
+# endif
+# if !S_IWOTH
+# define S_IWOTH (S_IWUSR >> 6)
+# endif
+
+# if !S_IXUSR && S_IEXEC
+# define S_IXUSR S_IEXEC
+# endif
+# if !S_IXUSR
+# define S_IXUSR 00100
+# endif
+# if !S_IXGRP
+# define S_IXGRP (S_IXUSR >> 3)
+# endif
+# if !S_IXOTH
+# define S_IXOTH (S_IXUSR >> 6)
+# endif
+
+# if !S_IRWXU
+# define S_IRWXU (S_IRUSR | S_IWUSR | S_IXUSR)
+# endif
+# if !S_IRWXG
+# define S_IRWXG (S_IRGRP | S_IWGRP | S_IXGRP)
+# endif
+# if !S_IRWXO
+# define S_IRWXO (S_IROTH | S_IWOTH | S_IXOTH)
+# endif
+
+/* S_IXUGO is a common extension to POSIX. */
+# if !S_IXUGO
+# define S_IXUGO (S_IXUSR | S_IXGRP | S_IXOTH)
+# endif
+
+# ifndef S_IRWXUGO
+# define S_IRWXUGO (S_IRWXU | S_IRWXG | S_IRWXO)
+# endif
+
+/* All the mode bits that can be affected by chmod. */
+# define CHMOD_MODE_BITS \
+ (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO)
+
+#endif /* STAT_MACROS_H */
diff --git a/lib/stat_.h b/lib/stat_.h
new file mode 100644
index 0000000..8c2e552
--- /dev/null
+++ b/lib/stat_.h
@@ -0,0 +1,48 @@
+/* Provide a complete sys/stat header file.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+ Written by Eric Blake.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _gl_SYS_STAT_H
+#define _gl_SYS_STAT_H
+
+/* This file is supposed to be used on platforms where <sys/stat.h> is
+ incomplete. It is intended to provide definitions and prototypes
+ needed by an application. Start with what the system provides. */
+#include @ABSOLUTE_SYS_STAT_H@
+
+/* mingw does not support symlinks, therefore it does not have lstat. But
+ without links, stat does just fine. */
+#if ! HAVE_LSTAT
+# define lstat stat
+#endif
+
+/* mingw's _mkdir() function has 1 argument, but we pass 2 arguments.
+ Additionally, it declares _mkdir (and depending on compile flags, an
+ alias mkdir), only in the nonstandard io.h. */
+#if ! HAVE_DECL_MKDIR && HAVE_IO_H
+# include <io.h>
+
+static inline int
+rpl_mkdir (char const *name, mode_t mode)
+{
+ return _mkdir (name);
+}
+
+# define mkdir rpl_mkdir
+#endif
+
+#endif /* _gl_SYS_STAT_H */
diff --git a/lib/stdbool_.h b/lib/stdbool_.h
new file mode 100644
index 0000000..efa80ba
--- /dev/null
+++ b/lib/stdbool_.h
@@ -0,0 +1,115 @@
+/* Copyright (C) 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _STDBOOL_H
+#define _STDBOOL_H
+
+/* ISO C 99 <stdbool.h> for platforms that lack it. */
+
+/* Usage suggestions:
+
+ Programs that use <stdbool.h> should be aware of some limitations
+ and standards compliance issues.
+
+ Standards compliance:
+
+ - <stdbool.h> must be #included before 'bool', 'false', 'true'
+ can be used.
+
+ - You cannot assume that sizeof (bool) == 1.
+
+ - Programs should not undefine the macros bool, true, and false,
+ as C99 lists that as an "obsolescent feature".
+
+ Limitations of this substitute, when used in a C89 environment:
+
+ - <stdbool.h> must be #included before the '_Bool' type can be used.
+
+ - You cannot assume that _Bool is a typedef; it might be a macro.
+
+ - In C99, casts and automatic conversions to '_Bool' or 'bool' are
+ performed in such a way that every nonzero value gets converted
+ to 'true', and zero gets converted to 'false'. This doesn't work
+ with this substitute. With this substitute, only the values 0 and 1
+ give the expected result when converted to _Bool' or 'bool'.
+
+ Also, it is suggested that programs use 'bool' rather than '_Bool';
+ this isn't required, but 'bool' is more common. */
+
+
+/* 7.16. Boolean type and values */
+
+/* BeOS <sys/socket.h> already #defines false 0, true 1. We use the same
+ definitions below, but temporarily we have to #undef them. */
+#ifdef __BEOS__
+# include <OS.h> /* defines bool but not _Bool */
+# undef false
+# undef true
+#endif
+
+/* For the sake of symbolic names in gdb, we define true and false as
+ enum constants, not only as macros.
+ It is tempting to write
+ typedef enum { false = 0, true = 1 } _Bool;
+ so that gdb prints values of type 'bool' symbolically. But if we do
+ this, values of type '_Bool' may promote to 'int' or 'unsigned int'
+ (see ISO C 99 6.7.2.2.(4)); however, '_Bool' must promote to 'int'
+ (see ISO C 99 6.3.1.1.(2)). So we add a negative value to the
+ enum; this ensures that '_Bool' promotes to 'int'. */
+#if defined __cplusplus || defined __BEOS__
+ /* A compiler known to have 'bool'. */
+ /* If the compiler already has both 'bool' and '_Bool', we can assume they
+ are the same types. */
+# if !@HAVE__BOOL@
+typedef bool _Bool;
+# endif
+#else
+# if !defined __GNUC__
+ /* If @HAVE__BOOL@:
+ Some HP-UX cc and AIX IBM C compiler versions have compiler bugs when
+ the built-in _Bool type is used. See
+ http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html
+ http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html
+ http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html
+ Similar bugs are likely with other compilers as well; this file
+ wouldn't be used if <stdbool.h> was working.
+ So we override the _Bool type.
+ If !@HAVE__BOOL@:
+ Need to define _Bool ourselves. As 'signed char' or as an enum type?
+ Use of a typedef, with SunPRO C, leads to a stupid
+ "warning: _Bool is a keyword in ISO C99".
+ Use of an enum type, with IRIX cc, leads to a stupid
+ "warning(1185): enumerated type mixed with another type".
+ The only benefit of the enum type, debuggability, is not important
+ with these compilers. So use 'signed char' and no typedef. */
+# define _Bool signed char
+enum { false = 0, true = 1 };
+# else
+ /* With this compiler, trust the _Bool type if the compiler has it. */
+# if !@HAVE__BOOL@
+typedef enum { _Bool_must_promote_to_int = -1, false = 0, true = 1 } _Bool;
+# endif
+# endif
+#endif
+#define bool _Bool
+
+/* The other macros must be usable in preprocessor directives. */
+#define false 0
+#define true 1
+#define __bool_true_false_are_defined 1
+
+#endif /* _STDBOOL_H */
diff --git a/lib/stdint_.h b/lib/stdint_.h
new file mode 100644
index 0000000..1ada949
--- /dev/null
+++ b/lib/stdint_.h
@@ -0,0 +1,456 @@
+/* Copyright (C) 2001-2002, 2004-2006 Free Software Foundation, Inc.
+ Written by Paul Eggert, Bruno Haible, Sam Steingold, Peter Burwood.
+ This file is part of gnulib.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _GL_STDINT_H
+#define _GL_STDINT_H
+
+/*
+ * ISO C 99 <stdint.h> for platforms that lack it.
+ * <http://www.opengroup.org/susv3xbd/stdint.h.html>
+ */
+
+/* Get those types that are already defined in other system include
+ files, so that we can "#define int8_t signed char" below without
+ worrying about a later system include file containing a "typedef
+ signed char int8_t;" that will get messed up by our macro. Our
+ macros should all be consistent with the system versions, except
+ for the "fast" types and macros, which we recommend against using
+ in public interfaces due to compiler differences. */
+
+#if @HAVE_STDINT_H@
+# if defined __sgi && ! defined __c99
+ /* Bypass IRIX's <stdint.h> if in C89 mode, since it merely annoys users
+ with "This header file is to be used only for c99 mode compilations"
+ diagnostics. */
+# define __STDINT_H__
+# endif
+ /* Other systems may have an incomplete or buggy <stdint.h>.
+ Include it before <inttypes.h>, since any "#include <stdint.h>"
+ in <inttypes.h> would reinclude us, skipping our contents because
+ _GL_STDINT_H is defined. */
+# include @ABSOLUTE_STDINT_H@
+#endif
+
+/* <sys/types.h> defines some of the stdint.h types as well, on glibc,
+ IRIX 6.5, and OpenBSD 3.8 (via <machine/types.h>).
+ MacOS X 10.4.6 <sys/types.h> includes <stdint.h> (which is us), but
+ relies on the system <stdint.h> definitions, so include
+ <sys/types.h> after @ABSOLUTE_STDINT_H@. */
+#if @HAVE_SYS_TYPES_H@
+# include <sys/types.h>
+#endif
+
+/* Get LONG_MIN, LONG_MAX, ULONG_MAX. */
+#include <limits.h>
+
+#if @HAVE_INTTYPES_H@
+ /* In OpenBSD 3.8, <inttypes.h> includes <machine/types.h>, which defines
+ int{8,16,32,64}_t, uint{8,16,32,64}_t and __BIT_TYPES_DEFINED__.
+ <inttypes.h> also defines intptr_t and uintptr_t. */
+# define _GL_JUST_INCLUDE_ABSOLUTE_INTTYPES_H
+# include <inttypes.h>
+# undef _GL_JUST_INCLUDE_ABSOLUTE_INTTYPES_H
+#elif @HAVE_SYS_INTTYPES_H@
+ /* Solaris 7 <sys/inttypes.h> has the types except the *_fast*_t types, and
+ the macros except for *_FAST*_*, INTPTR_MIN, PTRDIFF_MIN, PTRDIFF_MAX. */
+# include <sys/inttypes.h>
+#endif
+
+#if @HAVE_SYS_BITYPES_H@ && ! defined __BIT_TYPES_DEFINED__
+ /* Linux libc4 >= 4.6.7 and libc5 have a <sys/bitypes.h> that defines
+ int{8,16,32,64}_t and __BIT_TYPES_DEFINED__. In libc5 >= 5.2.2 it is
+ included by <sys/types.h>. */
+# include <sys/bitypes.h>
+#endif
+
+#if ! defined __cplusplus || defined __STDC_CONSTANT_MACROS
+
+/* Get WCHAR_MIN, WCHAR_MAX. */
+# if @HAVE_WCHAR_H@ && ! (defined WCHAR_MIN && defined WCHAR_MAX)
+ /* BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
+ <wchar.h>. */
+# include <stdio.h>
+# include <time.h>
+# include <wchar.h>
+# endif
+
+#endif
+
+/* Minimum and maximum values for a integer type under the usual assumption.
+ Return an unspecified value if BITS == 0, adding a check to pacify
+ picky compilers. */
+
+#define _STDINT_MIN(signed, bits, zero) \
+ ((signed) ? (- ((zero) + 1) << ((bits) ? (bits) - 1 : 0)) : (zero))
+
+#define _STDINT_MAX(signed, bits, zero) \
+ ((signed) \
+ ? ~ _STDINT_MIN (signed, bits, zero) \
+ : ((((zero) + 1) << ((bits) ? (bits) - 1 : 0)) - 1) * 2 + 1)
+
+/* 7.18.1.1. Exact-width integer types */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. */
+
+#undef int8_t
+#undef uint8_t
+#define int8_t signed char
+#define uint8_t unsigned char
+
+#undef int16_t
+#undef uint16_t
+#define int16_t short int
+#define uint16_t unsigned short int
+
+#undef int32_t
+#undef uint32_t
+#define int32_t int
+#define uint32_t unsigned int
+
+#undef int64_t
+#undef uint64_t
+#if LONG_MAX >> 31 >> 31 == 1
+# define int64_t long int
+# define uint64_t unsigned long int
+#elif defined _MSC_VER
+# define int64_t __int64
+# define uint64_t unsigned __int64
+#elif @HAVE_LONG_LONG_INT@
+# define int64_t long long int
+# define uint64_t unsigned long long int
+#endif
+
+/* Avoid collision with Solaris 2.5.1 <pthread.h> etc. */
+#define _UINT8_T
+#define _UINT32_T
+#define _UINT64_T
+
+
+/* 7.18.1.2. Minimum-width integer types */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. Therefore the leastN_t types
+ are the same as the corresponding N_t types. */
+
+#undef int_least8_t
+#undef uint_least8_t
+#undef int_least16_t
+#undef uint_least16_t
+#undef int_least32_t
+#undef uint_least32_t
+#undef int_least64_t
+#undef uint_least64_t
+#define int_least8_t int8_t
+#define uint_least8_t uint8_t
+#define int_least16_t int16_t
+#define uint_least16_t uint16_t
+#define int_least32_t int32_t
+#define uint_least32_t uint32_t
+#ifdef int64_t
+# define int_least64_t int64_t
+# define uint_least64_t uint64_t
+#endif
+
+/* 7.18.1.3. Fastest minimum-width integer types */
+
+/* Note: Other <stdint.h> substitutes may define these types differently.
+ It is not recommended to use these types in public header files. */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. Therefore the fastN_t types
+ are taken from the same list of types. Assume that 'long int'
+ is fast enough for all narrower integers. */
+
+#undef int_fast8_t
+#undef uint_fast8_t
+#undef int_fast16_t
+#undef uint_fast16_t
+#undef int_fast32_t
+#undef uint_fast32_t
+#undef int_fast64_t
+#undef uint_fast64_t
+#define int_fast8_t long int
+#define uint_fast8_t unsigned int_fast8_t
+#define int_fast16_t long int
+#define uint_fast16_t unsigned int_fast16_t
+#define int_fast32_t long int
+#define uint_fast32_t unsigned int_fast32_t
+#ifdef int64_t
+# define int_fast64_t int64_t
+# define uint_fast64_t uint64_t
+#endif
+
+/* 7.18.1.4. Integer types capable of holding object pointers */
+
+#undef intptr_t
+#undef uintptr_t
+#define intptr_t long int
+#define uintptr_t unsigned long int
+
+/* 7.18.1.5. Greatest-width integer types */
+
+/* Note: These types are compiler dependent. It may be unwise to use them in
+ public header files. */
+
+#undef intmax_t
+#undef uintmax_t
+#if @HAVE_LONG_LONG_INT@ && LONG_MAX >> 30 == 1
+# define intmax_t long long int
+# define uintmax_t unsigned long long int
+#elif defined int64_t
+# define intmax_t int64_t
+# define uintmax_t uint64_t
+#else
+# define intmax_t long int
+# define uintmax_t unsigned long int
+#endif
+
+/* 7.18.2. Limits of specified-width integer types */
+
+#if ! defined __cplusplus || defined __STDC_LIMIT_MACROS
+
+/* 7.18.2.1. Limits of exact-width integer types */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. */
+
+#undef INT8_MIN
+#undef INT8_MAX
+#undef UINT8_MAX
+#define INT8_MIN (~ INT8_MAX)
+#define INT8_MAX 127
+#define UINT8_MAX 255
+
+#undef INT16_MIN
+#undef INT16_MAX
+#undef UINT16_MAX
+#define INT16_MIN (~ INT16_MAX)
+#define INT16_MAX 32767
+#define UINT16_MAX 65535
+
+#undef INT32_MIN
+#undef INT32_MAX
+#undef UINT32_MAX
+#define INT32_MIN (~ INT32_MAX)
+#define INT32_MAX 2147483647
+#define UINT32_MAX 4294967295U
+
+#undef INT64_MIN
+#undef INT64_MAX
+#undef UINT64_MAX
+#ifdef int64_t
+# define INT64_MIN (~ INT64_MAX)
+# define INT64_MAX INTMAX_C (9223372036854775807)
+# define UINT64_MAX UINTMAX_C (18446744073709551615)
+#endif
+
+/* 7.18.2.2. Limits of minimum-width integer types */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. Therefore the leastN_t types
+ are the same as the corresponding N_t types. */
+
+#undef INT_LEAST8_MIN
+#undef INT_LEAST8_MAX
+#undef UINT_LEAST8_MAX
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST8_MAX INT8_MAX
+#define UINT_LEAST8_MAX UINT8_MAX
+
+#undef INT_LEAST16_MIN
+#undef INT_LEAST16_MAX
+#undef UINT_LEAST16_MAX
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST16_MAX INT16_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+
+#undef INT_LEAST32_MIN
+#undef INT_LEAST32_MAX
+#undef UINT_LEAST32_MAX
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST32_MAX INT32_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+
+#undef INT_LEAST64_MIN
+#undef INT_LEAST64_MAX
+#undef UINT_LEAST64_MAX
+#ifdef int64_t
+# define INT_LEAST64_MIN INT64_MIN
+# define INT_LEAST64_MAX INT64_MAX
+# define UINT_LEAST64_MAX UINT64_MAX
+#endif
+
+/* 7.18.2.3. Limits of fastest minimum-width integer types */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits. Therefore the fastN_t types
+ are taken from the same list of types. */
+
+#undef INT_FAST8_MIN
+#undef INT_FAST8_MAX
+#undef UINT_FAST8_MAX
+#define INT_FAST8_MIN LONG_MIN
+#define INT_FAST8_MAX LONG_MAX
+#define UINT_FAST8_MAX ULONG_MAX
+
+#undef INT_FAST16_MIN
+#undef INT_FAST16_MAX
+#undef UINT_FAST16_MAX
+#define INT_FAST16_MIN LONG_MIN
+#define INT_FAST16_MAX LONG_MAX
+#define UINT_FAST16_MAX ULONG_MAX
+
+#undef INT_FAST32_MIN
+#undef INT_FAST32_MAX
+#undef UINT_FAST32_MAX
+#define INT_FAST32_MIN LONG_MIN
+#define INT_FAST32_MAX LONG_MAX
+#define UINT_FAST32_MAX ULONG_MAX
+
+#undef INT_FAST64_MIN
+#undef INT_FAST64_MAX
+#undef UINT_FAST64_MAX
+#ifdef int64_t
+# define INT_FAST64_MIN INT64_MIN
+# define INT_FAST64_MAX INT64_MAX
+# define UINT_FAST64_MAX UINT64_MAX
+#endif
+
+/* 7.18.2.4. Limits of integer types capable of holding object pointers */
+
+#undef INTPTR_MIN
+#undef INTPTR_MAX
+#undef UINTPTR_MAX
+#define INTPTR_MIN LONG_MIN
+#define INTPTR_MAX LONG_MAX
+#define UINTPTR_MAX ULONG_MAX
+
+/* 7.18.2.5. Limits of greatest-width integer types */
+
+#undef INTMAX_MIN
+#undef INTMAX_MAX
+#undef UINTMAX_MAX
+#define INTMAX_MIN (~ INTMAX_MAX)
+#ifdef INT64_MAX
+# define INTMAX_MAX INT64_MAX
+# define UINTMAX_MAX UINT64_MAX
+#else
+# define INTMAX_MAX INT32_MAX
+# define UINTMAX_MAX UINT32_MAX
+#endif
+
+/* 7.18.3. Limits of other integer types */
+
+/* ptrdiff_t limits */
+#undef PTRDIFF_MIN
+#undef PTRDIFF_MAX
+#define PTRDIFF_MIN \
+ _STDINT_MIN (1, @BITSIZEOF_PTRDIFF_T@, 0@PTRDIFF_T_SUFFIX@)
+#define PTRDIFF_MAX \
+ _STDINT_MAX (1, @BITSIZEOF_PTRDIFF_T@, 0@PTRDIFF_T_SUFFIX@)
+
+/* sig_atomic_t limits */
+#undef SIG_ATOMIC_MIN
+#undef SIG_ATOMIC_MAX
+#define SIG_ATOMIC_MIN \
+ _STDINT_MIN (@HAVE_SIGNED_SIG_ATOMIC_T@, @BITSIZEOF_SIG_ATOMIC_T@, \
+ 0@SIG_ATOMIC_T_SUFFIX@)
+#define SIG_ATOMIC_MAX \
+ _STDINT_MAX (@HAVE_SIGNED_SIG_ATOMIC_T@, @BITSIZEOF_SIG_ATOMIC_T@, \
+ 0@SIG_ATOMIC_T_SUFFIX@)
+
+
+/* size_t limit */
+#undef SIZE_MAX
+#define SIZE_MAX _STDINT_MAX (0, @BITSIZEOF_SIZE_T@, 0@SIZE_T_SUFFIX@)
+
+/* wchar_t limits */
+#undef WCHAR_MIN
+#undef WCHAR_MAX
+#define WCHAR_MIN \
+ _STDINT_MIN (@HAVE_SIGNED_WCHAR_T@, @BITSIZEOF_WCHAR_T@, 0@WCHAR_T_SUFFIX@)
+#define WCHAR_MAX \
+ _STDINT_MAX (@HAVE_SIGNED_WCHAR_T@, @BITSIZEOF_WCHAR_T@, 0@WCHAR_T_SUFFIX@)
+
+/* wint_t limits */
+#undef WINT_MIN
+#undef WINT_MAX
+#define WINT_MIN \
+ _STDINT_MIN (@HAVE_SIGNED_WINT_T@, @BITSIZEOF_WINT_T@, 0@WINT_T_SUFFIX@)
+#define WINT_MAX \
+ _STDINT_MAX (@HAVE_SIGNED_WINT_T@, @BITSIZEOF_WINT_T@, 0@WINT_T_SUFFIX@)
+
+#endif /* !defined __cplusplus || defined __STDC_LIMIT_MACROS */
+
+/* 7.18.4. Macros for integer constants */
+
+#if ! defined __cplusplus || defined __STDC_CONSTANT_MACROS
+
+/* 7.18.4.1. Macros for minimum-width integer constants */
+/* According to ISO C 99 Technical Corrigendum 1 */
+
+/* Here we assume a standard architecture where the hardware integer
+ types have 8, 16, 32, optionally 64 bits, and int is 32 bits. */
+
+#undef INT8_C
+#undef UINT8_C
+#define INT8_C(x) x
+#define UINT8_C(x) x
+
+#undef INT16_C
+#undef UINT16_C
+#define INT16_C(x) x
+#define UINT16_C(x) x
+
+#undef INT32_C
+#undef UINT32_C
+#define INT32_C(x) x
+#define UINT32_C(x) x ## U
+
+#undef INT64_C
+#undef UINT64_C
+#if LONG_MAX >> 31 >> 31 == 1
+# define INT64_C(x) x##L
+# define UINT64_C(x) x##UL
+#elif defined _MSC_VER
+# define INT64_C(x) x##i64
+# define UINT64_C(x) x##ui64
+#elif @HAVE_LONG_LONG_INT@
+# define INT64_C(x) x##LL
+# define UINT64_C(x) x##ULL
+#endif
+
+/* 7.18.4.2. Macros for greatest-width integer constants */
+
+#undef INTMAX_C
+#undef UINTMAX_C
+#if @HAVE_LONG_LONG_INT@ && LONG_MAX >> 30 == 1
+# define INTMAX_C(x) x##LL
+# define UINTMAX_C(x) x##ULL
+#elif defined int64_t
+# define INTMAX_C(x) INT64_C(x)
+# define UINTMAX_C(x) UINT64_C(x)
+#else
+# define INTMAX_C(x) x##L
+# define UINTMAX_C(x) x##UL
+#endif
+
+#endif /* !defined __cplusplus || defined __STDC_CONSTANT_MACROS */
+
+#endif /* _GL_STDINT_H */
diff --git a/lib/strcase.h b/lib/strcase.h
new file mode 100644
index 0000000..e420798
--- /dev/null
+++ b/lib/strcase.h
@@ -0,0 +1,48 @@
+/* Case-insensitive string comparison functions.
+ Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _STRCASE_H
+#define _STRCASE_H
+
+#include <stddef.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
+ greater than zero if S1 is lexicographically less than, equal to or greater
+ than S2.
+ Note: This function may, in multibyte locales, return 0 for strings of
+ different lengths! */
+extern int strcasecmp (const char *s1, const char *s2);
+
+/* Compare no more than N characters of strings S1 and S2, ignoring case,
+ returning less than, equal to or greater than zero if S1 is
+ lexicographically less than, equal to or greater than S2.
+ Note: This function can not work correctly in multibyte locales. */
+extern int strncasecmp (const char *s1, const char *s2, size_t n);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _STRCASE_H */
diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c
new file mode 100644
index 0000000..99d5dd2
--- /dev/null
+++ b/lib/strcasecmp.c
@@ -0,0 +1,103 @@
+/* Case-insensitive string comparison function.
+ Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2005,
+ based on earlier glibc code.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "strcase.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+#if HAVE_MBRTOWC
+# include "mbuiter.h"
+#endif
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
+ greater than zero if S1 is lexicographically less than, equal to or greater
+ than S2.
+ Note: This function may, in multibyte locales, return 0 for strings of
+ different lengths! */
+int
+strcasecmp (const char *s1, const char *s2)
+{
+ if (s1 == s2)
+ return 0;
+
+ /* Be careful not to look at the entire extent of s1 or s2 until needed.
+ This is useful because when two strings differ, the difference is
+ most often already in the very few first characters. */
+#if HAVE_MBRTOWC
+ if (MB_CUR_MAX > 1)
+ {
+ mbui_iterator_t iter1;
+ mbui_iterator_t iter2;
+
+ mbui_init (iter1, s1);
+ mbui_init (iter2, s2);
+
+ while (mbui_avail (iter1) && mbui_avail (iter2))
+ {
+ int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));
+
+ if (cmp != 0)
+ return cmp;
+
+ mbui_advance (iter1);
+ mbui_advance (iter2);
+ }
+ if (mbui_avail (iter1))
+ /* s2 terminated before s1. */
+ return 1;
+ if (mbui_avail (iter2))
+ /* s1 terminated before s2. */
+ return -1;
+ return 0;
+ }
+ else
+#endif
+ {
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ do
+ {
+ c1 = TOLOWER (*p1);
+ c2 = TOLOWER (*p2);
+
+ if (c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ if (UCHAR_MAX <= INT_MAX)
+ return c1 - c2;
+ else
+ /* On machines where 'char' and 'int' are types of the same size, the
+ difference of two 'unsigned char' values - including the sign bit -
+ doesn't fit in an 'int'. */
+ return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ }
+}
diff --git a/lib/strerror.c b/lib/strerror.c
new file mode 100644
index 0000000..24e2e1b
--- /dev/null
+++ b/lib/strerror.c
@@ -0,0 +1,47 @@
+/* strerror.c --- ANSI C compatible system error routine
+
+ Copyright (C) 1986, 1988, 1989, 1991, 2002, 2003, 2006 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+#include <limits.h>
+
+/* Don't include <stdio.h>, since it may or may not declare
+ sys_errlist and its declarations may collide with ours. Just
+ declare the stuff that we need directly. Standard hosted C89
+ implementations define strerror and they don't need this strerror
+ function, so take some liberties with the standard to cater to
+ ancient or limited freestanding implementations. */
+int sprintf (char *, char const *, ...);
+extern int sys_nerr;
+extern char *sys_errlist[];
+
+char *
+strerror (int n)
+{
+ static char const fmt[] = "Unknown error (%d)";
+ static char mesg[sizeof fmt + sizeof n * CHAR_BIT / 3];
+
+ if (n < 0 || n >= sys_nerr)
+ {
+ sprintf (mesg, fmt, n);
+ return mesg;
+ }
+ else
+ return sys_errlist[n];
+}
diff --git a/lib/strncasecmp.c b/lib/strncasecmp.c
new file mode 100644
index 0000000..f59b953
--- /dev/null
+++ b/lib/strncasecmp.c
@@ -0,0 +1,63 @@
+/* strncasecmp.c -- case insensitive string comparator
+ Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "strcase.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+/* Compare no more than N bytes of strings S1 and S2,
+ ignoring case, returning less than, equal to or
+ greater than zero if S1 is lexicographically less
+ than, equal to or greater than S2. */
+
+int
+strncasecmp (const char *s1, const char *s2, size_t n)
+{
+ register const unsigned char *p1 = (const unsigned char *) s1;
+ register const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+
+ if (p1 == p2 || n == 0)
+ return 0;
+
+ do
+ {
+ c1 = TOLOWER (*p1);
+ c2 = TOLOWER (*p2);
+
+ if (--n == 0 || c1 == '\0')
+ break;
+
+ ++p1;
+ ++p2;
+ }
+ while (c1 == c2);
+
+ if (UCHAR_MAX <= INT_MAX)
+ return c1 - c2;
+ else
+ /* On machines where 'char' and 'int' are types of the same size, the
+ difference of two 'unsigned char' values - including the sign bit -
+ doesn't fit in an 'int'. */
+ return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+}
diff --git a/lib/strnlen1.c b/lib/strnlen1.c
new file mode 100644
index 0000000..f74e427
--- /dev/null
+++ b/lib/strnlen1.c
@@ -0,0 +1,36 @@
+/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
+ Copyright (C) 2005-2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+/* Specification. */
+#include "strnlen1.h"
+
+#include <string.h>
+
+/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
+ If no '\0' terminator is found in that many characters, return MAXLEN. */
+/* This is the same as strnlen (string, maxlen - 1) + 1. */
+size_t
+strnlen1 (const char *string, size_t maxlen)
+{
+ const char *end = memchr (string, '\0', maxlen);
+ if (end != NULL)
+ return end - string + 1;
+ else
+ return maxlen;
+}
diff --git a/lib/strnlen1.h b/lib/strnlen1.h
new file mode 100644
index 0000000..7ce7d0c
--- /dev/null
+++ b/lib/strnlen1.h
@@ -0,0 +1,40 @@
+/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _STRNLEN1_H
+#define _STRNLEN1_H
+
+#include <stddef.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
+ If no '\0' terminator is found in that many characters, return MAXLEN. */
+/* This is the same as strnlen (string, maxlen - 1) + 1. */
+extern size_t strnlen1 (const char *string, size_t maxlen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _STRNLEN1_H */
diff --git a/lib/strverscmp.c b/lib/strverscmp.c
new file mode 100644
index 0000000..f077651
--- /dev/null
+++ b/lib/strverscmp.c
@@ -0,0 +1,131 @@
+/* Compare strings while treating digits characters numerically.
+ Copyright (C) 1997, 2000, 2002, 2004, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#if !_LIBC
+# include <config.h>
+#endif
+
+#include <string.h>
+#include <ctype.h>
+
+/* states: S_N: normal, S_I: comparing integral part, S_F: comparing
+ fractional parts, S_Z: idem but with leading Zeroes only */
+#define S_N 0x0
+#define S_I 0x4
+#define S_F 0x8
+#define S_Z 0xC
+
+/* result_type: CMP: return diff; LEN: compare using len_diff/diff */
+#define CMP 2
+#define LEN 3
+
+
+/* ISDIGIT differs from isdigit, as follows:
+ - Its arg may be any int or unsigned int; it need not be an unsigned char
+ or EOF.
+ - It's typically faster.
+ POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to
+ isdigit unless it's important to use the locale's definition
+ of `digit' even when the host does not conform to POSIX. */
+#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9)
+
+#undef __strverscmp
+#undef strverscmp
+
+#ifndef weak_alias
+# define __strverscmp strverscmp
+#endif
+
+/* Compare S1 and S2 as strings holding indices/version numbers,
+ returning less than, equal to or greater than zero if S1 is less than,
+ equal to or greater than S2 (for more info, see the texinfo doc).
+*/
+
+int
+__strverscmp (const char *s1, const char *s2)
+{
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+ int state;
+ int diff;
+
+ /* Symbol(s) 0 [1-9] others (padding)
+ Transition (10) 0 (01) d (00) x (11) - */
+ static const unsigned int next_state[] =
+ {
+ /* state x d 0 - */
+ /* S_N */ S_N, S_I, S_Z, S_N,
+ /* S_I */ S_N, S_I, S_I, S_I,
+ /* S_F */ S_N, S_F, S_F, S_F,
+ /* S_Z */ S_N, S_F, S_Z, S_Z
+ };
+
+ static const int result_type[] =
+ {
+ /* state x/x x/d x/0 x/- d/x d/d d/0 d/-
+ 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */
+
+ /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
+ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+ /* S_I */ CMP, -1, -1, CMP, 1, LEN, LEN, CMP,
+ 1, LEN, LEN, CMP, CMP, CMP, CMP, CMP,
+ /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
+ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+ /* S_Z */ CMP, 1, 1, CMP, -1, CMP, CMP, CMP,
+ -1, CMP, CMP, CMP
+ };
+
+ if (p1 == p2)
+ return 0;
+
+ c1 = *p1++;
+ c2 = *p2++;
+ /* Hint: '0' is a digit too. */
+ state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0));
+
+ while ((diff = c1 - c2) == 0 && c1 != '\0')
+ {
+ state = next_state[state];
+ c1 = *p1++;
+ c2 = *p2++;
+ state |= (c1 == '0') + (ISDIGIT (c1) != 0);
+ }
+
+ state = result_type[state << 2 | ((c2 == '0') + (ISDIGIT (c2) != 0))];
+
+ switch (state)
+ {
+ case CMP:
+ return diff;
+
+ case LEN:
+ while (ISDIGIT (*p1++))
+ if (!ISDIGIT (*p2++))
+ return 1;
+
+ return ISDIGIT (*p2) ? -1 : diff;
+
+ default:
+ return state;
+ }
+}
+#ifdef weak_alias
+weak_alias (__strverscmp, strverscmp)
+#endif
diff --git a/lib/strverscmp.h b/lib/strverscmp.h
new file mode 100644
index 0000000..7edeac5
--- /dev/null
+++ b/lib/strverscmp.h
@@ -0,0 +1,24 @@
+/* Compare strings while treating digits characters numerically.
+
+ Copyright (C) 1997, 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef STRVERSCMP_H_
+# define STRVERSCMP_H_
+
+int strverscmp (const char *, const char *);
+
+#endif /* not STRVERSCMP_H_ */
diff --git a/lib/tempname.c b/lib/tempname.c
new file mode 100644
index 0000000..6fef5e9
--- /dev/null
+++ b/lib/tempname.c
@@ -0,0 +1,317 @@
+/* tempname.c - generate the name of a temporary file.
+
+ Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#if !_LIBC
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <assert.h>
+
+#include <errno.h>
+#ifndef __set_errno
+# define __set_errno(Val) errno = (Val)
+#endif
+
+#include <stdio.h>
+#ifndef P_tmpdir
+# define P_tmpdir "/tmp"
+#endif
+#ifndef TMP_MAX
+# define TMP_MAX 238328
+#endif
+#ifndef __GT_FILE
+# define __GT_FILE 0
+# define __GT_BIGFILE 1
+# define __GT_DIR 2
+# define __GT_NOCREATE 3
+#endif
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+
+#if HAVE_SYS_TIME_H || _LIBC
+# include <sys/time.h>
+#endif
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+
+#if _LIBC
+# define struct_stat64 struct stat64
+# define small_open __open
+# define large_open __open64
+#else
+# include "stat-macros.h"
+# define struct_stat64 struct stat
+# define small_open open
+# define large_open open
+# define __getpid getpid
+# define __gettimeofday gettimeofday
+# define __mkdir mkdir
+# define __lxstat64(version, file, buf) lstat (file, buf)
+# define __xstat64(version, file, buf) stat (file, buf)
+#endif
+
+#if ! (HAVE___SECURE_GETENV || _LIBC)
+# define __secure_getenv getenv
+#endif
+
+#ifdef _LIBC
+# include <hp-timing.h>
+# if HP_TIMING_AVAIL
+# define RANDOM_BITS(Var) \
+ if (__builtin_expect (value == UINT64_C (0), 0)) \
+ { \
+ /* If this is the first time this function is used initialize \
+ the variable we accumulate the value in to some somewhat \
+ random value. If we'd not do this programs at startup time \
+ might have a reduced set of possible names, at least on slow \
+ machines. */ \
+ struct timeval tv; \
+ __gettimeofday (&tv, NULL); \
+ value = ((uint64_t) tv.tv_usec << 16) ^ tv.tv_sec; \
+ } \
+ HP_TIMING_NOW (Var)
+# endif
+#endif
+
+/* Use the widest available unsigned type if uint64_t is not
+ available. The algorithm below extracts a number less than 62**6
+ (approximately 2**35.725) from uint64_t, so ancient hosts where
+ uintmax_t is only 32 bits lose about 3.725 bits of randomness,
+ which is better than not having mkstemp at all. */
+#if !defined UINT64_MAX && !defined uint64_t
+# define uint64_t uintmax_t
+#endif
+
+/* Return nonzero if DIR is an existent directory. */
+static int
+direxists (const char *dir)
+{
+ struct_stat64 buf;
+ return __xstat64 (_STAT_VER, dir, &buf) == 0 && S_ISDIR (buf.st_mode);
+}
+
+/* Path search algorithm, for tmpnam, tmpfile, etc. If DIR is
+ non-null and exists, uses it; otherwise uses the first of $TMPDIR,
+ P_tmpdir, /tmp that exists. Copies into TMPL a template suitable
+ for use with mk[s]temp. Will fail (-1) if DIR is non-null and
+ doesn't exist, none of the searched dirs exists, or there's not
+ enough space in TMPL. */
+int
+__path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
+ int try_tmpdir)
+{
+ const char *d;
+ size_t dlen, plen;
+
+ if (!pfx || !pfx[0])
+ {
+ pfx = "file";
+ plen = 4;
+ }
+ else
+ {
+ plen = strlen (pfx);
+ if (plen > 5)
+ plen = 5;
+ }
+
+ if (try_tmpdir)
+ {
+ d = __secure_getenv ("TMPDIR");
+ if (d != NULL && direxists (d))
+ dir = d;
+ else if (dir != NULL && direxists (dir))
+ /* nothing */ ;
+ else
+ dir = NULL;
+ }
+ if (dir == NULL)
+ {
+ if (direxists (P_tmpdir))
+ dir = P_tmpdir;
+ else if (strcmp (P_tmpdir, "/tmp") != 0 && direxists ("/tmp"))
+ dir = "/tmp";
+ else
+ {
+ __set_errno (ENOENT);
+ return -1;
+ }
+ }
+
+ dlen = strlen (dir);
+ while (dlen > 1 && dir[dlen - 1] == '/')
+ dlen--; /* remove trailing slashes */
+
+ /* check we have room for "${dir}/${pfx}XXXXXX\0" */
+ if (tmpl_len < dlen + 1 + plen + 6 + 1)
+ {
+ __set_errno (EINVAL);
+ return -1;
+ }
+
+ sprintf (tmpl, "%.*s/%.*sXXXXXX", (int) dlen, dir, (int) plen, pfx);
+ return 0;
+}
+
+/* These are the characters used in temporary file names. */
+static const char letters[] =
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+/* Generate a temporary file name based on TMPL. TMPL must match the
+ rules for mk[s]temp (i.e. end in "XXXXXX"). The name constructed
+ does not exist at the time of the call to __gen_tempname. TMPL is
+ overwritten with the result.
+
+ KIND may be one of:
+ __GT_NOCREATE: simply verify that the name does not exist
+ at the time of the call.
+ __GT_FILE: create the file using open(O_CREAT|O_EXCL)
+ and return a read-write fd. The file is mode 0600.
+ __GT_BIGFILE: same as __GT_FILE but use open64().
+ __GT_DIR: create a directory, which will be mode 0700.
+
+ We use a clever algorithm to get hard-to-predict names. */
+int
+__gen_tempname (char *tmpl, int kind)
+{
+ int len;
+ char *XXXXXX;
+ static uint64_t value;
+ uint64_t random_time_bits;
+ unsigned int count;
+ int fd = -1;
+ int save_errno = errno;
+ struct_stat64 st;
+
+ /* A lower bound on the number of temporary files to attempt to
+ generate. The maximum total number of temporary file names that
+ can exist for a given template is 62**6. It should never be
+ necessary to try all these combinations. Instead if a reasonable
+ number of names is tried (we define reasonable as 62**3) fail to
+ give the system administrator the chance to remove the problems. */
+#define ATTEMPTS_MIN (62 * 62 * 62)
+
+ /* The number of times to attempt to generate a temporary file. To
+ conform to POSIX, this must be no smaller than TMP_MAX. */
+#if ATTEMPTS_MIN < TMP_MAX
+ unsigned int attempts = TMP_MAX;
+#else
+ unsigned int attempts = ATTEMPTS_MIN;
+#endif
+
+ len = strlen (tmpl);
+ if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
+ {
+ __set_errno (EINVAL);
+ return -1;
+ }
+
+ /* This is where the Xs start. */
+ XXXXXX = &tmpl[len - 6];
+
+ /* Get some more or less random data. */
+#ifdef RANDOM_BITS
+ RANDOM_BITS (random_time_bits);
+#else
+# if HAVE_GETTIMEOFDAY || _LIBC
+ {
+ struct timeval tv;
+ __gettimeofday (&tv, NULL);
+ random_time_bits = ((uint64_t) tv.tv_usec << 16) ^ tv.tv_sec;
+ }
+# else
+ random_time_bits = time (NULL);
+# endif
+#endif
+ value += random_time_bits ^ __getpid ();
+
+ for (count = 0; count < attempts; value += 7777, ++count)
+ {
+ uint64_t v = value;
+
+ /* Fill in the random bits. */
+ XXXXXX[0] = letters[v % 62];
+ v /= 62;
+ XXXXXX[1] = letters[v % 62];
+ v /= 62;
+ XXXXXX[2] = letters[v % 62];
+ v /= 62;
+ XXXXXX[3] = letters[v % 62];
+ v /= 62;
+ XXXXXX[4] = letters[v % 62];
+ v /= 62;
+ XXXXXX[5] = letters[v % 62];
+
+ switch (kind)
+ {
+ case __GT_FILE:
+ fd = small_open (tmpl, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ break;
+
+ case __GT_BIGFILE:
+ fd = large_open (tmpl, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ break;
+
+ case __GT_DIR:
+ fd = __mkdir (tmpl, S_IRUSR | S_IWUSR | S_IXUSR);
+ break;
+
+ case __GT_NOCREATE:
+ /* This case is backward from the other three. __gen_tempname
+ succeeds if __xstat fails because the name does not exist.
+ Note the continue to bypass the common logic at the bottom
+ of the loop. */
+ if (__lxstat64 (_STAT_VER, tmpl, &st) < 0)
+ {
+ if (errno == ENOENT)
+ {
+ __set_errno (save_errno);
+ return 0;
+ }
+ else
+ /* Give up now. */
+ return -1;
+ }
+ continue;
+
+ default:
+ assert (! "invalid KIND in __gen_tempname");
+ }
+
+ if (fd >= 0)
+ {
+ __set_errno (save_errno);
+ return fd;
+ }
+ else if (errno != EEXIST)
+ return -1;
+ }
+
+ /* We got out of the loop because we ran out of combinations to try. */
+ __set_errno (EEXIST);
+ return -1;
+}
diff --git a/lib/unlocked-io.h b/lib/unlocked-io.h
new file mode 100644
index 0000000..d009303
--- /dev/null
+++ b/lib/unlocked-io.h
@@ -0,0 +1,137 @@
+/* Prefer faster, non-thread-safe stdio functions if available.
+
+ Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Jim Meyering. */
+
+#ifndef UNLOCKED_IO_H
+# define UNLOCKED_IO_H 1
+
+/* These are wrappers for functions/macros from the GNU C library, and
+ from other C libraries supporting POSIX's optional thread-safe functions.
+
+ The standard I/O functions are thread-safe. These *_unlocked ones are
+ more efficient but not thread-safe. That they're not thread-safe is
+ fine since all of the applications in this package are single threaded.
+
+ Also, some code that is shared with the GNU C library may invoke
+ the *_unlocked functions directly. On hosts that lack those
+ functions, invoke the non-thread-safe versions instead. */
+
+# include <stdio.h>
+
+# if HAVE_DECL_CLEARERR_UNLOCKED
+# undef clearerr
+# define clearerr(x) clearerr_unlocked (x)
+# else
+# define clearerr_unlocked(x) clearerr (x)
+# endif
+
+# if HAVE_DECL_FEOF_UNLOCKED
+# undef feof
+# define feof(x) feof_unlocked (x)
+# else
+# define feof_unlocked(x) feof (x)
+# endif
+
+# if HAVE_DECL_FERROR_UNLOCKED
+# undef ferror
+# define ferror(x) ferror_unlocked (x)
+# else
+# define ferror_unlocked(x) ferror (x)
+# endif
+
+# if HAVE_DECL_FFLUSH_UNLOCKED
+# undef fflush
+# define fflush(x) fflush_unlocked (x)
+# else
+# define fflush_unlocked(x) fflush (x)
+# endif
+
+# if HAVE_DECL_FGETS_UNLOCKED
+# undef fgets
+# define fgets(x,y,z) fgets_unlocked (x,y,z)
+# else
+# define fgets_unlocked(x,y,z) fgets (x,y,z)
+# endif
+
+# if HAVE_DECL_FPUTC_UNLOCKED
+# undef fputc
+# define fputc(x,y) fputc_unlocked (x,y)
+# else
+# define fputc_unlocked(x,y) fputc (x,y)
+# endif
+
+# if HAVE_DECL_FPUTS_UNLOCKED
+# undef fputs
+# define fputs(x,y) fputs_unlocked (x,y)
+# else
+# define fputs_unlocked(x,y) fputs (x,y)
+# endif
+
+# if HAVE_DECL_FREAD_UNLOCKED
+# undef fread
+# define fread(w,x,y,z) fread_unlocked (w,x,y,z)
+# else
+# define fread_unlocked(w,x,y,z) fread (w,x,y,z)
+# endif
+
+# if HAVE_DECL_FWRITE_UNLOCKED
+# undef fwrite
+# define fwrite(w,x,y,z) fwrite_unlocked (w,x,y,z)
+# else
+# define fwrite_unlocked(w,x,y,z) fwrite (w,x,y,z)
+# endif
+
+# if HAVE_DECL_GETC_UNLOCKED
+# undef getc
+# define getc(x) getc_unlocked (x)
+# else
+# define getc_unlocked(x) getc (x)
+# endif
+
+# if HAVE_DECL_GETCHAR_UNLOCKED
+# undef getchar
+# define getchar() getchar_unlocked ()
+# else
+# define getchar_unlocked() getchar ()
+# endif
+
+# if HAVE_DECL_PUTC_UNLOCKED
+# undef putc
+# define putc(x,y) putc_unlocked (x,y)
+# else
+# define putc_unlocked(x,y) putc (x,y)
+# endif
+
+# if HAVE_DECL_PUTCHAR_UNLOCKED
+# undef putchar
+# define putchar(x) putchar_unlocked (x)
+# else
+# define putchar_unlocked(x) putchar (x)
+# endif
+
+# undef flockfile
+# define flockfile(x) ((void) 0)
+
+# undef ftrylockfile
+# define ftrylockfile(x) 0
+
+# undef funlockfile
+# define funlockfile(x) ((void) 0)
+
+#endif /* UNLOCKED_IO_H */
diff --git a/lib/verify.h b/lib/verify.h
new file mode 100644
index 0000000..d603b17
--- /dev/null
+++ b/lib/verify.h
@@ -0,0 +1,141 @@
+/* Compile-time assert-like macros.
+
+ Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert, Bruno Haible, and Jim Meyering. */
+
+#ifndef VERIFY_H
+# define VERIFY_H 1
+
+/* Each of these macros verifies that its argument R is nonzero. To
+ be portable, R should be an integer constant expression. Unlike
+ assert (R), there is no run-time overhead.
+
+ There are two macros, since no single macro can be used in all
+ contexts in C. verify_true (R) is for scalar contexts, including
+ integer constant expression contexts. verify (R) is for declaration
+ contexts, e.g., the top level.
+
+ Symbols ending in "__" are private to this header.
+
+ The code below uses several ideas.
+
+ * The first step is ((R) ? 1 : -1). Given an expression R, of
+ integral or boolean or floating-point type, this yields an
+ expression of integral type, whose value is later verified to be
+ constant and nonnegative.
+
+ * Next this expression W is wrapped in a type
+ struct verify_type__ { unsigned int verify_error_if_negative_size__: W; }.
+ If W is negative, this yields a compile-time error. No compiler can
+ deal with a bit-field of negative size.
+
+ One might think that an array size check would have the same
+ effect, that is, that the type struct { unsigned int dummy[W]; }
+ would work as well. However, inside a function, some compilers
+ (such as C++ compilers and GNU C) allow local parameters and
+ variables inside array size expressions. With these compilers,
+ an array size check would not properly diagnose this misuse of
+ the verify macro:
+
+ void function (int n) { verify (n < 0); }
+
+ * For the verify macro, the struct verify_type__ will need to
+ somehow be embedded into a declaration. To be portable, this
+ declaration must declare an object, a constant, a function, or a
+ typedef name. If the declared entity uses the type directly,
+ such as in
+
+ struct dummy {...};
+ typedef struct {...} dummy;
+ extern struct {...} *dummy;
+ extern void dummy (struct {...} *);
+ extern struct {...} *dummy (void);
+
+ two uses of the verify macro would yield colliding declarations
+ if the entity names are not disambiguated. A workaround is to
+ attach the current line number to the entity name:
+
+ #define GL_CONCAT0(x, y) x##y
+ #define GL_CONCAT(x, y) GL_CONCAT0 (x, y)
+ extern struct {...} * GL_CONCAT(dummy,__LINE__);
+
+ But this has the problem that two invocations of verify from
+ within the same macro would collide, since the __LINE__ value
+ would be the same for both invocations.
+
+ A solution is to use the sizeof operator. It yields a number,
+ getting rid of the identity of the type. Declarations like
+
+ extern int dummy [sizeof (struct {...})];
+ extern void dummy (int [sizeof (struct {...})]);
+ extern int (*dummy (void)) [sizeof (struct {...})];
+
+ can be repeated.
+
+ * Should the implementation use a named struct or an unnamed struct?
+ Which of the following alternatives can be used?
+
+ extern int dummy [sizeof (struct {...})];
+ extern int dummy [sizeof (struct verify_type__ {...})];
+ extern void dummy (int [sizeof (struct {...})]);
+ extern void dummy (int [sizeof (struct verify_type__ {...})]);
+ extern int (*dummy (void)) [sizeof (struct {...})];
+ extern int (*dummy (void)) [sizeof (struct verify_type__ {...})];
+
+ In the second and sixth case, the struct type is exported to the
+ outer scope; two such declarations therefore collide. GCC warns
+ about the first, third, and fourth cases. So the only remaining
+ possibility is the fifth case:
+
+ extern int (*dummy (void)) [sizeof (struct {...})];
+
+ * This implementation exploits the fact that GCC does not warn about
+ the last declaration mentioned above. If a future version of GCC
+ introduces a warning for this, the problem could be worked around
+ by using code specialized to GCC, e.g.,:
+
+ #if 4 <= __GNUC__
+ # define verify(R) \
+ extern int (* verify_function__ (void)) \
+ [__builtin_constant_p (R) && (R) ? 1 : -1]
+ #endif
+
+ * In C++, any struct definition inside sizeof is invalid.
+ Use a template type to work around the problem. */
+
+
+/* Verify requirement R at compile-time, as an integer constant expression.
+ Return 1. */
+
+# ifdef __cplusplus
+template <int w>
+ struct verify_type__ { unsigned int verify_error_if_negative_size__: w; };
+# define verify_true(R) \
+ (!!sizeof (verify_type__<(R) ? 1 : -1>))
+# else
+# define verify_true(R) \
+ (!!sizeof \
+ (struct { unsigned int verify_error_if_negative_size__: (R) ? 1 : -1; }))
+# endif
+
+/* Verify requirement R at compile-time, as a declaration without a
+ trailing ';'. */
+
+# define verify(R) extern int (* verify_function__ (void)) [verify_true (R)]
+
+#endif
diff --git a/lib/wcwidth.h b/lib/wcwidth.h
new file mode 100644
index 0000000..37b83cb
--- /dev/null
+++ b/lib/wcwidth.h
@@ -0,0 +1,77 @@
+/* Determine the number of screen columns needed for a character.
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _gl_WCWIDTH_H
+#define _gl_WCWIDTH_H
+
+#if HAVE_WCHAR_T
+
+/* Get wcwidth if available, along with wchar_t. */
+# if HAVE_WCHAR_H
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
+ <wchar.h>. */
+# include <stdio.h>
+# include <time.h>
+# include <wchar.h>
+# endif
+
+/* Get iswprint. */
+# if HAVE_WCTYPE_H
+# include <wctype.h>
+# endif
+# if !defined iswprint && !HAVE_ISWPRINT
+static inline int
+iswprint (wint_t wc)
+{
+ return (wc >= 0 && wc < 128
+ ? wc >= ' ' && wc <= '~'
+ : 1);
+}
+# define iswprint iswprint
+# endif
+
+# ifndef HAVE_DECL_WCWIDTH
+"this configure-time declaration test was not run"
+# endif
+# ifndef wcwidth
+# if !HAVE_WCWIDTH
+
+/* wcwidth doesn't exist, so assume all printable characters have
+ width 1. */
+static inline int
+wcwidth (wchar_t wc)
+{
+ return wc == 0 ? 0 : iswprint (wc) ? 1 : -1;
+}
+
+# elif !HAVE_DECL_WCWIDTH
+
+/* wcwidth exists but is not declared. */
+extern
+# ifdef __cplusplus
+"C"
+# endif
+int wcwidth (int /* actually wchar_t */);
+
+# endif
+# endif
+
+#endif /* HAVE_WCHAR_H */
+
+#endif /* _gl_WCWIDTH_H */
diff --git a/lib/xalloc-die.c b/lib/xalloc-die.c
new file mode 100644
index 0000000..090f060
--- /dev/null
+++ b/lib/xalloc-die.c
@@ -0,0 +1,42 @@
+/* Report a memory allocation failure and exit.
+
+ Copyright (C) 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2006 Free
+ Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+#include "xalloc.h"
+
+#include <stdlib.h>
+
+#include "error.h"
+#include "exitfail.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+void
+xalloc_die (void)
+{
+ error (exit_failure, 0, "%s", _("memory exhausted"));
+
+ /* The `noreturn' cannot be given to error, since it may return if
+ its first argument is 0. To help compilers understand the
+ xalloc_die does not return, call abort. Also, the abort is a
+ safety feature if exit_failure is 0 (which shouldn't happen). */
+ abort ();
+}
diff --git a/lib/xalloc.h b/lib/xalloc.h
new file mode 100644
index 0000000..f80977e
--- /dev/null
+++ b/lib/xalloc.h
@@ -0,0 +1,79 @@
+/* xalloc.h -- malloc with out-of-memory checking
+
+ Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+ 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef XALLOC_H_
+# define XALLOC_H_
+
+# include <stddef.h>
+
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+
+
+# ifndef __attribute__
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__
+# define __attribute__(x)
+# endif
+# endif
+
+# ifndef ATTRIBUTE_NORETURN
+# define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__))
+# endif
+
+/* This function is always triggered when memory is exhausted.
+ It must be defined by the application, either explicitly
+ or by using gnulib's xalloc-die module. This is the
+ function to call when one wants the program to die because of a
+ memory allocation failure. */
+extern void xalloc_die (void) ATTRIBUTE_NORETURN;
+
+void *xmalloc (size_t s);
+void *xnmalloc (size_t n, size_t s);
+void *xzalloc (size_t s);
+void *xcalloc (size_t n, size_t s);
+void *xrealloc (void *p, size_t s);
+void *xnrealloc (void *p, size_t n, size_t s);
+void *x2realloc (void *p, size_t *pn);
+void *x2nrealloc (void *p, size_t *pn, size_t s);
+void *xmemdup (void const *p, size_t s);
+char *xstrdup (char const *str);
+
+/* Return 1 if an array of N objects, each of size S, cannot exist due
+ to size arithmetic overflow. S must be positive and N must be
+ nonnegative. This is a macro, not an inline function, so that it
+ works correctly even when SIZE_MAX < N.
+
+ By gnulib convention, SIZE_MAX represents overflow in size
+ calculations, so the conservative dividend to use here is
+ SIZE_MAX - 1, since SIZE_MAX might represent an overflowed value.
+ However, malloc (SIZE_MAX) fails on all known hosts where
+ sizeof (ptrdiff_t) <= sizeof (size_t), so do not bother to test for
+ exactly-SIZE_MAX allocations on such hosts; this avoids a test and
+ branch when S is known to be 1. */
+# define xalloc_oversized(n, s) \
+ ((size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) < (n))
+
+# ifdef __cplusplus
+}
+# endif
+
+
+#endif /* !XALLOC_H_ */
diff --git a/lib/xmalloc.c b/lib/xmalloc.c
new file mode 100644
index 0000000..fe84a0b
--- /dev/null
+++ b/lib/xmalloc.c
@@ -0,0 +1,240 @@
+/* xmalloc.c -- malloc with out of memory checking
+
+ Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+ 1999, 2000, 2002, 2003, 2004, 2005, 2006 Free Software Foundation,
+ Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include <config.h>
+
+#include "xalloc.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+/* 1 if calloc is known to be compatible with GNU calloc. This
+ matters if we are not also using the calloc module, which defines
+ HAVE_CALLOC and supports the GNU API even on non-GNU platforms. */
+#if defined HAVE_CALLOC || defined __GLIBC__
+enum { HAVE_GNU_CALLOC = 1 };
+#else
+enum { HAVE_GNU_CALLOC = 0 };
+#endif
+
+/* Allocate an array of N objects, each with S bytes of memory,
+ dynamically, with error checking. S must be nonzero. */
+
+static inline void *
+xnmalloc_inline (size_t n, size_t s)
+{
+ void *p;
+ if (xalloc_oversized (n, s) || (! (p = malloc (n * s)) && n != 0))
+ xalloc_die ();
+ return p;
+}
+
+void *
+xnmalloc (size_t n, size_t s)
+{
+ return xnmalloc_inline (n, s);
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+void *
+xmalloc (size_t n)
+{
+ return xnmalloc_inline (n, 1);
+}
+
+/* Change the size of an allocated block of memory P to an array of N
+ objects each of S bytes, with error checking. S must be nonzero. */
+
+static inline void *
+xnrealloc_inline (void *p, size_t n, size_t s)
+{
+ if (xalloc_oversized (n, s) || (! (p = realloc (p, n * s)) && n != 0))
+ xalloc_die ();
+ return p;
+}
+
+void *
+xnrealloc (void *p, size_t n, size_t s)
+{
+ return xnrealloc_inline (p, n, s);
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking. */
+
+void *
+xrealloc (void *p, size_t n)
+{
+ return xnrealloc_inline (p, n, 1);
+}
+
+
+/* If P is null, allocate a block of at least *PN such objects;
+ otherwise, reallocate P so that it contains more than *PN objects
+ each of S bytes. *PN must be nonzero unless P is null, and S must
+ be nonzero. Set *PN to the new number of objects, and return the
+ pointer to the new block. *PN is never set to zero, and the
+ returned pointer is never null.
+
+ Repeated reallocations are guaranteed to make progress, either by
+ allocating an initial block with a nonzero size, or by allocating a
+ larger block.
+
+ In the following implementation, nonzero sizes are doubled so that
+ repeated reallocations have O(N log N) overall cost rather than
+ O(N**2) cost, but the specification for this function does not
+ guarantee that sizes are doubled.
+
+ Here is an example of use:
+
+ int *p = NULL;
+ size_t used = 0;
+ size_t allocated = 0;
+
+ void
+ append_int (int value)
+ {
+ if (used == allocated)
+ p = x2nrealloc (p, &allocated, sizeof *p);
+ p[used++] = value;
+ }
+
+ This causes x2nrealloc to allocate a block of some nonzero size the
+ first time it is called.
+
+ To have finer-grained control over the initial size, set *PN to a
+ nonzero value before calling this function with P == NULL. For
+ example:
+
+ int *p = NULL;
+ size_t used = 0;
+ size_t allocated = 0;
+ size_t allocated1 = 1000;
+
+ void
+ append_int (int value)
+ {
+ if (used == allocated)
+ {
+ p = x2nrealloc (p, &allocated1, sizeof *p);
+ allocated = allocated1;
+ }
+ p[used++] = value;
+ }
+
+ */
+
+static inline void *
+x2nrealloc_inline (void *p, size_t *pn, size_t s)
+{
+ size_t n = *pn;
+
+ if (! p)
+ {
+ if (! n)
+ {
+ /* The approximate size to use for initial small allocation
+ requests, when the invoking code specifies an old size of
+ zero. 64 bytes is the largest "small" request for the
+ GNU C library malloc. */
+ enum { DEFAULT_MXFAST = 64 };
+
+ n = DEFAULT_MXFAST / s;
+ n += !n;
+ }
+ }
+ else
+ {
+ if (SIZE_MAX / 2 / s < n)
+ xalloc_die ();
+ n *= 2;
+ }
+
+ *pn = n;
+ return xrealloc (p, n * s);
+}
+
+void *
+x2nrealloc (void *p, size_t *pn, size_t s)
+{
+ return x2nrealloc_inline (p, pn, s);
+}
+
+/* If P is null, allocate a block of at least *PN bytes; otherwise,
+ reallocate P so that it contains more than *PN bytes. *PN must be
+ nonzero unless P is null. Set *PN to the new block's size, and
+ return the pointer to the new block. *PN is never set to zero, and
+ the returned pointer is never null. */
+
+void *
+x2realloc (void *p, size_t *pn)
+{
+ return x2nrealloc_inline (p, pn, 1);
+}
+
+/* Allocate S bytes of zeroed memory dynamically, with error checking.
+ There's no need for xnzalloc (N, S), since it would be equivalent
+ to xcalloc (N, S). */
+
+void *
+xzalloc (size_t s)
+{
+ return memset (xmalloc (s), 0, s);
+}
+
+/* Allocate zeroed memory for N elements of S bytes, with error
+ checking. S must be nonzero. */
+
+void *
+xcalloc (size_t n, size_t s)
+{
+ void *p;
+ /* Test for overflow, since some calloc implementations don't have
+ proper overflow checks. But omit overflow and size-zero tests if
+ HAVE_GNU_CALLOC, since GNU calloc catches overflow and never
+ returns NULL if successful. */
+ if ((! HAVE_GNU_CALLOC && xalloc_oversized (n, s))
+ || (! (p = calloc (n, s)) && (HAVE_GNU_CALLOC || n != 0)))
+ xalloc_die ();
+ return p;
+}
+
+/* Clone an object P of size S, with error checking. There's no need
+ for xnmemdup (P, N, S), since xmemdup (P, N * S) works without any
+ need for an arithmetic overflow check. */
+
+void *
+xmemdup (void const *p, size_t s)
+{
+ return memcpy (xmalloc (s), p, s);
+}
+
+/* Clone STRING. */
+
+char *
+xstrdup (char const *string)
+{
+ return xmemdup (string, strlen (string) + 1);
+}