summaryrefslogtreecommitdiff
path: root/libuxre/re.h
diff options
context:
space:
mode:
Diffstat (limited to 'libuxre/re.h')
-rw-r--r--libuxre/re.h228
1 files changed, 228 insertions, 0 deletions
diff --git a/libuxre/re.h b/libuxre/re.h
new file mode 100644
index 0000000..2738a05
--- /dev/null
+++ b/libuxre/re.h
@@ -0,0 +1,228 @@
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
+ *
+ * Sccsid @(#)re.h 1.15 (gritter) 2/6/05
+ */
+/* UNIX(R) Regular Expresssion Library
+ *
+ * Note: Code is released under the GNU LGPL
+ *
+ * Copyright (C) 2001 Caldera International, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LIBUXRE_RE_H
+#define LIBUXRE_RE_H
+
+ /*
+ * Maps safe external tag to internal one
+ */
+#define re_coll_ lc_collate /* <regex.h> */
+/* #define __fnm_collate lc_collate */ /* <fnmatch.h> */
+
+#include <limits.h>
+#include <regex.h>
+/* #include <fnmatch.h> */
+#include <colldata.h>
+
+#define NBSHT (sizeof(unsigned short) * CHAR_BIT)
+#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
+#define NTYPE 4
+#define NWIDE 32
+#define NQUIV 4
+
+typedef struct
+{
+ struct lc_collate *col; /* only member set by caller */
+ wctype_t *extype;
+ wuchar_type *exquiv;
+ wchar_t *exwide;
+ wctype_t type[NTYPE];
+ wuchar_type quiv[NQUIV];
+ wchar_t wide[NWIDE];
+ unsigned short byte[NBYTE];
+ unsigned short ntype;
+ unsigned short nquiv;
+ unsigned short nwide;
+ unsigned int flags;
+} Bracket;
+
+#define BKT_NEGATED 0x001 /* complemented set */
+#define BKT_ONECASE 0x002 /* uppercase same as lowercase */
+#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */
+#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */
+#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */
+#define BKT_NLBAD 0x020 /* newline disallowed */
+#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */
+#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */
+#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */
+#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */
+#define BKT_ESCNL 0x400 /* take \n as the newline character */
+#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */
+#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */
+#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */
+#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */
+
+ /*
+ * These error returns for libuxre_bktmbcomp() are directly tied to
+ * the error returns for regcomp() for convenience.
+ */
+#define BKT_BADPAT (-REG_BADPAT)
+#define BKT_ECOLLATE (-REG_ECOLLATE)
+#define BKT_ECTYPE (-REG_ECTYPE)
+#define BKT_EEQUIV (-REG_EEQUIV)
+#define BKT_BADCHAR (-REG_EBKTCHAR)
+#define BKT_EBRACK (-REG_EBRACK)
+#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT)
+#define BKT_ERANGE (-REG_ERANGE)
+#define BKT_ESPACE (-REG_ESPACE)
+#define BKT_BADESC (-REG_BADESC)
+#define BKT_ILLSEQ (-REG_ILLSEQ)
+
+ /*
+ * These must be distinct from the flags in <fnmatch.h>.
+ */
+#define FNM_COLLATE 0x2000 /* have collation information */
+#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */
+
+ /*
+ * These must be distinct from the flags in <regex.h>.
+ */
+#define REG_NFA 0x20000000
+#define REG_DFA 0x40000000
+#define REG_GOTBKT 0x80000000
+
+#define BRACE_INF USHRT_MAX
+#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */
+#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */
+
+typedef union /* extra info always kept for some tokens/nodes */
+{
+ Bracket *bkt; /* ROP_BKT */
+ size_t sub; /* ROP_LP (ROP_RP), ROP_REF */
+ unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */
+} Info;
+
+typedef struct /* lexical context while parsing */
+{
+ Info info;
+ const unsigned char *pat;
+ unsigned char *clist;
+ struct lc_collate *col;
+ unsigned long flags;
+ w_type tok;
+ size_t maxref;
+ size_t nleft;
+ size_t nright;
+ size_t nclist;
+ int bktflags;
+ int err;
+ int mb_cur_max;
+} Lex;
+
+typedef struct t_tree Tree; /* RE parse tree node */
+struct t_tree
+{
+ union
+ {
+ Tree *ptr; /* unary & binary nodes */
+ size_t pos; /* position for DFA leaves */
+ } left;
+ union
+ {
+ Tree *ptr; /* binary nodes */
+ Info info;
+ } right;
+ Tree *parent;
+ w_type op; /* positive => char. to match */
+};
+
+typedef struct re_dfa_ Dfa; /* DFA engine description */
+typedef struct re_nfa_ Nfa; /* NFA engine description */
+
+typedef struct
+{
+ const unsigned char *str;
+ regmatch_t *match;
+ size_t nmatch;
+ unsigned long flags;
+ int mb_cur_max;
+} Exec;
+
+ /*
+ * Regular expression operators. Some only used internally.
+ * All are negative, to distinguish them from the regular
+ * "match this particular wide character" operation.
+ */
+#define BINARY_ROP 0x02
+#define UNARY_ROP 0x01
+#define LEAF_ROP 0x00
+
+#define MAKE_ROP(k, v) (-((v) | ((k) << 4)))
+#define KIND_ROP(v) ((-(v)) >> 4)
+
+#define ROP_OR MAKE_ROP(BINARY_ROP, 1)
+#define ROP_CAT MAKE_ROP(BINARY_ROP, 2)
+
+#define ROP_STAR MAKE_ROP(UNARY_ROP, 1)
+#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2)
+#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3)
+#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4)
+#define ROP_LP MAKE_ROP(UNARY_ROP, 5)
+#define ROP_RP MAKE_ROP(UNARY_ROP, 6)
+
+#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */
+#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */
+#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */
+#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */
+#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */
+#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */
+#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */
+#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */
+#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */
+#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */
+#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */
+#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */
+#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */
+#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */
+
+ /*
+ * Return values:
+ * libuxre_bktmbcomp()
+ * <0 error (see BKT_* above); >0 #bytes scanned
+ * libuxre_bktmbexec()
+ * <0 doesn't match; >=0 matches, #extra bytes scanned
+ */
+LIBUXRE_STATIC void libuxre_bktfree(Bracket *);
+LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *,
+ int, int);
+LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t,
+ const unsigned char *, int);
+
+LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int);
+LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *);
+LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *);
+LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int);
+
+extern void libuxre_regdeldfa(Dfa *);
+LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *);
+LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *);
+
+extern void libuxre_regdelnfa(Nfa *);
+LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *);
+LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *);
+#endif /* !LIBUXRE_RE_H */