summaryrefslogtreecommitdiff
path: root/libuxre/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'libuxre/regparse.c')
-rw-r--r--libuxre/regparse.c1091
1 files changed, 1091 insertions, 0 deletions
diff --git a/libuxre/regparse.c b/libuxre/regparse.c
new file mode 100644
index 0000000..0a5c6b2
--- /dev/null
+++ b/libuxre/regparse.c
@@ -0,0 +1,1091 @@
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
+ *
+ * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03
+ */
+/* UNIX(R) Regular Expresssion Library
+ *
+ * Note: Code is released under the GNU LGPL
+ *
+ * Copyright (C) 2001 Caldera International, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #include "synonyms.h" */
+#include <stdlib.h>
+#include <ctype.h>
+#include "re.h"
+
+LIBUXRE_STATIC void
+libuxre_regdeltree(Tree *tp, int all)
+{
+ if (tp == 0)
+ return;
+ if (tp->op < 0)
+ {
+ switch (KIND_ROP(tp->op))
+ {
+ case BINARY_ROP:
+ libuxre_regdeltree(tp->right.ptr, all);
+ /*FALLTHROUGH*/
+ case UNARY_ROP:
+ libuxre_regdeltree(tp->left.ptr, all);
+ break;
+ default:
+ if (tp->op == ROP_BKT && all)
+ {
+ libuxre_bktfree(tp->right.info.bkt);
+ free(tp->right.info.bkt);
+ }
+ break;
+ }
+ }
+ free(tp);
+}
+
+LIBUXRE_STATIC Tree *
+libuxre_reg1tree(w_type op, Tree *lp)
+{
+ Tree *tp;
+
+ if ((tp = malloc(sizeof(Tree))) == 0)
+ {
+ if (lp != 0)
+ libuxre_regdeltree(lp, 1);
+ return 0;
+ }
+ tp->op = op;
+ tp->left.ptr = lp;
+ if (lp != 0)
+ lp->parent = tp;
+ return tp;
+}
+
+LIBUXRE_STATIC Tree *
+libuxre_reg2tree(w_type op, Tree *lp, Tree *rp)
+{
+ Tree *tp;
+
+ if ((tp = malloc(sizeof(Tree))) == 0)
+ {
+ libuxre_regdeltree(lp, 1);
+ libuxre_regdeltree(rp, 1);
+ return 0;
+ }
+ tp->op = op;
+ tp->left.ptr = lp;
+ lp->parent = tp;
+ tp->right.ptr = rp;
+ rp->parent = tp;
+ return tp;
+}
+
+static int
+lex(Lex *lxp)
+{
+ size_t num;
+ w_type wc;
+ int n, mb_cur_max;
+
+ mb_cur_max = lxp->mb_cur_max;
+nextc: switch (wc = *lxp->pat++) /* interesting ones are single bytes */
+ {
+ case '\0':
+ lxp->pat--; /* continue to report ROP_END */
+ wc = ROP_END;
+ break;
+ case '(':
+ if (lxp->flags & REG_PARENS)
+ {
+ leftparen:;
+ /*
+ * Must keep track of the closed and
+ * yet-to-be closed groups as a list.
+ * Consider (()a(()b(()c(()d... in which
+ * at each letter another even-numbered
+ * group is made available, but no
+ * odd-numbered ones are.
+ */
+ if ((lxp->flags & REG_NOBACKREF) == 0)
+ {
+ if (lxp->nleft >= lxp->nclist) /* grow it */
+ {
+ unsigned char *p;
+
+ lxp->nclist += 8; /* arbitrary */
+ if ((p = realloc(lxp->clist,
+ lxp->nclist)) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return -1;
+ }
+ lxp->clist = p;
+ }
+ lxp->clist[lxp->nleft] = 0; /* unavailable */
+ }
+ lxp->nleft++;
+ wc = ROP_LP;
+ }
+ break;
+ case ')':
+ /*
+ * For REG_PARENS, only take a right paren as a close
+ * if there is a matching left paren.
+ */
+ if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft)
+ {
+ lxp->nright++;
+ rightparen:;
+ /*
+ * The group that is being closed is the highest
+ * numbered as-yet-unclosed group.
+ */
+ if ((lxp->flags & REG_NOBACKREF) == 0)
+ {
+ num = lxp->nleft;
+ while (lxp->clist[--num] != 0)
+ ;
+ lxp->clist[num] = 1;
+ }
+ wc = ROP_RP;
+ }
+ break;
+ case '.':
+ wc = ROP_ANYCH;
+ if (lxp->flags & REG_NEWLINE)
+ wc = ROP_NOTNL;
+ break;
+ case '*':
+ if (lxp->flags & REG_ADDITIVE)
+ {
+ nxtstar: switch (*lxp->pat)
+ {
+ case '+':
+ if ((lxp->flags & REG_PLUS) == 0)
+ break;
+ lxp->pat++;
+ goto nxtstar;
+ case '?':
+ if ((lxp->flags & REG_QUEST) == 0)
+ break;
+ /*FALLTHRU*/
+ case '*':
+ lxp->pat++;
+ goto nxtstar;
+ }
+ }
+ wc = ROP_STAR;
+ break;
+ case '^':
+ /*
+ * Look "behind" to see if this is an anchor.
+ * Take it as an anchor if it follows an alternation
+ * operator. (lxp->tok is initially set to ROP_OR.)
+ */
+ if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) {
+ if (lxp->flags & REG_ADDITIVE)
+ {
+ int optional = 0;
+
+ nxtcar: switch (*lxp->pat)
+ {
+ case '+':
+ if ((lxp->flags & REG_PLUS) == 0)
+ break;
+ lxp->pat++;
+ goto nxtcar;
+ case '?':
+ if ((lxp->flags & REG_QUEST) == 0)
+ break;
+ /*FALLTHRU*/
+ case '*':
+ optional = 1;
+ lxp->pat++;
+ goto nxtcar;
+ }
+ if (optional)
+ goto nextc;
+ }
+ wc = ROP_BOL;
+ }
+ break;
+ case '$':
+ /*
+ * Look ahead to see if this is an anchor,
+ * unless any '$' is an anchor.
+ * Take it as an anchor if it occurs just before
+ * the pattern end or an alternation operator.
+ */
+ if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0'
+ || (lxp->flags & REG_OR && *lxp->pat == '|')
+ || (lxp->flags & REG_NLALT && *lxp->pat == '\n'))
+ {
+ if (lxp->flags & REG_ADDITIVE)
+ {
+ int optional = 0;
+
+ nxtdol: switch (*lxp->pat)
+ {
+ case '+':
+ if ((lxp->flags & REG_PLUS) == 0)
+ break;
+ lxp->pat++;
+ goto nxtdol;
+ case '?':
+ if ((lxp->flags & REG_QUEST) == 0)
+ break;
+ /*FALLTHRU*/
+ case '*':
+ optional = 1;
+ lxp->pat++;
+ goto nxtdol;
+ }
+ if (optional)
+ goto nextc;
+ }
+ wc = ROP_EOL;
+ }
+ break;
+ case '+':
+ if (lxp->flags & REG_PLUS)
+ {
+ wc = ROP_PLUS;
+ if (lxp->flags & REG_ADDITIVE)
+ {
+ nxtplus: switch (*lxp->pat)
+ {
+ case '?':
+ if ((lxp->flags & REG_QUEST) == 0)
+ break;
+ case '*':
+ wc = ROP_STAR;
+ /*FALLTHRU*/
+ case '+':
+ lxp->pat++;
+ goto nxtplus;
+ }
+ }
+ }
+ break;
+ case '?':
+ if (lxp->flags & REG_QUEST)
+ {
+ wc = ROP_QUEST;
+ if (lxp->flags & REG_ADDITIVE)
+ {
+ nxtquest: switch (*lxp->pat)
+ {
+ case '+':
+ if ((lxp->flags & REG_PLUS) == 0)
+ break;
+ case '*':
+ wc = ROP_STAR;
+ /*FALLTHRU*/
+ case '?':
+ lxp->pat++;
+ goto nxtquest;
+ }
+ }
+ }
+ break;
+ case '\n':
+ if (lxp->flags & REG_NLALT)
+ {
+ /*
+ * Even when newline is an alternative separator,
+ * it doesn't permit parenthesized subexpressions
+ * to include it.
+ */
+ if (lxp->nleft != lxp->nright)
+ {
+ lxp->err = REG_EPAREN;
+ return -1;
+ }
+ wc = ROP_OR;
+ }
+ else if (lxp->flags & REG_NEWLINE)
+ lxp->flags |= REG_NFA;
+ break;
+ case '|':
+ if (lxp->flags & REG_OR)
+ wc = ROP_OR;
+ break;
+ case '[':
+ if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return -1;
+ }
+ if ((lxp->flags & REG_GOTBKT) == 0) /* first time */
+ {
+ struct lc_collate *col;
+
+ lxp->flags |= REG_GOTBKT;
+ lxp->bktflags = 0;
+ if (lxp->flags & REG_ICASE)
+ lxp->bktflags |= BKT_ONECASE;
+ if (lxp->flags & REG_NEWLINE)
+ lxp->bktflags |= BKT_NOTNL;
+ if (lxp->flags & REG_BADRANGE)
+ lxp->bktflags |= BKT_BADRANGE;
+ if (lxp->flags & REG_ODDRANGE)
+ lxp->bktflags |= BKT_ODDRANGE;
+ if (lxp->flags & REG_SEPRANGE)
+ lxp->bktflags |= BKT_SEPRANGE;
+ if (lxp->flags & REG_BKTQUOTE)
+ lxp->bktflags |= BKT_QUOTE;
+ if (lxp->flags & REG_BKTEMPTY)
+ lxp->bktflags |= BKT_EMPTY;
+ if (lxp->flags & REG_ESCNL)
+ lxp->bktflags |= BKT_ESCNL;
+ if (lxp->flags & REG_NLALT)
+ lxp->bktflags |= BKT_NLBAD;
+ if (lxp->flags & REG_ESCSEQ)
+ lxp->bktflags |= BKT_ESCSEQ;
+ if (lxp->flags & REG_BKTESCAPE)
+ lxp->bktflags |= BKT_ESCAPE;
+ if (lxp->flags & REG_NOI18N)
+ lxp->bktflags |= BKT_NOI18N;
+ if (lxp->flags & REG_OLDESC)
+ lxp->bktflags |= BKT_OLDESC;
+ if ((col = libuxre_lc_collate(0)) != 0)
+ {
+ if (col->maintbl == 0
+ || col->flags & CHF_ENCODED)
+ {
+ (void)libuxre_lc_collate(col);
+ col = 0;
+ }
+ else if (col->flags & CHF_MULTICH)
+ lxp->flags |= REG_NFA;
+ }
+ lxp->col = col;
+ }
+ n = lxp->bktflags;
+ if (*lxp->pat == '^')
+ {
+ n |= BKT_NEGATED;
+ lxp->pat++;
+ }
+ lxp->info.bkt->col = lxp->col;
+ if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat,
+ n, mb_cur_max)) < 0)
+ {
+ free(lxp->info.bkt);
+ lxp->err = -n; /* convert to REG_* errors */
+ return -1;
+ }
+ /*
+ * NFA forced if newline can be a match and REG_NEWLINE is set.
+ */
+ if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE
+ && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */
+ && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0)
+ {
+ lxp->flags |= REG_NFA;
+ }
+ lxp->pat += n;
+ wc = ROP_BKT;
+ break;
+ case '{':
+ if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0)
+ break;
+ interval:;
+ if (!isdigit(num = *lxp->pat))
+ {
+ badbr:;
+ lxp->err = REG_BADBR;
+ if (*lxp->pat == '\0')
+ lxp->err = REG_EBRACE; /* more accurate */
+ return -1;
+ }
+ num -= '0';
+ while (isdigit(wc = *++lxp->pat))
+ {
+ num *= 10;
+ if ((num += wc - '0') > BRACE_MAX)
+ goto badbr;
+ }
+ lxp->info.num[0] = num;
+ lxp->info.num[1] = num;
+ if (wc == ',')
+ {
+ lxp->info.num[1] = BRACE_INF;
+ if (isdigit(wc = *++lxp->pat))
+ {
+ num = wc - '0';
+ while (isdigit(wc = *++lxp->pat))
+ {
+ num *= 10;
+ if ((num += wc - '0') > BRACE_MAX)
+ goto badbr;
+ }
+ if (num < lxp->info.num[0])
+ goto badbr;
+ lxp->info.num[1] = num;
+ }
+ }
+ if ((lxp->flags & REG_BRACES) == 0)
+ {
+ if (wc != '\\')
+ goto badbr;
+ wc = *++lxp->pat;
+ }
+ if (wc != '}')
+ goto badbr;
+ lxp->pat++;
+ wc = ROP_BRACE;
+ /*
+ * Replace interval with simpler equivalents where possible,
+ * even when the operators are not otherwise available.
+ */
+ if (lxp->info.num[1] <= 1)
+ {
+ if (lxp->info.num[0] == 1)
+ wc = ROP_NOP; /* {1,1} is noise */
+ else if (lxp->info.num[1] == 0)
+ wc = ROP_EMPTY; /* {0,0} is empty string */
+ else
+ wc = ROP_QUEST; /* {0,1} is ? */
+ }
+ else if (lxp->info.num[1] == BRACE_INF)
+ {
+ if (lxp->info.num[0] == 0)
+ wc = ROP_STAR;
+ else if (lxp->info.num[0] == 1)
+ wc = ROP_PLUS;
+ else if (lxp->info.num[0] > BRACE_DFAMAX)
+ lxp->flags |= REG_NFA;
+ }
+ else if (lxp->info.num[1] > BRACE_DFAMAX)
+ {
+ lxp->flags |= REG_NFA;
+ }
+ break;
+ case '\\':
+ switch (wc = *lxp->pat++)
+ {
+ case '\0':
+ lxp->err = REG_EESCAPE;
+ return -1;
+ case '<':
+ if (lxp->flags & REG_ANGLES)
+ {
+ lxp->flags |= REG_NFA;
+ wc = ROP_LT;
+ }
+ goto out;
+ case '>':
+ if (lxp->flags & REG_ANGLES)
+ {
+ lxp->flags |= REG_NFA;
+ wc = ROP_GT;
+ }
+ goto out;
+ case '(':
+ if ((lxp->flags & REG_PARENS) == 0)
+ goto leftparen;
+ goto out;
+ case ')':
+ if ((lxp->flags & REG_PARENS) == 0)
+ {
+ if (++lxp->nright > lxp->nleft)
+ {
+ lxp->err = REG_EPAREN;
+ return -1;
+ }
+ goto rightparen;
+ }
+ goto out;
+ case '{':
+ if (lxp->flags & (REG_BRACES|REG_NOBRACES))
+ goto out;
+ goto interval;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ num = wc - '0';
+ if ((lxp->flags & REG_NOBACKREF) == 0)
+ {
+ backref:;
+ if (num > lxp->nleft
+ || lxp->clist[num - 1] == 0)
+ {
+ lxp->err = REG_ESUBREG;
+ return -1;
+ }
+ lxp->info.sub = num;
+ if (lxp->maxref < num)
+ lxp->maxref = num;
+ lxp->flags |= REG_NFA;
+ wc = ROP_REF;
+ goto out;
+ }
+ /*
+ * For compatibility (w/awk), permit "octal" 8 and 9.
+ * Already have the value of the first digit in num.
+ *
+ * If REG_OLDESC, exactly three digits must be present.
+ */
+ tryoctal:;
+ if ((lxp->flags & REG_ESCSEQ) == 0)
+ goto out;
+ if ((wc = *lxp->pat) >= '0' && wc <= '9')
+ {
+ num <<= 3;
+ num += wc - '0';
+ if ((wc = *++lxp->pat) >= '0' && wc <= '9')
+ {
+ num <<= 3;
+ num += wc - '0';
+ lxp->pat++;
+ }
+ else if (lxp->flags & REG_OLDESC)
+ {
+ lxp->pat--;
+ wc = lxp->pat[-1];
+ goto out;
+ }
+ }
+ else if (lxp->flags & REG_OLDESC)
+ {
+ wc = lxp->pat[-1];
+ goto out;
+ }
+ if ((wc = num) <= 0)
+ {
+ lxp->err = REG_BADESC;
+ return -1;
+ }
+ goto out;
+ case '0':
+ if ((lxp->flags & REG_NOBACKREF) == 0
+ && (num = *lxp->pat) >= '0' && num <= '9')
+ {
+ num -= '0';
+ /*
+ * This loop ignores wraparounds.
+ * Keep track of number of digits in n.
+ */
+ n = 1;
+ while ((wc = *++lxp->pat) >= '0' && wc <= '9')
+ {
+ num *= 10;
+ num += wc - '0';
+ n++;
+ }
+ if (num != 0)
+ goto backref;
+ lxp->pat -= n;
+ }
+ num = 0;
+ goto tryoctal;
+ case 'a':
+ if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
+ wc = '\a';
+ goto out;
+ case 'b':
+ if (lxp->flags & REG_ESCSEQ)
+ wc = '\b';
+ goto out;
+ case 'f':
+ if (lxp->flags & REG_ESCSEQ)
+ wc = '\f';
+ goto out;
+ case 'n':
+ if (lxp->flags & (REG_ESCSEQ | REG_ESCNL))
+ {
+ wc = '\n';
+ if (lxp->flags & REG_NEWLINE)
+ lxp->flags |= REG_NFA;
+ }
+ goto out;
+ case 'r':
+ if (lxp->flags & REG_ESCSEQ)
+ wc = '\r';
+ goto out;
+ case 't':
+ if (lxp->flags & REG_ESCSEQ)
+ wc = '\t';
+ goto out;
+ case 'v':
+ if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
+ wc = '\v';
+ goto out;
+ case 'x':
+ if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ
+ && isxdigit(num = *lxp->pat))
+ {
+ wc = num;
+ num = 0;
+ /*
+ * Take as many hex digits as possible,
+ * ignoring overflows.
+ * If the result (squeezed into a w_type)
+ * is positive, it's okay.
+ */
+ do
+ {
+ if (isdigit(wc))
+ wc -= '0';
+ else if (isupper(wc))
+ wc -= 'A' + 10;
+ else
+ wc -= 'a' + 10;
+ num <<= 4;
+ num |= wc;
+ } while (isxdigit(wc = *++lxp->pat));
+ if ((wc = num) <= 0)
+ {
+ lxp->err = REG_BADESC;
+ return -1;
+ }
+ }
+ goto out;
+ }
+ /*FALLTHROUGH*/
+ default:
+ if (!ISONEBYTE(wc))
+ {
+ if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0)
+ lxp->pat += n;
+ else if (n < 0)
+ {
+ lxp->err = REG_ILLSEQ;
+ return -1;
+ }
+ }
+ if (lxp->flags & REG_ICASE)
+ wc = to_lower(wc);
+ break;
+ }
+out:;
+ lxp->tok = wc;
+ return 0;
+}
+
+static Tree *alt(Lex *);
+
+static Tree *
+leaf(Lex *lxp)
+{
+ Tree *tp;
+
+ if ((tp = malloc(sizeof(Tree))) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return 0;
+ }
+ switch (tp->op = lxp->tok) /* covers most cases */
+ {
+ default:
+ if (tp->op < 0)
+ {
+ lxp->err = REG_BADPAT;
+ tp->right.ptr = 0;
+ goto badunary;
+ }
+ break;
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ if ((lxp->flags & REG_NOAUTOQUOTE) == 0
+ && lxp->pat[-1] != '}')
+ {
+ tp->op = lxp->pat[-1];
+ break;
+ }
+ /*FALLTHROUGH*/
+ case ROP_BRACE:
+ case ROP_EMPTY: /* was {0,0} ROP_BRACE */
+ case ROP_NOP: /* was {1,1} ROP_BRACE */
+ lxp->err = REG_BADRPT;
+ badunary:;
+ tp->left.ptr = 0;
+ goto err;
+ case ROP_ANYCH:
+ case ROP_NOTNL:
+ break;
+ case ROP_BOL:
+ case ROP_EOL:
+ case ROP_LT:
+ case ROP_GT:
+ /*
+ * Look ahead for what would have been taken to be
+ * postfix operators.
+ */
+ if (lex(lxp) != 0)
+ goto err;
+ switch (lxp->tok)
+ {
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ if ((lxp->flags & REG_NOAUTOQUOTE) == 0
+ && lxp->pat[-1] != '}')
+ {
+ lxp->tok = lxp->pat[-1];
+ break;
+ }
+ /*FALLTHROUGH*/
+ case ROP_BRACE:
+ case ROP_EMPTY: /* was {0,0} ROP_BRACE */
+ case ROP_NOP: /* was {1,1} ROP_BRACE */
+ lxp->err = REG_BADRPT;
+ goto err;
+ }
+ return tp;
+ case ROP_BKT:
+ tp->right.info.bkt = lxp->info.bkt;
+ break;
+ case ROP_REF:
+ tp->right.info.sub = lxp->info.sub;
+ break;
+ case ROP_LP:
+ tp->right.info.sub = lxp->nleft;
+ if (lex(lxp) != 0)
+ goto badunary;
+ if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */
+ {
+ if (lxp->flags & REG_MTPARENBAD)
+ {
+ lxp->err = REG_EMPTYPAREN;
+ goto badunary;
+ }
+ lxp->tok = ROP_EMPTY;
+ if (lxp->flags & REG_MTPARENFAIL)
+ lxp->tok = ROP_NONE;
+ if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0)
+ goto badunary;
+ }
+ else if ((tp->left.ptr = alt(lxp)) == 0)
+ {
+ if (lxp->err == REG_BADPAT)
+ goto parenerr;
+ goto badunary;
+ }
+ else if (lxp->tok != ROP_RP)
+ {
+ lxp->err = REG_BADPAT;
+ parenerr:;
+ if (lxp->nleft != lxp->nright)
+ lxp->err = REG_EPAREN; /* better choice */
+ goto badunary;
+ }
+ tp->left.ptr->parent = tp;
+ break;
+ }
+ if (lex(lxp) != 0)
+ {
+ err:;
+ libuxre_regdeltree(tp, 1);
+ tp = 0;
+ }
+ return tp;
+}
+
+static Tree *
+post(Lex *lxp)
+{
+ Tree *lp;
+
+ if ((lp = leaf(lxp)) == 0)
+ return 0;
+ switch (lxp->tok)
+ {
+ case ROP_EMPTY: /* this was {0,0} ROP_BRACE */
+ libuxre_regdeltree(lp, 1);
+ lp = 0;
+ /*FALLTHROUGH*/
+ case ROP_BRACE:
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return 0;
+ }
+ if (lxp->tok == ROP_BRACE)
+ lp->right.info = lxp->info;
+ /*FALLTHROUGH*/
+ case ROP_NOP: /* this was {1,1} ROP_BRACE */
+ if (lex(lxp) != 0)
+ {
+ libuxre_regdeltree(lp, 1);
+ return 0;
+ }
+ break;
+ }
+ return lp;
+}
+
+static Tree *
+cat(Lex *lxp)
+{
+ Tree *lp, *rp;
+
+ if ((lp = post(lxp)) == 0)
+ return 0;
+ for (;;)
+ {
+ if (lxp->tok == ROP_OR || lxp->tok == ROP_RP
+ || lxp->tok == ROP_END)
+ {
+ return lp;
+ }
+ if ((rp = post(lxp)) == 0)
+ break;
+ if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return 0;
+ }
+ }
+ libuxre_regdeltree(lp, 1);
+ return 0;
+}
+
+static Tree *
+alt(Lex *lxp)
+{
+ Tree *lp, *rp;
+
+ if ((lp = cat(lxp)) == 0)
+ return 0;
+ for (;;)
+ {
+ if (lxp->tok != ROP_OR)
+ return lp;
+ if (lex(lxp) != 0)
+ break;
+ if (lxp->tok == ROP_END)
+ return lp; /* ignore trailing '|' */
+ if ((rp = cat(lxp)) == 0)
+ break;
+ if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0)
+ {
+ lxp->err = REG_ESPACE;
+ return 0;
+ }
+ }
+ libuxre_regdeltree(lp, 1);
+ return 0;
+}
+
+LIBUXRE_STATIC Tree *
+libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags)
+{
+ Tree *lp, *rp;
+
+ lp = 0; /* in case of error */
+ lxp->clist = 0;
+ lxp->col = 0;
+ lxp->err = 0;
+ lxp->maxref = 0;
+ lxp->nleft = 0;
+ lxp->nright = 0;
+ lxp->nclist = 0;
+ lxp->mb_cur_max = MB_CUR_MAX;
+ if (flags & REG_OR && *pat == '|')
+ pat++; /* skip initial OR like egrep did */
+ lxp->pat = pat;
+ lxp->flags = flags;
+ lxp->tok = ROP_OR; /* enables ^ as anchor */
+ /*
+ * Get initial token.
+ */
+ if (lex(lxp) != 0)
+ {
+ err:;
+ if (lp != 0)
+ {
+ libuxre_regdeltree(lp, 1);
+ lp = 0;
+ }
+ if (lxp->err == 0)
+ lxp->err = REG_ESPACE;
+ goto ret;
+ }
+ if (lxp->tok == ROP_END)
+ {
+ lxp->err = REG_NOPAT;
+ goto err;
+ }
+ if ((lp = alt(lxp)) == 0) /* parse entire RE */
+ goto err;
+ if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0)
+ {
+ if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0)
+ goto err;
+ lp->right.info.sub = 0;
+ }
+ if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0)
+ goto err;
+ if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
+ goto err;
+ lp->parent = 0;
+ret:;
+ if (lxp->clist != 0)
+ free(lxp->clist);
+ return lp;
+}
+
+#ifdef REGDEBUG
+
+LIBUXRE_STATIC void
+libuxre_regtree(Tree *tp, int n)
+{
+ const char *opstr;
+ char buf[32];
+ int kind, next;
+
+ if (n < 0)
+ next = -n + 2;
+ else
+ next = n + 2;
+ switch (tp->op)
+ {
+ case ROP_OR:
+ opstr = "|";
+ kind = BINARY_ROP;
+ break;
+ case ROP_CAT:
+ opstr = "&";
+ kind = BINARY_ROP;
+ break;
+ case ROP_STAR:
+ opstr = "*";
+ kind = UNARY_ROP;
+ break;
+ case ROP_PLUS:
+ opstr = "+";
+ kind = UNARY_ROP;
+ break;
+ case ROP_QUEST:
+ opstr = "?";
+ kind = UNARY_ROP;
+ break;
+ case ROP_BRACE:
+ opstr = buf;
+ if (tp->right.info.num[1] == BRACE_INF)
+ {
+ sprintf(buf, "{%u,inf}",
+ (unsigned)tp->right.info.num[0]);
+ }
+ else
+ {
+ sprintf(buf, "{%u,%u}",
+ (unsigned)tp->right.info.num[0],
+ (unsigned)tp->right.info.num[1]);
+ }
+ kind = UNARY_ROP;
+ break;
+ case ROP_LP:
+ opstr = buf;
+ sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub);
+ kind = UNARY_ROP;
+ break;
+ case ROP_RP:
+ opstr = buf;
+ sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub);
+ kind = UNARY_ROP;
+ break;
+ case ROP_NOP:
+ opstr = "<NOP>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_BOL:
+ opstr = "<BOL>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_EOL:
+ opstr = "<EOL>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_ALL:
+ opstr = "<ALL>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_ANYCH:
+ opstr = "<ANYCH>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_NOTNL:
+ opstr = "<NOTNL>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_EMPTY:
+ opstr = "<MT>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_NONE:
+ opstr = "<NONE>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_BKT:
+ opstr = buf;
+ sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt);
+ kind = LEAF_ROP;
+ break;
+ case ROP_BKTCOPY:
+ opstr = buf;
+ sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt);
+ kind = LEAF_ROP;
+ break;
+ case ROP_LT:
+ opstr = "\\<";
+ kind = LEAF_ROP;
+ break;
+ case ROP_GT:
+ opstr = "\\>";
+ kind = LEAF_ROP;
+ break;
+ case ROP_REF:
+ opstr = buf;
+ sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub);
+ kind = LEAF_ROP;
+ break;
+ case ROP_END:
+ opstr = "<END>";
+ kind = LEAF_ROP;
+ break;
+ default:
+ opstr = buf;
+ if (tp->op > UCHAR_MAX)
+ sprintf(buf, "W%#x", tp->op);
+ else if (tp->op <= 0)
+ sprintf(buf, "UNK=%u", tp->op);
+ else
+ sprintf(buf, "%c", tp->op);
+ kind = LEAF_ROP;
+ break;
+ }
+ if (kind == BINARY_ROP)
+ libuxre_regtree(tp->right.ptr, -next);
+ printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr);
+ if (kind != LEAF_ROP)
+ libuxre_regtree(tp->left.ptr, next);
+}
+
+#endif /*REGDEBUG*/