summaryrefslogtreecommitdiff
path: root/lib/regex_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/regex_impl.h')
-rw-r--r--lib/regex_impl.h380
1 files changed, 151 insertions, 229 deletions
diff --git a/lib/regex_impl.h b/lib/regex_impl.h
index 7ecfa21b..83ce046d 100644
--- a/lib/regex_impl.h
+++ b/lib/regex_impl.h
@@ -11,54 +11,52 @@
#include "src/dfa/dfa.h"
#include "src/dfa/determinization.h"
#include "src/nfa/nfa.h"
-
+#include "src/util/check.h"
namespace re2c {
namespace libre2c {
-typedef std::vector<tag_info_t> tag_path_t;
+using tag_path_t = std::vector<tag_info_t>;
-struct conf_t
-{
- nfa_state_t *state;
+struct conf_t {
+ TnfaState* state;
uint32_t origin;
int32_t thist;
- inline conf_t(): state(NULL), origin(0), thist(HROOT) {}
- inline conf_t(nfa_state_t *s, uint32_t o, int32_t h)
+ inline conf_t(): state(nullptr), origin(0), thist(HROOT) {}
+ inline conf_t(TnfaState* s, uint32_t o, int32_t h)
: state(s), origin(o), thist(h) {}
- inline conf_t(const conf_t &c, nfa_state_t *s)
+ inline conf_t(const conf_t& c, TnfaState* s)
: state(s), origin(c.origin), thist(c.thist) {}
- inline conf_t(const conf_t &c, nfa_state_t *s, int32_t h)
+ inline conf_t(const conf_t& c, TnfaState* s, int32_t h)
: state(s), origin(c.origin), thist(h) {}
};
-struct ran_or_fin_t
-{
- inline bool operator()(const conf_t &c);
+struct ran_or_fin_t {
+ inline bool operator()(const conf_t& c);
};
-typedef std::vector<conf_t> confset_t;
-typedef confset_t::iterator confiter_t;
-typedef confset_t::const_iterator cconfiter_t;
-typedef confset_t::const_reverse_iterator rcconfiter_t;
+using confset_t = std::vector<conf_t>;
+using confiter_t = confset_t::iterator;
+using cconfiter_t = confset_t::const_iterator;
+using rcconfiter_t = confset_t::const_reverse_iterator;
template<typename history_type_t>
-struct simctx_t
-{
- typedef libre2c::conf_t conf_t;
- typedef std::vector<conf_t> confset_t;
- typedef confset_t::iterator confiter_t;
- typedef confset_t::const_iterator cconfiter_t;
- typedef confset_t::reverse_iterator rconfiter_t;
- typedef confset_t::const_reverse_iterator rcconfiter_t;
- typedef history_type_t history_t;
-
- const nfa_t &nfa;
- const nfa_t *nfa0;
+struct simctx_t {
+ using conf_t = libre2c::conf_t;
+ using confset_t = std::vector<conf_t>;
+ using confiter_t = confset_t::iterator;
+ using cconfiter_t = confset_t::const_iterator;
+ using rconfiter_t = confset_t::reverse_iterator;
+ using rcconfiter_t = confset_t::const_reverse_iterator;
+ using history_t = history_type_t;
+
+ const Tnfa& nfa;
const size_t nsub;
const int flags;
+ const std::vector<Tag>& tags;
+
history_t history;
int32_t hidx;
@@ -66,18 +64,18 @@ struct simctx_t
size_t rule;
- const char *cursor;
- const char *marker;
+ const char* cursor;
+ const char* marker;
- regoff_t *offsets1;
- regoff_t *offsets2;
- regoff_t *offsets3;
- bool *done;
+ regoff_t* offsets1;
+ regoff_t* offsets2;
+ regoff_t* offsets3;
+ bool* done;
- int32_t *newprectbl;
- int32_t *oldprectbl;
+ int32_t* newprectbl;
+ int32_t* oldprectbl;
size_t oldprecdim;
- histleaf_t *histlevel;
+ histleaf_t* histlevel;
std::vector<uint32_t> sortcores;
std::vector<uint32_t> fincount;
std::vector<int32_t> worklist;
@@ -85,21 +83,17 @@ struct simctx_t
confset_t reach;
confset_t state;
- std::vector<nfa_state_t*> gor1_topsort;
- std::vector<nfa_state_t*> gor1_linear;
- std::vector<nfa_state_t*> gtop_heap_storage;
- cmp_gtop_t gtop_cmp;
- gtop_heap_t gtop_heap;
- closure_stats_t dc_clstats;
-
- simctx_t(const nfa_t &nfa, const nfa_t *nfa0, size_t re_nsub, int flags);
+ std::vector<TnfaState*> gor1_topsort;
+ std::vector<TnfaState*> gor1_linear;
+ closure_stats_t clstats;
+
+ simctx_t(const Tnfa& nfa, size_t re_nsub, int flags);
~simctx_t();
FORBID_COPY(simctx_t);
};
// tag history for lazy disambiguation (both POSIX and leftmost greedy)
-struct zhistory_t
-{
+struct zhistory_t {
struct node_t {
tag_info_t info;
hidx_t pred;
@@ -110,119 +104,86 @@ struct zhistory_t
: info(info), pred(pred), orig(orig), step(step) {}
};
- struct cache_entry_t
- {
+ struct cache_entry_t {
int32_t prec1;
int32_t prec2;
int32_t prec;
};
- typedef std::map<uint64_t, cache_entry_t> cache_t;
+ using cache_t = std::map<uint64_t, cache_entry_t>;
std::vector<node_t> nodes;
cache_t cache;
inline zhistory_t(): nodes(), cache() { init(); }
inline void init();
- inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
- inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
- template<typename ctx_t> inline hidx_t link(ctx_t &ctx
- , const typename ctx_t::conf_t &conf);
- template<typename ctx_t> static int32_t precedence(ctx_t &ctx
- , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
- , int32_t &prec1, int32_t &prec2);
+ inline node_t& node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
+ inline const node_t& node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
+
+ template<typename ctx_t> inline hidx_t link(ctx_t& ctx, const typename ctx_t::conf_t& conf);
+ template<typename ctx_t> static int32_t precedence(ctx_t& ctx,
+ const typename ctx_t::conf_t& x,
+ const typename ctx_t::conf_t& y,
+ int32_t& prec1,
+ int32_t& prec2);
FORBID_COPY(zhistory_t);
};
-// tag history for Kuklewicz disambiguation (POSIX semantics)
-struct khistory_t
-{
- struct node_t {
- tag_info_t info;
- hidx_t pred;
-
- inline node_t(tag_info_t info, hidx_t pred)
- : info(info), pred(pred) {}
- };
-
- std::vector<node_t> nodes;
- std::vector<int32_t> path1;
- std::vector<int32_t> path2;
-
- inline khistory_t(): nodes(), path1(), path2() { init(); }
- inline void init();
- inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
- inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
- template<typename ctx_t> inline hidx_t link(ctx_t &ctx
- , const typename ctx_t::conf_t &conf);
- template<typename ctx_t> static int32_t precedence(ctx_t &ctx
- , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
- , int32_t &prec1, int32_t &prec2);
- FORBID_COPY(khistory_t);
-};
-
-typedef simctx_t<phistory_t> psimctx_t;
-typedef simctx_t<lhistory_t> lsimctx_t;
-typedef simctx_t<zhistory_t> pzsimctx_t;
-typedef simctx_t<zhistory_t> lzsimctx_t;
-typedef simctx_t<khistory_t> ksimctx_t;
+using psimctx_t = simctx_t<phistory_t>;
+using lsimctx_t = simctx_t<lhistory_t>;
+using pzsimctx_t = simctx_t<zhistory_t>;
+using lzsimctx_t = simctx_t<zhistory_t>;
// regexec functions
-typedef int (regexec_t)(const regex_t*, const char*, size_t, regmatch_t[], int);
-template<bool stadfa> regexec_t regexec_dfa;
-template<typename ctx_t> regexec_t regexec_dfa_regless;
+using regexec_t = int (const regex_t*, const char*, size_t, regmatch_t[], int);
+regexec_t regexec_dfa;
+template<typename ctx_t> regexec_t regexec_dfa_multipass;
regexec_t regexec_nfa_posix;
regexec_t regexec_nfa_posix_trie;
-regexec_t regexec_nfa_posix_backward;
-regexec_t regexec_nfa_posix_kuklewicz;
regexec_t regexec_nfa_leftmost;
regexec_t regexec_nfa_leftmost_trie;
// regparse functions (non-standard)
-typedef subhistory_t*(regparse_t)(const regex_t*, const char*, size_t);
-template<bool stadfa> regparse_t regparse_dfa;
-template<typename ctx_t> regparse_t regparse_dfa_regless;
+using regparse_t = subhistory_t* (const regex_t*, const char*, size_t);
+regparse_t regparse_dfa;
+template<typename ctx_t> regparse_t regparse_dfa_multipass;
// regtstring function (non-standard)
template<typename ctx_t>
-const tstring_t *regtstring_dfa_regless(const regex_t*, const char*);
+const tstring_t* regtstring_dfa_multipass(const regex_t*, const char*);
template<typename history_t>
-simctx_t<history_t>::simctx_t(const nfa_t &nfa, const nfa_t *nfa0, size_t re_nsub, int flags)
- : nfa(nfa)
- , nfa0(nfa0)
- , nsub(2 * (re_nsub - 1))
- , flags(flags)
- , history()
- , hidx(HROOT)
- , step(0)
- , rule(Rule::NONE)
- , cursor(NULL)
- , marker(NULL)
- , offsets1(NULL)
- , offsets2(NULL)
- , offsets3(NULL)
- , done(NULL)
- , newprectbl(NULL)
- , oldprectbl(NULL)
- , oldprecdim(0)
- , histlevel(NULL)
- , sortcores()
- , fincount()
- , worklist()
- , stateiters()
- , reach()
- , state()
- , gor1_topsort()
- , gor1_linear()
- , gtop_heap_storage()
- , gtop_cmp()
- , gtop_heap(gtop_cmp, gtop_heap_storage)
- , dc_clstats()
-{
+simctx_t<history_t>::simctx_t(const Tnfa& nfa, size_t re_nsub, int flags)
+ : nfa(nfa),
+ nsub(2 * (re_nsub - 1)),
+ flags(flags),
+ tags(nfa.tags),
+ history(),
+ hidx(HROOT),
+ step(0),
+ rule(Rule::NONE),
+ cursor(nullptr),
+ marker(nullptr),
+ offsets1(nullptr),
+ offsets2(nullptr),
+ offsets3(nullptr),
+ done(nullptr),
+ newprectbl(nullptr),
+ oldprectbl(nullptr),
+ oldprecdim(0),
+ histlevel(nullptr),
+ sortcores(),
+ fincount(),
+ worklist(),
+ stateiters(),
+ reach(),
+ state(),
+ gor1_topsort(),
+ gor1_linear(),
+ clstats() {
const size_t
- ntags = nfa.tags.size(),
- nstates = nfa.size,
- ncores = nfa.ncores;
+ ntags = nfa.tags.size(),
+ nstates = nfa.nstates,
+ ncores = nfa.ncores;
state.reserve(nstates);
reach.reserve(nstates);
@@ -235,7 +196,7 @@ simctx_t<history_t>::simctx_t(const nfa_t &nfa, const nfa_t *nfa0, size_t re_nsu
offsets2 = new regoff_t[ntags * ncores];
}
if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
- const size_t dim = (flags & REG_KUKLEWICZ) ? ntags : ncores;
+ const size_t dim = ncores;
newprectbl = new int32_t[ncores * dim];
oldprectbl = new int32_t[ncores * dim];
histlevel = new histleaf_t[ncores];
@@ -243,22 +204,13 @@ simctx_t<history_t>::simctx_t(const nfa_t &nfa, const nfa_t *nfa0, size_t re_nsu
fincount.resize(ncores + 1);
worklist.reserve(nstates);
}
- if (flags & REG_KUKLEWICZ) {
- stateiters.reserve(ncores);
- }
- if (flags & REG_GTOP) {
- gtop_heap_storage.reserve(nstates);
- }
- else {
- gor1_topsort.reserve(nstates);
- gor1_linear.reserve(nstates);
- }
+ gor1_topsort.reserve(nstates);
+ gor1_linear.reserve(nstates);
}
template<typename history_t>
-simctx_t<history_t>::~simctx_t()
-{
+simctx_t<history_t>::~simctx_t() {
delete[] done;
delete[] offsets3;
if (!(flags & REG_TRIE)) {
@@ -270,17 +222,10 @@ simctx_t<history_t>::~simctx_t()
delete[] oldprectbl;
delete[] histlevel;
}
- if (flags & REG_BACKWARD) {
- delete &nfa0->charset;
- delete &nfa0->rules;
- delete &nfa0->tags;
- delete nfa0;
- }
}
template<typename history_t>
-void init(simctx_t<history_t> &ctx, const char *string)
-{
+void init(simctx_t<history_t>& ctx, const char* string) {
ctx.reach.clear();
ctx.state.clear();
ctx.history.init();
@@ -289,33 +234,30 @@ void init(simctx_t<history_t> &ctx, const char *string)
ctx.rule = Rule::NONE;
ctx.cursor = ctx.marker = string;
ctx.sortcores.clear();
- DASSERT(ctx.worklist.empty());
- DASSERT(ctx.gor1_topsort.empty());
- DASSERT(ctx.gor1_linear.empty());
- DASSERT(ctx.gtop_heap.empty());
+ DCHECK(ctx.worklist.empty());
+ DCHECK(ctx.gor1_topsort.empty());
+ DCHECK(ctx.gor1_linear.empty());
}
-static inline regoff_t *offs_addr(regmatch_t pmatch[], size_t t)
-{
- regmatch_t *m = &pmatch[t / 2 + 1];
+static inline regoff_t* offs_addr(regmatch_t pmatch[], size_t t) {
+ regmatch_t* m = &pmatch[t / 2 + 1];
return t % 2 == 0 ? &m->rm_so : &m->rm_eo;
}
-struct getoff_nfa_t
-{
- const regoff_t *offsets;
- inline regoff_t operator()(size_t idx) const { return offsets[idx]; }
+struct getoff_nfa_t {
+ const regoff_t* offsets;
+ inline regoff_t operator()(size_t idx) const {
+ return offsets[idx];
+ }
};
-struct getoff_dfa_t
-{
- const dfa_t *dfa;
- const regoff_t *regs;
+struct getoff_dfa_t {
+ const Tdfa* dfa;
+ const regoff_t* regs;
const regoff_t len;
- regoff_t operator()(size_t idx) const
- {
- const Tag &tag = dfa->tags[idx];
+ regoff_t operator()(size_t idx) const {
+ const Tag& tag = dfa->tags[idx];
regoff_t off;
if (!fixed(tag)) {
off = regs[dfa->finvers[idx]];
@@ -330,24 +272,26 @@ struct getoff_dfa_t
};
template<typename getoff_t>
-void tags_to_submatch(const std::vector<Tag> &tags, size_t nmatch,
- regmatch_t pmatch[], regoff_t len, const getoff_t &getoff)
-{
+void tags_to_submatch(const std::vector<Tag>& tags,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ regoff_t len,
+ const getoff_t& getoff) {
const size_t ntags = tags.size();
- regmatch_t *m = pmatch, *e = pmatch + nmatch;
+ regmatch_t* m = pmatch, *e = pmatch + nmatch;
m->rm_so = 0;
m->rm_eo = len;
++m;
for (size_t t = 0; t < ntags && m < e; t += 2) {
- const Tag &tag = tags[t];
+ const Tag& tag = tags[t];
if (!fictive(tag)) {
const regoff_t so = getoff(t);
const regoff_t eo = getoff(t + 1);
for (size_t j = tag.lsub; j <= tag.hsub && m < e; j += 2, ++m) {
- DASSERT(m - 1 == &pmatch[j / 2]);
+ DCHECK(m - 1 == &pmatch[j / 2]);
m->rm_so = so;
m->rm_eo = eo;
}
@@ -356,36 +300,35 @@ void tags_to_submatch(const std::vector<Tag> &tags, size_t nmatch,
}
template<typename history_t>
-int finalize(const simctx_t<history_t> &ctx, const char *string, size_t nmatch,
- regmatch_t pmatch[])
-{
+int finalize(const simctx_t<history_t>& ctx,
+ const char* string,
+ size_t nmatch,
+ regmatch_t pmatch[]) {
if (ctx.rule == Rule::NONE) {
return REG_NOMATCH;
}
- const std::vector<Tag> &tags = ctx.nfa.tags;
+ const std::vector<Tag>& tags = ctx.nfa.tags;
const size_t ntags = tags.size();
- bool *done = ctx.done;
+ bool* done = ctx.done;
memset(done, 0, ntags * sizeof(bool));
for (int32_t i = ctx.hidx; i != HROOT; ) {
- const typename history_t::node_t &n = ctx.history.node(i);
+ const typename history_t::node_t& n = ctx.history.node(i);
i = n.pred;
const size_t t = n.info.idx;
// If already updated, skip.
if (done[t]) continue;
- // Update positive tag.
if (!n.info.neg) {
+ // Update positive tag.
done[t] = true;
ctx.offsets3[t] = static_cast<regoff_t>(n.step);
- }
-
- // Update negative tag together with its sibling and nested tags (if any).
- else {
- const Tag &tag = tags[t];
+ } else {
+ // Update negative tag together with its sibling and nested tags (if any).
+ const Tag& tag = tags[t];
for (size_t l = tag.lnest; l < tag.hnest; ++l) {
if (!done[l]) {
done[l] = true;
@@ -402,20 +345,18 @@ int finalize(const simctx_t<history_t> &ctx, const char *string, size_t nmatch,
}
template<typename history_t>
-void update_offsets(simctx_t<history_t> &ctx, const conf_t &c, uint32_t id)
-{
- regoff_t *o;
- const std::vector<Tag> &tags = ctx.nfa.tags;
+void update_offsets(simctx_t<history_t>& ctx, const conf_t& c, uint32_t id) {
+ regoff_t* o;
+ const std::vector<Tag>& tags = ctx.nfa.tags;
const size_t ntags = tags.size();
- nfa_state_t *s = c.state;
- bool *done = ctx.done;
+ TnfaState* s = c.state;
+ bool* done = ctx.done;
- if (s->type == nfa_state_t::FIN) {
+ if (s->kind == TnfaState::Kind::FIN) {
ctx.marker = ctx.cursor;
ctx.rule = 0;
o = ctx.offsets3;
- }
- else {
+ } else {
o = ctx.offsets1 + id * ntags;
}
@@ -423,22 +364,20 @@ void update_offsets(simctx_t<history_t> &ctx, const conf_t &c, uint32_t id)
memset(done, 0, ntags * sizeof(bool));
for (int32_t i = c.thist; i != HROOT; ) {
- const typename history_t::node_t &n = ctx.history.node(i);
+ const typename history_t::node_t& n = ctx.history.node(i);
i = n.pred;
const size_t t = n.info.idx;
// If already updated, skip.
if (done[t]) continue;
- // Update positive tag.
if (!n.info.neg) {
+ // Update positive tag.
done[t] = true;
o[t] = static_cast<regoff_t>(ctx.step);
- }
-
- // Update negative tag together with its sibling and nested tags (if any).
- else {
- const Tag &tag = tags[t];
+ } else {
+ // Update negative tag together with its sibling and nested tags (if any).
+ const Tag& tag = tags[t];
for (size_t l = tag.lnest; l < tag.hnest; ++l) {
if (!done[l]) {
done[l] = true;
@@ -449,38 +388,21 @@ void update_offsets(simctx_t<history_t> &ctx, const conf_t &c, uint32_t id)
}
}
-bool ran_or_fin_t::operator()(const conf_t &c)
-{
- return c.state->type == nfa_state_t::RAN
- || c.state->type == nfa_state_t::FIN;
+bool ran_or_fin_t::operator()(const conf_t& c) {
+ return c.state->kind == TnfaState::Kind::RAN
+ || c.state->kind == TnfaState::Kind::FIN;
}
-void zhistory_t::init()
-{
+void zhistory_t::init() {
nodes.clear();
nodes.push_back(node_t(NOINFO, -1, 0, 0));
cache.clear();
}
-void khistory_t::init()
-{
- nodes.clear();
- nodes.push_back(node_t(NOINFO, -1));
-}
-
-template<typename ctx_t>
-hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf)
-{
- const int32_t i = static_cast<int32_t>(nodes.size());
- nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step));
- return i;
-}
-
template<typename ctx_t>
-hidx_t khistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf)
-{
+hidx_t zhistory_t::link(ctx_t& ctx, const typename ctx_t::conf_t& conf) {
const int32_t i = static_cast<int32_t>(nodes.size());
- nodes.push_back(node_t(conf.state->tag.info, conf.thist));
+ nodes.push_back(node_t(conf.state->tag, conf.thist, conf.origin, ctx.step));
return i;
}