diff options
Diffstat (limited to 'src/cfg/normalize.cc')
-rw-r--r-- | src/cfg/normalize.cc | 108 |
1 files changed, 72 insertions, 36 deletions
diff --git a/src/cfg/normalize.cc b/src/cfg/normalize.cc index 94879cfc..7ce6c3a1 100644 --- a/src/cfg/normalize.cc +++ b/src/cfg/normalize.cc @@ -6,6 +6,7 @@ #include "src/cfg/cfg.h" #include "src/dfa/dfa.h" #include "src/dfa/tcmd.h" +#include "src/options/opt.h" #include "src/regexp/tag.h" @@ -15,44 +16,63 @@ template<typename cmd_t> void normalize(cmd_t *cmd); /* note [tag normalization] * - * After optimizations different commands may become equal - * up to reordering and removing duplicates. Such commands - * should be recognized as equal by further optimizations - * like minimization, hoisting, tunnelling, etc. - * For that reason all commands are normalized. + * After optimizations different commands may become equal up to reordering + * and removing duplicates. Such commands should be deduplicated (duplicate + * "set" and "copy" commands are redundant and may prohibit further + * optimizations like minimization and tunneling, and duplicate "add" commands + * are incorrect altogether. + * + * Consequently, tag commands are "normalized": the sequence of commands is + * split into continuous subsequnces of "set", "copy" and "add" commands, and + * within each subsequence duplicate commands are removed. Additionally, "set" + * subsequences are sorted lexicographically (the relative order of "set" + * commands doesn't matter), and "copy" subsequences are first sorted + * lexicographically and then topologically (except for the case of staDFA, + * which allows loops in "copy" commands that cannot be topsorted and are + * resolved by using local temporary variables). */ -static tcmd_t **normalize(tcmd_t **ps, tcmd_t *e); +static void sort(tcmd_t *head, tcmd_t *end); +static void uniq(tcmd_t *head, tcmd_t *end); +static void dedup(tcmd_t *head, tcmd_t *end); -void cfg_t::normalization(cfg_t &cfg) +void cfg_t::normalization(cfg_t &cfg, const opt_t *opts) { const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1; uint32_t *indeg = new uint32_t[nver]; - memset(indeg, 0, nver * sizeof(uint32_t)); cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall; for (; b < e; ++b) { + for (tcmd_t **px = &b->cmd, *x; (x = *px);) { - // We cannot normalize the list of commands as a whole: the - // relative order of some commands might be significant. - // Therefore we split the list in continuous sublists of - // 'copy', 'save without history' and 'save with history' - // commands and normalize each sublist in a proper way. - tcmd_t **px, *x; - for (px = &b->cmd; (x = *px);) { if (tcmd_t::iscopy(x)) { - for (x = *px; x && tcmd_t::iscopy(x); x = x->next); - *normalize(px, x) = NULL; // topsort expects NULL terminator - tcmd_t::topsort(px, indeg); - for (; *px; px = &(*px)->next); // find tail - *px = x; // restore tail - } else if (tcmd_t::isset(x)) { - for (x = *px; x && tcmd_t::isset(x); x = x->next); - px = normalize(px, x); - } else { - for (; (x = *px) && tcmd_t::isadd(x); px = &x->next); - // don't normalize, histories may have complex dependencies + for (; x && tcmd_t::iscopy(x); x = x->next); + if (!opts->stadfa) { + // sort, remove adjacent duplicates, topsort + sort(*px, x); + uniq(*px, x); + tcmd_t::topsort(px, x, indeg); + } + else { + // remove duplicates + dedup(*px, x); + } + } + else if (tcmd_t::isset(x)) { + // sort, remove adjacent duplicates + for (; x && tcmd_t::isset(x); x = x->next); + sort(*px, x); + uniq(*px, x); } + else { + // remove duplicates + for (; x && tcmd_t::isadd(x); x = x->next); + dedup(*px, x); + } + + // Find tail of the current copy/set/add subsequence (need to + // reiterate it as the former tail might have been removed). + for (; *px != x; px = &(*px)->next); } } @@ -87,29 +107,45 @@ static bool less(const tcmd_t &x, const tcmd_t &y) return false; } -tcmd_t **normalize(tcmd_t **ps, tcmd_t *e) +void sort(tcmd_t *head, tcmd_t *end) { - // sort lexicographically - for (tcmd_t *p = *ps; p != e; p = p->next) { - for (tcmd_t *q = p->next; q != e; q = q->next) { + // sort: O(n^2) + for (tcmd_t *p = head; p != end; p = p->next) { + for (tcmd_t *q = p->next; q != end; q = q->next) { if (less(*q, *p)) { swap(*p, *q); } } } +} - // delete duplicates - for (tcmd_t *p = *ps; p != e;) { +void uniq(tcmd_t *head, tcmd_t *end) +{ + // remove adjacent duplicates: O(n) + for (tcmd_t *p = head; p != end;) { tcmd_t *q = p->next; - if (q != e && tcmd_t::equal(*p, *q)) { + if (q != end && tcmd_t::equal(*p, *q)) { p->next = q->next; - } else { + } + else { p = q; } } +} - for (; *ps != e; ps = &(*ps)->next); - return ps; +void dedup(tcmd_t *head, tcmd_t *end) +{ + // remove duplicates: O(n^2) + for (tcmd_t *p = head; p != end; p = p->next) { + for (tcmd_t **qq = &p->next; *qq != end; ) { + if (tcmd_t::equal(*p, **qq)) { + *qq = (*qq)->next; + } + else { + qq = &(*qq)->next; + } + } + } } } // namespace re2c |