diff options
Diffstat (limited to 'src/parse/parser.ypp')
-rw-r--r-- | src/parse/parser.ypp | 777 |
1 files changed, 777 insertions, 0 deletions
diff --git a/src/parse/parser.ypp b/src/parse/parser.ypp new file mode 100644 index 00000000..1c476953 --- /dev/null +++ b/src/parse/parser.ypp @@ -0,0 +1,777 @@ +%{ + +#include <assert.h> +#include <time.h> +#include <string.h> +#include <stdlib.h> +#include <iostream> +#include <set> + +#include "config.h" +#include "src/codegen/skeleton/skeleton.h" +#include "src/ir/bytecode/bytecode.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/globals.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/parser.h" +#include "src/util/c99_stdint.h" +#include "src/util/smart_ptr.h" + +#define YYMALLOC malloc +#define YYFREE free + +using namespace re2c; + +extern "C" +{ +int yylex(); +void yyerror(const char*); +} + +static counter_t<rule_rank_t> rank_counter; +static std::vector<std::string> condnames; +static re2c::SpecMap specMap; +static Spec spec; +static RuleOp *specNone = NULL; +static RuleOpList specStar; +static RuleOp * star_default = NULL; +static Scanner *in = NULL; +static Scanner::ParseMode parseMode; +static SetupMap ruleSetupMap; +static bool foundRules; +static symbol_table_t symbol_table; + +/* Bison version 1.875 emits a definition that is not working + * with several g++ version. Hence we disable it here. + */ +#if defined(__GNUC__) +#define __attribute__(x) +#endif + +void context_check(CondList *clist) +{ + if (!opts->cFlag) + { + delete clist; + in->fatal("conditions are only allowed when using -c switch"); + } +} + +void context_none(CondList *clist) +{ + delete clist; + context_check(NULL); + in->fatal("no expression specified"); +} + +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) +{ + context_check(clist); + const RegExp::InsAccess ins_access = clist->size() > 1 + ? RegExp::PRIVATE + : RegExp::SHARED; + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (specMap.find(*it) == specMap.end()) + { + condnames.push_back (*it); + } + + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , ins_access + , code + , newcond + ); + specMap[*it].add (rule); + } + delete clist; + delete newcond; +} + +void setup_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (ruleSetupMap.find(*it) != ruleSetupMap.end()) + { + in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str()); + } + ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text); + } + delete clist; +} + +void default_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + RuleOp * def = new RuleOp + ( code->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , RegExp::SHARED + , code + , NULL + ); + if (!specMap[*it].add_def (def)) + { + in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str()); + } + } + delete clist; +} + +%} + +%start spec + +%union { + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; +}; + +%token CLOSE +%token CLOSESIZE +%token CODE +%token CONF +%token ID +%token FID +%token FID_END +%token NOCOND +%token REGEXP +%token SETUP +%token STAR + +%type <op> CLOSE STAR SETUP close +%type <extop> CLOSESIZE +%type <code> CODE +%type <regexp> REGEXP rule look expr diff term factor primary +%type <str> ID FID newcond +%type <clist> cond clist + +%% + +spec: + /* empty */ + { + } + | spec rule + { + foundRules = true; + } + | spec decl +; + +decl: + ID '=' expr ';' + { + if (!symbol_table.insert (std::make_pair (* $1, $3)).second) + { + in->fatal("sym already defined"); + } + delete $1; + $3->ins_access = RegExp::PRIVATE; + } + | FID expr FID_END + { + if (!symbol_table.insert (std::make_pair (* $1, $2)).second) + { + in->fatal("sym already defined"); + } + delete $1; + $2->ins_access = RegExp::PRIVATE; + } + | ID '=' expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | FID expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | CONF {} +; + +rule: + expr look CODE + { + if (opts->cFlag) + { + in->fatal("condition or '<*>' required when using -c switch"); + } + RuleOp * rule = new RuleOp + ( $3->loc + , $1 + , $2 + , rank_counter.next () + , RegExp::SHARED + , $3 + , NULL + ); + spec.add (rule); + } + | STAR CODE /* default rule */ + { + if (opts->cFlag) + in->fatal("condition or '<*>' required when using -c switch"); + RuleOp * def = new RuleOp + ( $2->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , RegExp::SHARED + , $2 + , NULL + ); + if (!spec.add_def (def)) + { + in->fatal("code to default rule is already defined"); + } + } + | '<' cond '>' expr look newcond CODE + { + context_rule ($2, $7->loc, $4, $5, $7, $6); + } + | '<' cond '>' expr look ':' newcond + { + assert($7); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ($2, loc, $4, $5, NULL, $7); + } + | '<' cond '>' look newcond CODE + { + context_none($2); + delete $5; + } + | '<' cond '>' look ':' newcond + { + assert($6); + context_none($2); + delete $6; + } + | '<' cond '>' STAR CODE /* default rule for conditions */ + { + default_rule($2, $5); + } + | '<' STAR '>' expr look newcond CODE + { + context_check(NULL); + RuleOp * rule = new RuleOp + ( $7->loc + , $4 + , $5 + , rank_counter.next () + , RegExp::PRIVATE + , $7 + , $6 + ); + specStar.push_back (rule); + delete $6; + } + | '<' STAR '>' expr look ':' newcond + { + assert($7); + context_check(NULL); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , $4 + , $5 + , rank_counter.next () + , RegExp::PRIVATE + , NULL + , $7 + ); + specStar.push_back (rule); + delete $7; + } + | '<' STAR '>' look newcond CODE + { + context_none(NULL); + delete $5; + } + | '<' STAR '>' look ':' newcond + { + assert($6); + context_none(NULL); + delete $6; + } + | '<' STAR '>' STAR CODE /* default rule for all conditions */ + { + if (star_default) + { + in->fatal ("code to default rule '*' is already defined"); + } + star_default = new RuleOp + ( $5->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , RegExp::PRIVATE + , $5 + , NULL + ); + } + | NOCOND newcond CODE + { + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + $$ = specNone = new RuleOp + ( $3->loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , $3 + , $2 + ); + delete $2; + } + | NOCOND ':' newcond + { + assert($3); + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + Loc loc (in->get_fname (), in->get_cline ()); + $$ = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , NULL + , $3 + ); + delete $3; + } + | SETUP STAR '>' CODE + { + CondList *clist = new CondList(); + clist->insert("*"); + setup_rule(clist, $4); + } + | SETUP cond '>' CODE + { + setup_rule($2, $4); + } +; + +cond: + /* empty */ + { + in->fatal("unnamed condition not supported"); + } + | clist + { + $$ = $1; + } + ; + +clist: + ID + { + $$ = new CondList(); + $$->insert(* $1); + delete $1; + } + | clist ',' ID + { + $1->insert(* $3); + delete $3; + $$ = $1; + } +; + +newcond: + /* empty */ + { + $$ = NULL; + } + | '=' '>' ID + { + $$ = $3; + } +; + +look: + /* empty */ + { + $$ = new NullOp; + } + | '/' expr + { + $$ = $2; + } +; + +expr: + diff + { + $$ = $1; + } + | expr '|' diff + { + $$ = mkAlt($1, $3); + } +; + +diff: + term + { + $$ = $1; + } + | diff '\\' term + { + $$ = in->mkDiff($1, $3); + } +; + +term: + factor + { + $$ = $1; + } + | term factor + { + $$ = new CatOp($1, $2); + } +; + +factor: + primary + { + $$ = $1; + } + | primary close + { + switch($2) + { + case '*': + $$ = mkAlt(new CloseOp($1), new NullOp()); + break; + case '+': + $$ = new CloseOp($1); + break; + case '?': + $$ = mkAlt($1, new NullOp()); + break; + } + } + | primary CLOSESIZE + { + $1->ins_access = RegExp::PRIVATE; + if ($2.max == UINT32_MAX) + { + $$ = repeat_from ($1, $2.min); + } + else if ($2.min == $2.max) + { + $$ = repeat ($1, $2.min); + } + else + { + $$ = repeat_from_to ($1, $2.min, $2.max); + } + $$ = $$ ? $$ : new NullOp; + } +; + +close: + CLOSE + { + $$ = $1; + } + | STAR + { + $$ = $1; + } + | close CLOSE + { + $$ = ($1 == $2) ? $1 : '*'; + } + | close STAR + { + $$ = ($1 == $2) ? $1 : '*'; + } +; + +primary: + ID + { + symbol_table_t::iterator i = symbol_table.find (* $1); + delete $1; + if (i == symbol_table.end ()) + { + in->fatal("can't find symbol"); + } + $$ = i->second; + } + | REGEXP + { + $$ = $1; + } + | '(' expr ')' + { + $$ = $2; + } +; + +%% + +extern "C" { +void yyerror(const char* s) +{ + in->fatal(s); +} + +int yylex(){ + return in ? in->scan() : 0; +} +} // end extern "C" + +namespace re2c +{ + +void parse(Scanner& i, Output & o) +{ + std::map<std::string, smart_ptr<DFA> > dfa_map; + ScannerState rules_state; + + in = &i; + + o.source.write_version_time (); + o.source.write_line_info (in->get_cline (), in->get_fname ().c_str ()); + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_prolog (o.source); + } + + Enc encodingOld = opts->encoding; + + while ((parseMode = i.echo()) != Scanner::Stop) + { + o.source.new_block (); + bool bPrologBrace = false; + ScannerState curr_state; + + i.save_state(curr_state); + foundRules = false; + + if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size()) + { + in->fatal("cannot have a second 'rules:re2c' block"); + } + if (parseMode == Scanner::Reuse) + { + if (dfa_map.empty()) + { + in->fatal("got 'use:re2c' without 'rules:re2c'"); + } + } + else if (parseMode == Scanner::Rules) + { + i.save_state(rules_state); + } + else + { + dfa_map.clear(); + } + rank_counter.reset (); + spec.clear (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + if (opts->rFlag && parseMode == Scanner::Reuse) + { + if (foundRules || opts->encoding != encodingOld) + { + // Re-parse rules + parseMode = Scanner::Parse; + i.restore_state(rules_state); + i.reuse(); + dfa_map.clear(); + parse_cleanup(); + spec.clear (); + rank_counter.reset (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + + // Now append potential new rules + i.restore_state(curr_state); + parseMode = Scanner::Parse; + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + } + encodingOld = opts->encoding; + } + o.source.set_block_line (in->get_cline ()); + uint32_t ind = opts->topIndent; + if (opts->cFlag) + { + SpecMap::iterator it; + SetupMap::const_iterator itRuleSetup; + + if (parseMode != Scanner::Reuse) + { + // <*> rules must have the lowest priority + // now that all rules have been parsed, we can fix it + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + (*itOp)->rank = rank_counter.next (); + } + // merge <*> rules to all conditions + // note that all conditions use the same regexp for <*> rules, + // but compile it separately because of RegExp::PRIVATE attribute + for (it = specMap.begin(); it != specMap.end(); ++it) + { + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + it->second.addl (*itOp); + } + if (star_default) + { + it->second.addl (star_default); + } + } + + if (specNone) + { + specMap["0"].add (specNone); + // Note that "0" inserts first, which is important. + condnames.insert (condnames.begin (), "0"); + } + o.types = condnames; + } + + size_t nCount = specMap.size(); + + for (it = specMap.begin(); it != specMap.end(); ++it) + { + if (parseMode != Scanner::Reuse) + { + itRuleSetup = ruleSetupMap.find(it->first); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + yySetupRule = ""; + } + } + + dfa_map[it->first] = genCode(it->second, o, it->first, opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) + { + dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace); + } + } + } + else + { + if (spec.re || !dfa_map.empty()) + { + if (parseMode != Scanner::Reuse) + { + dfa_map[""] = genCode(spec, o, "", opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) + { + dfa_map[""]->emit(o, ind, 0, bPrologBrace); + } + } + } + o.source.write_line_info (in->get_cline (), in->get_fname ().c_str ()); + /* restore original char handling mode*/ + opts.reset_encoding (encodingOld); + } + + if (opts->cFlag) + { + SetupMap::const_iterator itRuleSetup; + for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup) + { + if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end()) + { + in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str()); + } + } + if (specMap.size() < ruleSetupMap.size()) + { + uint32_t line = in->get_cline(); + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + line = itRuleSetup->second.first; + } + in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly"); + } + } + + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_epilog (o.source, o.skeletons); + } + + parse_cleanup(); + in = NULL; +} + +void parse_cleanup() +{ + RegExp::vFreeList.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); + Code::freelist.clear(); + symbol_table.clear (); + condnames.clear (); + specMap.clear(); + specStar.clear(); + star_default = NULL; + specNone = NULL; +} + +} // end namespace re2c |