summaryrefslogtreecommitdiff
path: root/src/parse/parser.ypp
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse/parser.ypp')
-rw-r--r--src/parse/parser.ypp777
1 files changed, 777 insertions, 0 deletions
diff --git a/src/parse/parser.ypp b/src/parse/parser.ypp
new file mode 100644
index 00000000..1c476953
--- /dev/null
+++ b/src/parse/parser.ypp
@@ -0,0 +1,777 @@
+%{
+
+#include <assert.h>
+#include <time.h>
+#include <string.h>
+#include <stdlib.h>
+#include <iostream>
+#include <set>
+
+#include "config.h"
+#include "src/codegen/skeleton/skeleton.h"
+#include "src/ir/bytecode/bytecode.h"
+#include "src/ir/regexp/encoding/enc.h"
+#include "src/ir/regexp/encoding/range_suffix.h"
+#include "src/ir/regexp/regexp_cat.h"
+#include "src/ir/regexp/regexp_close.h"
+#include "src/ir/regexp/regexp_null.h"
+#include "src/globals.h"
+#include "src/parse/code.h"
+#include "src/parse/extop.h"
+#include "src/parse/parser.h"
+#include "src/util/c99_stdint.h"
+#include "src/util/smart_ptr.h"
+
+#define YYMALLOC malloc
+#define YYFREE free
+
+using namespace re2c;
+
+extern "C"
+{
+int yylex();
+void yyerror(const char*);
+}
+
+static counter_t<rule_rank_t> rank_counter;
+static std::vector<std::string> condnames;
+static re2c::SpecMap specMap;
+static Spec spec;
+static RuleOp *specNone = NULL;
+static RuleOpList specStar;
+static RuleOp * star_default = NULL;
+static Scanner *in = NULL;
+static Scanner::ParseMode parseMode;
+static SetupMap ruleSetupMap;
+static bool foundRules;
+static symbol_table_t symbol_table;
+
+/* Bison version 1.875 emits a definition that is not working
+ * with several g++ version. Hence we disable it here.
+ */
+#if defined(__GNUC__)
+#define __attribute__(x)
+#endif
+
+void context_check(CondList *clist)
+{
+ if (!opts->cFlag)
+ {
+ delete clist;
+ in->fatal("conditions are only allowed when using -c switch");
+ }
+}
+
+void context_none(CondList *clist)
+{
+ delete clist;
+ context_check(NULL);
+ in->fatal("no expression specified");
+}
+
+void context_rule
+ ( CondList * clist
+ , const Loc & loc
+ , RegExp * expr
+ , RegExp * look
+ , const Code * code
+ , const std::string * newcond
+ )
+{
+ context_check(clist);
+ const RegExp::InsAccess ins_access = clist->size() > 1
+ ? RegExp::PRIVATE
+ : RegExp::SHARED;
+ for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it)
+ {
+ if (specMap.find(*it) == specMap.end())
+ {
+ condnames.push_back (*it);
+ }
+
+ RuleOp * rule = new RuleOp
+ ( loc
+ , expr
+ , look
+ , rank_counter.next ()
+ , ins_access
+ , code
+ , newcond
+ );
+ specMap[*it].add (rule);
+ }
+ delete clist;
+ delete newcond;
+}
+
+void setup_rule(CondList *clist, const Code * code)
+{
+ assert(clist);
+ assert(code);
+ context_check(clist);
+ for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it)
+ {
+ if (ruleSetupMap.find(*it) != ruleSetupMap.end())
+ {
+ in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str());
+ }
+ ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text);
+ }
+ delete clist;
+}
+
+void default_rule(CondList *clist, const Code * code)
+{
+ assert(clist);
+ assert(code);
+ context_check(clist);
+ for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it)
+ {
+ RuleOp * def = new RuleOp
+ ( code->loc
+ , in->mkDefault ()
+ , new NullOp
+ , rule_rank_t::def ()
+ , RegExp::SHARED
+ , code
+ , NULL
+ );
+ if (!specMap[*it].add_def (def))
+ {
+ in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str());
+ }
+ }
+ delete clist;
+}
+
+%}
+
+%start spec
+
+%union {
+ re2c::RegExp * regexp;
+ const re2c::Code * code;
+ char op;
+ re2c::ExtOp extop;
+ std::string * str;
+ re2c::CondList * clist;
+};
+
+%token CLOSE
+%token CLOSESIZE
+%token CODE
+%token CONF
+%token ID
+%token FID
+%token FID_END
+%token NOCOND
+%token REGEXP
+%token SETUP
+%token STAR
+
+%type <op> CLOSE STAR SETUP close
+%type <extop> CLOSESIZE
+%type <code> CODE
+%type <regexp> REGEXP rule look expr diff term factor primary
+%type <str> ID FID newcond
+%type <clist> cond clist
+
+%%
+
+spec:
+ /* empty */
+ {
+ }
+ | spec rule
+ {
+ foundRules = true;
+ }
+ | spec decl
+;
+
+decl:
+ ID '=' expr ';'
+ {
+ if (!symbol_table.insert (std::make_pair (* $1, $3)).second)
+ {
+ in->fatal("sym already defined");
+ }
+ delete $1;
+ $3->ins_access = RegExp::PRIVATE;
+ }
+ | FID expr FID_END
+ {
+ if (!symbol_table.insert (std::make_pair (* $1, $2)).second)
+ {
+ in->fatal("sym already defined");
+ }
+ delete $1;
+ $2->ins_access = RegExp::PRIVATE;
+ }
+ | ID '=' expr '/'
+ {
+ in->fatal("trailing contexts are not allowed in named definitions");
+ }
+ | FID expr '/'
+ {
+ in->fatal("trailing contexts are not allowed in named definitions");
+ }
+ | CONF {}
+;
+
+rule:
+ expr look CODE
+ {
+ if (opts->cFlag)
+ {
+ in->fatal("condition or '<*>' required when using -c switch");
+ }
+ RuleOp * rule = new RuleOp
+ ( $3->loc
+ , $1
+ , $2
+ , rank_counter.next ()
+ , RegExp::SHARED
+ , $3
+ , NULL
+ );
+ spec.add (rule);
+ }
+ | STAR CODE /* default rule */
+ {
+ if (opts->cFlag)
+ in->fatal("condition or '<*>' required when using -c switch");
+ RuleOp * def = new RuleOp
+ ( $2->loc
+ , in->mkDefault ()
+ , new NullOp
+ , rule_rank_t::def ()
+ , RegExp::SHARED
+ , $2
+ , NULL
+ );
+ if (!spec.add_def (def))
+ {
+ in->fatal("code to default rule is already defined");
+ }
+ }
+ | '<' cond '>' expr look newcond CODE
+ {
+ context_rule ($2, $7->loc, $4, $5, $7, $6);
+ }
+ | '<' cond '>' expr look ':' newcond
+ {
+ assert($7);
+ Loc loc (in->get_fname (), in->get_cline ());
+ context_rule ($2, loc, $4, $5, NULL, $7);
+ }
+ | '<' cond '>' look newcond CODE
+ {
+ context_none($2);
+ delete $5;
+ }
+ | '<' cond '>' look ':' newcond
+ {
+ assert($6);
+ context_none($2);
+ delete $6;
+ }
+ | '<' cond '>' STAR CODE /* default rule for conditions */
+ {
+ default_rule($2, $5);
+ }
+ | '<' STAR '>' expr look newcond CODE
+ {
+ context_check(NULL);
+ RuleOp * rule = new RuleOp
+ ( $7->loc
+ , $4
+ , $5
+ , rank_counter.next ()
+ , RegExp::PRIVATE
+ , $7
+ , $6
+ );
+ specStar.push_back (rule);
+ delete $6;
+ }
+ | '<' STAR '>' expr look ':' newcond
+ {
+ assert($7);
+ context_check(NULL);
+ Loc loc (in->get_fname (), in->get_cline ());
+ RuleOp * rule = new RuleOp
+ ( loc
+ , $4
+ , $5
+ , rank_counter.next ()
+ , RegExp::PRIVATE
+ , NULL
+ , $7
+ );
+ specStar.push_back (rule);
+ delete $7;
+ }
+ | '<' STAR '>' look newcond CODE
+ {
+ context_none(NULL);
+ delete $5;
+ }
+ | '<' STAR '>' look ':' newcond
+ {
+ assert($6);
+ context_none(NULL);
+ delete $6;
+ }
+ | '<' STAR '>' STAR CODE /* default rule for all conditions */
+ {
+ if (star_default)
+ {
+ in->fatal ("code to default rule '*' is already defined");
+ }
+ star_default = new RuleOp
+ ( $5->loc
+ , in->mkDefault ()
+ , new NullOp
+ , rule_rank_t::def ()
+ , RegExp::PRIVATE
+ , $5
+ , NULL
+ );
+ }
+ | NOCOND newcond CODE
+ {
+ context_check(NULL);
+ if (specNone)
+ {
+ in->fatal("code to handle illegal condition already defined");
+ }
+ $$ = specNone = new RuleOp
+ ( $3->loc
+ , new NullOp
+ , new NullOp
+ , rank_counter.next ()
+ , RegExp::SHARED
+ , $3
+ , $2
+ );
+ delete $2;
+ }
+ | NOCOND ':' newcond
+ {
+ assert($3);
+ context_check(NULL);
+ if (specNone)
+ {
+ in->fatal("code to handle illegal condition already defined");
+ }
+ Loc loc (in->get_fname (), in->get_cline ());
+ $$ = specNone = new RuleOp
+ ( loc
+ , new NullOp
+ , new NullOp
+ , rank_counter.next ()
+ , RegExp::SHARED
+ , NULL
+ , $3
+ );
+ delete $3;
+ }
+ | SETUP STAR '>' CODE
+ {
+ CondList *clist = new CondList();
+ clist->insert("*");
+ setup_rule(clist, $4);
+ }
+ | SETUP cond '>' CODE
+ {
+ setup_rule($2, $4);
+ }
+;
+
+cond:
+ /* empty */
+ {
+ in->fatal("unnamed condition not supported");
+ }
+ | clist
+ {
+ $$ = $1;
+ }
+ ;
+
+clist:
+ ID
+ {
+ $$ = new CondList();
+ $$->insert(* $1);
+ delete $1;
+ }
+ | clist ',' ID
+ {
+ $1->insert(* $3);
+ delete $3;
+ $$ = $1;
+ }
+;
+
+newcond:
+ /* empty */
+ {
+ $$ = NULL;
+ }
+ | '=' '>' ID
+ {
+ $$ = $3;
+ }
+;
+
+look:
+ /* empty */
+ {
+ $$ = new NullOp;
+ }
+ | '/' expr
+ {
+ $$ = $2;
+ }
+;
+
+expr:
+ diff
+ {
+ $$ = $1;
+ }
+ | expr '|' diff
+ {
+ $$ = mkAlt($1, $3);
+ }
+;
+
+diff:
+ term
+ {
+ $$ = $1;
+ }
+ | diff '\\' term
+ {
+ $$ = in->mkDiff($1, $3);
+ }
+;
+
+term:
+ factor
+ {
+ $$ = $1;
+ }
+ | term factor
+ {
+ $$ = new CatOp($1, $2);
+ }
+;
+
+factor:
+ primary
+ {
+ $$ = $1;
+ }
+ | primary close
+ {
+ switch($2)
+ {
+ case '*':
+ $$ = mkAlt(new CloseOp($1), new NullOp());
+ break;
+ case '+':
+ $$ = new CloseOp($1);
+ break;
+ case '?':
+ $$ = mkAlt($1, new NullOp());
+ break;
+ }
+ }
+ | primary CLOSESIZE
+ {
+ $1->ins_access = RegExp::PRIVATE;
+ if ($2.max == UINT32_MAX)
+ {
+ $$ = repeat_from ($1, $2.min);
+ }
+ else if ($2.min == $2.max)
+ {
+ $$ = repeat ($1, $2.min);
+ }
+ else
+ {
+ $$ = repeat_from_to ($1, $2.min, $2.max);
+ }
+ $$ = $$ ? $$ : new NullOp;
+ }
+;
+
+close:
+ CLOSE
+ {
+ $$ = $1;
+ }
+ | STAR
+ {
+ $$ = $1;
+ }
+ | close CLOSE
+ {
+ $$ = ($1 == $2) ? $1 : '*';
+ }
+ | close STAR
+ {
+ $$ = ($1 == $2) ? $1 : '*';
+ }
+;
+
+primary:
+ ID
+ {
+ symbol_table_t::iterator i = symbol_table.find (* $1);
+ delete $1;
+ if (i == symbol_table.end ())
+ {
+ in->fatal("can't find symbol");
+ }
+ $$ = i->second;
+ }
+ | REGEXP
+ {
+ $$ = $1;
+ }
+ | '(' expr ')'
+ {
+ $$ = $2;
+ }
+;
+
+%%
+
+extern "C" {
+void yyerror(const char* s)
+{
+ in->fatal(s);
+}
+
+int yylex(){
+ return in ? in->scan() : 0;
+}
+} // end extern "C"
+
+namespace re2c
+{
+
+void parse(Scanner& i, Output & o)
+{
+ std::map<std::string, smart_ptr<DFA> > dfa_map;
+ ScannerState rules_state;
+
+ in = &i;
+
+ o.source.write_version_time ();
+ o.source.write_line_info (in->get_cline (), in->get_fname ().c_str ());
+ if (opts->target == opt_t::SKELETON)
+ {
+ Skeleton::emit_prolog (o.source);
+ }
+
+ Enc encodingOld = opts->encoding;
+
+ while ((parseMode = i.echo()) != Scanner::Stop)
+ {
+ o.source.new_block ();
+ bool bPrologBrace = false;
+ ScannerState curr_state;
+
+ i.save_state(curr_state);
+ foundRules = false;
+
+ if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size())
+ {
+ in->fatal("cannot have a second 'rules:re2c' block");
+ }
+ if (parseMode == Scanner::Reuse)
+ {
+ if (dfa_map.empty())
+ {
+ in->fatal("got 'use:re2c' without 'rules:re2c'");
+ }
+ }
+ else if (parseMode == Scanner::Rules)
+ {
+ i.save_state(rules_state);
+ }
+ else
+ {
+ dfa_map.clear();
+ }
+ rank_counter.reset ();
+ spec.clear ();
+ in->set_in_parse(true);
+ yyparse();
+ in->set_in_parse(false);
+ if (opts->rFlag && parseMode == Scanner::Reuse)
+ {
+ if (foundRules || opts->encoding != encodingOld)
+ {
+ // Re-parse rules
+ parseMode = Scanner::Parse;
+ i.restore_state(rules_state);
+ i.reuse();
+ dfa_map.clear();
+ parse_cleanup();
+ spec.clear ();
+ rank_counter.reset ();
+ in->set_in_parse(true);
+ yyparse();
+ in->set_in_parse(false);
+
+ // Now append potential new rules
+ i.restore_state(curr_state);
+ parseMode = Scanner::Parse;
+ in->set_in_parse(true);
+ yyparse();
+ in->set_in_parse(false);
+ }
+ encodingOld = opts->encoding;
+ }
+ o.source.set_block_line (in->get_cline ());
+ uint32_t ind = opts->topIndent;
+ if (opts->cFlag)
+ {
+ SpecMap::iterator it;
+ SetupMap::const_iterator itRuleSetup;
+
+ if (parseMode != Scanner::Reuse)
+ {
+ // <*> rules must have the lowest priority
+ // now that all rules have been parsed, we can fix it
+ for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
+ {
+ (*itOp)->rank = rank_counter.next ();
+ }
+ // merge <*> rules to all conditions
+ // note that all conditions use the same regexp for <*> rules,
+ // but compile it separately because of RegExp::PRIVATE attribute
+ for (it = specMap.begin(); it != specMap.end(); ++it)
+ {
+ for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp)
+ {
+ it->second.addl (*itOp);
+ }
+ if (star_default)
+ {
+ it->second.addl (star_default);
+ }
+ }
+
+ if (specNone)
+ {
+ specMap["0"].add (specNone);
+ // Note that "0" inserts first, which is important.
+ condnames.insert (condnames.begin (), "0");
+ }
+ o.types = condnames;
+ }
+
+ size_t nCount = specMap.size();
+
+ for (it = specMap.begin(); it != specMap.end(); ++it)
+ {
+ if (parseMode != Scanner::Reuse)
+ {
+ itRuleSetup = ruleSetupMap.find(it->first);
+ if (itRuleSetup != ruleSetupMap.end())
+ {
+ yySetupRule = itRuleSetup->second.second;
+ }
+ else
+ {
+ itRuleSetup = ruleSetupMap.find("*");
+ if (itRuleSetup != ruleSetupMap.end())
+ {
+ yySetupRule = itRuleSetup->second.second;
+ }
+ else
+ {
+ yySetupRule = "";
+ }
+ }
+
+ dfa_map[it->first] = genCode(it->second, o, it->first, opts->encoding.nCodeUnits ());
+ }
+ if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end())
+ {
+ dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace);
+ }
+ }
+ }
+ else
+ {
+ if (spec.re || !dfa_map.empty())
+ {
+ if (parseMode != Scanner::Reuse)
+ {
+ dfa_map[""] = genCode(spec, o, "", opts->encoding.nCodeUnits ());
+ }
+ if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end())
+ {
+ dfa_map[""]->emit(o, ind, 0, bPrologBrace);
+ }
+ }
+ }
+ o.source.write_line_info (in->get_cline (), in->get_fname ().c_str ());
+ /* restore original char handling mode*/
+ opts.reset_encoding (encodingOld);
+ }
+
+ if (opts->cFlag)
+ {
+ SetupMap::const_iterator itRuleSetup;
+ for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup)
+ {
+ if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end())
+ {
+ in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str());
+ }
+ }
+ if (specMap.size() < ruleSetupMap.size())
+ {
+ uint32_t line = in->get_cline();
+ itRuleSetup = ruleSetupMap.find("*");
+ if (itRuleSetup != ruleSetupMap.end())
+ {
+ line = itRuleSetup->second.first;
+ }
+ in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly");
+ }
+ }
+
+ if (opts->target == opt_t::SKELETON)
+ {
+ Skeleton::emit_epilog (o.source, o.skeletons);
+ }
+
+ parse_cleanup();
+ in = NULL;
+}
+
+void parse_cleanup()
+{
+ RegExp::vFreeList.clear();
+ Range::vFreeList.clear();
+ RangeSuffix::freeList.clear();
+ Code::freelist.clear();
+ symbol_table.clear ();
+ condnames.clear ();
+ specMap.clear();
+ specStar.clear();
+ star_default = NULL;
+ specNone = NULL;
+}
+
+} // end namespace re2c